diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/libs/antlr3_cpp_runtime | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'contrib/libs/antlr3_cpp_runtime')
54 files changed, 17816 insertions, 0 deletions
diff --git a/contrib/libs/antlr3_cpp_runtime/.yandex_meta/devtools.copyrights.report b/contrib/libs/antlr3_cpp_runtime/.yandex_meta/devtools.copyrights.report new file mode 100644 index 0000000000..f83a49ca54 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/.yandex_meta/devtools.copyrights.report @@ -0,0 +1,77 @@ +# File format ($ symbol means the beginning of a line): +# +# $ # this message +# $ # ======================= +# $ # comments (all commentaries should starts with some number of spaces and # symbol) +# ${action} {license id} {license text hash} +# $BELONGS ./ya/make/file/relative/path/1/ya.make ./ya/make/2/ya.make +# ${all_file_action} filename +# $ # user commentaries (many lines) +# $ generated description - files with this license, license text... (some number of lines that starts with some number of spaces, do not modify) +# ${action} {license spdx} {license text hash} +# $BELONGS ./ya/make/file/relative/path/3/ya.make +# ${all_file_action} filename +# $ # user commentaries +# $ generated description +# $ ... +# +# You can modify action, all_file_action and add commentaries +# Available actions: +# keep - keep license in contrib and use in credits +# skip - skip license +# remove - remove all files with this license +# rename - save license text/links into licenses texts file, but not store SPDX into LINCENSE macro. You should store correct license id into devtools.license.spdx.txt file +# +# {all file action} records will be generated when license text contains filename that exists on filesystem (in contrib directory) +# We suppose that that files can contain some license info +# Available all file actions: +# FILE_IGNORE - ignore file (do nothing) +# FILE_INCLUDE - include all file data into licenses text file +# ======================= + +KEEP COPYRIGHT_SERVICE_LABEL 487f5db7281296f7145a1268d0f46773 +BELONGS ya.make + License text: + // Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + include/antlr3.hpp [5:5] + include/antlr3baserecognizer.hpp [11:11] + include/antlr3bitset.hpp [12:12] + include/antlr3collections.hpp [5:5] + include/antlr3commontoken.hpp [15:15] + include/antlr3commontree.hpp [9:9] + include/antlr3commontreeadaptor.hpp [9:9] + include/antlr3commontreenodestream.hpp [9:9] + include/antlr3cyclicdfa.hpp [10:10] + include/antlr3debugeventlistener.hpp [19:19] + include/antlr3defs.hpp [8:8] + include/antlr3errors.hpp [5:5] + include/antlr3exception.hpp [19:19] + include/antlr3filestream.hpp [5:5] + include/antlr3input.hpp [12:12] + include/antlr3interfaces.hpp [11:11] + include/antlr3intstream.hpp [35:35] + include/antlr3lexer.hpp [34:34] + include/antlr3memory.hpp [5:5] + include/antlr3parser.hpp [10:10] + include/antlr3recognizersharedstate.hpp [18:18] + include/antlr3rewriterulesubtreestream.hpp [5:5] + include/antlr3rewriteruletokenstream.hpp [5:5] + include/antlr3rewritestreams.hpp [5:5] + include/antlr3tokenstream.hpp [10:10] + include/antlr3treeparser.hpp [5:5] + +KEEP COPYRIGHT_SERVICE_LABEL 945f656078309035caf01d2420f95535 +BELONGS ya.make + License text: + * Copyright 2001-2004 Unicode, Inc. + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + include/antlr3convertutf.hpp [2:2] diff --git a/contrib/libs/antlr3_cpp_runtime/.yandex_meta/devtools.licenses.report b/contrib/libs/antlr3_cpp_runtime/.yandex_meta/devtools.licenses.report new file mode 100644 index 0000000000..a75720d825 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/.yandex_meta/devtools.licenses.report @@ -0,0 +1,138 @@ +# File format ($ symbol means the beginning of a line): +# +# $ # this message +# $ # ======================= +# $ # comments (all commentaries should starts with some number of spaces and # symbol) +# ${action} {license spdx} {license text hash} +# $BELONGS ./ya/make/file/relative/path/1/ya.make ./ya/make/2/ya.make +# ${all_file_action} filename +# $ # user commentaries (many lines) +# $ generated description - files with this license, license text... (some number of lines that starts with some number of spaces, do not modify) +# ${action} {license spdx} {license text hash} +# $BELONGS ./ya/make/file/relative/path/3/ya.make +# ${all_file_action} filename +# $ # user commentaries +# $ generated description +# $ ... +# +# You can modify action, all_file_action and add commentaries +# Available actions: +# keep - keep license in contrib and use in credits +# skip - skip license +# remove - remove all files with this license +# rename - save license text/links into licenses texts file, but not store SPDX into LINCENSE macro. You should store correct license id into devtools.license.spdx.txt file +# +# {all file action} records will be generated when license text contains filename that exists on filesystem (in contrib directory) +# We suppose that that files can contain some license info +# Available all file actions: +# FILE_IGNORE - ignore file (do nothing) +# FILE_INCLUDE - include all file data into licenses text file +# ======================= + +SKIP LicenseRef-scancode-unknown-license-reference 1ea2485bc5ae3a83ea15a16e65f88cb5 +BELONGS ya.make + License text: + /// Produce a DOT (see graphviz freeware suite) from a base tree + Scancode info: + Original SPDX id: LicenseRef-scancode-unknown-license-reference + Score : 100.00 + Match type : REFERENCE + Links : https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/unknown-license-reference.LICENSE + Files with this license: + include/antlr3commontreeadaptor.hpp [175:175] + +KEEP Unicode-Mappings 21a868edc7974974e055dab130756d20 +BELONGS ya.make + Note: matched license text is too long. Read it in the source files. + Scancode info: + Original SPDX id: LicenseRef-scancode-unicode-mappings + Score : 99.00 + Match type : NOTICE + Links : https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/unicode-mappings.LICENSE + Files with this license: + include/antlr3convertutf.hpp [4:20] + +KEEP BSD-3-Clause 34338f662d2c64c570a2231b4cf8b4d6 +BELONGS ya.make + License text: + * as covered by the BSD license under which ANTLR is issued. You can cut the code + Scancode info: + Original SPDX id: BSD-3-Clause + Score : 99.00 + Match type : REFERENCE + Links : http://www.opensource.org/licenses/BSD-3-Clause, https://spdx.org/licenses/BSD-3-Clause + Files with this license: + include/antlr3collections.hpp [96:96] + +KEEP BSD-3-Clause b88430f0378e11c82525d52e66d2eea4 +BELONGS ya.make + Note: matched license text is too long. Read it in the source files. + Scancode info: + Original SPDX id: BSD-3-Clause + Score : 100.00 + Match type : TEXT + Links : http://www.opensource.org/licenses/BSD-3-Clause, https://spdx.org/licenses/BSD-3-Clause + Files with this license: + include/antlr3.hpp [9:29] + include/antlr3baserecognizer.hpp [16:36] + include/antlr3bitset.hpp [17:37] + include/antlr3collections.hpp [10:30] + include/antlr3commontoken.hpp [20:40] + include/antlr3commontree.hpp [14:34] + include/antlr3commontreeadaptor.hpp [14:34] + include/antlr3commontreenodestream.hpp [14:34] + include/antlr3cyclicdfa.hpp [15:35] + include/antlr3debugeventlistener.hpp [24:44] + include/antlr3defs.hpp [13:33] + include/antlr3errors.hpp [10:30] + include/antlr3exception.hpp [24:44] + include/antlr3filestream.hpp [10:30] + include/antlr3input.hpp [17:37] + include/antlr3interfaces.hpp [15:35] + include/antlr3intstream.hpp [40:60] + include/antlr3lexer.hpp [39:59] + include/antlr3memory.hpp [10:30] + include/antlr3parser.hpp [15:35] + include/antlr3recognizersharedstate.hpp [23:43] + include/antlr3rewriterulesubtreestream.hpp [10:30] + include/antlr3rewriteruletokenstream.hpp [10:30] + include/antlr3rewritestreams.hpp [10:30] + include/antlr3tokenstream.hpp [15:35] + include/antlr3treeparser.hpp [10:30] + +KEEP BSD-3-Clause d377dec7c659f1f9f14adbdefe822aa2 +BELONGS ya.make + License text: + // [The "BSD licence"] + Scancode info: + Original SPDX id: BSD-3-Clause + Score : 99.00 + Match type : REFERENCE + Links : http://www.opensource.org/licenses/BSD-3-Clause, https://spdx.org/licenses/BSD-3-Clause + Files with this license: + include/antlr3.hpp [4:4] + include/antlr3baserecognizer.hpp [10:10] + include/antlr3bitset.hpp [11:11] + include/antlr3collections.hpp [4:4] + include/antlr3commontoken.hpp [14:14] + include/antlr3commontree.hpp [8:8] + include/antlr3commontreeadaptor.hpp [8:8] + include/antlr3commontreenodestream.hpp [8:8] + include/antlr3cyclicdfa.hpp [9:9] + include/antlr3debugeventlistener.hpp [18:18] + include/antlr3defs.hpp [7:7] + include/antlr3errors.hpp [4:4] + include/antlr3exception.hpp [18:18] + include/antlr3filestream.hpp [4:4] + include/antlr3input.hpp [11:11] + include/antlr3interfaces.hpp [10:10] + include/antlr3intstream.hpp [34:34] + include/antlr3lexer.hpp [33:33] + include/antlr3memory.hpp [4:4] + include/antlr3parser.hpp [9:9] + include/antlr3recognizersharedstate.hpp [17:17] + include/antlr3rewriterulesubtreestream.hpp [4:4] + include/antlr3rewriteruletokenstream.hpp [4:4] + include/antlr3rewritestreams.hpp [4:4] + include/antlr3tokenstream.hpp [9:9] + include/antlr3treeparser.hpp [4:4] diff --git a/contrib/libs/antlr3_cpp_runtime/.yandex_meta/licenses.list.txt b/contrib/libs/antlr3_cpp_runtime/.yandex_meta/licenses.list.txt new file mode 100644 index 0000000000..7b99c403e9 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/.yandex_meta/licenses.list.txt @@ -0,0 +1,58 @@ +====================BSD-3-Clause==================== + * as covered by the BSD license under which ANTLR is issued. You can cut the code + + +====================BSD-3-Clause==================== +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +====================BSD-3-Clause==================== +// [The "BSD licence"] + + +====================COPYRIGHT==================== + * Copyright 2001-2004 Unicode, Inc. + + +====================COPYRIGHT==================== +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + + +====================Unicode-Mappings==================== + * Disclaimer + * + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. + * + * Limitations on Rights to Redistribute This Code + * + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. diff --git a/contrib/libs/antlr3_cpp_runtime/antlr3.cpp b/contrib/libs/antlr3_cpp_runtime/antlr3.cpp new file mode 100644 index 0000000000..d64127a6bb --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/antlr3.cpp @@ -0,0 +1 @@ +#include "include/antlr3.hpp" diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3.hpp new file mode 100644 index 0000000000..d8cccb4aac --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3.hpp @@ -0,0 +1,89 @@ +#ifndef _ANTLR3_HPP +#define _ANTLR3_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <cassert> +#include <cstddef> // ptrdiff_t +#include <cstdint> // uint32_t, ... +#include <cstdio> // stderr (TODO remove fprintf(stderr) +#include <cstdlib> // malloc, calloc +#include <cstring> + +#include <algorithm> +#include <deque> +#include <exception> +#include <iostream> +#include <limits> +#include <map> +#include <memory> +#include <new> +#include <set> +#include <sstream> +#include <string> +#include <vector> + +#include "antlr3defs.hpp" + +/* Pre declare the typedefs for all the interfaces, then + * they can be inter-dependant and we will let the linker + * sort it out for us. + */ +#include "antlr3interfaces.hpp" + +// Include the unicode.org conversion library header. +// +#include "antlr3convertutf.hpp" + +#include "antlr3errors.hpp" +#include "antlr3memory.hpp" + +#include "antlr3collections.hpp" +#include "antlr3recognizersharedstate.hpp" +#include "antlr3baserecognizer.hpp" +#include "antlr3bitset.hpp" +#include "antlr3commontoken.hpp" +#include "antlr3commontree.hpp" +#include "antlr3commontreeadaptor.hpp" +#include "antlr3cyclicdfa.hpp" +#include "antlr3debugeventlistener.hpp" +#include "antlr3exception.hpp" +#include "antlr3filestream.hpp" +#include "antlr3intstream.hpp" +#include "antlr3input.hpp" +#include "antlr3tokenstream.hpp" +#include "antlr3commontreenodestream.hpp" +#include "antlr3lexer.hpp" +#include "antlr3parser.hpp" +//#include "antlr3rewritestreams.hpp" +#include "antlr3rewriteruletokenstream.hpp" +#include "antlr3rewriterulesubtreestream.hpp" +#include "antlr3traits.hpp" +#include "antlr3treeparser.hpp" + +#endif diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3.inl new file mode 100644 index 0000000000..b2d223398d --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3.inl @@ -0,0 +1,9 @@ +namespace antlr3 { + +//static +ANTLR_INLINE void GenericStream::displayRecognitionError( const StringType& str ) +{ + fprintf(stderr, str.c_str() ); +} + +} diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3baserecognizer.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3baserecognizer.hpp new file mode 100644 index 0000000000..0374b3a1ea --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3baserecognizer.hpp @@ -0,0 +1,509 @@ +/** \file + * Defines the basic structure to support recognizing by either a lexer, + * parser, or tree parser. + * \addtogroup BaseRecognizer + * @{ + */ +#ifndef _ANTLR3_BASERECOGNIZER_HPP +#define _ANTLR3_BASERECOGNIZER_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +namespace antlr3 { + +/** \brief Base tracking context structure for all types of + * recognizers. + */ +template< class ImplTraits, class StreamType > +class BaseRecognizer : public ImplTraits::AllocPolicyType +{ +public: + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename StreamType::IntStreamType IntStreamType; + typedef typename ComponentTypeFinder<ImplTraits, StreamType>::ComponentType SuperType; + typedef typename StreamType::UnitType UnitType; + typedef typename ImplTraits::template ExceptionBaseType<StreamType> ExceptionBaseType; + typedef typename ImplTraits::BitsetType BitsetType; + typedef typename ImplTraits::BitsetListType BitsetListType; + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::template RecognizerSharedStateType<StreamType> RecognizerSharedStateType; + typedef typename ImplTraits::DebugEventListenerType DebugEventListenerType; + typedef typename ImplTraits::LexerType LexerType; + typedef typename ImplTraits::ParserType ParserType; + typedef typename ImplTraits::TreeParserType TreeParserType; + + typedef typename AllocPolicyType::template StackType<StringType> StringStackType; + typedef typename AllocPolicyType::template ListType<StringType> StringListType; + +private: + /// A pointer to the shared recognizer state, such that multiple + /// recognizers can use the same inputs streams and so on (in + /// the case of grammar inheritance for instance. + /// + RecognizerSharedStateType* m_state; + + /// If set to something other than NULL, then this structure is + /// points to an instance of the debugger interface. In general, the + /// debugger is only referenced internally in recovery/error operations + /// so that it does not cause overhead by having to check this pointer + /// in every function/method + /// + DebugEventListenerType* m_debugger; + + +public: + BaseRecognizer(ANTLR_UINT32 sizeHint, RecognizerSharedStateType* state); + + SuperType* get_super(); + RecognizerSharedStateType* get_state() const; + DebugEventListenerType* get_debugger() const; + void set_state( RecognizerSharedStateType* state ); + void set_debugger( DebugEventListenerType* debugger ); + + /// Match current input symbol against ttype. Upon error, do one token + /// insertion or deletion if possible. + /// To turn off single token insertion or deletion error + /// recovery, override mismatchRecover() and have it call + /// plain mismatch(), which does not recover. Then any error + /// in a rule will cause an exception and immediate exit from + /// rule. Rule would recover by resynchronizing to the set of + /// symbols that can follow rule ref. + /// + const UnitType* match(ANTLR_UINT32 ttype, BitsetListType* follow); + + /// Consumes the next token, whatever it is, and resets the recognizer state + /// so that it is not in error. + /// + /// \param recognizer + /// Recognizer context pointer + /// + void matchAny(); + + /// function that decides if the token ahead of the current one is the + /// one we were loking for, in which case the curernt one is very likely extraneous + /// and can be reported that way. + /// + bool mismatchIsUnwantedToken(IntStreamType* input, ANTLR_UINT32 ttype); + + /// function that decides if the current token is one that can logically + /// follow the one we were looking for, in which case the one we were looking for is + /// probably missing from the input. + /// + bool mismatchIsMissingToken(IntStreamType* input, BitsetListType* follow); + + /// Factor out what to do upon token mismatch so tree parsers can behave + /// differently. Override and call mismatchRecover(input, ttype, follow) + /// to get single token insertion and deletion. Use this to turn off + /// single token insertion and deletion. Override mismatchRecover + /// to call this instead. + /// + /// \remark mismatch only works for parsers and must be overridden for anything else. + /// + void mismatch(ANTLR_UINT32 ttype, BitsetListType* follow); + + /// Report a recognition problem. + /// + /// This method sets errorRecovery to indicate the parser is recovering + /// not parsing. Once in recovery mode, no errors are generated. + /// To get out of recovery mode, the parser must successfully match + /// a token (after a resync). So it will go: + /// + /// 1. error occurs + /// 2. enter recovery mode, report error + /// 3. consume until token found in resynch set + /// 4. try to resume parsing + /// 5. next match() will reset errorRecovery mode + /// + /// If you override, make sure to update errorCount if you care about that. + /// + void reportError(); + void reportError( ClassForwarder<LexerType> ); + template<typename CompType> + void reportError( ClassForwarder<CompType> ); + + /** Function that is called to display a recognition error message. You may + * override this function independently of (*reportError)() above as that function calls + * this one to do the actual exception printing. + */ + void displayRecognitionError(ANTLR_UINT8** tokenNames); + + /// Get number of recognition errors (lexer, parser, tree parser). Each + /// recognizer tracks its own number. So parser and lexer each have + /// separate count. Does not count the spurious errors found between + /// an error and next valid token match + /// + /// \see reportError() + /// + ANTLR_UINT32 getNumberOfSyntaxErrors(); + + /** Function that recovers from an error found in the input stream. + * Generally, this will be a #ANTLR3_EXCEPTION_NOVIABLE_ALT but it could also + * be from a mismatched token that the (*match)() could not recover from. + */ + void recover(); + + /** function that is a hook to listen to token consumption during error recovery. + * This is mainly used by the debug parser to send events to the listener. + */ + void beginResync(); + + /** function that is a hook to listen to token consumption during error recovery. + * This is mainly used by the debug parser to send events to the listener. + */ + void endResync(); + + /** function that is a hook to listen to token consumption during error recovery. + * This is mainly used by the debug parser to send events to the listener. + */ + void beginBacktrack(ANTLR_UINT32 level); + + /** function that is a hook to listen to token consumption during error recovery. + * This is mainly used by the debug parser to send events to the listener. + */ + void endBacktrack(ANTLR_UINT32 level, bool successful); + + /// Compute the error recovery set for the current rule. + /// Documentation below is from the Java implementation. + /// + /// During rule invocation, the parser pushes the set of tokens that can + /// follow that rule reference on the stack; this amounts to + /// computing FIRST of what follows the rule reference in the + /// enclosing rule. This local follow set only includes tokens + /// from within the rule; i.e., the FIRST computation done by + /// ANTLR stops at the end of a rule. + // + /// EXAMPLE + // + /// When you find a "no viable alt exception", the input is not + /// consistent with any of the alternatives for rule r. The best + /// thing to do is to consume tokens until you see something that + /// can legally follow a call to r *or* any rule that called r. + /// You don't want the exact set of viable next tokens because the + /// input might just be missing a token--you might consume the + /// rest of the input looking for one of the missing tokens. + /// + /// Consider grammar: + /// + /// a : '[' b ']' + /// | '(' b ')' + /// ; + /// b : c '^' INT ; + /// c : ID + /// | INT + /// ; + /// + /// At each rule invocation, the set of tokens that could follow + /// that rule is pushed on a stack. Here are the various "local" + /// follow sets: + /// + /// FOLLOW(b1_in_a) = FIRST(']') = ']' + /// FOLLOW(b2_in_a) = FIRST(')') = ')' + /// FOLLOW(c_in_b) = FIRST('^') = '^' + /// + /// Upon erroneous input "[]", the call chain is + /// + /// a -> b -> c + /// + /// and, hence, the follow context stack is: + /// + /// depth local follow set after call to rule + /// 0 <EOF> a (from main()) + /// 1 ']' b + /// 3 '^' c + /// + /// Notice that ')' is not included, because b would have to have + /// been called from a different context in rule a for ')' to be + /// included. + /// + /// For error recovery, we cannot consider FOLLOW(c) + /// (context-sensitive or otherwise). We need the combined set of + /// all context-sensitive FOLLOW sets--the set of all tokens that + /// could follow any reference in the call chain. We need to + /// resync to one of those tokens. Note that FOLLOW(c)='^' and if + /// we resync'd to that token, we'd consume until EOF. We need to + /// sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. + /// In this case, for input "[]", LA(1) is in this set so we would + /// not consume anything and after printing an error rule c would + /// return normally. It would not find the required '^' though. + /// At this point, it gets a mismatched token error and throws an + /// exception (since LA(1) is not in the viable following token + /// set). The rule exception handler tries to recover, but finds + /// the same recovery set and doesn't consume anything. Rule b + /// exits normally returning to rule a. Now it finds the ']' (and + /// with the successful match exits errorRecovery mode). + /// + /// So, you can see that the parser walks up call chain looking + /// for the token that was a member of the recovery set. + /// + /// Errors are not generated in errorRecovery mode. + /// + /// ANTLR's error recovery mechanism is based upon original ideas: + /// + /// "Algorithms + Data Structures = Programs" by Niklaus Wirth + /// + /// and + /// + /// "A note on error recovery in recursive descent parsers": + /// http://portal.acm.org/citation.cfm?id=947902.947905 + /// + /// Later, Josef Grosch had some good ideas: + /// + /// "Efficient and Comfortable Error Recovery in Recursive Descent + /// Parsers": + /// ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip + /// + /// Like Grosch I implemented local FOLLOW sets that are combined + /// at run-time upon error to avoid overhead during parsing. + /// + BitsetType* computeErrorRecoverySet(); + + /// Compute the context-sensitive FOLLOW set for current rule. + /// Documentation below is from the Java runtime. + /// + /// This is the set of token types that can follow a specific rule + /// reference given a specific call chain. You get the set of + /// viable tokens that can possibly come next (look ahead depth 1) + /// given the current call chain. Contrast this with the + /// definition of plain FOLLOW for rule r: + /// + /// FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)} + /// + /// where x in T* and alpha, beta in V*; T is set of terminals and + /// V is the set of terminals and non terminals. In other words, + /// FOLLOW(r) is the set of all tokens that can possibly follow + /// references to r in///any* sentential form (context). At + /// runtime, however, we know precisely which context applies as + /// we have the call chain. We may compute the exact (rather + /// than covering superset) set of following tokens. + /// + /// For example, consider grammar: + /// + /// stat : ID '=' expr ';' // FOLLOW(stat)=={EOF} + /// | "return" expr '.' + /// ; + /// expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'} + /// atom : INT // FOLLOW(atom)=={'+',')',';','.'} + /// | '(' expr ')' + /// ; + /// + /// The FOLLOW sets are all inclusive whereas context-sensitive + /// FOLLOW sets are precisely what could follow a rule reference. + /// For input input "i=(3);", here is the derivation: + /// + /// stat => ID '=' expr ';' + /// => ID '=' atom ('+' atom)* ';' + /// => ID '=' '(' expr ')' ('+' atom)* ';' + /// => ID '=' '(' atom ')' ('+' atom)* ';' + /// => ID '=' '(' INT ')' ('+' atom)* ';' + /// => ID '=' '(' INT ')' ';' + /// + /// At the "3" token, you'd have a call chain of + /// + /// stat -> expr -> atom -> expr -> atom + /// + /// What can follow that specific nested ref to atom? Exactly ')' + /// as you can see by looking at the derivation of this specific + /// input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}. + /// + /// You want the exact viable token set when recovering from a + /// token mismatch. Upon token mismatch, if LA(1) is member of + /// the viable next token set, then you know there is most likely + /// a missing token in the input stream. "Insert" one by just not + /// throwing an exception. + /// + BitsetType* computeCSRuleFollow(); + + /// Compute the current followset for the input stream. + /// + BitsetType* combineFollows(bool exact); + + /// Attempt to recover from a single missing or extra token. + /// + /// EXTRA TOKEN + /// + /// LA(1) is not what we are looking for. If LA(2) has the right token, + /// however, then assume LA(1) is some extra spurious token. Delete it + /// and LA(2) as if we were doing a normal match(), which advances the + /// input. + /// + /// MISSING TOKEN + /// + /// If current token is consistent with what could come after + /// ttype then it is ok to "insert" the missing token, else throw + /// exception For example, Input "i=(3;" is clearly missing the + /// ')'. When the parser returns from the nested call to expr, it + /// will have call chain: + /// + /// stat -> expr -> atom + /// + /// and it will be trying to match the ')' at this point in the + /// derivation: + /// + /// => ID '=' '(' INT ')' ('+' atom)* ';' + /// ^ + /// match() will see that ';' doesn't match ')' and report a + /// mismatched token error. To recover, it sees that LA(1)==';' + /// is in the set of tokens that can follow the ')' token + /// reference in rule atom. It can assume that you forgot the ')'. + /// + /// The exception that was passed in, in the java implementation is + /// sorted in the recognizer exception stack in the C version. To 'throw' it we set the + /// error flag and rules cascade back when this is set. + /// + const UnitType* recoverFromMismatchedToken( ANTLR_UINT32 ttype, BitsetListType* follow); + + /** Function that recovers from a mismatched set in the token stream, in a similar manner + * to (*recoverFromMismatchedToken) + */ + const UnitType* recoverFromMismatchedSet(BitsetListType* follow); + + /** common routine to handle single token insertion for recovery functions. + */ + /// This code is factored out from mismatched token and mismatched set + /// recovery. It handles "single token insertion" error recovery for + /// both. No tokens are consumed to recover from insertions. Return + /// true if recovery was possible else return false. + /// + bool recoverFromMismatchedElement(BitsetListType* follow); + + /** function that consumes input until the next token matches + * the given token. + */ + void consumeUntil(ANTLR_UINT32 tokenType); + + /** function that consumes input until the next token matches + * one in the given set. + */ + void consumeUntilSet(BitsetType* set); + + /** function that returns an ANTLR3_LIST of the strings that identify + * the rules in the parser that got you to this point. Can be overridden by installing your + * own function set. + * + * \todo Document how to override invocation stack functions. + */ + StringStackType getRuleInvocationStack(); + StringStackType getRuleInvocationStackNamed(ANTLR_UINT8* name); + + /** function that converts an ANLR3_LIST of tokens to an ANTLR3_LIST of + * string token names. As this is mostly used in string template processing it may not be useful + * in the C runtime. + */ + StringListType toStrings( const StringListType& ); + + /** function to return whether the rule has parsed input starting at the supplied + * start index before. If the rule has not parsed input starting from the supplied start index, + * then it will return ANTLR3_MEMO_RULE_UNKNOWN. If it has parsed from the suppled start point + * then it will return the point where it last stopped parsing after that start point. + */ + ANTLR_MARKER getRuleMemoization( ANTLR_INTKEY ruleIndex, + ANTLR_MARKER ruleParseStart); + + /** function that determines whether the rule has parsed input at the current index + * in the input stream + */ + bool alreadyParsedRule(ANTLR_MARKER ruleIndex); + + /** Function that records whether the rule has parsed the input at a + * current position successfully or not. + */ + void memoize(ANTLR_MARKER ruleIndex, + ANTLR_MARKER ruleParseStart); + + /// Function that returns the current input symbol. + /// The is placed into any label for the associated token ref; e.g., x=ID. Token + /// and tree parsers need to return different objects. Rather than test + /// for input stream type or change the IntStream interface, I use + /// a simple method to ask the recognizer to tell me what the current + /// input symbol is. + /// + /// This is ignored for lexers and the lexer implementation of this + /// function should return NULL. + /// + const UnitType* getCurrentInputSymbol(IntStreamType* istream); + const UnitType* getCurrentInputSymbol(IntStreamType* istream, ClassForwarder<LexerType>); + const UnitType* getCurrentInputSymbol(IntStreamType* istream, ClassForwarder<ParserType>); + const UnitType* getCurrentInputSymbol(IntStreamType* istream, ClassForwarder<TreeParserType>); + + /// Conjure up a missing token during error recovery. + /// + /// The recognizer attempts to recover from single missing + /// symbols. But, actions might refer to that missing symbol. + /// For example, x=ID {f($x);}. The action clearly assumes + /// that there has been an identifier matched previously and that + /// $x points at that token. If that token is missing, but + /// the next token in the stream is what we want we assume that + /// this token is missing and we keep going. Because we + /// have to return some token to replace the missing token, + /// we have to conjure one up. This method gives the user control + /// over the tokens returned for missing tokens. Mostly, + /// you will want to create something special for identifier + /// tokens. For literals such as '{' and ',', the default + /// action in the parser or tree parser works. It simply creates + /// a CommonToken of the appropriate type. The text will be the token. + /// If you change what tokens must be created by the lexer, + /// override this method to create the appropriate tokens. + /// + UnitType* getMissingSymbol( IntStreamType* istream, ExceptionBaseType* e, + ANTLR_UINT32 expectedTokenType, + BitsetListType* follow); + + /** Function that returns whether the supplied grammar function + * will parse the current input stream or not. This is the way that syntactic + * predicates are evaluated. Unlike java, C is perfectly happy to invoke code + * via a pointer to a function (hence that's what all the ANTLR3 C interfaces + * do. + */ + template<typename Predicate> + bool synpred( ClassForwarder<Predicate> ); + + //In place of exConstruct, just directly instantiate the Exception Object + + /** Reset the recognizer + */ + void reset(); + void reset( ClassForwarder<LexerType> ); + template<typename CompType> + void reset( ClassForwarder<CompType> ); + + void exConstruct(); + + ~BaseRecognizer(); + +}; + +} + +#include "antlr3baserecognizer.inl" + +/// @} +/// + +#endif /* _ANTLR3_BASERECOGNIZER_H */ + diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3baserecognizer.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3baserecognizer.inl new file mode 100644 index 0000000000..b0c3fe8d51 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3baserecognizer.inl @@ -0,0 +1,920 @@ +namespace antlr3 { + +template< class ImplTraits, class StreamType > +BaseRecognizer<ImplTraits, StreamType>::BaseRecognizer(ANTLR_UINT32 sizeHint, + RecognizerSharedStateType* state) +{ + m_debugger = NULL; + + // If we have been supplied with a pre-existing recognizer state + // then we just install it, otherwise we must create one from scratch + // + if (state == NULL) + { + m_state = new RecognizerSharedStateType(); + m_state->set_sizeHint( sizeHint ); + } + else + { + // Install the one we were given, and do not reset it here + // as it will either already have been initialized or will + // be in a state that needs to be preserved. + // + m_state = state; + } +} + +template< class ImplTraits, class StreamType > +ANTLR_INLINE typename BaseRecognizer<ImplTraits, StreamType>::SuperType* BaseRecognizer<ImplTraits, StreamType>::get_super() +{ + return static_cast<SuperType*>(this); +} + +template< class ImplTraits, class StreamType > +ANTLR_INLINE typename BaseRecognizer<ImplTraits, StreamType>::RecognizerSharedStateType* BaseRecognizer<ImplTraits, StreamType>::get_state() const +{ + return m_state; +} +template< class ImplTraits, class StreamType > +ANTLR_INLINE typename BaseRecognizer<ImplTraits, StreamType>::DebugEventListenerType* BaseRecognizer<ImplTraits, StreamType>::get_debugger() const +{ + return m_debugger; +} +template< class ImplTraits, class StreamType > +ANTLR_INLINE void BaseRecognizer<ImplTraits, StreamType>::set_state( RecognizerSharedStateType* state ) +{ + m_state = state; +} +template< class ImplTraits, class StreamType > +ANTLR_INLINE void BaseRecognizer<ImplTraits, StreamType>::set_debugger( DebugEventListenerType* debugger ) +{ + m_debugger = debugger; +} + +template< class ImplTraits, class StreamType > +const typename BaseRecognizer<ImplTraits, StreamType>::UnitType* +BaseRecognizer<ImplTraits, StreamType>::match(ANTLR_UINT32 ttype, BitsetListType* follow) +{ + SuperType* super = static_cast<SuperType*>(this); + IntStreamType* is = super->get_istream(); + + // Pick up the current input token/node for assignment to labels + // + const UnitType* matchedSymbol = this->getCurrentInputSymbol(is); + + //if (is->LA(1) == ttype) + if (matchedSymbol->get_type() == ttype) + { + // The token was the one we were told to expect + // + is->consume(); // Consume that token from the stream + m_state->set_errorRecovery(false); // Not in error recovery now (if we were) + m_state->set_failed(false); // The match was a success + return matchedSymbol; // We are done + } + + // We did not find the expected token type, if we are backtracking then + // we just set the failed flag and return. + // + if ( m_state->get_backtracking() > 0) + { + // Backtracking is going on + // + m_state->set_failed(true); + return matchedSymbol; + } + + // We did not find the expected token and there is no backtracking + // going on, so we mismatch, which creates an exception in the recognizer exception + // stack. + // + matchedSymbol = this->recoverFromMismatchedToken(ttype, follow); + return matchedSymbol; + +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer<ImplTraits, StreamType>::matchAny() +{ + SuperType* super = static_cast<SuperType*>(this); + IntStreamType* is = super->get_istream(); + + is->consume(); + m_state->set_errorRecovery(false); + m_state->set_failed(false); + return; +} + +template< class ImplTraits, class StreamType > +bool BaseRecognizer<ImplTraits, StreamType>::mismatchIsUnwantedToken(IntStreamType* is, ANTLR_UINT32 ttype) +{ + ANTLR_UINT32 nextt = is->LA(2); + + if (nextt == ttype) + { + if(m_state->get_exception() != NULL) + m_state->get_exception()->set_expecting(nextt); + return true; // This token is unknown, but the next one is the one we wanted + } + else + return false; // Neither this token, nor the one following is the one we wanted +} + +template< class ImplTraits, class StreamType > +bool BaseRecognizer<ImplTraits, StreamType>::mismatchIsMissingToken(IntStreamType* is, BitsetListType* follow) +{ + bool retcode; + BitsetType* followClone; + BitsetType* viableTokensFollowingThisRule; + + if (follow == NULL) + { + // There is no information about the tokens that can follow the last one + // hence we must say that the current one we found is not a member of the + // follow set and does not indicate a missing token. We will just consume this + // single token and see if the parser works it out from there. + // + return false; + } + + followClone = NULL; + viableTokensFollowingThisRule = NULL; + + // The C bitset maps are laid down at compile time by the + // C code generation. Hence we cannot remove things from them + // and so on. So, in order to remove EOR (if we need to) then + // we clone the static bitset. + // + followClone = follow->bitsetLoad(); + if (followClone == NULL) + return false; + + // Compute what can follow this grammar reference + // + if (followClone->isMember( ImplTraits::CommonTokenType::EOR_TOKEN_TYPE)) + { + // EOR can follow, but if we are not the start symbol, we + // need to remove it. + // + followClone->remove(ImplTraits::CommonTokenType::EOR_TOKEN_TYPE); + + // Now compute the visiable tokens that can follow this rule, according to context + // and make them part of the follow set. + // + viableTokensFollowingThisRule = this->computeCSRuleFollow(); + followClone->borInPlace(viableTokensFollowingThisRule); + } + + /// if current token is consistent with what could come after set + /// then we know we're missing a token; error recovery is free to + /// "insert" the missing token + /// + /// BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR + /// in follow set to indicate that the fall of the start symbol is + /// in the set (EOF can follow). + /// + if ( followClone->isMember(is->LA(1)) + || followClone->isMember(ImplTraits::CommonTokenType::EOR_TOKEN_TYPE) + ) + { + retcode = true; + } + else + { + retcode = false; + } + + if (viableTokensFollowingThisRule != NULL) + { + delete viableTokensFollowingThisRule; + } + if (followClone != NULL) + { + delete followClone; + } + + return retcode; +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer<ImplTraits, StreamType>::mismatch(ANTLR_UINT32 ttype, BitsetListType* follow) +{ + this->get_super()->mismatch( ttype, follow ); +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer<ImplTraits, StreamType>::reportError() +{ + this->reportError( ClassForwarder<SuperType>() ); +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer<ImplTraits, StreamType>::reportError( ClassForwarder<LexerType> ) +{ + // Indicate this recognizer had an error while processing. + // + m_state->inc_errorCount(); + + this->displayRecognitionError(m_state->get_tokenNames()); +} + +template< class ImplTraits, class StreamType > +template<typename CompType> +void BaseRecognizer<ImplTraits, StreamType>::reportError(ClassForwarder<CompType> ) +{ + // Invoke the debugger event if there is a debugger listening to us + // + if ( m_debugger != NULL) + { + m_debugger->recognitionException( m_state->get_exception() ); + } + + if ( m_state->get_errorRecovery() == true) + { + // Already in error recovery so don't display another error while doing so + // + return; + } + + // Signal we are in error recovery now + // + m_state->set_errorRecovery(true); + + // Indicate this recognizer had an error while processing. + // + m_state->inc_errorCount(); + + // Call the error display routine + // + this->displayRecognitionError( m_state->get_tokenNames() ); +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer<ImplTraits, StreamType>::displayRecognitionError(ANTLR_UINT8** tokenNames) +{ + // Retrieve some info for easy reading. + // + ExceptionBaseType* ex = m_state->get_exception(); + StringType ttext; + + // See if there is a 'filename' we can use + // + SuperType* super = static_cast<SuperType*>(this); + super->displayRecognitionError(tokenNames, ex); +} + +template< class ImplTraits, class StreamType > +ANTLR_UINT32 BaseRecognizer<ImplTraits, StreamType>::getNumberOfSyntaxErrors() +{ + return m_state->get_errorCount(); +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer<ImplTraits, StreamType>::recover() +{ + SuperType* super = static_cast<SuperType*>(this); + IntStreamType* is = super->get_parser_istream(); + // Are we about to repeat the same error? + // + if ( m_state->get_lastErrorIndex() == is->index()) + { + // The last error was at the same token index point. This must be a case + // where LT(1) is in the recovery token set so nothing is + // consumed. Consume a single token so at least to prevent + // an infinite loop; this is a failsafe. + // + is->consume(); + } + + // Record error index position + // + m_state->set_lastErrorIndex( is->index() ); + + // Work out the follows set for error recovery + // + BitsetType* followSet = this->computeErrorRecoverySet(); + + // Call resync hook (for debuggers and so on) + // + this->beginResync(); + + // Consume tokens until we have resynced to something in the follows set + // + this->consumeUntilSet(followSet); + + // End resync hook + // + this->endResync(); + + // Destroy the temporary bitset we produced. + // + delete followSet; + + // Reset the inError flag so we don't re-report the exception + // + m_state->set_error(false); + m_state->set_failed(false); +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer<ImplTraits, StreamType>::beginResync() +{ + if (m_debugger != NULL) + { + m_debugger->beginResync(); + } +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer<ImplTraits, StreamType>::endResync() +{ + if (m_debugger != NULL) + { + m_debugger->endResync(); + } +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer<ImplTraits, StreamType>::beginBacktrack(ANTLR_UINT32 level) +{ + if (m_debugger != NULL) + { + m_debugger->beginBacktrack(level); + } +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer<ImplTraits, StreamType>::endBacktrack(ANTLR_UINT32 level, bool /*successful*/) +{ + if (m_debugger != NULL) + { + m_debugger->endBacktrack(level); + } +} + +template< class ImplTraits, class StreamType > +typename BaseRecognizer<ImplTraits, StreamType>::BitsetType* BaseRecognizer<ImplTraits, StreamType>::computeErrorRecoverySet() +{ + return this->combineFollows(false); +} + +template< class ImplTraits, class StreamType > +typename BaseRecognizer<ImplTraits, StreamType>::BitsetType* BaseRecognizer<ImplTraits, StreamType>::computeCSRuleFollow() +{ + return this->combineFollows(false); +} + +template< class ImplTraits, class StreamType > +typename BaseRecognizer<ImplTraits, StreamType>::BitsetType* BaseRecognizer<ImplTraits, StreamType>::combineFollows(bool exact) +{ + BitsetType* followSet; + BitsetType* localFollowSet; + ANTLR_UINT32 top; + ANTLR_UINT32 i; + + top = static_cast<ANTLR_UINT32>( m_state->get_following().size() ); + + followSet = new BitsetType(0); + localFollowSet = NULL; + + for (i = top; i>0; i--) + { + localFollowSet = m_state->get_following().at(i-1).bitsetLoad(); + + if (localFollowSet != NULL) + { + followSet->borInPlace(localFollowSet); + + if (exact == true) + { + if (localFollowSet->isMember( ImplTraits::CommonTokenType::EOR_TOKEN_TYPE) == false) + { + // Only leave EOR in the set if at top (start rule); this lets us know + // if we have to include the follow(start rule); I.E., EOF + // + if (i>1) + { + followSet->remove(ImplTraits::CommonTokenType::EOR_TOKEN_TYPE); + } + } + else + { + break; // Cannot see End Of Rule from here, just drop out + } + } + delete localFollowSet; + localFollowSet = NULL; + } + } + + if (localFollowSet != NULL) + { + delete localFollowSet; + } + return followSet; +} + +template< class ImplTraits, class StreamType > +const typename BaseRecognizer<ImplTraits, StreamType>::UnitType* +BaseRecognizer<ImplTraits, StreamType>::recoverFromMismatchedToken( ANTLR_UINT32 ttype, BitsetListType* follow) +{ + SuperType* super = static_cast<SuperType*>(this); + IntStreamType* is = super->get_parser_istream(); + const UnitType* matchedSymbol; + + // If the next token after the one we are looking at in the input stream + // is what we are looking for then we remove the one we have discovered + // from the stream by consuming it, then consume this next one along too as + // if nothing had happened. + // + if ( this->mismatchIsUnwantedToken( is, ttype) == true) + { + // Create an exception if we need one + // + new ANTLR_Exception<ImplTraits, UNWANTED_TOKEN_EXCEPTION, StreamType>(this, ""); + + // Call resync hook (for debuggers and so on) + // + if (m_debugger != NULL) + { + m_debugger->beginResync(); + } + + // "delete" the extra token + // + this->beginResync(); + is->consume(); + this->endResync(); + // End resync hook + // + if (m_debugger != NULL) + { + m_debugger->endResync(); + } + + // Print out the error after we consume so that ANTLRWorks sees the + // token in the exception. + // + this->reportError(); + + // Return the token we are actually matching + // + matchedSymbol = this->getCurrentInputSymbol(is); + + // Consume the token that the rule actually expected to get as if everything + // was hunky dory. + // + is->consume(); + + m_state->set_error(false); // Exception is not outstanding any more + + return matchedSymbol; + } + + // Single token deletion (Unwanted above) did not work + // so we see if we can insert a token instead by calculating which + // token would be missing + // + if ( this->mismatchIsMissingToken(is, follow)) + { + // We can fake the missing token and proceed + // + new ANTLR_Exception<ImplTraits, MISSING_TOKEN_EXCEPTION, StreamType>(this, ""); + matchedSymbol = this->getMissingSymbol( is, m_state->get_exception(), ttype, follow); + m_state->get_exception()->set_token( matchedSymbol ); + m_state->get_exception()->set_expecting(ttype); + + // Print out the error after we insert so that ANTLRWorks sees the + // token in the exception. + // + this->reportError(); + + m_state->set_error(false); // Exception is not outstanding any more + + return matchedSymbol; + } + + // Create an exception if we need one + // + new ANTLR_Exception<ImplTraits, RECOGNITION_EXCEPTION, StreamType>(this, ""); + + // Neither deleting nor inserting tokens allows recovery + // must just report the exception. + // + m_state->set_error(true); + return NULL; +} + +template< class ImplTraits, class StreamType > +const typename BaseRecognizer<ImplTraits, StreamType>::UnitType* +BaseRecognizer<ImplTraits, StreamType>::recoverFromMismatchedSet(BitsetListType* follow) +{ + SuperType* super = static_cast<SuperType*>(this); + IntStreamType* is = super->get_parser_istream(); + const UnitType* matchedSymbol; + + if (this->mismatchIsMissingToken(is, follow) == true) + { + // We can fake the missing token and proceed + // + new ANTLR_Exception<ImplTraits, MISSING_TOKEN_EXCEPTION, StreamType>(this); + matchedSymbol = this->getMissingSymbol(is, m_state->get_exception(), follow); + m_state->get_exception()->set_token(matchedSymbol); + + // Print out the error after we insert so that ANTLRWorks sees the + // token in the exception. + // + this->reportError(); + + m_state->set_error(false); // Exception is not outstanding any more + + return matchedSymbol; + } + + // TODO - Single token deletion like in recoverFromMismatchedToken() + // + m_state->set_error(true); + m_state->set_failed(true); + return NULL; +} + +template< class ImplTraits, class StreamType > +bool BaseRecognizer<ImplTraits, StreamType>::recoverFromMismatchedElement(BitsetListType* followBits) +{ + SuperType* super = static_cast<SuperType*>(this); + IntStreamType* is = super->get_parser_istream(); + + BitsetType* follow = followBits->load(); + BitsetType* viableToksFollowingRule; + + if (follow == NULL) + { + /* The follow set is NULL, which means we don't know what can come + * next, so we "hit and hope" by just signifying that we cannot + * recover, which will just cause the next token to be consumed, + * which might dig us out. + */ + return false; + } + + /* We have a bitmap for the follow set, hence we can compute + * what can follow this grammar element reference. + */ + if (follow->isMember( ImplTraits::CommonTokenType::EOR_TOKEN_TYPE) == true) + { + /* First we need to know which of the available tokens are viable + * to follow this reference. + */ + viableToksFollowingRule = this->computeCSRuleFollow(); + + /* Remove the EOR token, which we do not wish to compute with + */ + follow->remove( ImplTraits::CommonTokenType::EOR_TOKEN_TYPE); + delete viableToksFollowingRule; + /* We now have the computed set of what can follow the current token + */ + } + + /* We can now see if the current token works with the set of tokens + * that could follow the current grammar reference. If it looks like it + * is consistent, then we can "insert" that token by not throwing + * an exception and assuming that we saw it. + */ + if ( follow->isMember(is->LA(1)) == true) + { + /* report the error, but don't cause any rules to abort and stuff + */ + this->reportError(); + if (follow != NULL) + { + delete follow; + } + m_state->set_error(false); + m_state->set_failed(false); + return true; /* Success in recovery */ + } + + if (follow != NULL) + { + delete follow; + } + + /* We could not find anything viable to do, so this is going to + * cause an exception. + */ + return false; +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer<ImplTraits, StreamType>::consumeUntil(ANTLR_UINT32 tokenType) +{ + SuperType* super = static_cast<SuperType*>(this); + IntStreamType* is = super->get_parser_istream(); + + // What do have at the moment? + // + ANTLR_UINT32 ttype = is->LA(1); + + // Start eating tokens until we get to the one we want. + // + while (ttype != ImplTraits::CommonTokenType::TOKEN_EOF && ttype != tokenType) + { + is->consume(); + ttype = is->LA(1); + } +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer<ImplTraits, StreamType>::consumeUntilSet(BitsetType* set) +{ + ANTLR_UINT32 ttype; + SuperType* super = static_cast<SuperType*>(this); + IntStreamType* is = super->get_parser_istream(); + + // What do have at the moment? + // + ttype = is->LA(1); + + // Start eating tokens until we get to one we want. + // + while (ttype != ImplTraits::CommonTokenType::TOKEN_EOF && set->isMember(ttype) == false) + { + is->consume(); + ttype = is->LA(1); + } + +} + +template< class ImplTraits, class StreamType > +ANTLR_MARKER BaseRecognizer<ImplTraits, StreamType>::getRuleMemoization( ANTLR_INTKEY ruleIndex, ANTLR_MARKER ruleParseStart) +{ + /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST. + */ + typedef IntTrie<ImplTraits, ANTLR_MARKER> RuleListType; + typedef TrieEntry<ImplTraits, std::shared_ptr<RuleListType>> EntryType; + typedef TrieEntry<ImplTraits, ANTLR_MARKER> SubEntryType; + ANTLR_MARKER stopIndex; + EntryType* entry; + + /* See if we have a list in the ruleMemos for this rule, and if not, then create one + * as we will need it eventually if we are being asked for the memo here. + */ + entry = m_state->get_ruleMemo()->get(ruleIndex); + + if (entry == NULL) + { + /* Did not find it, so create a new one for it, with a bit depth based on the + * size of the input stream. We need the bit depth to incorporate the number if + * bits required to represent the largest possible stop index in the input, which is the + * last character. An int stream is free to return the largest 64 bit offset if it has + * no idea of the size, but you should remember that this will cause the leftmost + * bit match algorithm to run to 63 bits, which will be the whole time spent in the trie ;-) + */ + m_state->get_ruleMemo()->add( ruleIndex, std::make_shared<RuleListType>(63) ); + + /* We cannot have a stopIndex in a trie we have just created of course + */ + return MEMO_RULE_UNKNOWN; + } + + std::shared_ptr<RuleListType> ruleList = entry->get_data(); + + /* See if there is a stop index associated with the supplied start index. + */ + stopIndex = 0; + + SubEntryType* sub_entry = ruleList->get(ruleParseStart); + if (sub_entry != NULL) + { + stopIndex = sub_entry->get_data(); + } + + if (stopIndex == 0) + { + return MEMO_RULE_UNKNOWN; + } + + return stopIndex; +} + +template< class ImplTraits, class StreamType > +bool BaseRecognizer<ImplTraits, StreamType>::alreadyParsedRule(ANTLR_MARKER ruleIndex) +{ + SuperType* super = static_cast<SuperType*>(this); + IntStreamType* is = super->get_istream(); + + /* See if we have a memo marker for this. + */ + ANTLR_MARKER stopIndex = this->getRuleMemoization( ruleIndex, is->index() ); + + if (stopIndex == MEMO_RULE_UNKNOWN) + { + return false; + } + + if (stopIndex == MEMO_RULE_FAILED) + { + m_state->set_failed(true); + } + else + { + is->seek(stopIndex+1); + } + + /* If here then the rule was executed for this input already + */ + return true; +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer<ImplTraits, StreamType>::memoize(ANTLR_MARKER ruleIndex, ANTLR_MARKER ruleParseStart) +{ + /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST. + */ + typedef IntTrie<ImplTraits, ANTLR_MARKER> RuleListType; + typedef TrieEntry<ImplTraits, std::shared_ptr<RuleListType>> EntryType; + EntryType* entry; + ANTLR_MARKER stopIndex; + SuperType* super = static_cast<SuperType*>(this); + IntStreamType* is = super->get_istream(); + + stopIndex = (m_state->get_failed() == true) ? MEMO_RULE_FAILED : is->index() - 1; + + entry = m_state->get_ruleMemo()->get(ruleIndex); + + if (entry != NULL) + { + std::shared_ptr<RuleListType> ruleList = entry->get_data(); + + /* If we don't already have this entry, append it. The memoize trie does not + * accept duplicates so it won't add it if already there and we just ignore the + * return code as we don't care if it is there already. + */ + ruleList->add(ruleParseStart, stopIndex); + } +} + +template< class ImplTraits, class StreamType > +const typename BaseRecognizer<ImplTraits, StreamType>::UnitType* +BaseRecognizer<ImplTraits, StreamType>::getCurrentInputSymbol( IntStreamType* istream ) +{ + return this->getCurrentInputSymbol( istream, ClassForwarder<SuperType>() ); +} + +template< class ImplTraits, class StreamType > +const typename BaseRecognizer<ImplTraits, StreamType>::UnitType* +BaseRecognizer<ImplTraits, StreamType>::getCurrentInputSymbol(IntStreamType* /*istream*/, ClassForwarder<LexerType>) +{ + return NULL; +} + +template< class ImplTraits, class StreamType > +const typename BaseRecognizer<ImplTraits, StreamType>::UnitType* +BaseRecognizer<ImplTraits, StreamType>::getCurrentInputSymbol(IntStreamType* istream, ClassForwarder<ParserType>) +{ + typedef typename ImplTraits::TokenStreamType TokenStreamType; + TokenStreamType* token_stream = static_cast<TokenStreamType*>(istream); + return token_stream->LT(1); +} + +template< class ImplTraits, class StreamType > +const typename BaseRecognizer<ImplTraits, StreamType>::UnitType* +BaseRecognizer<ImplTraits, StreamType>::getCurrentInputSymbol(IntStreamType* istream, ClassForwarder<TreeParserType>) +{ + typedef typename ImplTraits::TreeNodeStreamType TreeNodeStreamType; + TreeNodeStreamType* ctns = static_cast<TreeNodeStreamType*>(istream); + return ctns->LT(1); +} + + +template< class ImplTraits, class StreamType > +typename BaseRecognizer<ImplTraits, StreamType>::UnitType* BaseRecognizer<ImplTraits, StreamType>::getMissingSymbol( IntStreamType* istream, + ExceptionBaseType* e, + ANTLR_UINT32 expectedTokenType, + BitsetListType* follow) +{ + return this->get_super()->getMissingSymbol( istream, e, expectedTokenType, follow ); +} + + +template< class ImplTraits, class StreamType > + template<typename Predicate> +bool BaseRecognizer<ImplTraits, StreamType>::synpred(ClassForwarder<Predicate> pred) +{ + ANTLR_MARKER start; + SuperType* super = static_cast<SuperType*>(this); + IntStreamType* is = super->get_istream(); + + /* Begin backtracking so we can get back to where we started after trying out + * the syntactic predicate. + */ + start = is->mark(); + m_state->inc_backtracking(); + + /* Try the syntactical predicate + */ + this->get_super()->synpred( pred ); + + /* Reset + */ + is->rewind(start); + m_state->dec_backtracking(); + + if ( m_state->get_failed() == true) + { + /* Predicate failed + */ + m_state->set_failed(false); + return false; + } + else + { + /* Predicate was successful + */ + m_state->set_failed(false); + return true; + } +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer<ImplTraits, StreamType>::exConstruct() +{ + this->get_super()->exConstruct(); +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer<ImplTraits, StreamType>::reset() +{ + this->reset( ClassForwarder<SuperType>() ); +} + +template< class ImplTraits, class StreamType > +template< typename CompType > +void BaseRecognizer<ImplTraits, StreamType>::reset( ClassForwarder<CompType> ) +{ + typedef typename RecognizerSharedStateType::RuleMemoType RuleMemoType; + m_state->get_following().clear(); + + // Reset the state flags + // + m_state->set_errorRecovery(false); + m_state->set_lastErrorIndex(-1); + m_state->set_failed(false); + m_state->set_errorCount(0); + m_state->set_backtracking(0); + + if (m_state->get_ruleMemo() != NULL) + { + delete m_state->get_ruleMemo(); + m_state->set_ruleMemo( new RuleMemoType(15) ); /* 16 bit depth is enough for 32768 rules! */ + } +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer<ImplTraits, StreamType>::reset( ClassForwarder<LexerType> ) +{ + m_state->set_token_present( false ); + m_state->set_type( ImplTraits::CommonTokenType::TOKEN_INVALID ); + m_state->set_channel( TOKEN_DEFAULT_CHANNEL ); + m_state->set_tokenStartCharIndex( -1 ); + m_state->set_tokenStartCharPositionInLine(-1); + m_state->set_tokenStartLine( -1 ); + m_state->set_text(""); +} + +template< class ImplTraits, class StreamType > +BaseRecognizer<ImplTraits, StreamType>::~BaseRecognizer() +{ + // Did we have a state allocated? + // + if (m_state != NULL) + { + // Free any rule memoization we set up + // + if (m_state->get_ruleMemo() != NULL) + { + delete m_state->get_ruleMemo(); + m_state->set_ruleMemo(NULL); + } + + + // Free any exception space we have left around + // + ExceptionBaseType* thisE = m_state->get_exception(); + if (thisE != NULL) + { + delete thisE; + } + + // Free the shared state memory + // + delete m_state; + } + + // Free the actual recognizer space + // +} + + + +} diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3bitset.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3bitset.hpp new file mode 100644 index 0000000000..68eab69568 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3bitset.hpp @@ -0,0 +1,222 @@ +/** + * \file + * Defines the basic structures of an ANTLR3 bitset. this is a C version of the + * cut down Bitset class provided with the java version of antlr 3. + * + * + */ +#ifndef _ANTLR3_BITSET_HPP +#define _ANTLR3_BITSET_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +namespace antlr3 { + +/** How many bits in the elements + */ +static const ANTLR_UINT32 ANTLR_BITSET_BITS = 64; + +/** How many bits in a nible of bits + */ +static const ANTLR_UINT32 ANTLR_BITSET_NIBBLE = 4; + +/** log2 of ANTLR3_BITSET_BITS 2^ANTLR3_BITSET_LOG_BITS = ANTLR3_BITSET_BITS + */ +static const ANTLR_UINT32 ANTLR_BITSET_LOG_BITS = 6; + +/** We will often need to do a mod operator (i mod nbits). + * For powers of two, this mod operation is the + * same as: + * - (i & (nbits-1)). + * + * Since mod is relatively slow, we use an easily + * precomputed mod mask to do the mod instead. + */ +static const ANTLR_UINT32 ANTLR_BITSET_MOD_MASK = ANTLR_BITSET_BITS - 1; + +template <class ImplTraits> +class BitsetList : public ImplTraits::AllocPolicyType +{ +public: + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename ImplTraits::BitsetType BitsetType; + +private: + /// Pointer to the allocated array of bits for this bit set, which + /// is an array of 64 bit elements (of the architecture). If we find a + /// machine/C compiler that does not know anything about 64 bit values + /// then it should be easy enough to produce a 32 bit (or less) version + /// of the bitset code. Note that the pointer here may be static if laid down + /// by the code generation, and it must be copied if it is to be manipulated + /// to perform followset calculations. + /// + ANTLR_BITWORD* m_bits; + + /// Length of the current bit set in ANTLR3_UINT64 units. + /// + ANTLR_UINT32 m_length; + +public: + BitsetList(); + BitsetList( ANTLR_BITWORD* bits, ANTLR_UINT32 length ); + + ANTLR_BITWORD* get_bits() const; + ANTLR_UINT32 get_length() const; + void set_bits( ANTLR_BITWORD* bits ); + void set_length( ANTLR_UINT32 length ); + + /// + /// \brief + /// Creates a new bitset with at least one 64 bit bset of bits, but as + /// many 64 bit sets as are required. + /// + /// \param[in] bset + /// A variable number of bits to add to the set, ending in -1 (impossible bit). + /// + /// \returns + /// A new bit set with all of the specified bitmaps in it and the API + /// initialized. + /// + /// Call as: + /// - pANTLR3_BITSET = antlrBitsetLoad(bset, bset11, ..., -1); + /// - pANTLR3_BITSET = antlrBitsetOf(-1); Create empty bitset + /// + /// \remarks + /// Stdargs function - must supply -1 as last paremeter, which is NOT + /// added to the set. + /// + /// + BitsetType* bitsetLoad(); + + BitsetType* bitsetCopy(); + +}; + +template <class ImplTraits> +class Bitset : public ImplTraits::AllocPolicyType +{ +public: + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename AllocPolicyType::template ListType<ANTLR_UINT32> IntListType; + typedef typename ImplTraits::BitsetListType BitsetListType; + +private: + /// The actual bits themselves + /// + BitsetListType m_blist; + +public: + Bitset( ANTLR_UINT32 nbits=0 ); + Bitset( const Bitset& bitset ); + Bitset* clone() const; + Bitset* bor(Bitset* bitset2); + + BitsetListType& get_blist(); + void borInPlace(Bitset* bitset2); + ANTLR_UINT32 size() const; + void add(ANTLR_INT32 bit); + void grow(ANTLR_INT32 newSize); + bool equals(Bitset* bitset2) const; + bool isMember(ANTLR_UINT32 bit) const; + ANTLR_UINT32 numBits() const; + void remove(ANTLR_UINT32 bit); + bool isNilNode() const; + + /** Produce an integer list of all the bits that are turned on + * in this bitset. Used for error processing in the main as the bitset + * reresents a number of integer tokens which we use for follow sets + * and so on. + * + * The first entry is the number of elements following in the list. + */ + ANTLR_INT32* toIntList() const; + + /// + /// \brief + /// Creates a new bitset with at least one element, but as + /// many elements are required. + /// + /// \param[in] bit + /// A variable number of bits to add to the set, ending in -1 (impossible bit). + /// + /// \returns + /// A new bit set with all of the specified elements added into it. + /// + /// Call as: + /// - pANTLR3_BITSET = antlrBitsetOf(n, n1, n2, -1); + /// - pANTLR3_BITSET = antlrBitsetOf(-1); Create empty bitset + /// + /// \remarks + /// Stdargs function - must supply -1 as last paremeter, which is NOT + /// added to the set. + /// + /// + //C++ doesn't like variable length arguments. so use function overloading + static Bitset* BitsetOf(ANTLR_INT32 bit); + static Bitset* BitsetOf(ANTLR_INT32 bit1, ANTLR_INT32 bit2); + + /// + /// \brief + /// Creates a new bitset with at least one 64 bit bset of bits, but as + /// many 64 bit sets as are required. + /// + /// \param[in] bset + /// A variable number of bits to add to the set, ending in -1 (impossible bit). + /// + /// \returns + /// A new bit set with all of the specified bitmaps in it and the API + /// initialized. + /// + /// Call as: + /// - pANTLR3_BITSET = antlrBitsetLoad(bset, bset11, ..., -1); + /// - pANTLR3_BITSET = antlrBitsetOf(-1); Create empty bitset + /// + /// \remarks + /// Stdargs function - must supply -1 as last paremeter, which is NOT + /// added to the set. + /// + ///antlr3BitsetList + static Bitset* BitsetFromList(const IntListType& list); + ~Bitset(); + +private: + void growToInclude(ANTLR_INT32 bit); + static ANTLR_UINT64 BitMask(ANTLR_UINT32 bitNumber); + static ANTLR_UINT32 NumWordsToHold(ANTLR_UINT32 bit); + static ANTLR_UINT32 WordNumber(ANTLR_UINT32 bit); + void bitsetORInPlace(Bitset* bitset2); + +}; + +} + +#include "antlr3bitset.inl" + +#endif + diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3bitset.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3bitset.inl new file mode 100644 index 0000000000..64318ea0ea --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3bitset.inl @@ -0,0 +1,492 @@ +namespace antlr3 { + +template <class ImplTraits> +ANTLR_INLINE BitsetList<ImplTraits>::BitsetList() +{ + m_bits = NULL; + m_length = 0; +} + +template <class ImplTraits> +ANTLR_INLINE BitsetList<ImplTraits>::BitsetList( ANTLR_BITWORD* bits, ANTLR_UINT32 length ) +{ + m_bits = bits; + m_length = length; +} + +template <class ImplTraits> +ANTLR_INLINE ANTLR_BITWORD* BitsetList<ImplTraits>::get_bits() const +{ + return m_bits; +} + +template <class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 BitsetList<ImplTraits>::get_length() const +{ + return m_length; +} + +template <class ImplTraits> +ANTLR_INLINE void BitsetList<ImplTraits>::set_bits( ANTLR_BITWORD* bits ) +{ + m_bits = bits; +} + +template <class ImplTraits> +ANTLR_INLINE void BitsetList<ImplTraits>::set_length( ANTLR_UINT32 length ) +{ + m_length = length; +} + +template <class ImplTraits> +typename BitsetList<ImplTraits>::BitsetType* BitsetList<ImplTraits>::bitsetLoad() +{ + // Allocate memory for the bitset structure itself + // the input parameter is the bit number (0 based) + // to include in the bitset, so we need at at least + // bit + 1 bits. If any arguments indicate a + // a bit higher than the default number of bits (0 means default size) + // then Add() will take care + // of it. + // + BitsetType* bitset = new BitsetType(); + + if (this != NULL) + { + // Now we can add the element bits into the set + // + ANTLR_UINT32 count=0; + while (count < m_length) + { + if( bitset->get_blist().get_length() <= count) + bitset->grow(count+1); + + typename ImplTraits::BitsetListType& blist = bitset->get_blist(); + blist.m_bits[count] = *(m_bits+count); + count++; + } + } + + // return the new bitset + // + return bitset; +} + +template <class ImplTraits> +typename BitsetList<ImplTraits>::BitsetType* BitsetList<ImplTraits>::bitsetCopy() +{ + BitsetType* bitset; + ANTLR_UINT32 numElements = m_length; + + // Avoid memory thrashing at the expense of a few more bytes + // + if (numElements < 8) + numElements = 8; + + // Allocate memory for the bitset structure itself + // + bitset = new Bitset<ImplTraits>(numElements); + memcpy(bitset->get_blist().get_bits(), m_bits, numElements * sizeof(ANTLR_BITWORD)); + + // All seems good + // + return bitset; +} + +template <class ImplTraits> +Bitset<ImplTraits>::Bitset( ANTLR_UINT32 numBits ) +{ + // Avoid memory thrashing at the up front expense of a few bytes + if (numBits < (8 * ANTLR_BITSET_BITS)) + numBits = 8 * ANTLR_BITSET_BITS; + + // No we need to allocate the memory for the number of bits asked for + // in multiples of ANTLR3_UINT64. + // + ANTLR_UINT32 numelements = ((numBits -1) >> ANTLR_BITSET_LOG_BITS) + 1; + + m_blist.set_bits( (ANTLR_BITWORD*) AllocPolicyType::alloc0(numelements * sizeof(ANTLR_BITWORD))); + + m_blist.set_length( numelements ); +} + +template <class ImplTraits> +Bitset<ImplTraits>::Bitset( const Bitset& bitset ) + :m_blist(bitset.m_blist) +{ +} + +template <class ImplTraits> +ANTLR_INLINE Bitset<ImplTraits>* Bitset<ImplTraits>::clone() const +{ + Bitset* bitset; + + // Allocate memory for the bitset structure itself + // + bitset = new Bitset( ANTLR_BITSET_BITS * m_blist.get_length() ); + + // Install the actual bits in the source set + // + memcpy(bitset->m_blist.get_bits(), m_blist.get_bits(), + m_blist.get_length() * sizeof(ANTLR_BITWORD) ); + + // All seems good + // + return bitset; +} + +template <class ImplTraits> +Bitset<ImplTraits>* Bitset<ImplTraits>::bor(Bitset* bitset2) +{ + Bitset* bitset; + + if (this == NULL) + return bitset2->clone(); + + if (bitset2 == NULL) + return this->clone(); + + // Allocate memory for the newly ordered bitset structure itself. + // + bitset = this->clone(); + bitset->bitsetORInPlace(bitset2); + return bitset; +} + +template <class ImplTraits> +void Bitset<ImplTraits>::borInPlace(Bitset* bitset2) +{ + ANTLR_UINT32 minimum; + + if (bitset2 == NULL) + return; + + // First make sure that the target bitset is big enough + // for the new bits to be ored in. + // + if ( m_blist.get_length() < bitset2->m_blist.get_length() ) + this->growToInclude( bitset2->m_blist.get_length() * sizeof(ANTLR_BITWORD) ); + + // Or the miniimum number of bits after any resizing went on + // + if ( m_blist.get_length() < bitset2->m_blist.get_length() ) + minimum = m_blist.get_length(); + else + minimum = bitset2->m_blist.get_length(); + + ANTLR_BITWORD* bits1 = m_blist.get_bits(); + ANTLR_BITWORD* bits2 = bitset2->m_blist.get_bits(); + for (ANTLR_UINT32 i = minimum; i > 0; i--) + bits1[i-1] |= bits2[i-1]; +} + +template <class ImplTraits> +ANTLR_UINT32 Bitset<ImplTraits>::size() const +{ + ANTLR_UINT32 degree; + ANTLR_INT32 i; + ANTLR_INT8 bit; + + // TODO: Come back to this, it may be faster to & with 0x01 + // then shift right a copy of the 4 bits, than shift left a constant of 1. + // But then again, the optimizer might just work this out + // anyway. + // + degree = 0; + ANTLR_BITWORD* bits = m_blist.get_bits(); + for (i = m_blist.get_length() - 1; i>= 0; i--) + { + if (bits[i] != 0) + { + for(bit = ANTLR_BITSET_BITS - 1; bit >= 0; bit--) + { + if((bits[i] & (((ANTLR_BITWORD)1) << bit)) != 0) + { + degree++; + } + } + } + } + return degree; +} + +template <class ImplTraits> +ANTLR_INLINE void Bitset<ImplTraits>::add(ANTLR_INT32 bit) +{ + ANTLR_UINT32 word = Bitset::WordNumber(bit); + + if (word >= m_blist.get_length() ) + this->growToInclude(bit); + + ANTLR_BITWORD* bits = m_blist.get_bits(); + bits[word] |= Bitset::BitMask(bit); +} + +template <class ImplTraits> +void Bitset<ImplTraits>::grow(ANTLR_INT32 newSize) +{ + ANTLR_BITWORD* newBits; + + // Space for newly sized bitset - TODO: come back to this and use realloc?, it may + // be more efficient... + // + newBits = (ANTLR_BITWORD*) AllocPolicyType::alloc0(newSize * sizeof(ANTLR_BITWORD) ); + if ( m_blist.get_bits() != NULL) + { + // Copy existing bits + // + memcpy( newBits, m_blist.get_bits(), m_blist.get_length() * sizeof(ANTLR_BITWORD) ); + + // Out with the old bits... de de de derrr + // + AllocPolicyType::free( m_blist.get_bits() ); + } + + // In with the new bits... keerrrang. + // + m_blist.set_bits(newBits); + m_blist.set_length(newSize); +} + +template <class ImplTraits> +bool Bitset<ImplTraits>::equals(Bitset* bitset2) const +{ + ANTLR_UINT32 minimum; + ANTLR_UINT32 i; + + if (this == NULL || bitset2 == NULL) + return false; + + // Work out the minimum comparison set + // + if ( m_blist.get_length() < bitset2->m_blist.get_length() ) + minimum = m_blist.get_length(); + else + minimum = bitset2->m_blist.get_length(); + + // Make sure explict in common bits are equal + // + for (i = minimum - 1; i < minimum ; i--) + { + ANTLR_BITWORD* bits1 = m_blist.get_bits(); + ANTLR_BITWORD* bits2 = bitset2->m_blist.get_bits(); + if ( bits1[i] != bits2[i]) + return false; + } + + // Now make sure the bits of the larger set are all turned + // off. + // + if ( m_blist.get_length() > minimum) + { + for (i = minimum ; i < m_blist.get_length(); i++) + { + ANTLR_BITWORD* bits = m_blist.get_bits(); + if(bits[i] != 0) + return false; + } + } + else if (bitset2->m_blist.get_length() > minimum) + { + ANTLR_BITWORD* bits = m_blist.get_bits(); + for (i = minimum; i < bitset2->m_blist.get_length(); i++) + { + if ( bits[i] != 0 ) + return false; + } + } + + return true; +} + +template <class ImplTraits> +bool Bitset<ImplTraits>::isMember(ANTLR_UINT32 bit) const +{ + ANTLR_UINT32 wordNo = Bitset::WordNumber(bit); + + if (wordNo >= m_blist.get_length()) + return false; + + ANTLR_BITWORD* bits = m_blist.get_bits(); + if ( (bits[wordNo] & Bitset::BitMask(bit)) == 0) + return false; + else + return true; +} + +template <class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 Bitset<ImplTraits>::numBits() const +{ + return m_blist.get_length() << ANTLR_BITSET_LOG_BITS; +} + +template <class ImplTraits> +ANTLR_INLINE typename ImplTraits::BitsetListType& Bitset<ImplTraits>::get_blist() +{ + return m_blist; +} + +template <class ImplTraits> +ANTLR_INLINE void Bitset<ImplTraits>::remove(ANTLR_UINT32 bit) +{ + ANTLR_UINT32 wordNo = Bitset::WordNumber(bit); + + if (wordNo < m_blist.get_length()) + { + ANTLR_BITWORD* bits = m_blist.get_bits(); + bits[wordNo] &= ~(Bitset::BitMask(bit)); + } +} + +template <class ImplTraits> +ANTLR_INLINE bool Bitset<ImplTraits>::isNilNode() const +{ + ANTLR_UINT32 i; + ANTLR_BITWORD* bits = m_blist.get_bits(); + for (i = m_blist.get_length() -1 ; i < m_blist.get_length(); i--) + { + if(bits[i] != 0) + return false; + } + return true; +} + +template <class ImplTraits> +ANTLR_INT32* Bitset<ImplTraits>::toIntList() const +{ + ANTLR_UINT32 numInts; // How many integers we will need + ANTLR_UINT32 numBits; // How many bits are in the set + ANTLR_UINT32 i; + ANTLR_UINT32 index; + + ANTLR_INT32* intList; + + numInts = this->size() + 1; + numBits = this->numBits(); + + intList = (ANTLR_INT32*) AllocPolicyType::alloc(numInts * sizeof(ANTLR_INT32)); + + intList[0] = numInts; + + // Enumerate the bits that are turned on + // + for (i = 0, index = 1; i<numBits; i++) + { + if (this->isMember(i) == true) + intList[index++] = i; + } + + // Result set + // + return intList; +} + +template <class ImplTraits> +ANTLR_INLINE Bitset<ImplTraits>::~Bitset() +{ + if (m_blist.get_bits() != NULL) + AllocPolicyType::free(m_blist.get_bits()); + return; +} + +template <class ImplTraits> +void Bitset<ImplTraits>::growToInclude(ANTLR_INT32 bit) +{ + ANTLR_UINT32 bl; + ANTLR_UINT32 nw; + + bl = (m_blist.get_length() << 1); + nw = Bitset::NumWordsToHold(bit); + + if (bl > nw) + this->grow(bl); + else + this->grow(nw); +} + +template <class ImplTraits> +ANTLR_INLINE ANTLR_UINT64 Bitset<ImplTraits>::BitMask(ANTLR_UINT32 bitNumber) +{ + return ((ANTLR_UINT64)1) << (bitNumber & (ANTLR_BITSET_MOD_MASK)); +} + +template <class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 Bitset<ImplTraits>::NumWordsToHold(ANTLR_UINT32 bit) +{ + return (bit >> ANTLR_BITSET_LOG_BITS) + 1; +} + +template <class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 Bitset<ImplTraits>::WordNumber(ANTLR_UINT32 bit) +{ + return bit >> ANTLR_BITSET_LOG_BITS; +} + +template <class ImplTraits> +void Bitset<ImplTraits>::bitsetORInPlace(Bitset* bitset2) +{ + ANTLR_UINT32 minimum; + ANTLR_UINT32 i; + + if (bitset2 == NULL) + return; + + // First make sure that the target bitset is big enough + // for the new bits to be ored in. + // + if ( m_blist.get_length() < bitset2->m_blist.get_length() ) + this->growToInclude( bitset2->m_blist.get_length() * sizeof(ANTLR_BITWORD) ); + + // Or the miniimum number of bits after any resizing went on + // + if ( m_blist.get_length() < bitset2->m_blist.get_length() ) + minimum = m_blist.get_length(); + else + minimum = bitset2->m_blist.get_length(); + + ANTLR_BITWORD* bits1 = m_blist.get_bits(); + ANTLR_BITWORD* bits2 = bitset2->m_blist.get_bits(); + for (i = minimum; i > 0; i--) + bits1[i-1] |= bits2[i-1]; +} + +template <class ImplTraits> +Bitset<ImplTraits>* Bitset<ImplTraits>::BitsetOf(ANTLR_INT32 bit) +{ + // Allocate memory for the bitset structure itself + // the input parameter is the bit number (0 based) + // to include in the bitset, so we need at at least + // bit + 1 bits. If any arguments indicate a + // a bit higher than the default number of bits (0 menas default size) + // then Add() will take care + // of it. + // + Bitset<ImplTraits>* bitset = new Bitset<ImplTraits>(0); + bitset->add(bit); + return bitset; +} + +template <class ImplTraits> +Bitset<ImplTraits>* Bitset<ImplTraits>::BitsetOf(ANTLR_INT32 bit1, ANTLR_INT32 bit2) +{ + Bitset<ImplTraits>* bitset = Bitset<ImplTraits>::BitsetOf(bit1); + bitset->add(bit2); + return bitset; +} + +//static +template <class ImplTraits> +Bitset<ImplTraits>* Bitset<ImplTraits>::BitsetFromList(const IntListType& list) +{ + // We have no idea what exactly is in the list + // so create a default bitset and then just add stuff + // as we enumerate. + // + Bitset<ImplTraits>* bitset = new Bitset<ImplTraits>(0); + for( int i = 0; i < list.size(); ++i ) + bitset->add( list[i] ); + + return bitset; +} + +} diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3collections.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3collections.hpp new file mode 100644 index 0000000000..7551c243d0 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3collections.hpp @@ -0,0 +1,283 @@ +#ifndef ANTLR3COLLECTIONS_HPP +#define ANTLR3COLLECTIONS_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +namespace antlr3 { + +/* -------------- TRIE Interfaces ---------------- */ + +/** Structure that holds the payload entry in an ANTLR3_INT_TRIE or ANTLR3_STRING_TRIE + */ +template< class ImplTraits, class DataType > +class TrieEntry : public ImplTraits::AllocPolicyType +{ +public: + typedef typename ImplTraits::AllocPolicyType AllocPolicy; + +private: + DataType m_data; + TrieEntry* m_next; /* Allows duplicate entries for same key in insertion order */ + +public: + TrieEntry(const DataType& data, TrieEntry* next); + DataType& get_data(); + const DataType& get_data() const; + TrieEntry* get_next() const; + void set_next( TrieEntry* next ); +}; + +/** Structure that defines an element/node in an ANTLR_INT_TRIE + */ +template< class ImplTraits, class DataType > +class IntTrieNode : public ImplTraits::AllocPolicyType +{ +public: + typedef TrieEntry<ImplTraits, DataType> TrieEntryType; + typedef TrieEntryType BucketsType; + +private: + ANTLR_UINT32 m_bitNum; /**< This is the left/right bit index for traversal along the nodes */ + ANTLR_INTKEY m_key; /**< This is the actual key that the entry represents if it is a terminal node */ + BucketsType* m_buckets; /**< This is the data bucket(s) that the key indexes, which may be NULL */ + IntTrieNode* m_leftN; /**< Pointer to the left node from here when sKey & bitNum = 0 */ + IntTrieNode* m_rightN; /**< Pointer to the right node from here when sKey & bitNum, = 1 */ + +public: + IntTrieNode(); + ~IntTrieNode(); + + ANTLR_UINT32 get_bitNum() const; + ANTLR_INTKEY get_key() const; + BucketsType* get_buckets() const; + IntTrieNode* get_leftN() const; + IntTrieNode* get_rightN() const; + void set_bitNum( ANTLR_UINT32 bitNum ); + void set_key( ANTLR_INTKEY key ); + void set_buckets( BucketsType* buckets ); + void set_leftN( IntTrieNode* leftN ); + void set_rightN( IntTrieNode* rightN ); +}; + +/** Structure that defines an ANTLR3_INT_TRIE. For this particular implementation, + * as you might expect, the key is turned into a "string" by looking at bit(key, depth) + * of the integer key. Using 64 bit keys gives us a depth limit of 64 (or bit 0..63) + * and potentially a huge trie. This is the algorithm for a Patricia Trie. + * Note also that this trie [can] accept multiple entries for the same key and is + * therefore a kind of elastic bucket patricia trie. + * + * If you find this code useful, please feel free to 'steal' it for any purpose + * as covered by the BSD license under which ANTLR is issued. You can cut the code + * but as the ANTLR library is only about 50K (Windows Vista), you might find it + * easier to just link the library. Please keep all comments and licenses and so on + * in any version of this you create of course. + * + * Jim Idle. + * + */ +class IntTrieBase +{ +public: + static const ANTLR_UINT8* get_bitIndex(); + static const ANTLR_UINT64* get_bitMask(); +}; + +template< class ImplTraits, class DataType > +class IntTrie : public ImplTraits::AllocPolicyType, public IntTrieBase +{ +public: + typedef TrieEntry<ImplTraits, DataType> TrieEntryType; + typedef IntTrieNode<ImplTraits, DataType> IntTrieNodeType; + +private: + IntTrieNodeType* m_root; /* Root node of this integer trie */ + IntTrieNodeType* m_current; /* Used to traverse the TRIE with the next() method */ + ANTLR_UINT32 m_count; /* Current entry count */ + bool m_allowDups; /* Whether this trie accepts duplicate keys */ + +public: + /* INT TRIE Implementation of depth 64 bits, being the number of bits + * in a 64 bit integer. + */ + IntTrie( ANTLR_UINT32 depth ); + + /** Search the int Trie and return a pointer to the first bucket indexed + * by the key if it is contained in the trie, otherwise NULL. + */ + TrieEntryType* get( ANTLR_INTKEY key); + bool del( ANTLR_INTKEY key); + + /** Add an entry into the INT trie. + * Basically we descend the trie as we do when searching it, which will + * locate the only node in the trie that can be reached by the bit pattern of the + * key. If the key is actually at that node, then if the trie accepts duplicates + * we add the supplied data in a new chained bucket to that data node. If it does + * not accept duplicates then we merely return FALSE in case the caller wants to know + * whether the key was already in the trie. + * If the node we locate is not the key we are looking to add, then we insert a new node + * into the trie with a bit index of the leftmost differing bit and the left or right + * node pointing to itself or the data node we are inserting 'before'. + */ + bool add( ANTLR_INTKEY key, const DataType& data ); + ~IntTrie(); +}; + +/** + * A topological sort system that given a set of dependencies of a node m on node n, + * can sort them in dependency order. This is a generally useful utility object + * that does not care what the things are it is sorting. Generally the set + * to be sorted will be numeric indexes into some other structure such as an ANTLR3_VECTOR. + * I have provided a sort method that given ANTLR3_VECTOR as an input will sort + * the vector entries in place, as well as a sort method that just returns an + * array of the sorted noded indexes, in case you are not sorting ANTLR3_VECTORS but + * some set of your own device. + * + * Of the two main algorithms that could be used, I chose to use the depth first + * search for unvisited nodes as a) This runs in linear time, and b) it is what + * we used in the ANTLR Tool to perform a topological sort of the input grammar files + * based on their dependencies. + */ +template<class ImplTraits> +class Topo : public ImplTraits::AllocPolicyType +{ +public: + typedef typename ImplTraits::BitsetType BitsetType; + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + +private: + /** + * A vector of vectors of edges, built by calling the addEdge method() + * to indicate that node number n depends on node number m. Each entry in the vector + * contains a bitset, which has a bit index set for each node upon which the + * entry node depends. + */ + BitsetType** m_edges; + + /** + * A vector used to build up the sorted output order. Note that + * as the vector contains UINT32 then the maximum node index is + * 'limited' to 2^32, as nodes should be zero based. + */ + ANTLR_UINT32* m_sorted; + + /** + * A vector used to detect cycles in the edge dependecies. It is used + * as a stack and each time we descend a node to one of its edges we + * add the node into this stack. If we find a node that we have already + * visited in the stack, then it means there wasa cycle such as 9->8->1->9 + * as the only way a node can be on the stack is if we are currently + * descnding from it as we remove it from the stack as we exit from + * descending its dependencies + */ + ANTLR_UINT32* m_cycle; + + /** + * A flag that indicates the algorithm found a cycle in the edges + * such as 9->8->1->9 + * If this flag is set after you have called one of the sort routines + * then the detected cycle will be contained in the cycle array and + * cycleLimit will point to the one after the last entry in the cycle. + */ + bool m_hasCycle; + + /** + * A watermark used to accumulate potential cycles in the cycle array. + * This should be zero when we are done. Check hasCycle after calling one + * of the sort methods and if it is true then you can find the cycle + * in cycle[0]...cycle[cycleMark-1] + */ + ANTLR_UINT32 m_cycleMark; + + /** + * One more than the largest node index that is contained in edges/sorted. + */ + ANTLR_UINT32 m_limit; + + /** + * The set of visited nodes as determined by a set entry in + * the bitmap. + */ + BitsetType* m_visited; + +public: + Topo(); + /** + * A method that adds an edge from one node to another. An edge + * of n -> m indicates that node n is dependent on node m. Note that + * while building these edges, it is perfectly OK to add nodes out of + * sequence. So, if you have edges: + * + * 3 -> 0 + * 2 -> 1 + * 1 -> 3 + * + * The you can add them in that order and so add node 3 before nodes 2 and 1 + * + */ + void addEdge(ANTLR_UINT32 edge, ANTLR_UINT32 dependency); + + + /** + * A method that returns a pointer to an array of sorted node indexes. + * The array is sorted in topological sorted order. Note that the array + * is only as large as the largest node index you created an edge for. This means + * that if you had an input of 32 nodes, but that largest node with an edge + * was 16, then the returned array will be the sorted order of the first 16 + * nodes and the last 16 nodes of your array are basically fine as they are + * as they had no dependencies and do not need any particular sort order. + * + * NB: If the structure that contains the array is freed, then the sorted + * array will be freed too so you should use the value of limit to + * make a long term copy of this array if you do not want to keep the topo + * structure around as well. + */ + ANTLR_UINT32* sortToArray(); + + /** + * A method that sorts the supplied ANTLR3_VECTOR in place based + * on the previously supplied edge data. + */ + template<typename DataType> + void sortVector( typename ImplTraits::template VectorType<DataType>& v); + + void DFS(ANTLR_UINT32 node); + + /** + * A method to free this structure and any associated memory. + */ + ~Topo(); +}; + +} + +#include "antlr3collections.inl" + +#endif + + diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3collections.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3collections.inl new file mode 100644 index 0000000000..3a2d06c9c3 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3collections.inl @@ -0,0 +1,995 @@ +namespace antlr3 { + +template< class ImplTraits, class DataType > +ANTLR_INLINE TrieEntry<ImplTraits, DataType>::TrieEntry(const DataType& data, TrieEntry* next) + :m_data(data) +{ + m_next = next; +} +template< class ImplTraits, class DataType > +ANTLR_INLINE DataType& TrieEntry<ImplTraits, DataType>::get_data() +{ + return m_data; +} +template< class ImplTraits, class DataType > +ANTLR_INLINE const DataType& TrieEntry<ImplTraits, DataType>::get_data() const +{ + return m_data; +} +template< class ImplTraits, class DataType > +ANTLR_INLINE TrieEntry<ImplTraits, DataType>* TrieEntry<ImplTraits, DataType>::get_next() const +{ + return m_next; +} + +template< class ImplTraits, class DataType > +ANTLR_INLINE void TrieEntry<ImplTraits, DataType>::set_next( TrieEntry* next ) +{ + m_next = next; +} + +template< class ImplTraits, class DataType > +ANTLR_INLINE ANTLR_UINT32 IntTrieNode<ImplTraits, DataType>::get_bitNum() const +{ + return m_bitNum; +} +template< class ImplTraits, class DataType > +ANTLR_INLINE ANTLR_INTKEY IntTrieNode<ImplTraits, DataType>::get_key() const +{ + return m_key; +} +template< class ImplTraits, class DataType > +ANTLR_INLINE typename IntTrieNode<ImplTraits, DataType>::BucketsType* IntTrieNode<ImplTraits, DataType>::get_buckets() const +{ + return m_buckets; +} +template< class ImplTraits, class DataType > +ANTLR_INLINE IntTrieNode<ImplTraits, DataType>* IntTrieNode<ImplTraits, DataType>::get_leftN() const +{ + return m_leftN; +} +template< class ImplTraits, class DataType > +ANTLR_INLINE IntTrieNode<ImplTraits, DataType>* IntTrieNode<ImplTraits, DataType>::get_rightN() const +{ + return m_rightN; +} +template< class ImplTraits, class DataType > +ANTLR_INLINE void IntTrieNode<ImplTraits, DataType>::set_bitNum( ANTLR_UINT32 bitNum ) +{ + m_bitNum = bitNum; +} +template< class ImplTraits, class DataType > +ANTLR_INLINE void IntTrieNode<ImplTraits, DataType>::set_key( ANTLR_INTKEY key ) +{ + m_key = key; +} +template< class ImplTraits, class DataType > +ANTLR_INLINE void IntTrieNode<ImplTraits, DataType>::set_buckets( BucketsType* buckets ) +{ + m_buckets = buckets; +} +template< class ImplTraits, class DataType > +ANTLR_INLINE void IntTrieNode<ImplTraits, DataType>::set_leftN( IntTrieNode* leftN ) +{ + m_leftN = leftN; +} +template< class ImplTraits, class DataType > +ANTLR_INLINE void IntTrieNode<ImplTraits, DataType>::set_rightN( IntTrieNode* rightN ) +{ + m_rightN = rightN; +} + +ANTLR_INLINE const ANTLR_UINT8* IntTrieBase::get_bitIndex() +{ + static ANTLR_UINT8 bitIndex[256] = + { + 0, // 0 - Just for padding + 0, // 1 + 1, 1, // 2..3 + 2, 2, 2, 2, // 4..7 + 3, 3, 3, 3, 3, 3, 3, 3, // 8+ + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 16+ + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, // 32+ + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, // 64+ + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 128+ + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 + }; + return bitIndex; +} + +ANTLR_INLINE const ANTLR_UINT64* IntTrieBase::get_bitMask() +{ + static ANTLR_UINT64 bitMask[64] = + { + 0x0000000000000001ULL, 0x0000000000000002ULL, 0x0000000000000004ULL, 0x0000000000000008ULL, + 0x0000000000000010ULL, 0x0000000000000020ULL, 0x0000000000000040ULL, 0x0000000000000080ULL, + 0x0000000000000100ULL, 0x0000000000000200ULL, 0x0000000000000400ULL, 0x0000000000000800ULL, + 0x0000000000001000ULL, 0x0000000000002000ULL, 0x0000000000004000ULL, 0x0000000000008000ULL, + 0x0000000000010000ULL, 0x0000000000020000ULL, 0x0000000000040000ULL, 0x0000000000080000ULL, + 0x0000000000100000ULL, 0x0000000000200000ULL, 0x0000000000400000ULL, 0x0000000000800000ULL, + 0x0000000001000000ULL, 0x0000000002000000ULL, 0x0000000004000000ULL, 0x0000000008000000ULL, + 0x0000000010000000ULL, 0x0000000020000000ULL, 0x0000000040000000ULL, 0x0000000080000000ULL, + 0x0000000100000000ULL, 0x0000000200000000ULL, 0x0000000400000000ULL, 0x0000000800000000ULL, + 0x0000001000000000ULL, 0x0000002000000000ULL, 0x0000004000000000ULL, 0x0000008000000000ULL, + 0x0000010000000000ULL, 0x0000020000000000ULL, 0x0000040000000000ULL, 0x0000080000000000ULL, + 0x0000100000000000ULL, 0x0000200000000000ULL, 0x0000400000000000ULL, 0x0000800000000000ULL, + 0x0001000000000000ULL, 0x0002000000000000ULL, 0x0004000000000000ULL, 0x0008000000000000ULL, + 0x0010000000000000ULL, 0x0020000000000000ULL, 0x0040000000000000ULL, 0x0080000000000000ULL, + 0x0100000000000000ULL, 0x0200000000000000ULL, 0x0400000000000000ULL, 0x0800000000000000ULL, + 0x1000000000000000ULL, 0x2000000000000000ULL, 0x4000000000000000ULL, 0x8000000000000000ULL + }; + + return bitMask; +} + +template< class ImplTraits, class DataType > +IntTrie<ImplTraits, DataType>::IntTrie( ANTLR_UINT32 depth ) +{ + /* Now we need to allocate the root node. This makes it easier + * to use the tree as we don't have to do anything special + * for the root node. + */ + m_root = new IntTrieNodeType; + + /* Now we seed the root node with the index being the + * highest left most bit we want to test, which limits the + * keys in the trie. This is the trie 'depth'. The limit for + * this implementation is 63 (bits 0..63). + */ + m_root->set_bitNum( depth ); + + /* And as we have nothing in here yet, we set both child pointers + * of the root node to point back to itself. + */ + m_root->set_leftN( m_root ); + m_root->set_rightN( m_root ); + m_count = 0; + + /* Finally, note that the key for this root node is 0 because + * we use calloc() to initialise it. + */ + m_allowDups = false; + m_current = NULL; +} + +template< class ImplTraits, class DataType > +IntTrie<ImplTraits, DataType>::~IntTrie() +{ + /* Descend from the root and free all the nodes + */ + delete m_root; + + /* the nodes are all gone now, so we need only free the memory + * for the structure itself + */ +} + +template< class ImplTraits, class DataType > +typename IntTrie<ImplTraits, DataType>::TrieEntryType* IntTrie<ImplTraits, DataType>::get( ANTLR_INTKEY key) +{ + IntTrieNodeType* thisNode; + IntTrieNodeType* nextNode; + + if (m_count == 0) + return NULL; /* Nothing in this trie yet */ + + /* Starting at the root node in the trie, compare the bit index + * of the current node with its next child node (starts left from root). + * When the bit index of the child node is greater than the bit index of the current node + * then by definition (as the bit index decreases as we descent the trie) + * we have reached a 'backward' pointer. A backward pointer means we + * have reached the only node that can be reached by the bits given us so far + * and it must either be the key we are looking for, or if not then it + * means the entry was not in the trie, and we return NULL. A backward pointer + * points back in to the tree structure rather than down (deeper) within the + * tree branches. + */ + thisNode = m_root; /* Start at the root node */ + nextNode = thisNode->get_leftN(); /* Examine the left node from the root */ + + /* While we are descending the tree nodes... + */ + const ANTLR_UINT64* bitMask = this->get_bitMask(); + while( thisNode->get_bitNum() > nextNode->get_bitNum() ) + { + /* Next node now becomes the new 'current' node + */ + thisNode = nextNode; + + /* We now test the bit indicated by the bitmap in the next node + * in the key we are searching for. The new next node is the + * right node if that bit is set and the left node it is not. + */ + if (key & bitMask[nextNode->get_bitNum()]) + { + nextNode = nextNode->get_rightN(); /* 1 is right */ + } + else + { + nextNode = nextNode->get_leftN(); /* 0 is left */ + } + } + + /* Here we have reached a node where the bitMap index is lower than + * its parent. This means it is pointing backward in the tree and + * must therefore be a terminal node, being the only point than can + * be reached with the bits seen so far. It is either the actual key + * we wanted, or if that key is not in the trie it is another key + * that is currently the only one that can be reached by those bits. + * That situation would obviously change if the key was to be added + * to the trie. + * + * Hence it only remains to test whether this is actually the key or not. + */ + if (nextNode->get_key() == key) + { + /* This was the key, so return the entry pointer + */ + return nextNode->get_buckets(); + } + else + { + return NULL; /* That key is not in the trie (note that we set the pointer to -1 if no payload) */ + } +} + +template< class ImplTraits, class DataType > +bool IntTrie<ImplTraits, DataType>::del( ANTLR_INTKEY /*key*/) +{ + IntTrieNodeType* p; + + p = m_root; + + return false; + +} + +template< class ImplTraits, class DataType > +bool IntTrie<ImplTraits, DataType>::add( ANTLR_INTKEY key, const DataType& data ) +{ + IntTrieNodeType* thisNode; + IntTrieNodeType* nextNode; + IntTrieNodeType* entNode; + ANTLR_UINT32 depth; + TrieEntryType* newEnt; + TrieEntryType* nextEnt; + ANTLR_INTKEY xorKey; + + /* Cache the bit depth of this trie, which is always the highest index, + * which is in the root node + */ + depth = m_root->get_bitNum(); + + thisNode = m_root; /* Start with the root node */ + nextNode = m_root->get_leftN(); /* And assume we start to the left */ + + /* Now find the only node that can be currently reached by the bits in the + * key we are being asked to insert. + */ + const ANTLR_UINT64* bitMask = this->get_bitMask(); + while (thisNode->get_bitNum() > nextNode->get_bitNum() ) + { + /* Still descending the structure, next node becomes current. + */ + thisNode = nextNode; + + if (key & bitMask[nextNode->get_bitNum()]) + { + /* Bit at the required index was 1, so travers the right node from here + */ + nextNode = nextNode->get_rightN(); + } + else + { + /* Bit at the required index was 0, so we traverse to the left + */ + nextNode = nextNode->get_leftN(); + } + } + /* Here we have located the only node that can be reached by the + * bits in the requested key. It could in fact be that key or the node + * we need to use to insert the new key. + */ + if (nextNode->get_key() == key) + { + /* We have located an exact match, but we will only append to the bucket chain + * if this trie accepts duplicate keys. + */ + if (m_allowDups ==true) + { + /* Yes, we are accepting duplicates + */ + newEnt = new TrieEntryType(data, NULL); + + /* We want to be able to traverse the stored elements in the order that they were + * added as duplicate keys. We might need to revise this opinion if we end up having many duplicate keys + * as perhaps reverse order is just as good, so long as it is ordered. + */ + nextEnt = nextNode->get_buckets(); + while (nextEnt->get_next() != NULL) + { + nextEnt = nextEnt->get_next(); + } + nextEnt->set_next(newEnt); + + m_count++; + return true; + } + else + { + /* We found the key is already there and we are not allowed duplicates in this + * trie. + */ + return false; + } + } + + /* Here we have discovered the only node that can be reached by the bits in the key + * but we have found that this node is not the key we need to insert. We must find the + * the leftmost bit by which the current key for that node and the new key we are going + * to insert, differ. While this nested series of ifs may look a bit strange, experimentation + * showed that it allows a machine code path that works well with predicated execution + */ + xorKey = (key ^ nextNode->get_key() ); /* Gives 1 bits only where they differ then we find the left most 1 bit*/ + + /* Most common case is a 32 bit key really + */ + const ANTLR_UINT8* bitIndex = this->get_bitIndex(); +#ifdef ANTLR_USE_64BIT + if (xorKey & 0xFFFFFFFF00000000) + { + if (xorKey & 0xFFFF000000000000) + { + if (xorKey & 0xFF00000000000000) + { + depth = 56 + bitIndex[((xorKey & 0xFF00000000000000)>>56)]; + } + else + { + depth = 48 + bitIndex[((xorKey & 0x00FF000000000000)>>48)]; + } + } + else + { + if (xorKey & 0x0000FF0000000000) + { + depth = 40 + bitIndex[((xorKey & 0x0000FF0000000000)>>40)]; + } + else + { + depth = 32 + bitIndex[((xorKey & 0x000000FF00000000)>>32)]; + } + } + } + else +#endif + { + if (xorKey & 0x00000000FFFF0000) + { + if (xorKey & 0x00000000FF000000) + { + depth = 24 + bitIndex[((xorKey & 0x00000000FF000000)>>24)]; + } + else + { + depth = 16 + bitIndex[((xorKey & 0x0000000000FF0000)>>16)]; + } + } + else + { + if (xorKey & 0x000000000000FF00) + { + depth = 8 + bitIndex[((xorKey & 0x0000000000000FF00)>>8)]; + } + else + { + depth = bitIndex[xorKey & 0x00000000000000FF]; + } + } + } + + /* We have located the leftmost differing bit, indicated by the depth variable. So, we know what + * bit index we are to insert the new entry at. There are two cases, being where the two keys + * differ at a bit position that is not currently part of the bit testing, where they differ on a bit + * that is currently being skipped in the indexed comparisons, and where they differ on a bit + * that is merely lower down in the current bit search. If the bit index went bit 4, bit 2 and they differ + * at bit 3, then we have the "skipped" bit case. But if that chain was Bit 4, Bit 2 and they differ at bit 1 + * then we have the easy bit <pun>. + * + * So, set up to descend the tree again, but this time looking for the insert point + * according to whether we skip the bit that differs or not. + */ + thisNode = m_root; + entNode = m_root->get_leftN(); + + /* Note the slight difference in the checks here to cover both cases + */ + while (thisNode->get_bitNum() > entNode->get_bitNum() && entNode->get_bitNum() > depth) + { + /* Still descending the structure, next node becomes current. + */ + thisNode = entNode; + + if (key & bitMask[entNode->get_bitNum()]) + { + /* Bit at the required index was 1, so traverse the right node from here + */ + entNode = entNode->get_rightN(); + } + else + { + /* Bit at the required index was 0, so we traverse to the left + */ + entNode = entNode->get_leftN(); + } + } + + /* We have located the correct insert point for this new key, so we need + * to allocate our entry and insert it etc. + */ + nextNode = new IntTrieNodeType(); + + /* Build a new entry block for the new node + */ + newEnt = new TrieEntryType(data, NULL); + + /* Install it + */ + nextNode->set_buckets(newEnt); + nextNode->set_key(key); + nextNode->set_bitNum( depth ); + + /* Work out the right and left pointers for this new node, which involve + * terminating with the current found node either right or left according + * to whether the current index bit is 1 or 0 + */ + if (key & bitMask[depth]) + { + nextNode->set_leftN(entNode); /* Terminates at previous position */ + nextNode->set_rightN(nextNode); /* Terminates with itself */ + } + else + { + nextNode->set_rightN(entNode); /* Terminates at previous position */ + nextNode->set_leftN(nextNode); /* Terminates with itself */ + } + + /* Finally, we need to change the pointers at the node we located + * for inserting. If the key bit at its index is set then the right + * pointer for that node becomes the newly created node, otherwise the left + * pointer does. + */ + if (key & bitMask[thisNode->get_bitNum()] ) + { + thisNode->set_rightN( nextNode ); + } + else + { + thisNode->set_leftN(nextNode); + } + + /* Et voila + */ + m_count++; + return true; +} + +template< class ImplTraits, class DataType > +IntTrieNode<ImplTraits, DataType>::IntTrieNode() +{ + m_bitNum = 0; + m_key = 0; + m_buckets = NULL; + m_leftN = NULL; + m_rightN = NULL; +} + +template< class ImplTraits, class DataType > +IntTrieNode<ImplTraits, DataType>::~IntTrieNode() +{ + TrieEntryType* thisEntry; + TrieEntryType* nextEntry; + + /* If this node has a left pointer that is not a back pointer + * then recursively call to free this + */ + if ( m_bitNum > m_leftN->get_bitNum()) + { + /* We have a left node that needs descending, so do it. + */ + delete m_leftN; + } + + /* The left nodes from here should now be dealt with, so + * we need to descend any right nodes that are not back pointers + */ + if ( m_bitNum > m_rightN->get_bitNum() ) + { + /* There are some right nodes to descend and deal with. + */ + delete m_rightN; + } + + /* Now all the children are dealt with, we can destroy + * this node too + */ + thisEntry = m_buckets; + + while (thisEntry != NULL) + { + nextEntry = thisEntry->get_next(); + + /* Now free the data for this bucket entry + */ + delete thisEntry; + thisEntry = nextEntry; /* See if there are any more to free */ + } + + /* The bucket entry is now gone, so we can free the memory for + * the entry itself. + */ + + /* And that should be it for everything under this node and itself + */ +} + +/** + * Allocate and initialize a new ANTLR3 topological sorter, which can be + * used to define edges that identify numerical node indexes that depend on other + * numerical node indexes, which can then be sorted topologically such that + * any node is sorted after all its dependent nodes. + * + * Use: + * + * /verbatim + + pANTLR3_TOPO topo; + topo = antlr3NewTopo(); + + if (topo == NULL) { out of memory } + + topo->addEdge(topo, 3, 0); // Node 3 depends on node 0 + topo->addEdge(topo, 0, 1); // Node - depends on node 1 + topo->sortVector(topo, myVector); // Sort the vector in place (node numbers are the vector entry numbers) + + * /verbatim + */ +template<class ImplTraits> +Topo<ImplTraits>::Topo() +{ + // Initialize variables + // + m_visited = NULL; // Don't know how big it is yet + m_limit = 1; // No edges added yet + m_edges = NULL; // No edges added yet + m_sorted = NULL; // Nothing sorted at the start + m_cycle = NULL; // No cycles at the start + m_cycleMark = 0; // No cycles at the start + m_hasCycle = false; // No cycle at the start +} + +// Topological sorter +// +template<class ImplTraits> +void Topo<ImplTraits>::addEdge(ANTLR_UINT32 edge, ANTLR_UINT32 dependency) +{ + ANTLR_UINT32 i; + ANTLR_UINT32 maxEdge; + BitsetType* edgeDeps; + + if (edge>dependency) + { + maxEdge = edge; + } + else + { + maxEdge = dependency; + } + // We need to add an edge to says that the node indexed by 'edge' is + // dependent on the node indexed by 'dependency' + // + + // First see if we have enough room in the edges array to add the edge? + // + if ( m_edges == NULL) + { + // We don't have any edges yet, so create an array to hold them + // + m_edges = AllocPolicyType::alloc0(sizeof(BitsetType*) * (maxEdge + 1)); + + // Set the limit to what we have now + // + m_limit = maxEdge + 1; + } + else if (m_limit <= maxEdge) + { + // WE have some edges but not enough + // + m_edges = AllocPolicyType::realloc(m_edges, sizeof(BitsetType*) * (maxEdge + 1)); + + // Initialize the new bitmaps to ;indicate we have no edges defined yet + // + for (i = m_limit; i <= maxEdge; i++) + { + *((m_edges) + i) = NULL; + } + + // Set the limit to what we have now + // + m_limit = maxEdge + 1; + } + + // If the edge was flagged as depending on itself, then we just + // do nothing as it means this routine was just called to add it + // in to the list of nodes. + // + if (edge == dependency) + { + return; + } + + // Pick up the bit map for the requested edge + // + edgeDeps = *((m_edges) + edge); + + if (edgeDeps == NULL) + { + // No edges are defined yet for this node + // + edgeDeps = new BitsetType(0); + *((m_edges) + edge) = edgeDeps; + } + + // Set the bit in the bitmap that corresponds to the requested + // dependency. + // + edgeDeps->add(dependency); + + // And we are all set + // + return; + +} + +/** + * Given a starting node, descend its dependent nodes (ones that it has edges + * to) until we find one without edges. Having found a node without edges, we have + * discovered the bottom of a depth first search, which we can then ascend, adding + * the nodes in order from the bottom, which gives us the dependency order. + */ +template<class ImplTraits> +void Topo<ImplTraits>::DFS(ANTLR_UINT32 node) +{ + BitsetType* edges; + + // Guard against a revisit and check for cycles + // + if (m_hasCycle == true) + { + return; // We don't do anything else if we found a cycle + } + + if ( m_visited->isMember(node)) + { + // Check to see if we found a cycle. To do this we search the + // current cycle stack and see if we find this node already in the stack. + // + ANTLR_UINT32 i; + + for (i=0; i< m_cycleMark; i++) + { + if ( m_cycle[i] == node) + { + // Stop! We found a cycle in the input, so rejig the cycle + // stack so that it only contains the cycle and set the cycle flag + // which will tell the caller what happened + // + ANTLR_UINT32 l; + + for (l = i; l < m_cycleMark; l++) + { + m_cycle[l - i] = m_cycle[l]; // Move to zero base in the cycle list + } + + // Recalculate the limit + // + m_cycleMark -= i; + + // Signal disaster + // + m_hasCycle = true; + } + } + return; + } + + // So far, no cycles have been found and we have not visited this node yet, + // so this node needs to go into the cycle stack before we continue + // then we will take it out of the stack once we have descended all its + // dependencies. + // + m_cycle[m_cycleMark++] = node; + + // First flag that we have visited this node + // + m_visited->add(node); + + // Now, if this node has edges, then we want to ensure we visit + // them all before we drop through and add this node into the sorted + // list. + // + edges = *((m_edges) + node); + if (edges != NULL) + { + // We have some edges, so visit each of the edge nodes + // that have not already been visited. + // + ANTLR_UINT32 numBits; // How many bits are in the set + ANTLR_UINT32 i; + ANTLR_UINT32 range; + + numBits = edges->numBits(); + range = edges->size(); // Number of set bits + + // Stop if we exahust the bit list or have checked the + // number of edges that this node refers to (so we don't + // check bits at the end that cannot possibly be set). + // + for (i=0; i<= numBits && range > 0; i++) + { + if (edges->isMember(i)) + { + range--; // About to check another one + + // Found an edge, make sure we visit and descend it + // + this->DFS(i); + } + } + } + + // At this point we will have visited all the dependencies + // of this node and they will be ordered (even if there are cycles) + // So we just add the node into the sorted list at the + // current index position. + // + m_sorted[m_limit++] = node; + + // Remove this node from the cycle list if we have not detected a cycle + // + if (m_hasCycle == false) + { + m_cycleMark--; + } + + return; +} + +template<class ImplTraits> +ANTLR_UINT32* Topo<ImplTraits>::sortToArray() +{ + ANTLR_UINT32 v; + ANTLR_UINT32 oldLimit; + + // Guard against being called with no edges defined + // + if (m_edges == NULL) + { + return 0; + } + // First we need a vector to populate with enough + // entries to accomodate the sorted list and another to accomodate + // the maximum cycle we could detect which is all nodes such as 0->1->2->3->0 + // + m_sorted = AllocPolicyType::alloc( m_limit * sizeof(ANTLR_UINT32) ); + m_cycle = AllocPolicyType::alloc( m_limit * sizeof(ANTLR_UINT32)); + + // Next we need an empty bitset to show whether we have visited a node + // or not. This is the bit that gives us linear time of course as we are essentially + // dropping through the nodes in depth first order and when we get to a node that + // has no edges, we pop back up the stack adding the nodes we traversed in reverse + // order. + // + m_visited = new BitsetType(0); + + // Now traverse the nodes as if we were just going left to right, but + // then descend each node unless it has already been visited. + // + oldLimit = m_limit; // Number of nodes to traverse linearly + m_limit = 0; // Next entry in the sorted table + + for (v = 0; v < oldLimit; v++) + { + // If we did not already visit this node, then descend it until we + // get a node without edges or arrive at a node we have already visited. + // + if (m_visited->isMember(v) == false) + { + // We have not visited this one so descend it + // + this->DFS(v); + } + + // Break the loop if we detect a cycle as we have no need to go any + // further + // + if (m_hasCycle == true) + { + break; + } + } + + // Reset the limit to the number we recorded as if we hit a + // cycle, then limit will have stopped at the node where we + // discovered the cycle, but in order to free the edge bitmaps + // we need to know how many we may have allocated and traverse them all. + // + m_limit = oldLimit; + + // Having traversed all the nodes we were given, we + // are guaranteed to have ordered all the nodes or detected a + // cycle. + // + return m_sorted; +} + +template<class ImplTraits> + template<typename DataType> +void Topo<ImplTraits>::sortVector( typename ImplTraits::template VectorType<DataType>& v ) +{ + // To sort a vector, we first perform the + // sort to an array, then use the results to reorder the vector + // we are given. This is just a convenience routine that allows you to + // sort the children of a tree node into topological order before or + // during an AST walk. This can be useful for optimizations that require + // dag reorders and also when the input stream defines thigns that are + // interdependent and you want to walk the list of the generated trees + // for those things in topological order so you can ignore the interdependencies + // at that point. + // + ANTLR_UINT32 i; + + // Used as a lookup index to find the current location in the vector of + // the vector entry that was originally at position [0], [1], [2] etc + // + ANTLR_UINT32* vIndex; + + // Sort into an array, then we can use the array that is + // stored in the topo + // + if (this->sortToArray() == 0) + { + return; // There were no edges + } + + if (m_hasCycle == true) + { + return; // Do nothing if we detected a cycle + } + + // Ensure that the vector we are sorting is at least as big as the + // the input sequence we were adsked to sort. It does not matter if it is + // bigger as thaat probably just means that nodes numbered higher than the + // limit had no dependencies and so can be left alone. + // + if (m_limit > v.size() ) + { + // We can only sort the entries that we have dude! The caller is + // responsible for ensuring the vector is the correct one and is the + // correct size etc. + // + m_limit = v.size(); + } + // We need to know the locations of each of the entries + // in the vector as we don't want to duplicate them in a new vector. We + // just use an indirection table to get the vector entry for a particular sequence + // acording to where we moved it last. Then we can just swap vector entries until + // we are done :-) + // + vIndex = AllocPolicyType::alloc(m_limit * sizeof(ANTLR_UINT32)); + + // Start index, each vector entry is located where you think it is + // + for (i = 0; i < m_limit; i++) + { + vIndex[i] = i; + } + + // Now we traverse the sorted array and moved the entries of + // the vector around according to the sort order and the indirection + // table we just created. The index telsl us where in the vector the + // original element entry n is now located via vIndex[n]. + // + for (i=0; i < m_limit; i++) + { + ANTLR_UINT32 ind; + + // If the vector entry at i is already the one that it + // should be, then we skip moving it of course. + // + if (vIndex[m_sorted[i]] == i) + { + continue; + } + + // The vector entry at i, should be replaced with the + // vector entry indicated by topo->sorted[i]. The vector entry + // at topo->sorted[i] may have already been swapped out though, so we + // find where it is now and move it from there to i. + // + ind = vIndex[m_sorted[i]]; + std::swap( v[i], v[ind] ); + + // Update our index. The element at i is now the one we wanted + // to be sorted here and the element we swapped out is now the + // element that was at i just before we swapped it. If you are lost now + // don't worry about it, we are just reindexing on the fly is all. + // + vIndex[m_sorted[i]] = i; + vIndex[i] = ind; + } + + // Having traversed all the entries, we have sorted the vector in place. + // + AllocPolicyType::free(vIndex); + return; +} + +template<class ImplTraits> +Topo<ImplTraits>::~Topo() +{ + ANTLR_UINT32 i; + + // Free the result vector + // + if (m_sorted != NULL) + { + AllocPolicyType::free(m_sorted); + } + + // Free the visited map + // + if (m_visited != NULL) + { + delete m_visited; + } + + // Free any edgemaps + // + if (m_edges != NULL) + { + Bitset<AllocPolicyType>* edgeList; + + for (i=0; i<m_limit; i++) + { + edgeList = *((m_edges) + i); + if (edgeList != NULL) + { + delete edgeList; + } + } + + AllocPolicyType::free( m_edges ); + } + m_edges = NULL; + + // Free any cycle map + // + if (m_cycle != NULL) + { + AllocPolicyType::free(m_cycle); + } +} + + +} diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3commontoken.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontoken.hpp new file mode 100644 index 0000000000..51fa3954ab --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontoken.hpp @@ -0,0 +1,254 @@ +/** \file + * \brief Defines the interface for a common token. + * + * All token streams should provide their tokens using an instance + * of this common token. A custom pointer is provided, wher you may attach + * a further structure to enhance the common token if you feel the need + * to do so. The C runtime will assume that a token provides implementations + * of the interface functions, but all of them may be rplaced by your own + * implementation if you require it. + */ +#ifndef _ANTLR3_COMMON_TOKEN_HPP +#define _ANTLR3_COMMON_TOKEN_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +namespace antlr3 { + +/** The definition of an ANTLR3 common token structure, which all implementations + * of a token stream should provide, installing any further structures in the + * custom pointer element of this structure. + * + * \remark + * Token streams are in essence provided by lexers or other programs that serve + * as lexers. + */ + +template<class ImplTraits> +class CommonToken : public ImplTraits::AllocPolicyType +{ +public: + /* Base token types, which all lexer/parser tokens come after in sequence. + */ + enum TOKEN_TYPE : ANTLR_UINT32 + { + /** Indicator of an invalid token + */ + TOKEN_INVALID = 0 + , EOR_TOKEN_TYPE + /** Imaginary token type to cause a traversal of child nodes in a tree parser + */ + , TOKEN_DOWN + /** Imaginary token type to signal the end of a stream of child nodes. + */ + , TOKEN_UP + /** First token that can be used by users/generated code + */ + , MIN_TOKEN_TYPE = TOKEN_UP + 1 + + /** End of file token + */ +#ifndef _MSC_VER + , TOKEN_EOF = std::numeric_limits<ANTLR_UINT32>::max() +#else + , TOKEN_EOF = 0xFFFFFFFF +#endif + }; + + typedef typename ImplTraits::TokenIntStreamType TokenIntStreamType; + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::InputStreamType InputStreamType; + typedef typename ImplTraits::StreamDataType StreamDataType; + typedef typename ImplTraits::TokenUserDataType UserDataType; + +private: + /** The actual type of this token + */ + ANTLR_UINT32 m_type; + + /** The virtual channel that this token exists in. + */ + ANTLR_UINT32 m_channel; + + mutable StringType m_tokText; + + /** The offset into the input stream that the line in which this + * token resides starts. + */ + const StreamDataType* m_lineStart; + + /** The line number in the input stream where this token was derived from + */ + ANTLR_UINT32 m_line; + + /** The character position in the line that this token was derived from + */ + ANTLR_INT32 m_charPositionInLine; + + /** Pointer to the input stream that this token originated in. + */ + InputStreamType* m_input; + + /** What the index of this token is, 0, 1, .., n-2, n-1 tokens + */ + ANTLR_MARKER m_index; + + /** The character offset in the input stream where the text for this token + * starts. + */ + ANTLR_MARKER m_startIndex; + + /** The character offset in the input stream where the text for this token + * stops. + */ + ANTLR_MARKER m_stopIndex; + +public: + CommonToken(); + CommonToken(ANTLR_UINT32 type); + CommonToken(TOKEN_TYPE type); + CommonToken( const CommonToken& ctoken ); + + ~CommonToken() {} + + CommonToken& operator=( const CommonToken& ctoken ); + bool operator==( const CommonToken& ctoken ) const; + bool operator<( const CommonToken& ctoken ) const; + + InputStreamType* get_input() const; + ANTLR_MARKER get_index() const; + void set_index( ANTLR_MARKER index ); + void set_input( InputStreamType* input ); + + /* ============================== + * API + */ + + /** Function that returns the text pointer of a token, use + * toString() if you want a pANTLR3_STRING version of the token. + */ + StringType const & getText() const; + + /** Pointer to a function that 'might' be able to set the text associated + * with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually + * do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actaully have + * strings associated with them but just point into the current input stream. These + * tokens will implement this function with a function that errors out (probably + * drastically. + */ + void set_tokText( const StringType& text ); + + /** Pointer to a function that 'might' be able to set the text associated + * with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually + * do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actully have + * strings associated with them but just point into the current input stream. These + * tokens will implement this function with a function that errors out (probably + * drastically. + */ + void setText(ANTLR_UINT8* text); + void setText(const char* text); + + /** Pointer to a function that returns the token type of this token + */ + ANTLR_UINT32 get_type() const; + ANTLR_UINT32 getType() const; + + /** Pointer to a function that sets the type of this token + */ + void set_type(ANTLR_UINT32 ttype); + + /** Pointer to a function that gets the 'line' number where this token resides + */ + ANTLR_UINT32 get_line() const; + + /** Pointer to a function that sets the 'line' number where this token reside + */ + void set_line(ANTLR_UINT32 line); + + /** Pointer to a function that gets the offset in the line where this token exists + */ + ANTLR_INT32 get_charPositionInLine() const; + ANTLR_INT32 getCharPositionInLine() const; + + /** Pointer to a function that sets the offset in the line where this token exists + */ + void set_charPositionInLine(ANTLR_INT32 pos); + + /** Pointer to a function that gets the channel that this token was placed in (parsers + * can 'tune' to these channels. + */ + ANTLR_UINT32 get_channel() const; + + /** Pointer to a function that sets the channel that this token should belong to + */ + void set_channel(ANTLR_UINT32 channel); + + /** Pointer to a function that returns an index 0...n-1 of the token in the token + * input stream. + */ + ANTLR_MARKER get_tokenIndex() const; + + /** Pointer to a function that can set the token index of this token in the token + * input stream. + */ + void set_tokenIndex(ANTLR_MARKER tokenIndex); + + /** Pointer to a function that gets the start index in the input stream for this token. + */ + ANTLR_MARKER get_startIndex() const; + + /** Pointer to a function that sets the start index in the input stream for this token. + */ + void set_startIndex(ANTLR_MARKER index); + + /** Pointer to a function that gets the stop index in the input stream for this token. + */ + ANTLR_MARKER get_stopIndex() const; + + /** Pointer to a function that sets the stop index in the input stream for this token. + */ + void set_stopIndex(ANTLR_MARKER index); + const StreamDataType* get_lineStart() const; + void set_lineStart( const StreamDataType* lineStart ); + + /** Pointer to a function that returns this token as a text representation that can be + * printed with embedded control codes such as \n replaced with the printable sequence "\\n" + * This also yields a string structure that can be used more easily than the pointer to + * the input stream in certain situations. + */ + StringType toString() const; + + UserDataType UserData; +}; + +} + +#include "antlr3commontoken.inl" + +#endif diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3commontoken.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontoken.inl new file mode 100644 index 0000000000..3277b3def1 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontoken.inl @@ -0,0 +1,328 @@ +namespace antlr3 { + +template<class ImplTraits> +CommonToken<ImplTraits>::CommonToken() +{ + m_type = 0; + m_channel = 0; + m_lineStart = NULL; + m_line = 0; + m_charPositionInLine = 0; + m_input = NULL; + m_index = 0; + m_startIndex = 0; + m_stopIndex = 0; +} + +template<class ImplTraits> +CommonToken<ImplTraits>::CommonToken(ANTLR_UINT32 type) +{ + m_type = type; + m_channel = 0; + m_lineStart = NULL; + m_line = 0; + m_charPositionInLine = 0; + m_input = NULL; + m_index = 0; + m_startIndex = 0; + m_stopIndex = 0; +} + +template<class ImplTraits> +CommonToken<ImplTraits>::CommonToken(TOKEN_TYPE type) +{ + m_type = type; + m_channel = 0; + m_lineStart = NULL; + m_line = 0; + m_charPositionInLine = 0; + m_input = NULL; + m_index = 0; + m_startIndex = 0; + m_stopIndex = 0; +} + +template<class ImplTraits> +CommonToken<ImplTraits>::CommonToken( const CommonToken& ctoken ) + :m_tokText( ctoken.m_tokText ) + ,UserData(ctoken.UserData) +{ + m_type = ctoken.m_type; + m_channel = ctoken.m_channel; + m_lineStart = ctoken.m_lineStart; + m_line = ctoken.m_line; + m_charPositionInLine = ctoken.m_charPositionInLine; + m_input = ctoken.m_input; + m_index = ctoken.m_index; + m_startIndex = ctoken.m_startIndex; + m_stopIndex = ctoken.m_stopIndex; +} + +template<class ImplTraits> +CommonToken<ImplTraits>& CommonToken<ImplTraits>::operator=( const CommonToken& ctoken ) +{ + UserData = ctoken.UserData; + m_type = ctoken.m_type; + m_channel = ctoken.m_channel; + m_lineStart = ctoken.m_lineStart; + m_line = ctoken.m_line; + m_charPositionInLine = ctoken.m_charPositionInLine; + m_input = ctoken.m_input; + m_index = ctoken.m_index; + m_startIndex = ctoken.m_startIndex; + m_stopIndex = ctoken.m_stopIndex; + + m_tokText = ctoken.m_tokText; + return *this; +} + +template<class ImplTraits> +ANTLR_INLINE bool CommonToken<ImplTraits>::operator<( const CommonToken& ctoken ) const +{ + return (m_index < ctoken.m_index); +} + +template<class ImplTraits> +bool CommonToken<ImplTraits>::operator==( const CommonToken& ctoken ) const +{ + return ( (m_type == ctoken.m_type) && + (m_channel == ctoken.m_channel) && + (m_lineStart == ctoken.m_lineStart) && + (m_line == ctoken.m_line) && + (m_charPositionInLine == ctoken.m_charPositionInLine) && + (m_input == ctoken.m_input) && + (m_index == ctoken.m_index) && + (m_startIndex == ctoken.m_startIndex) && + (m_stopIndex == ctoken.m_stopIndex) ); +} + +template<class ImplTraits> +ANTLR_INLINE typename CommonToken<ImplTraits>::InputStreamType* CommonToken<ImplTraits>::get_input() const +{ + return m_input; +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_MARKER CommonToken<ImplTraits>::get_index() const +{ + return m_index; +} + +template<class ImplTraits> +ANTLR_INLINE void CommonToken<ImplTraits>::set_index( ANTLR_MARKER index ) +{ + m_index = index; +} + +template<class ImplTraits> +void CommonToken<ImplTraits>::set_input( InputStreamType* input ) +{ + m_input = input; +} + +template<class ImplTraits> +typename CommonToken<ImplTraits>::StringType const & +CommonToken<ImplTraits>::getText() const +{ + static const StringType EOF_STRING("<EOF>"); + static const StringType EMPTY_STRING(""); + + if ( !m_tokText.empty() ) + return m_tokText; + + // EOF is a special case + // + if ( m_type == TOKEN_EOF) + { + return EOF_STRING; + } + + // We had nothing installed in the token, create a new string + // from the input stream + // + if ( m_input != NULL) + { + return m_tokText = m_input->substr( this->get_startIndex(), this->get_stopIndex() ); + } + // Nothing to return, there is no input stream + // + return EMPTY_STRING; +} + +template<class ImplTraits> +ANTLR_INLINE void CommonToken<ImplTraits>::set_tokText( const StringType& text ) +{ + m_tokText = text; +} + +template<class ImplTraits> +ANTLR_INLINE void CommonToken<ImplTraits>::setText(ANTLR_UINT8* text) +{ + if( text == NULL ) + m_tokText.clear(); + else + m_tokText = (const char*) text; +} + +template<class ImplTraits> +ANTLR_INLINE void CommonToken<ImplTraits>::setText(const char* text) +{ + if( text == NULL ) + m_tokText.clear(); + else + m_tokText = text; +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 CommonToken<ImplTraits>::get_type() const +{ + return m_type; +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 CommonToken<ImplTraits>::getType() const +{ + return m_type; +} + +template<class ImplTraits> +ANTLR_INLINE void CommonToken<ImplTraits>::set_type(ANTLR_UINT32 ttype) +{ + m_type = ttype; +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 CommonToken<ImplTraits>::get_line() const +{ + return m_line; +} + +template<class ImplTraits> +ANTLR_INLINE void CommonToken<ImplTraits>::set_line(ANTLR_UINT32 line) +{ + m_line = line; +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_INT32 CommonToken<ImplTraits>::get_charPositionInLine() const +{ + return m_charPositionInLine; +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_INT32 CommonToken<ImplTraits>::getCharPositionInLine() const +{ + return this->get_charPositionInLine(); +} + +template<class ImplTraits> +ANTLR_INLINE void CommonToken<ImplTraits>::set_charPositionInLine(ANTLR_INT32 pos) +{ + m_charPositionInLine = pos; +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 CommonToken<ImplTraits>::get_channel() const +{ + return m_channel; +} + +template<class ImplTraits> +ANTLR_INLINE void CommonToken<ImplTraits>::set_channel(ANTLR_UINT32 channel) +{ + m_channel = channel; +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_MARKER CommonToken<ImplTraits>::get_tokenIndex() const +{ + return m_index; +} + +template<class ImplTraits> +ANTLR_INLINE void CommonToken<ImplTraits>::set_tokenIndex(ANTLR_MARKER tokenIndex) +{ + m_index = tokenIndex; +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_MARKER CommonToken<ImplTraits>::get_startIndex() const +{ + return (m_startIndex == -1) ? (ANTLR_MARKER)(m_input->get_data()) : m_startIndex; +} + +template<class ImplTraits> +ANTLR_INLINE void CommonToken<ImplTraits>::set_startIndex(ANTLR_MARKER index) +{ + m_startIndex = index; +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_MARKER CommonToken<ImplTraits>::get_stopIndex() const +{ + return m_stopIndex; +} + +template<class ImplTraits> +ANTLR_INLINE void CommonToken<ImplTraits>::set_stopIndex(ANTLR_MARKER index) +{ + m_stopIndex = index; +} + +template<class ImplTraits> +ANTLR_INLINE const typename CommonToken<ImplTraits>::StreamDataType* CommonToken<ImplTraits>::get_lineStart() const +{ + return m_lineStart; +} + +template<class ImplTraits> +ANTLR_INLINE void CommonToken<ImplTraits>::set_lineStart( const StreamDataType* lineStart ) +{ + m_lineStart = lineStart; +} + +template<class ImplTraits> +typename CommonToken<ImplTraits>::StringType CommonToken<ImplTraits>::toString() const +{ + StringType text; + typedef typename ImplTraits::StringStreamType StringStreamType; + StringStreamType outtext; + + text = this->getText(); + + if (text.empty()) + return ""; + + /* Now we use our handy dandy string utility to assemble the + * the reporting string + * return "[@"+getTokenIndex()+","+start+":"+stop+"='"+txt+"',<"+type+">"+channelStr+","+line+":"+getCharPositionInLine()+"]"; + */ + outtext << "[Index: "; + outtext << (int)this->get_tokenIndex(); + outtext << " (Start: "; + outtext << (int)this->get_startIndex(); + outtext << "-Stop: "; + outtext << (int)this->get_stopIndex(); + outtext << ") ='"; + outtext << text; + outtext << "', type<"; + outtext << (int)m_type; + outtext << "> "; + + if (this->get_channel() > TOKEN_DEFAULT_CHANNEL) + { + outtext << "(channel = "; + outtext << (int)this->get_channel(); + outtext << ") "; + } + + outtext << "Line: "; + outtext << (int)this->get_line(); + outtext << " LinePos:"; + outtext << (int)this->get_charPositionInLine(); + outtext << "]"; + + return outtext.str(); +} + +} diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3commontree.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontree.hpp new file mode 100644 index 0000000000..2a5e61f94f --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontree.hpp @@ -0,0 +1,152 @@ +/** Interface for an ANTLR3 common tree which is what gets + * passed around by the AST producing parser. + */ + +#ifndef _ANTLR3_COMMON_TREE_HPP +#define _ANTLR3_COMMON_TREE_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +namespace antlr3 { + +template<class ImplTraits> +class CommonTree : public ImplTraits::AllocPolicyType +{ +public: + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::CommonTokenType CommonTokenType; + typedef typename ImplTraits::TreeType TreeType; + typedef typename ImplTraits::TreeTypePtr TreeTypePtr; + //typedef CommonTree TokenType; + typedef typename AllocPolicyType::template VectorType<TreeTypePtr> ChildrenType; + typedef typename AllocPolicyType::template ListType<TreeTypePtr> ChildListType; + typedef typename ImplTraits::TreeUserDataType UserDataType; +protected: + /// The list of all the children that belong to this node. They are not part of the node + /// as they belong to the common tree node that implements this. + /// + ChildrenType m_children; + + + /// Start token index that encases this tree + /// + ANTLR_MARKER m_startIndex; + + /// End token that encases this tree + /// + ANTLR_MARKER m_stopIndex; + + /// A single token, this is the payload for the tree + /// + const CommonTokenType* m_token; + + /// Points to the node that has this node as a child. + /// If this is NULL, then this is the root node. + /// + CommonTree* m_parent; + + /// What index is this particular node in the child list it + /// belongs to? + /// + ANTLR_INT32 m_childIndex; + +public: + CommonTree(); + CommonTree( const CommonTokenType* token ); + CommonTree( const CommonTree* token ); + CommonTree( const CommonTree& ctree ); + ~CommonTree(); + + const CommonTokenType* get_token() const; + void set_token(CommonTokenType const*); + + ChildrenType& get_children(); + const ChildrenType& get_children() const; + ANTLR_INT32 get_childIndex() const; + TreeType* get_parent() const; + + ANTLR_MARKER get_startIndex() const; + void set_startIndex(ANTLR_MARKER index); + + ANTLR_MARKER get_stopIndex() const; + void set_stopIndex(ANTLR_MARKER index); + + void set_parent( TreeType* parent); + void set_childIndex( ANTLR_INT32 ); + + void addChild(TreeTypePtr& child); + /// Add all elements of the supplied list as children of this node + /// + void addChildren(const ChildListType& kids); + + TreeTypePtr deleteChild(ANTLR_UINT32 i); + /// Delete children from start to stop and replace with t even if t is + /// a list (nil-root tree). Num of children can increase or decrease. + /// For huge child lists, inserting children can force walking rest of + /// children to set their child index; could be slow. + /// + void replaceChildren(ANTLR_INT32 startChildIndex, ANTLR_INT32 stopChildIndex, TreeTypePtr t); + + // clone itself + CommonTree* dupNode() const; + // clone itself in pre-allocated storage + CommonTree* dupNode(void *) const; + + + ANTLR_UINT32 get_charPositionInLine() const; + ANTLR_UINT32 get_line() const; + + TreeTypePtr& getChild(ANTLR_UINT32 i); + ANTLR_UINT32 getChildCount() const; + ANTLR_UINT32 getType(); + TreeTypePtr& getFirstChildWithType(ANTLR_UINT32 type); + + StringType getText(); + bool isNilNode(); + void setChild(ANTLR_UINT32 i, TreeTypePtr child); + StringType toStringTree(); + StringType toString(); + void freshenParentAndChildIndexes(); + void freshenParentAndChildIndexes(ANTLR_UINT32 offset); + void freshenParentAndChildIndexesDeeply(); + void freshenParentAndChildIndexesDeeply(ANTLR_UINT32 offset); + // Prepare tree node to be re-used + void reuse(); + + UserDataType UserData; +}; + +} + +#include "antlr3commontree.inl" + +#endif + + diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3commontree.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontree.inl new file mode 100644 index 0000000000..7000ca4fd2 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontree.inl @@ -0,0 +1,520 @@ +namespace antlr3 { + +template<class ImplTraits> +CommonTree<ImplTraits>::CommonTree() +{ + m_startIndex = -1; + m_stopIndex = -1; + m_childIndex = -1; + m_token = NULL; + m_parent = NULL; +} + +template<class ImplTraits> +CommonTree<ImplTraits>::CommonTree( const CommonTree& ctree ) + :m_children( ctree.m_children) + ,UserData(ctree.UserData) +{ + m_startIndex = ctree.m_startIndex; + m_stopIndex = ctree.m_stopIndex; + m_childIndex = ctree.m_childIndex; + m_token = ctree.m_token; + m_parent = ctree.m_parent; +} + +template<class ImplTraits> +CommonTree<ImplTraits>::CommonTree( const CommonTokenType* token ) +{ + m_startIndex = -1; + m_stopIndex = -1; + m_childIndex = -1; + m_token = token; + m_parent = NULL; +} + +template<class ImplTraits> +CommonTree<ImplTraits>::CommonTree( const CommonTree* tree ) + :UserData(tree->UserData) +{ + m_startIndex = tree->get_startIndex(); + m_stopIndex = tree->get_stopIndex(); + m_childIndex = -1; + m_token = tree->get_token(); + m_parent = NULL; +} + +template<class ImplTraits> +const typename CommonTree<ImplTraits>::CommonTokenType* CommonTree<ImplTraits>::get_token() const +{ + return m_token; +} + +template<class ImplTraits> +void CommonTree<ImplTraits>::set_token(typename CommonTree<ImplTraits>::CommonTokenType const* token) +{ + m_token = token; +} + +template<class ImplTraits> +typename CommonTree<ImplTraits>::ChildrenType& CommonTree<ImplTraits>::get_children() +{ + return m_children; +} + +template<class ImplTraits> +const typename CommonTree<ImplTraits>::ChildrenType& CommonTree<ImplTraits>::get_children() const +{ + return m_children; +} + +template<class ImplTraits> +void CommonTree<ImplTraits>::addChild(TreeTypePtr& child) +{ + if (child == NULL) + return; + + ChildrenType& child_children = child->get_children(); + //ChildrenType& tree_children = this->get_children(); + + if (child->isNilNode() == true) + { + if ( !child_children.empty() && child_children == m_children ) + { + // TODO: Change to exception rather than ANTLR3_FPRINTF? + fprintf(stderr, "ANTLR3: An attempt was made to add a child list to itself!\n"); + return; + } + + // Add all of the children's children to this list + // + if ( !child_children.empty() ) + { + if (!m_children.empty()) + { + // Need to copy(append) the children + for(auto i = child_children.begin(); i != child_children.end(); ++i) + { + // ANTLR3 lists can be sparse, unlike Array Lists (TODO: really?) + if ((*i) != NULL) + { + m_children.push_back(std::move(*i)); + // static_cast to possible subtype (if TreeType trait defined) + TreeType* tree = static_cast<TreeType*>(this); + m_children.back()->set_parent(tree); + m_children.back()->set_childIndex(m_children.size() - 1); + } + } + } else { + // We are build ing the tree structure here, so we need not + // worry about duplication of pointers as the tree node + // factory will only clean up each node once. So we just + // copy in the child's children pointer as the child is + // a nil node (has not root itself). + // + m_children.swap( child_children ); + this->freshenParentAndChildIndexes(); + } + } + } + else + { + // Tree we are adding is not a Nil and might have children to copy + m_children.push_back( std::move(child) ); + // static_cast to possible subtype (if TreeType trait defined) + TreeType* tree = static_cast<TreeType*>(this); + m_children.back()->set_parent(tree); + m_children.back()->set_childIndex(m_children.size() - 1); + } +} + +template<class ImplTraits> +void CommonTree<ImplTraits>::addChildren(const ChildListType& kids) +{ + for( typename ChildListType::const_iterator iter = kids.begin(); + iter != kids.end(); ++iter ) + { + this->addChild( *iter ); + } +} + +template<class ImplTraits> +typename CommonTree<ImplTraits>::TreeTypePtr CommonTree<ImplTraits>::deleteChild(ANTLR_UINT32 i) +{ + if( m_children.empty() ) + return NULL; + TreeTypePtr killed = m_children.erase( m_children.begin() + i); + this->freshenParentAndChildIndexes(i); + return killed; +} + +template<class ImplTraits> +void CommonTree<ImplTraits>::replaceChildren(ANTLR_INT32 startChildIndex, ANTLR_INT32 stopChildIndex, TreeTypePtr newTree) +{ + + ANTLR_INT32 numNewChildren; // Tracking variable + ANTLR_INT32 delta; // Difference in new vs existing count + + if ( m_children.empty() ) + { + fprintf(stderr, "replaceChildren call: Indexes are invalid; no children in list for %s", this->get_text().c_str() ); + // TODO throw here + return; + } + // How many nodes will go away + ANTLR_INT32 replacingHowMany = stopChildIndex - startChildIndex + 1; + ANTLR_INT32 replacingWithHowMany; // How many nodes will replace them + + // Either use the existing list of children in the supplied nil node, or build a vector of the + // tree we were given if it is not a nil node, then we treat both situations exactly the same + // + ChildrenType newChildren; + ChildrenType &newChildrenRef(newChildren); + + if (newTree->isNilNode()) + { + newChildrenRef = newTree->get_children(); + } else { + newChildrenRef.push_back(newTree); + } + + // Initialize + replacingWithHowMany = newChildrenRef.size(); + numNewChildren = newChildrenRef.size(); + delta = replacingHowMany - replacingWithHowMany; + + // If it is the same number of nodes, then do a direct replacement + // + if (delta == 0) + { + ANTLR_INT32 j = 0; + for (ANTLR_INT32 i = startChildIndex; i <= stopChildIndex; i++) + { + TreeType *child = newChildrenRef.at(j); + m_children[i] = child; + TreeType* tree = static_cast<TreeType*>(this); + child->set_parent(tree); + child->set_childIndex(i); + j++; + } + } + else if (delta > 0) + { + // Less nodes than there were before + // reuse what we have then delete the rest + for (ANTLR_UINT32 j = 0; j < numNewChildren; j++) + { + m_children[ startChildIndex + j ] = newChildrenRef.at(j); + } + // We just delete the same index position until done + ANTLR_UINT32 indexToDelete = startChildIndex + numNewChildren; + for (ANTLR_UINT32 j = indexToDelete; j <= stopChildIndex; j++) + { + m_children.erase( m_children.begin() + indexToDelete); + } + this->freshenParentAndChildIndexes(startChildIndex); + } + else + { + // More nodes than there were before + // Use what we can, then start adding + for (ANTLR_UINT32 j = 0; j < replacingHowMany; j++) + { + m_children[ startChildIndex + j ] = newChildrenRef.at(j); + } + + for (ANTLR_UINT32 j = replacingHowMany; j < replacingWithHowMany; j++) + { + m_children.push_back( newChildrenRef.at(j) ); + } + + this->freshenParentAndChildIndexes(startChildIndex); + } +} + +template<class ImplTraits> +CommonTree<ImplTraits>* CommonTree<ImplTraits>::dupNode() const +{ + return new CommonTree<ImplTraits>(this); +} + +template<class ImplTraits> +CommonTree<ImplTraits>* CommonTree<ImplTraits>::dupNode(void *p) const +{ + return new (p) CommonTree<ImplTraits>(this); +} + +template<class ImplTraits> +ANTLR_UINT32 CommonTree<ImplTraits>::get_charPositionInLine() const +{ + if(m_token == NULL || (m_token->get_charPositionInLine() == 0) ) + { + if(m_children.empty()) + return 0; + if(m_children.front()) + return m_children.front()->get_charPositionInLine(); + return 0; + } + return m_token->get_charPositionInLine(); +} + +template<class ImplTraits> +typename CommonTree<ImplTraits>::TreeTypePtr& CommonTree<ImplTraits>::getChild(ANTLR_UINT32 i) +{ + static TreeTypePtr nul; + if ( m_children.empty() || i >= m_children.size() ) + { + // TODO throw here should not happen + return nul; + } + return m_children.at(i); +} + +template<class ImplTraits> +void CommonTree<ImplTraits>::set_childIndex( ANTLR_INT32 i) +{ + m_childIndex = i; +} + +template<class ImplTraits> +ANTLR_INT32 CommonTree<ImplTraits>::get_childIndex() const +{ + return m_childIndex; +} + +template<class ImplTraits> +ANTLR_UINT32 CommonTree<ImplTraits>::getChildCount() const +{ + return static_cast<ANTLR_UINT32>( m_children.size() ); +} + +template<class ImplTraits> +typename CommonTree<ImplTraits>::TreeType* CommonTree<ImplTraits>::get_parent() const +{ + return m_parent; +} + +template<class ImplTraits> +void CommonTree<ImplTraits>::set_parent( TreeType* parent) +{ + m_parent = parent; +} + +template<class ImplTraits> +ANTLR_MARKER CommonTree<ImplTraits>::get_startIndex() const +{ + if( m_startIndex==-1 && m_token!=NULL) + return m_token->get_tokenIndex(); + return m_startIndex; +} + +template<class ImplTraits> +void CommonTree<ImplTraits>::set_startIndex( ANTLR_MARKER index) +{ + m_startIndex = index; +} + +template<class ImplTraits> +ANTLR_MARKER CommonTree<ImplTraits>::get_stopIndex() const +{ + if( m_stopIndex==-1 && m_token!=NULL) + return m_token->get_tokenIndex(); + return m_stopIndex; +} + +template<class ImplTraits> +void CommonTree<ImplTraits>::set_stopIndex( ANTLR_MARKER index) +{ + m_stopIndex = index; +} + +template<class ImplTraits> +ANTLR_UINT32 CommonTree<ImplTraits>::getType() +{ + if (m_token == NULL) + return CommonTokenType::TOKEN_INVALID; + else + return m_token->get_type(); +} + +template<class ImplTraits> +typename CommonTree<ImplTraits>::TreeTypePtr& CommonTree<ImplTraits>::getFirstChildWithType(ANTLR_UINT32 type) +{ + ANTLR_UINT32 i; + std::size_t cs; + + TreeTypePtr t; + if ( !m_children.empty() ) + { + cs = m_children.size(); + for (i = 0; i < cs; i++) + { + t = m_children[i]; + if (t->getType() == type) + { + return t; + } + } + } + return NULL; +} + +template<class ImplTraits> +ANTLR_UINT32 CommonTree<ImplTraits>::get_line() const +{ + if(m_token == NULL || m_token->get_line() == 0) + { + if ( m_children.empty()) + return 0; + if ( m_children.front()) + return m_children.front()->get_line(); + return 0; + } + return m_token->get_line(); +} + +template<class ImplTraits> +typename CommonTree<ImplTraits>::StringType CommonTree<ImplTraits>::getText() +{ + return this->toString(); +} + +template<class ImplTraits> +bool CommonTree<ImplTraits>::isNilNode() +{ + // This is a Nil tree if it has no payload (Token in our case) + if(m_token == NULL) + return true; + else + return false; +} + +template<class ImplTraits> +void CommonTree<ImplTraits>::setChild(ANTLR_UINT32 i, TreeTypePtr child) +{ + if( child==NULL) + return; + + if( child->isNilNode()) + { + // TODO: throw IllegalArgumentException + return; + } + + if( m_children.size() <= i ) + m_children.resize(i+1); + + m_children[i] = child; + TreeType* tree = static_cast<TreeType*>(this); + child->set_parent(tree); + child->set_childIndex(i); +} + +template<class ImplTraits> +typename CommonTree<ImplTraits>::StringType CommonTree<ImplTraits>::toStringTree() +{ + StringType retval; + + if( m_children.empty() ) + return this->toString(); + + /* Need a new string with nothing at all in it. + */ + if(this->isNilNode() == false) + { + retval.append("("); + retval.append(this->toString()); + retval.append(" "); + } + + if ( !m_children.empty()) + { + retval.append( m_children.front()->toStringTree()); + for (auto i = std::next(m_children.begin()); i != m_children.end(); ++i) + { + retval.append(" "); + retval.append((*i)->toStringTree()); + } + } + + if (this->isNilNode() == false) + { + retval.append(")"); + } + return retval; +} + +template<class ImplTraits> +typename CommonTree<ImplTraits>::StringType CommonTree<ImplTraits>::toString() +{ + if( this->isNilNode()) + return StringType("nil"); + return m_token->toString(); +} + +template<class ImplTraits> +void CommonTree<ImplTraits>::freshenParentAndChildIndexes() +{ + this->freshenParentAndChildIndexes(0); +} + +template<class ImplTraits> +void CommonTree<ImplTraits>::freshenParentAndChildIndexes(ANTLR_UINT32 offset) +{ +// ANTLR_UINT32 count = this->getChildCount(); + // Loop from the supplied index and set the indexes and parent +// for (ANTLR_UINT32 c = offset; c < count; c++) +// { +// TreeTypePtr child = this->getChild(c); +// child->set_childIndex(c); +// child->set_parent(this); +// } + // Loop from the supplied index and set the indexes and parent + auto i = m_children.begin(); + int c = offset; + if(offset) + std::advance( i, offset ); + for(; i != m_children.end(); ++i, ++c) + { + (*i)->set_childIndex(c); + TreeType* tree = static_cast<TreeType*>(this); + (*i)->set_parent(tree); + } +} + +template<class ImplTraits> +void CommonTree<ImplTraits>::freshenParentAndChildIndexesDeeply() +{ + this->freshenParentAndChildIndexes(0); +} + +template<class ImplTraits> +void CommonTree<ImplTraits>::freshenParentAndChildIndexesDeeply(ANTLR_UINT32 offset) +{ + ANTLR_UINT32 count = this->getChildCount(); + for (ANTLR_UINT32 c = offset; c < count; c++) { + TreeTypePtr child = getChild(c); + child->set_childIndex(c); + child->set_parent(this); + child->freshenParentAndChildIndexesDeeply(); + } +} + +template<class ImplTraits> +void CommonTree<ImplTraits>::reuse() +{ + m_startIndex = -1; + m_stopIndex = -1; + m_childIndex = -1; + m_token = NULL; + m_parent = NULL; + + ChildrenType empty; + m_children.swap(empty); +} + +template<class ImplTraits> +CommonTree<ImplTraits>::~CommonTree() +{ +} + +} diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3commontreeadaptor.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontreeadaptor.hpp new file mode 100644 index 0000000000..c32968ac92 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontreeadaptor.hpp @@ -0,0 +1,248 @@ +/** \file + * Definition of the ANTLR3 common tree adaptor. + */ + +#ifndef _ANTLR3_COMMON_TREE_ADAPTOR_HPP +#define _ANTLR3_COMMON_TREE_ADAPTOR_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +namespace antlr3 { + +template <typename ImplTraits> class CommonTreeStore; + +/** Helper class for unique_ptr. Implements deleter for instances of unique_ptr + While building AST tree dangling pointers are automatically put back into pool + */ +template <typename ImplTraits> +class CommonResourcePoolManager +{ +public: + typedef typename ImplTraits::TreeType TreeType; + CommonResourcePoolManager(CommonTreeStore<ImplTraits> * pool); + CommonResourcePoolManager(); + + ~CommonResourcePoolManager(); + + void operator()(TreeType* releasedResource) const; +private: + CommonTreeStore<ImplTraits> * m_pool; +}; + +template <class ImplTraits> +class CommonTreeStore +{ +public: + typedef typename ImplTraits::TreeType TreeType; + typedef typename ImplTraits::CommonTokenType CommonTokenType; + typedef CommonResourcePoolManager<ImplTraits> ResourcePoolManagerType; + typedef std::unique_ptr<TreeType, CommonResourcePoolManager<ImplTraits> > TreeTypePtr; + //typedef typename ImplTraits::TreeTypePtr TreeTypePtr; + + CommonTreeStore(); + + TreeTypePtr create(); + CommonTokenType* createToken(); + CommonTokenType* createToken(const CommonTokenType* fromToken); + + // Return special kind of NULL pointer wrapped into TreeTypePtr + TreeTypePtr + null() + { + return TreeTypePtr(NULL, m_manager); + } + + std::size_t size() const + { + return m_treeStore.size(); + } + + template <class T> + bool contains(const std::vector<T> &vec, const T &value) + { + return std::find(vec.begin(), vec.end(), value) != vec.end(); + } + +protected: + template<typename> friend class CommonResourcePoolManager; + template<typename> friend class CommonTreeAdaptor; + + void reuse(TreeType* releasedResource); + + std::vector<TreeType *> m_recycleBin; + std::vector<std::unique_ptr<TreeType> > m_treeStore; + std::vector<std::unique_ptr<CommonTokenType> > m_tokenStore; + ResourcePoolManagerType m_manager; +}; + +template<class ImplTraits> +class CommonTreeAdaptor + : public ImplTraits::AllocPolicyType + , public CommonTreeStore<ImplTraits> +{ +public: + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::TreeType TreeType; + typedef typename ImplTraits::TreeTypePtr TreeTypePtr; + typedef typename TreeType::ChildrenType ChildrenType; + + typedef TreeType TokenType; + typedef typename ImplTraits::CommonTokenType CommonTokenType; + typedef typename ImplTraits::DebugEventListenerType DebuggerType; + typedef typename ImplTraits::TokenStreamType TokenStreamType; + typedef CommonTreeStore<ImplTraits> TreeStoreType; + +public: + //The parameter is there only to provide uniform constructor interface + CommonTreeAdaptor(DebuggerType* dbg = nullptr); + + TreeTypePtr nilNode(); + TreeTypePtr dupTree( const TreeTypePtr& tree); + TreeTypePtr dupTree( const TreeType* tree); + + TreeTypePtr dupNode(const TreeTypePtr& treeNode); + TreeTypePtr dupNode(const TreeType* treeNode); + + void addChild( TreeTypePtr& t, TreeTypePtr& child); + void addChild( TreeTypePtr& t, TreeTypePtr&& child); + void addChildToken( TreeTypePtr& t, CommonTokenType* child); + void setParent( TreeTypePtr& child, TreeType* parent); + TreeType* getParent( TreeTypePtr& child); + + TreeTypePtr errorNode( CommonTokenType* tnstream, const CommonTokenType* startToken, const CommonTokenType* stopToken); + bool isNilNode( TreeTypePtr& t); + + TreeTypePtr becomeRoot( TreeTypePtr& newRoot, TreeTypePtr& oldRoot); + TreeTypePtr becomeRoot( TreeTypePtr&& newRoot, TreeTypePtr& oldRoot); + TreeTypePtr becomeRootToken(CommonTokenType* newRoot, TreeTypePtr& oldRoot); + TreeTypePtr rulePostProcessing( TreeTypePtr& root); + + TreeTypePtr create( CommonTokenType const* payload); + TreeTypePtr create( ANTLR_UINT32 tokenType, const CommonTokenType* fromToken); + TreeTypePtr create( ANTLR_UINT32 tokenType, const CommonTokenType* fromToken, const char* text); + TreeTypePtr create( ANTLR_UINT32 tokenType, const CommonTokenType* fromToken, StringType const& text); + TreeTypePtr create( ANTLR_UINT32 tokenType, const char* text); + TreeTypePtr create( ANTLR_UINT32 tokenType, StringType const& text); + + CommonTokenType* createToken( ANTLR_UINT32 tokenType, const char* text); + CommonTokenType* createToken( ANTLR_UINT32 tokenType, StringType const& text); + CommonTokenType* createToken( const CommonTokenType* fromToken); + + ANTLR_UINT32 getType( TreeTypePtr& t); + StringType getText( TreeTypePtr& t); + + TreeTypePtr& getChild( TreeTypePtr& t, ANTLR_UINT32 i); + void setChild( TreeTypePtr& t, ANTLR_UINT32 i, TreeTypePtr& child); + void deleteChild( TreeTypePtr& t, ANTLR_UINT32 i); + void setChildIndex( TreeTypePtr& t, ANTLR_INT32 i); + ANTLR_INT32 getChildIndex( TreeTypePtr& t); + + ANTLR_UINT32 getChildCount( TreeTypePtr&); + ANTLR_UINT64 getUniqueID( TreeTypePtr&); + + CommonTokenType* getToken( TreeTypePtr& t); + + void setTokenBoundaries( TreeTypePtr& t, const CommonTokenType* startToken, const CommonTokenType* stopToken); + ANTLR_MARKER getTokenStartIndex( TreeTypePtr& t); + ANTLR_MARKER getTokenStopIndex( TreeTypePtr& t); + + /// Produce a DOT (see graphviz freeware suite) from a base tree + /// + StringType makeDot( TreeTypePtr& theTree); + + /// Replace from start to stop child index of parent with t, which might + /// be a list. Number of children may be different + /// after this call. + /// + /// If parent is null, don't do anything; must be at root of overall tree. + /// Can't replace whatever points to the parent externally. Do nothing. + /// + void replaceChildren( TreeTypePtr parent, ANTLR_INT32 startChildIndex, + ANTLR_INT32 stopChildIndex, TreeTypePtr t); + + ~CommonTreeAdaptor(); + +protected: + TreeTypePtr dupTreeImpl( const TreeType* root, TreeType* parent); + + void defineDotNodes(TreeTypePtr t, const StringType& dotSpec); + void defineDotEdges(TreeTypePtr t, const StringType& dotSpec); +}; + +//If someone can override the CommonTreeAdaptor at the compile time, that will be +//inherited here. Still you can choose to override the DebugTreeAdaptor, if you wish to +//change the DebugTreeAdaptor +template<class ImplTraits> +class DebugTreeAdaptor : public ImplTraits::CommonTreeAdaptorType +{ +public: + //DebugEventListener implements functionality through virtual functions + //the template parameter is required for pointing back at the adaptor + typedef typename ImplTraits::DebugEventListener DebuggerType; + typedef typename ImplTraits::TreeType TreeType; + typedef typename ImplTraits::TreeTypePtr TreeTypePtr; + typedef typename ImplTraits::CommonTokenType CommonTokenType; + typedef typename ImplTraits::CommonTreeAdaptorType super; + +private: + /// If set to something other than NULL, then this structure is + /// points to an instance of the debugger interface. In general, the + /// debugger is only referenced internally in recovery/error operations + /// so that it does not cause overhead by having to check this pointer + /// in every function/method + /// + DebuggerType* m_debugger; + +public: + DebugTreeAdaptor( DebuggerType* debugger ); + void setDebugEventListener( DebuggerType* debugger); + TreeTypePtr nilNode(); + void addChild(TreeTypePtr& t, TreeTypePtr& child); + void addChildToken(TreeTypePtr& t, CommonTokenType* child); + TreeTypePtr becomeRoot( TreeTypePtr& newRootTree, TreeTypePtr& oldRootTree ); + TreeTypePtr becomeRootToken( CommonTokenType* newRoot, TreeTypePtr& oldRoot); + + TreeTypePtr createTypeToken(ANTLR_UINT32 tokenType, CommonTokenType* fromToken); + TreeTypePtr createTypeTokenText(ANTLR_UINT32 tokenType, CommonTokenType* fromToken, ANTLR_UINT8* text); + TreeTypePtr createTypeText( ANTLR_UINT32 tokenType, ANTLR_UINT8* text); + + TreeTypePtr dupTree( const TreeTypePtr& tree); + TreeTypePtr dupTree( const TreeType* tree); + + /// Sends the required debugging events for duplicating a tree + /// to the debugger. + /// + void simulateTreeConstruction(TreeTypePtr& tree); +}; + +} + +#include "antlr3commontreeadaptor.inl" + +#endif diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3commontreeadaptor.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontreeadaptor.inl new file mode 100644 index 0000000000..6e191b2a15 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontreeadaptor.inl @@ -0,0 +1,972 @@ +namespace antlr3 { + +template <typename ImplTraits> +CommonResourcePoolManager<ImplTraits>::CommonResourcePoolManager(CommonTreeStore<ImplTraits> * pool) + : m_pool(pool) +{} + +template <typename ImplTraits> +CommonResourcePoolManager<ImplTraits>::CommonResourcePoolManager() + : m_pool(NULL) +{} + +template <typename ImplTraits> +CommonResourcePoolManager<ImplTraits>::~CommonResourcePoolManager() +{}; + +template <typename ImplTraits> +void +CommonResourcePoolManager<ImplTraits>::operator()(TreeType* releasedResource) const +{ + if (releasedResource && m_pool) + m_pool->reuse(releasedResource); +} + +template <class ImplTraits> +CommonTreeStore<ImplTraits>::CommonTreeStore() + : m_manager(this) +{} + +template <class ImplTraits> +typename CommonTreeStore<ImplTraits>::TreeTypePtr +CommonTreeStore<ImplTraits>::create() +{ + if (m_recycleBin.empty()) + { + TreeTypePtr retval = TreeTypePtr(new TreeType, m_manager); + m_treeStore.push_back(std::unique_ptr<TreeType>(retval.get())); + return retval; + } else { + TreeType* resource = m_recycleBin.back(); + m_recycleBin.pop_back(); + return TreeTypePtr(resource, m_manager); + } +} + +template <class ImplTraits> +typename CommonTreeStore<ImplTraits>::CommonTokenType* +CommonTreeStore<ImplTraits>::createToken() +{ + CommonTokenType* retval = new CommonTokenType; + m_tokenStore.push_back(std::unique_ptr<CommonTokenType>(retval)); + return retval; +} + +template <class ImplTraits> +typename CommonTreeStore<ImplTraits>::CommonTokenType* +CommonTreeStore<ImplTraits>::createToken( const CommonTokenType* fromToken) +{ + CommonTokenType* retval = new CommonTokenType(*fromToken); + m_tokenStore.push_back(std::unique_ptr<CommonTokenType>(retval)); + return retval; +} + +template <class ImplTraits> +void +CommonTreeStore<ImplTraits>::reuse(TreeType* releasedResource) +{ + if (contains(m_recycleBin, releasedResource)) + { + throw std::string("Grrr double reuse"); + } + releasedResource->reuse(); + m_recycleBin.push_back(releasedResource); +} + +template<class ImplTraits> +CommonTreeAdaptor<ImplTraits>::CommonTreeAdaptor(DebuggerType*) +{} + +template<class ImplTraits> +typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr +CommonTreeAdaptor<ImplTraits>::nilNode() +{ + return this->create(NULL); +} + +template<class ImplTraits> +typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr +CommonTreeAdaptor<ImplTraits>::dupTree( const TreeType* tree) +{ + if (tree == NULL) + return NULL; + return std::move(this->dupTreeImpl(tree, NULL)); +} + +template<class ImplTraits> +typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr +CommonTreeAdaptor<ImplTraits>::dupTree( const TreeTypePtr& tree) +{ + if (tree == NULL) + return NULL; + return std::move(dupTreeImpl(tree.get(), NULL)); +} + +template<class ImplTraits> +typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr +CommonTreeAdaptor<ImplTraits>::dupTreeImpl( const TreeType *root, TreeType* parent) +{ + TreeTypePtr newTree(dupNode(root)); + + // Ensure new subtree root has parent/child index set + // + this->setChildIndex( newTree, root->get_childIndex() ); + this->setParent(newTree, parent); + + ChildrenType const& r_children = root->get_children(); + for (auto i = r_children.begin(); i != r_children.end(); ++i) + { + // add child's clone + this->addChild(newTree, dupTreeImpl(i->get(), newTree.get())); + } + + return newTree; +} + +template<class ImplTraits> +void CommonTreeAdaptor<ImplTraits>::addChild( TreeTypePtr& t, TreeTypePtr& child) +{ + if (t != NULL && child != NULL) + { + t->addChild(child); + } +} + +template<class ImplTraits> +void CommonTreeAdaptor<ImplTraits>::addChild( TreeTypePtr& t, TreeTypePtr&& child) +{ + if (t != NULL && child != NULL) + { + t->addChild(child); + } +} + +template<class ImplTraits> +void CommonTreeAdaptor<ImplTraits>::addChildToken( TreeTypePtr& t, CommonTokenType* child) +{ + if (t != NULL && child != NULL) + { + this->addChild(t, this->create(child)); + } +} + +template<class ImplTraits> +void CommonTreeAdaptor<ImplTraits>::setParent( TreeTypePtr& child, TreeType* parent) +{ + child->set_parent(parent); +} + +template<class ImplTraits> +typename CommonTreeAdaptor<ImplTraits>::TreeType* +CommonTreeAdaptor<ImplTraits>::getParent( TreeTypePtr& child) +{ + if ( child==NULL ) + return NULL; + return child->getParent(); +} + +template<class ImplTraits> +typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr +CommonTreeAdaptor<ImplTraits>::errorNode( CommonTokenType*, + const CommonTokenType*, + const CommonTokenType*) +{ + // Use the supplied common tree node stream to get another tree from the factory + // TODO: Look at creating the erronode as in Java, but this is complicated by the + // need to track and free the memory allocated to it, so for now, we just + // want something in the tree that isn't a NULL pointer. + // + return this->create( CommonTokenType::TOKEN_INVALID, "Tree Error Node"); + +} + +template<class ImplTraits> +bool CommonTreeAdaptor<ImplTraits>::isNilNode( TreeTypePtr& t) +{ + return t->isNilNode(); +} + +template<class ImplTraits> +typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr +CommonTreeAdaptor<ImplTraits>::becomeRoot( TreeTypePtr& newRootTree, TreeTypePtr& oldRootTree) +{ + /* Protect against tree rewrites if we are in some sort of error + * state, but have tried to recover. In C we can end up with a null pointer + * for a tree that was not produced. + */ + if (newRootTree == NULL) + { + return std::move(oldRootTree); + } + + /* root is just the new tree as is if there is no + * current root tree. + */ + if (oldRootTree == NULL) + { + return std::move(newRootTree); + } + + /* Produce ^(nil real-node) + */ + if (newRootTree->isNilNode()) + { + if (newRootTree->getChildCount() > 1) + { + /* TODO: Handle tree exceptions + */ + fprintf(stderr, "More than one node as root! TODO: Create tree exception handling\n"); + return std::move(newRootTree); + } + + /* The new root is the first child, keep track of the original newRoot + * because if it was a Nil Node, then we can reuse it now. + */ + TreeTypePtr saveRoot = std::move(newRootTree); + newRootTree = std::move(saveRoot->getChild(0)); + + // Will Reclaim the old nilNode() saveRoot here + } + + /* Add old root into new root. addChild takes care of the case where oldRoot + * is a flat list (nill rooted tree). All children of oldroot are added to + * new root. + */ + newRootTree->addChild(oldRootTree); + + /* Always returns new root structure + */ + return std::move(newRootTree); +} + +template<class ImplTraits> +typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr +CommonTreeAdaptor<ImplTraits>::becomeRoot( TreeTypePtr&& newRootTree, TreeTypePtr& oldRootTree) +{ + /* Protect against tree rewrites if we are in some sort of error + * state, but have tried to recover. In C we can end up with a null pointer + * for a tree that was not produced. + */ + if (newRootTree == NULL) + { + return std::move(oldRootTree); + } + + /* root is just the new tree as is if there is no + * current root tree. + */ + if (oldRootTree == NULL) + { + return std::move(newRootTree); + } + + /* Produce ^(nil real-node) + */ + if (newRootTree->isNilNode()) + { + if (newRootTree->getChildCount() > 1) + { + /* TODO: Handle tree exceptions + */ + fprintf(stderr, "More than one node as root! TODO: Create tree exception handling\n"); + return std::move(newRootTree); + } + + /* The new root is the first child, keep track of the original newRoot + * because if it was a Nil Node, then we can reuse it now. + */ + TreeTypePtr saveRoot = std::move(newRootTree); + newRootTree = std::move(saveRoot->getChild(0)); + + // will Reclaim the old nilNode() here saveRoot. + } + + /* Add old root into new root. addChild takes care of the case where oldRoot + * is a flat list (nill rooted tree). All children of oldroot are added to + * new root. + */ + newRootTree->addChild(oldRootTree); + + /* Always returns new root structure + */ + return std::move(newRootTree); +} + +template<class ImplTraits> +typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr +CommonTreeAdaptor<ImplTraits>::becomeRootToken( CommonTokenType* newRoot, TreeTypePtr& oldRoot) +{ + return this->becomeRoot(this->create(newRoot), oldRoot); +} + +template<class ImplTraits> +typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr +CommonTreeAdaptor<ImplTraits>::create( CommonTokenType const* payload) +{ + TreeTypePtr retval = TreeStoreType::create(); + retval->set_token(payload); + return retval; +} + +template<class ImplTraits> +typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr +CommonTreeAdaptor<ImplTraits>::create( ANTLR_UINT32 tokenType, const CommonTokenType* fromToken) +{ + /* Create the new token */ + auto newToken = this->createToken(fromToken); + /* Set the type of the new token to that supplied */ + newToken->set_type(tokenType); + /* Return a new node based upon this token */ + return this->create(newToken); +} + +template<class ImplTraits> +typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr +CommonTreeAdaptor<ImplTraits>::create( ANTLR_UINT32 tokenType, const CommonTokenType* fromToken, const char* text) +{ + if (fromToken == NULL) + return create(tokenType, text); + /* Create the new token */ + auto newToken = this->createToken(fromToken); + /* Set the type of the new token to that supplied */ + newToken->set_type(tokenType); + /* Set the text of the token accordingly */ + newToken->setText(text); + /* Return a new node based upon this token */ + return this->create(newToken); +} + +template<class ImplTraits> +typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr +CommonTreeAdaptor<ImplTraits>::create( ANTLR_UINT32 tokenType, const CommonTokenType* fromToken, typename CommonTreeAdaptor<ImplTraits>::StringType const& text) +{ + if (fromToken == NULL) + return create(tokenType, text); + /* Create the new token */ + auto newToken = this->createToken(fromToken); + /* Set the type of the new token to that supplied */ + newToken->set_type(tokenType); + /* Set the text of the token accordingly */ + newToken->set_tokText(text); + /* Return a new node based upon this token */ + return this->create(newToken); +} + +template<class ImplTraits> +typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr +CommonTreeAdaptor<ImplTraits>::create( ANTLR_UINT32 tokenType, const char* text) +{ + auto fromToken = this->createToken(tokenType, text); + return this->create(fromToken); +} + +template<class ImplTraits> +typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr +CommonTreeAdaptor<ImplTraits>::create( ANTLR_UINT32 tokenType, typename CommonTreeAdaptor<ImplTraits>::StringType const& text) +{ + auto fromToken = this->createToken(tokenType, text); + return this->create(fromToken); +} + +template<class ImplTraits> +typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr +CommonTreeAdaptor<ImplTraits>::dupNode(const TreeType* treeNode) +{ + if (treeNode == NULL) + return TreeStoreType::null(); + TreeTypePtr retval(TreeStoreType::create()); + treeNode->dupNode(retval.get()); + return retval; +} + +template<class ImplTraits> +typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr +CommonTreeAdaptor<ImplTraits>::dupNode(const TreeTypePtr& treeNode) +{ + if (treeNode == NULL) + return TreeStoreType::null(); + TreeTypePtr retval(TreeStoreType::create()); + treeNode->dupNode(retval.get()); + return retval; +} + +template<class ImplTraits> +ANTLR_UINT32 CommonTreeAdaptor<ImplTraits>::getType( TreeTypePtr& t) +{ + if ( t==NULL) + return CommonTokenType::TOKEN_INVALID; + return t->getType(); +} + +template<class ImplTraits> +typename CommonTreeAdaptor<ImplTraits>::StringType CommonTreeAdaptor<ImplTraits>::getText( TreeTypePtr& t) +{ + return t->getText(); +} + +template<class ImplTraits> +typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr& CommonTreeAdaptor<ImplTraits>::getChild( TreeTypePtr& t, ANTLR_UINT32 i) +{ + if ( t==NULL ) + return NULL; + return t->getChild(i); +} + +template<class ImplTraits> +void CommonTreeAdaptor<ImplTraits>::setChild( TreeTypePtr& t, ANTLR_UINT32 i, TreeTypePtr& child) +{ + t->setChild(i, child); +} + +template<class ImplTraits> +void CommonTreeAdaptor<ImplTraits>::deleteChild( TreeTypePtr& t, ANTLR_UINT32 i) +{ + t->deleteChild(i); +} + +template<class ImplTraits> +void CommonTreeAdaptor<ImplTraits>::setChildIndex( TreeTypePtr& t, ANTLR_INT32 i) +{ + if( t!= NULL) + t->set_childIndex(i); +} + +template<class ImplTraits> +ANTLR_INT32 CommonTreeAdaptor<ImplTraits>::getChildIndex( TreeTypePtr& t) +{ + if ( t==NULL ) + return 0; + return t->getChildIndex(); +} + +template<class ImplTraits> +ANTLR_UINT32 CommonTreeAdaptor<ImplTraits>::getChildCount( TreeTypePtr& t) +{ + if ( t==NULL ) + return 0; + return t->getChildCount(); +} + +template<class ImplTraits> +ANTLR_UINT64 CommonTreeAdaptor<ImplTraits>::getUniqueID( TreeTypePtr& node ) +{ + return reinterpret_cast<ANTLR_UINT64>(node); +} + +template<class ImplTraits> +typename CommonTreeAdaptor<ImplTraits>::CommonTokenType* +CommonTreeAdaptor<ImplTraits>::createToken( ANTLR_UINT32 tokenType, const char* text) +{ + CommonTokenType* newToken = TreeStoreType::createToken(); + newToken->set_tokText( text ); + newToken->set_type(tokenType); + return newToken; +} + +template<class ImplTraits> +typename CommonTreeAdaptor<ImplTraits>::CommonTokenType* +CommonTreeAdaptor<ImplTraits>::createToken( ANTLR_UINT32 tokenType, typename CommonTreeAdaptor<ImplTraits>::StringType const& text) +{ + CommonTokenType* newToken = TreeStoreType::createToken(); + newToken->set_tokText( text ); + newToken->set_type(tokenType); + return newToken; +} + +template<class ImplTraits> +typename CommonTreeAdaptor<ImplTraits>::CommonTokenType* +CommonTreeAdaptor<ImplTraits>::createToken( const CommonTokenType* fromToken) +{ + CommonTokenType* newToken = TreeStoreType::createToken(fromToken); + return newToken; +} + +template<class ImplTraits> +typename CommonTreeAdaptor<ImplTraits>::CommonTokenType* +CommonTreeAdaptor<ImplTraits>::getToken( TreeTypePtr& t) +{ + return t->getToken(); +} + +template<class ImplTraits> +void CommonTreeAdaptor<ImplTraits>::setTokenBoundaries( TreeTypePtr& t, const CommonTokenType* startToken, const CommonTokenType* stopToken) +{ + ANTLR_MARKER start = 0; + ANTLR_MARKER stop = 0; + + if (t == NULL) + return; + + if ( startToken != NULL) + start = startToken->get_tokenIndex(); + + if ( stopToken != NULL) + stop = stopToken->get_tokenIndex(); + + t->set_startIndex(start); + t->set_stopIndex(stop); +} + +template<class ImplTraits> +ANTLR_MARKER CommonTreeAdaptor<ImplTraits>::getTokenStartIndex( TreeTypePtr& t) +{ + if ( t==NULL ) + return -1; + return t->get_tokenStartIndex(); +} + +template<class ImplTraits> +ANTLR_MARKER CommonTreeAdaptor<ImplTraits>::getTokenStopIndex( TreeTypePtr& t) +{ + if ( t==NULL ) + return -1; + return t->get_tokenStopIndex(); +} + +template<class ImplTraits> +typename CommonTreeAdaptor<ImplTraits>::StringType CommonTreeAdaptor<ImplTraits>::makeDot( TreeTypePtr& theTree) +{ + // The string we are building up + // + StringType dotSpec; + char buff[64]; + StringType text; + + dotSpec = "digraph {\n\n" + "\tordering=out;\n" + "\tranksep=.4;\n" + "\tbgcolor=\"lightgrey\"; node [shape=box, fixedsize=false, fontsize=12, fontname=\"Helvetica-bold\", fontcolor=\"blue\"\n" + "\twidth=.25, height=.25, color=\"black\", fillcolor=\"white\", style=\"filled, solid, bold\"];\n\n" + "\tedge [arrowsize=.5, color=\"black\", style=\"bold\"]\n\n"; + + if (theTree == NULL) + { + // No tree, so create a blank spec + // + dotSpec->append("n0[label=\"EMPTY TREE\"]\n"); + return dotSpec; + } + + sprintf(buff, "\tn%p[label=\"", theTree); + dotSpec.append(buff); + text = this->getText(theTree); + for (std::size_t j = 0; j < text.size(); j++) + { + switch(text[j]) + { + case '"': + dotSpec.append("\\\""); + break; + + case '\n': + dotSpec.append("\\n"); + break; + + case '\r': + dotSpec.append("\\r"); + break; + + default: + dotSpec += text[j]; + break; + } + } + dotSpec->append("\"]\n"); + + // First produce the node defintions + // + this->defineDotNodes(theTree, dotSpec); + dotSpec.append("\n"); + this->defineDotEdges(theTree, dotSpec); + + // Terminate the spec + // + dotSpec.append("\n}"); + + // Result + // + return dotSpec; +} + +template<class ImplTraits> +void CommonTreeAdaptor<ImplTraits>::replaceChildren( TreeTypePtr parent, ANTLR_INT32 startChildIndex, ANTLR_INT32 stopChildIndex, TreeTypePtr t) +{ + if (parent != NULL) + parent->replaceChildren(startChildIndex, stopChildIndex, t); +} + +template<class ImplTraits> +CommonTreeAdaptor<ImplTraits>::~CommonTreeAdaptor() +{ +#ifdef ANTLR3_DEBUG + std::cout << "SZ" << TreeStoreType::size() << std::endl; + std::cout << "RZ" << TreeStoreType::m_recycleBin.size() << std::endl; + + auto i = TreeStoreType::m_treeStore.begin(); + + std::cout + << ' ' + << "Node " << '\t' << "Parent " << '\t' << "Type" << '\t' << "toStringTree" << std::endl; + + for(; i != TreeStoreType::m_treeStore.end(); ++i) + { + std::cout + << (TreeStoreType::contains(TreeStoreType::m_recycleBin, i->get()) ? '*' : ' ') + << i->get() << '\t' + << (const void *) (*i)->get_parent() << '\t' + << (*i)->getType() << '\t' + << (*i)->getChildCount() << '\t' + << (*i)->toStringTree() << '\t' + << std::endl; + } +#endif +} + +template<class ImplTraits> +void CommonTreeAdaptor<ImplTraits>::defineDotNodes(TreeTypePtr t, const StringType& dotSpec) +{ + // How many nodes are we talking about? + // + int nCount; + int i; + TreeTypePtr child; + char buff[64]; + StringType text; + int j; + + // Count the nodes + // + nCount = this->getChildCount(t); + + if (nCount == 0) + { + // This will already have been included as a child of another node + // so there is nothing to add. + // + return; + } + + // For each child of the current tree, define a node using the + // memory address of the node to name it + // + for (i = 0; i<nCount; i++) + { + + // Pick up a pointer for the child + // + child = this->getChild(t, i); + + // Name the node + // + sprintf(buff, "\tn%p[label=\"", child); + dotSpec->append(buff); + text = this->getText(child); + for (j = 0; j < text.size(); j++) + { + switch(text[j]) + { + case '"': + dotSpec.append("\\\""); + break; + + case '\n': + dotSpec.append("\\n"); + break; + + case '\r': + dotSpec.append("\\r"); + break; + + default: + dotSpec += text[j]; + break; + } + } + dotSpec.append("\"]\n"); + + // And now define the children of this child (if any) + // + this->defineDotNodes(child, dotSpec); + } + + // Done + // + return; +} + +template<class ImplTraits> +void CommonTreeAdaptor<ImplTraits>::defineDotEdges(TreeTypePtr t, const StringType& dotSpec) +{ + // How many nodes are we talking about? + // + int nCount; + if (t == NULL) + { + // No tree, so do nothing + // + return; + } + + // Count the nodes + // + nCount = this->getChildCount(t); + + if (nCount == 0) + { + // This will already have been included as a child of another node + // so there is nothing to add. + // + return; + } + + // For each child, define an edge from this parent, then process + // and children of this child in the same way + // + for (int i=0; i<nCount; i++) + { + TreeTypePtr child; + char buff[128]; + StringType text; + + // Next child + // + child = this->getChild(t, i); + + // Create the edge relation + // + sprintf(buff, "\t\tn%p -> n%p\t\t// ", t, child); + + dotSpec.append(buff); + + // Document the relationship + // + text = this->getText(t); + for (std::size_t j = 0; j < text.size(); j++) + { + switch(text[j]) + { + case '"': + dotSpec.append("\\\""); + break; + + case '\n': + dotSpec.append("\\n"); + break; + + case '\r': + dotSpec.append("\\r"); + break; + + default: + dotSpec += text[j]; + break; + } + } + + dotSpec.append(" -> "); + + text = this->getText(child); + for (std::size_t j = 0; j < text.size(); j++) + { + switch(text[j]) + { + case '"': + dotSpec.append("\\\""); + break; + + case '\n': + dotSpec.append("\\n"); + break; + + case '\r': + dotSpec.append("\\r"); + break; + + default: + dotSpec += text[j]; + break; + } + } + dotSpec.append("\n"); + + // Define edges for this child + // + this->defineDotEdges(child, dotSpec); + } + + // Done + // + return; +} + +template<class ImplTraits> +typename CommonTreeAdaptor<ImplTraits>::TreeTypePtr CommonTreeAdaptor<ImplTraits>::rulePostProcessing( TreeTypePtr& root) +{ + TreeTypePtr saveRoot = std::move(root); + + if (saveRoot != NULL && saveRoot->isNilNode()) + { + if (saveRoot->getChildCount() == 0) + { + return TreeTypePtr(NULL, root.get_deleter()); + } + else if (saveRoot->getChildCount() == 1) + { + TreeTypePtr newRoot = std::move(saveRoot->getChild(0)); + newRoot->set_parent(NULL); + newRoot->set_childIndex(-1); + + // The root we were given was a nil node, with one child, which means it has + // been abandoned and would be lost in the node factory. + // saveRoot will be releases and put back into factory + // + return newRoot; + } + } + return saveRoot; +} + +template<class ImplTraits> +DebugTreeAdaptor<ImplTraits>::DebugTreeAdaptor( DebuggerType* debugger ) +{ + m_debugger = debugger; +} + +template<class ImplTraits> +void DebugTreeAdaptor<ImplTraits>::setDebugEventListener( DebuggerType* debugger) +{ + m_debugger = debugger; +} + +template<class ImplTraits> +typename DebugTreeAdaptor<ImplTraits>::TreeTypePtr DebugTreeAdaptor<ImplTraits>::nilNode() +{ + TreeTypePtr t = this->create(NULL); + m_debugger->createNode(t); + return t; +} + +template<class ImplTraits> +void DebugTreeAdaptor<ImplTraits>::addChild(TreeTypePtr& t, TreeTypePtr& child) +{ + if (t != NULL && child != NULL) + { + t->addChild(child); + m_debugger->addChild(t, child); + } +} + +template<class ImplTraits> +void DebugTreeAdaptor<ImplTraits>::addChildToken(TreeTypePtr& t, CommonTokenType* child) +{ + TreeTypePtr tc; + if (t != NULL && child != NULL) + { + tc = this->create(child); + this->addChild(t, tc); + m_debugger->addChild(t, tc); + } +} + +template<class ImplTraits> +typename DebugTreeAdaptor<ImplTraits>::TreeTypePtr DebugTreeAdaptor<ImplTraits>::becomeRoot( TreeTypePtr& newRootTree, TreeTypePtr& oldRootTree ) +{ + TreeTypePtr t = super::becomeRoot(newRootTree, oldRootTree); + m_debugger->becomeRoot(newRootTree, oldRootTree); + return t; +} + +template<class ImplTraits> +typename DebugTreeAdaptor<ImplTraits>::TreeTypePtr DebugTreeAdaptor<ImplTraits>::becomeRootToken(CommonTokenType* newRoot, TreeTypePtr& oldRoot) +{ + TreeTypePtr t = super::becomeRoot(this->create(newRoot), oldRoot); + m_debugger->becomeRoot(t, oldRoot); + return t; +} + +template<class ImplTraits> +typename DebugTreeAdaptor<ImplTraits>::TreeTypePtr DebugTreeAdaptor<ImplTraits>::createTypeToken(ANTLR_UINT32 tokenType, CommonTokenType* fromToken) +{ + TreeTypePtr t; + t = this->createTypeToken(tokenType, fromToken); + m_debugger->createNode(t); + return t; +} + +template<class ImplTraits> +typename DebugTreeAdaptor<ImplTraits>::TreeTypePtr DebugTreeAdaptor<ImplTraits>::createTypeTokenText(ANTLR_UINT32 tokenType, CommonTokenType* fromToken, ANTLR_UINT8* text) +{ + TreeTypePtr t; + t = this->createTypeTokenText(tokenType, fromToken, text); + m_debugger->createNode(t); + return t; +} + +template<class ImplTraits> +typename DebugTreeAdaptor<ImplTraits>::TreeTypePtr DebugTreeAdaptor<ImplTraits>::createTypeText( ANTLR_UINT32 tokenType, ANTLR_UINT8* text) +{ + TreeTypePtr t; + t = this->createTypeText(tokenType, text); + m_debugger->createNode(t); + return t; +} + +template<class ImplTraits> +typename DebugTreeAdaptor<ImplTraits>::TreeTypePtr DebugTreeAdaptor<ImplTraits>::dupTree( const TreeTypePtr& tree) +{ + TreeTypePtr t; + + // Call the normal dup tree mechanism first + // + t = this->dupTreeImpl(tree, NULL); + + // In order to tell the debugger what we have just done, we now + // simulate the tree building mechanism. THis will fire + // lots of debugging events to the client and look like we + // duped the tree.. + // + this->simulateTreeConstruction( t); + + return t; +} + +template<class ImplTraits> +typename DebugTreeAdaptor<ImplTraits>::TreeTypePtr DebugTreeAdaptor<ImplTraits>::dupTree( const TreeType* tree) +{ + TreeTypePtr t; + + // Call the normal dup tree mechanism first + // + t = this->dupTreeImpl(tree, NULL); + + // In order to tell the debugger what we have just done, we now + // simulate the tree building mechanism. THis will fire + // lots of debugging events to the client and look like we + // duped the tree.. + // + this->simulateTreeConstruction( t); + + return t; +} + +template<class ImplTraits> +void DebugTreeAdaptor<ImplTraits>::simulateTreeConstruction(TreeTypePtr& tree) +{ + ANTLR_UINT32 n; + ANTLR_UINT32 i; + TreeTypePtr child; + + // Send the create node event + // + m_debugger->createNode(tree); + + n = this->getChildCount(tree); + for (i = 0; i < n; i++) + { + child = this->getChild(tree, i); + this->simulateTreeConstruction(child); + m_debugger->addChild(tree, child); + } +} + +} diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3commontreenodestream.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontreenodestream.hpp new file mode 100644 index 0000000000..3adf02e933 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontreenodestream.hpp @@ -0,0 +1,317 @@ +/// \file +/// Definition of the ANTLR3 common tree node stream. +/// + +#ifndef _ANTLR_COMMON_TREE_NODE_STREAM__HPP +#define _ANTLR_COMMON_TREE_NODE_STREAM__HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +namespace antlr3 { + +template<class ImplTraits> +class CommonTreeNodeStream : public ImplTraits::TreeNodeIntStreamType +{ +public: + enum Constants + { + /// Token buffer initial size settings ( will auto increase) + /// + DEFAULT_INITIAL_BUFFER_SIZE = 100 + , INITIAL_CALL_STACK_SIZE = 10 + }; + + typedef typename ImplTraits::TreeType TreeType; + typedef typename ImplTraits::TreeTypePtr TreeTypePtr; + typedef TreeType UnitType; + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::StringStreamType StringStreamType; + typedef typename ImplTraits::TreeAdaptorType TreeAdaptorType; + typedef typename ImplTraits::TreeNodeIntStreamType IntStreamType; + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename AllocPolicyType::template VectorType<TreeTypePtr> NodesType; + typedef typename AllocPolicyType::template VectorType< TreeWalkState<ImplTraits> > MarkersType; + typedef typename AllocPolicyType::template StackType< ANTLR_INT32 > NodeStackType; + typedef typename ImplTraits::TreeParserType ComponentType; + typedef typename ImplTraits::CommonTokenType CommonTokenType; + typedef typename ImplTraits::TreeNodeIntStreamType BaseType; + +public: + /// Dummy tree node that indicates a descent into a child + /// tree. Initialized by a call to create a new interface. + /// + TreeType m_DOWN; + + /// Dummy tree node that indicates a descent up to a parent + /// tree. Initialized by a call to create a new interface. + /// + TreeType m_UP; + + /// Dummy tree node that indicates the termination point of the + /// tree. Initialized by a call to create a new interface. + /// + TreeType m_EOF_NODE; + + /// Dummy node that is returned if we need to indicate an invalid node + /// for any reason. + /// + TreeType m_INVALID_NODE; + + /// The complete mapping from stream index to tree node. + /// This buffer includes pointers to DOWN, UP, and EOF nodes. + /// It is built upon ctor invocation. The elements are type + /// Object as we don't what the trees look like. + /// + /// Load upon first need of the buffer so we can set token types + /// of interest for reverseIndexing. Slows us down a wee bit to + /// do all of the if p==-1 testing everywhere though, though in C + /// you won't really be able to measure this. + /// + /// Must be freed when the tree node stream is torn down. + /// + NodesType m_nodes; + + /// Which tree are we navigating ? + /// + TreeTypePtr m_root; + + /// Pointer to tree adaptor interface that manipulates/builds + /// the tree. + /// + TreeAdaptorType* m_adaptor; + + /// As we walk down the nodes, we must track parent nodes so we know + /// where to go after walking the last child of a node. When visiting + /// a child, push current node and current index (current index + /// is first stored in the tree node structure to avoid two stacks. + /// + NodeStackType m_nodeStack; + + /// The current index into the nodes vector of the current tree + /// we are parsing and possibly rewriting. + /// + ANTLR_INT32 m_p; + + /// Which node are we currently visiting? + /// + TreeTypePtr m_currentNode; + + /// Which node did we last visit? Used for LT(-1) + /// + TreeTypePtr m_previousNode; + + /// Which child are we currently visiting? If -1 we have not visited + /// this node yet; next consume() request will set currentIndex to 0. + /// + ANTLR_INT32 m_currentChildIndex; + + /// What node index did we just consume? i=0..n-1 for n node trees. + /// IntStream.next is hence 1 + this value. Size will be same. + /// + ANTLR_MARKER m_absoluteNodeIndex; + + /// Buffer tree node stream for use with LT(i). This list grows + /// to fit new lookahead depths, but consume() wraps like a circular + /// buffer. + /// + TreeTypePtr* m_lookAhead; + + /// Number of elements available in the lookahead buffer at any point in + /// time. This is the current size of the array. + /// + ANTLR_UINT32 m_lookAheadLength; + + /// lookAhead[head] is the first symbol of lookahead, LT(1). + /// + ANTLR_UINT32 m_head; + + /// Add new lookahead at lookahead[tail]. tail wraps around at the + /// end of the lookahead buffer so tail could be less than head. + /// + ANTLR_UINT32 m_tail; + + /// Calls to mark() may be nested so we have to track a stack of + /// them. The marker is an index into this stack. Index 0 is + /// the first marker. This is a List<TreeWalkState> + /// + MarkersType m_markers; + + /// Indicates whether this node stream was derived from a prior + /// node stream to be used by a rewriting tree parser for instance. + /// If this flag is set to ANTLR_TRUE, then when this stream is + /// closed it will not free the root tree as this tree always + /// belongs to the origniating node stream. + /// + bool m_isRewriter; + + /// If set to ANTLR_TRUE then the navigation nodes UP, DOWN are + /// duplicated rather than reused within the tree. + /// + bool m_uniqueNavigationNodes; + +public: + // INTERFACE + // + CommonTreeNodeStream( ANTLR_UINT32 hint ); + CommonTreeNodeStream( const CommonTreeNodeStream& ctn ); + CommonTreeNodeStream( TreeTypePtr tree, ANTLR_UINT32 hint ); + + void init( ANTLR_UINT32 hint ); + ~CommonTreeNodeStream(); + + /// Get tree node at current input pointer + i ahead where i=1 is next node. + /// i<0 indicates nodes in the past. So LT(-1) is previous node, but + /// implementations are not required to provide results for k < -1. + /// LT(0) is undefined. For i>=n, return null. + /// Return NULL for LT(0) and any index that results in an absolute address + /// that is negative (beyond the start of the list). + /// + /// This is analogous to the LT() method of the TokenStream, but this + /// returns a tree node instead of a token. Makes code gen identical + /// for both parser and tree grammars. :) + /// + TreeTypePtr _LT(ANTLR_INT32 k); + + /// Where is this stream pulling nodes from? This is not the name, but + /// the object that provides node objects. + /// + TreeTypePtr getTreeSource(); + + /// What adaptor can tell me how to interpret/navigate nodes and + /// trees. E.g., get text of a node. + /// + TreeAdaptorType* getTreeAdaptor(); + + /// As we flatten the tree, we use UP, DOWN nodes to represent + /// the tree structure. When debugging we need unique nodes + /// so we have to instantiate new ones. When doing normal tree + /// parsing, it's slow and a waste of memory to create unique + /// navigation nodes. Default should be false; + /// + void set_uniqueNavigationNodes(bool uniqueNavigationNodes); + + StringType toString(); + + /// Return the text of all nodes from start to stop, inclusive. + /// If the stream does not buffer all the nodes then it can still + /// walk recursively from start until stop. You can always return + /// null or "" too, but users should not access $ruleLabel.text in + /// an action of course in that case. + /// + StringType toStringSS(TreeTypePtr start, TreeTypePtr stop); + + /// Return the text of all nodes from start to stop, inclusive, into the + /// supplied buffer. + /// If the stream does not buffer all the nodes then it can still + /// walk recursively from start until stop. You can always return + /// null or "" too, but users should not access $ruleLabel.text in + /// an action of course in that case. + /// + void toStringWork(TreeTypePtr start, TreeTypePtr stop, StringType& buf); + + /// Get a tree node at an absolute index i; 0..n-1. + /// If you don't want to buffer up nodes, then this method makes no + /// sense for you. + /// + TreeTypePtr get(ANTLR_INT32 i); + + // REWRITING TREES (used by tree parser) + + /// Replace from start to stop child index of parent with t, which might + /// be a list. Number of children may be different + /// after this call. The stream is notified because it is walking the + /// tree and might need to know you are monkeying with the underlying + /// tree. Also, it might be able to modify the node stream to avoid + /// restreaming for future phases. + /// + /// If parent is null, don't do anything; must be at root of overall tree. + /// Can't replace whatever points to the parent externally. Do nothing. + /// + void replaceChildren(TreeTypePtr parent, ANTLR_INT32 startChildIndex, + ANTLR_INT32 stopChildIndex, TreeTypePtr t); + + TreeTypePtr LB(ANTLR_INT32 k); + + /// As we flatten the tree, we use UP, DOWN nodes to represent + /// the tree structure. When debugging we need unique nodes + /// so instantiate new ones when uniqueNavigationNodes is true. + /// + void addNavigationNode(ANTLR_UINT32 ttype); + + TreeTypePtr newDownNode(); + + TreeTypePtr newUpNode(); + + bool hasUniqueNavigationNodes() const; + + ANTLR_UINT32 getLookaheadSize(); + + void push(ANTLR_INT32 index); + + ANTLR_INT32 pop(); + + void reset(); + + void fillBufferRoot(); + void fillBuffer(TreeTypePtr t); + +}; + +/** This structure is used to save the state information in the treenodestream + * when walking ahead with cyclic DFA or for syntactic predicates, + * we need to record the state of the tree node stream. This + * class wraps up the current state of the CommonTreeNodeStream. + * Calling mark() will push another of these on the markers stack. + */ +template<class ImplTraits> +class TreeWalkState : public ImplTraits::AllocPolicyType +{ +public: + typedef typename ImplTraits::TreeType TreeType; + typedef typename ImplTraits::TreeTypePtr TreeTypePtr; + +private: + ANTLR_UINT32 m_currentChildIndex; + ANTLR_MARKER m_absoluteNodeIndex; + TreeTypePtr m_currentNode; + TreeTypePtr m_previousNode; + ANTLR_UINT32 m_nodeStackSize; + TreeTypePtr m_lookAhead; + ANTLR_UINT32 m_lookAheadLength; + ANTLR_UINT32 m_tail; + ANTLR_UINT32 m_head; + + +}; + +} + +#include "antlr3commontreenodestream.inl" + +#endif diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3commontreenodestream.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontreenodestream.inl new file mode 100644 index 0000000000..096e15db11 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3commontreenodestream.inl @@ -0,0 +1,422 @@ +namespace antlr3 { + +template<class ImplTraits> +CommonTreeNodeStream<ImplTraits>::CommonTreeNodeStream(ANTLR_UINT32 hint) +{ + this->init(hint); +} + +template<class ImplTraits> +void CommonTreeNodeStream<ImplTraits>::init( ANTLR_UINT32 hint ) +{ + m_root = NULL; + m_adaptor = new TreeAdaptorType; + // Create the node list map + // + if (hint == 0) + hint = DEFAULT_INITIAL_BUFFER_SIZE; + m_nodes.reserve( DEFAULT_INITIAL_BUFFER_SIZE ); + + m_p = -1; + m_currentNode = NULL; + m_previousNode = NULL; + m_currentChildIndex = 0; + m_absoluteNodeIndex = 0; + m_lookAhead = NULL; + m_lookAheadLength = 0; + m_head = 0; + m_tail = 0; + m_uniqueNavigationNodes = false; + m_isRewriter = false; + + CommonTokenType* token = new CommonTokenType(CommonTokenType::TOKEN_UP); + token->set_tokText( "UP" ); + m_UP.set_token( token ); + + token = new CommonTokenType(CommonTokenType::TOKEN_DOWN); + token->set_tokText( "DOWN" ); + m_DOWN.set_token( token ); + + token = new CommonTokenType(CommonTokenType::TOKEN_EOF); + token->set_tokText( "EOF" ); + m_EOF_NODE.set_token( token ); + + token = new CommonTokenType(CommonTokenType::TOKEN_INVALID); + token->set_tokText( "INVALID" ); + m_EOF_NODE.set_token( token ); +} + +template<class ImplTraits> +CommonTreeNodeStream<ImplTraits>::CommonTreeNodeStream( const CommonTreeNodeStream& ctn ) +{ + m_root = ctn.m_root; + m_adaptor = ctn.m_adaptor; + m_nodes.reserve( DEFAULT_INITIAL_BUFFER_SIZE ); + m_nodeStack = ctn.m_nodeStack; + m_p = -1; + m_currentNode = NULL; + m_previousNode = NULL; + m_currentChildIndex = 0; + m_absoluteNodeIndex = 0; + m_lookAhead = NULL; + m_lookAheadLength = 0; + m_head = 0; + m_tail = 0; + m_uniqueNavigationNodes = false; + m_isRewriter = true; + + m_UP.set_token( ctn.m_UP.get_token() ); + m_DOWN.set_token( ctn.m_DOWN.get_token() ); + m_EOF_NODE.set_token( ctn.m_EOF_NODE.get_token() ); + m_INVALID_NODE.set_token( ctn.m_INVALID_NODE.get_token() ); +} + +template<class ImplTraits> +CommonTreeNodeStream<ImplTraits>::CommonTreeNodeStream( TreeTypePtr tree, ANTLR_UINT32 hint ) +{ + this->init(hint); + m_root = tree; +} + +template<class ImplTraits> +CommonTreeNodeStream<ImplTraits>::~CommonTreeNodeStream() +{ + // If this is a rewrting stream, then certain resources + // belong to the originating node stream and we do not + // free them here. + // + if ( m_isRewriter != true) + { + delete m_adaptor; + + m_nodeStack.clear(); + + delete m_INVALID_NODE.get_token(); + delete m_EOF_NODE.get_token(); + delete m_DOWN.get_token(); + delete m_UP.get_token(); + } + + m_nodes.clear(); +} + +template<class ImplTraits> +typename CommonTreeNodeStream<ImplTraits>::TreeTypePtr CommonTreeNodeStream<ImplTraits>::_LT(ANTLR_INT32 k) +{ + if ( m_p == -1) + { + this->fillBufferRoot(); + } + + if (k < 0) + { + return this->LB(-k); + } + else if (k == 0) + { + return &(m_INVALID_NODE); + } + + // k was a legitimate request, + // + if (( m_p + k - 1) >= (ANTLR_INT32)(m_nodes.size())) + { + return &(m_EOF_NODE); + } + + return m_nodes[ m_p + k - 1 ]; +} + +template<class ImplTraits> +typename CommonTreeNodeStream<ImplTraits>::TreeTypePtr CommonTreeNodeStream<ImplTraits>::getTreeSource() +{ + return m_root; +} + +template<class ImplTraits> +typename CommonTreeNodeStream<ImplTraits>::TreeAdaptorType* CommonTreeNodeStream<ImplTraits>::getTreeAdaptor() +{ + return m_adaptor; +} + +template<class ImplTraits> +void CommonTreeNodeStream<ImplTraits>::set_uniqueNavigationNodes(bool uniqueNavigationNodes) +{ + m_uniqueNavigationNodes = uniqueNavigationNodes; +} + +template<class ImplTraits> +typename CommonTreeNodeStream<ImplTraits>::StringType CommonTreeNodeStream<ImplTraits>::toString() +{ + return this->toStringSS(m_root, NULL); +} + +template<class ImplTraits> +typename CommonTreeNodeStream<ImplTraits>::StringType CommonTreeNodeStream<ImplTraits>::toStringSS(TreeTypePtr start, TreeTypePtr stop) +{ + StringType buf; + this->toStringWork(start, stop, buf); + return buf; +} + +template<class ImplTraits> +void CommonTreeNodeStream<ImplTraits>::toStringWork(TreeTypePtr start, TreeTypePtr stop, StringType& str) +{ + ANTLR_UINT32 n; + ANTLR_UINT32 c; + StringStreamType buf; + + if (!start->isNilNode() ) + { + StringType text; + + text = start->toString(); + + if (text.empty()) + { + buf << ' '; + buf << start->getType(); + } + else + buf << text; + } + + if (start == stop) + { + return; /* Finished */ + } + + n = start->getChildCount(); + + if (n > 0 && ! start->isNilNode() ) + { + buf << ' '; + buf << CommonTokenType::TOKEN_DOWN; + } + + for (c = 0; c<n ; c++) + { + TreeTypePtr child; + + child = start->getChild(c); + this->toStringWork(child, stop, buf); + } + + if (n > 0 && ! start->isNilNode() ) + { + buf << ' '; + buf << CommonTokenType::TOKEN_UP; + } + str = buf.str(); +} + +template<class ImplTraits> +typename CommonTreeNodeStream<ImplTraits>::TreeTypePtr CommonTreeNodeStream<ImplTraits>::get(ANTLR_INT32 k) +{ + if( m_p == -1 ) + { + this->fillBufferRoot(); + } + + return m_nodes[k]; +} + +template<class ImplTraits> +void CommonTreeNodeStream<ImplTraits>::replaceChildren(TreeTypePtr parent, + ANTLR_INT32 startChildIndex, + ANTLR_INT32 stopChildIndex, + TreeTypePtr t) +{ + if (parent != NULL) + { + TreeAdaptorType* adaptor; + adaptor = this->getTreeAdaptor(); + adaptor->replaceChildren(parent, startChildIndex, stopChildIndex, t); + } +} + +template<class ImplTraits> +typename CommonTreeNodeStream<ImplTraits>::TreeTypePtr CommonTreeNodeStream<ImplTraits>::LB(ANTLR_INT32 k) +{ + if ( k==0) + { + return &(m_INVALID_NODE); + } + + if ( (m_p - k) < 0) + { + return &(m_INVALID_NODE); + } + + return m_nodes[ m_p - k ]; +} + +template<class ImplTraits> +void CommonTreeNodeStream<ImplTraits>::addNavigationNode(ANTLR_UINT32 ttype) +{ + TreeTypePtr node; + + node = NULL; + + if (ttype == CommonTokenType::TOKEN_DOWN) + { + if (this->hasUniqueNavigationNodes() == true) + { + node = this->newDownNode(); + } + else + { + node = &m_DOWN; + } + } + else + { + if (this->hasUniqueNavigationNodes() == true) + { + node = this->newUpNode(); + } + else + { + node = &m_UP; + } + } + + // Now add the node we decided upon. + // + m_nodes.push_back(node); +} + +template<class ImplTraits> +typename CommonTreeNodeStream<ImplTraits>::TreeTypePtr CommonTreeNodeStream<ImplTraits>::newDownNode() +{ + TreeTypePtr dNode; + CommonTokenType* token; + + token = new CommonTokenType(CommonTokenType::TOKEN_DOWN); + token->set_tokText("DOWN"); + dNode = new TreeType(token); + return &dNode; +} + +template<class ImplTraits> +typename CommonTreeNodeStream<ImplTraits>::TreeTypePtr CommonTreeNodeStream<ImplTraits>::newUpNode() +{ + TreeTypePtr uNode; + CommonTokenType* token; + + token = new CommonTokenType(CommonTokenType::TOKEN_UP); + token->set_tokText("UP"); + uNode = new TreeType(token); + return &uNode; + +} + +template<class ImplTraits> +bool CommonTreeNodeStream<ImplTraits>::hasUniqueNavigationNodes() const +{ + return m_uniqueNavigationNodes; +} + +template<class ImplTraits> +ANTLR_UINT32 CommonTreeNodeStream<ImplTraits>::getLookaheadSize() +{ + return m_tail < m_head + ? (m_lookAheadLength - m_head + m_tail) + : (m_tail - m_head); +} + +template<class ImplTraits> +void CommonTreeNodeStream<ImplTraits>::push(ANTLR_INT32 index) +{ + m_nodeStack.push(m_p); // Save current index + this->seek(index); +} + +template<class ImplTraits> +ANTLR_INT32 CommonTreeNodeStream<ImplTraits>::pop() +{ + ANTLR_INT32 retVal; + + retVal = m_nodeStack.top(); + m_nodeStack.pop(); + this->seek(retVal); + return retVal; +} + +template<class ImplTraits> +void CommonTreeNodeStream<ImplTraits>::reset() +{ + if ( m_p != -1) + { + m_p = 0; + } + BaseType::m_lastMarker = 0; + + + // Free and reset the node stack only if this is not + // a rewriter, which is going to reuse the originating + // node streams node stack + // + if (m_isRewriter != true) + m_nodeStack.clear(); +} + +template<class ImplTraits> +void CommonTreeNodeStream<ImplTraits>::fillBufferRoot() +{ + // Call the generic buffer routine with the root as the + // argument + // + this->fillBuffer(m_root); + m_p = 0; // Indicate we are at buffer start +} + +template<class ImplTraits> +void CommonTreeNodeStream<ImplTraits>::fillBuffer(TreeTypePtr t) +{ + bool nilNode; + ANTLR_UINT32 nCount; + ANTLR_UINT32 c; + + nilNode = m_adaptor->isNilNode(t); + + // If the supplied node is not a nil (list) node then we + // add in the node itself to the vector + // + if (nilNode == false) + { + m_nodes.push_back(t); + } + + // Only add a DOWN node if the tree is not a nil tree and + // the tree does have children. + // + nCount = t->getChildCount(); + + if (nilNode == false && nCount>0) + { + this->addNavigationNode( CommonTokenType::TOKEN_DOWN); + } + + // We always add any children the tree contains, which is + // a recursive call to this function, which will cause similar + // recursion and implement a depth first addition + // + for (c = 0; c < nCount; c++) + { + this->fillBuffer( m_adaptor->getChild(t, c)); + } + + // If the tree had children and was not a nil (list) node, then we + // we need to add an UP node here to match the DOWN node + // + if (nilNode == false && nCount > 0) + { + this->addNavigationNode(CommonTokenType::TOKEN_UP); + } +} + + + +} + diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3convertutf.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3convertutf.hpp new file mode 100644 index 0000000000..7f4b7e0d25 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3convertutf.hpp @@ -0,0 +1,143 @@ +/* + * Copyright 2001-2004 Unicode, Inc. + * + * Disclaimer + * + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. + * + * Limitations on Rights to Redistribute This Code + * + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. + */ + +/* --------------------------------------------------------------------- + + Conversions between UTF32, UTF-16, and UTF-8. Header file. + + Several functions are included here, forming a complete set of + conversions between the three formats. UTF-7 is not included + here, but is handled in a separate source file. + + Each of these routines takes pointers to input buffers and output + buffers. The input buffers are const. + + Each routine converts the text between *sourceStart and sourceEnd, + putting the result into the buffer between *targetStart and + targetEnd. Note: the end pointers are *after* the last item: e.g. + *(sourceEnd - 1) is the last item. + + The return result indicates whether the conversion was successful, + and if not, whether the problem was in the source or target buffers. + (Only the first encountered problem is indicated.) + + After the conversion, *sourceStart and *targetStart are both + updated to point to the end of last text successfully converted in + the respective buffers. + + Input parameters: + sourceStart - pointer to a pointer to the source buffer. + The contents of this are modified on return so that + it points at the next thing to be converted. + targetStart - similarly, pointer to pointer to the target buffer. + sourceEnd, targetEnd - respectively pointers to the ends of the + two buffers, for overflow checking only. + + These conversion functions take a ConversionFlags argument. When this + flag is set to strict, both irregular sequences and isolated surrogates + will cause an error. When the flag is set to lenient, both irregular + sequences and isolated surrogates are converted. + + Whether the flag is strict or lenient, all illegal sequences will cause + an error return. This includes sequences such as: <F4 90 80 80>, <C0 80>, + or <A0> in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code + must check for illegal sequences. + + When the flag is set to lenient, characters over 0x10FFFF are converted + to the replacement character; otherwise (when the flag is set to strict) + they constitute an error. + + Output parameters: + The value "sourceIllegal" is returned from some routines if the input + sequence is malformed. When "sourceIllegal" is returned, the source + value will point to the illegal value that caused the problem. E.g., + in UTF-8 when a sequence is malformed, it points to the start of the + malformed sequence. + + Author: Mark E. Davis, 1994. + Rev History: Rick McGowan, fixes & updates May 2001. + Fixes & updates, Sept 2001. + +------------------------------------------------------------------------ */ + +/* --------------------------------------------------------------------- + The following 4 definitions are compiler-specific. + The C standard does not guarantee that wchar_t has at least + 16 bits, so wchar_t is no less portable than unsigned short! + All should be unsigned values to avoid sign extension during + bit mask & shift operations. +------------------------------------------------------------------------ */ + + +// Changes for ANTLR3 - Jim Idle, January 2008. +// builtin types defined for Unicode types changed to +// aliases for the types that are system determined by +// ANTLR at compile time. +// +// typedef unsigned long UTF32; /* at least 32 bits */ +// typedef unsigned short UTF16; /* at least 16 bits */ +// typedef unsigned char UTF8; /* typically 8 bits */ +// typedef unsigned char Boolean; /* 0 or 1 */ + +#ifndef _ANTLR3_CONVERTUTF_H +#define _ANTLR3_CONVERTUTF_H + +namespace antlr3 { + +typedef ANTLR_UINT32 UTF32; /* at least 32 bits */ +typedef ANTLR_UINT16 UTF16; /* at least 16 bits */ +typedef ANTLR_UINT8 UTF8; /* typically 8 bits */ + +/* Some fundamental constants */ +#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD +#define UNI_MAX_BMP (UTF32)0x0000FFFF +#define UNI_MAX_UTF16 (UTF32)0x0010FFFF +#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF +#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF + +#define UNI_SUR_HIGH_START (UTF32)0xD800 +#define UNI_SUR_HIGH_END (UTF32)0xDBFF +#define UNI_SUR_LOW_START (UTF32)0xDC00 +#define UNI_SUR_LOW_END (UTF32)0xDFFF +#define halfShift ((UTF32)10) +#define halfBase ((UTF32)0x0010000UL) +#define halfMask ((UTF32)0x3FFUL) + +enum ConversionResult { + conversionOK, /* conversion successful */ + sourceExhausted, /* partial character in source, but hit end */ + targetExhausted, /* insuff. room in target for conversion */ + sourceIllegal /* source sequence is illegal/malformed */ +}; + +enum ConversionFlags { + strictConversion = 0, + lenientConversion +} ; + + + +} + +#endif + +/* --------------------------------------------------------------------- */ diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3cyclicdfa.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3cyclicdfa.hpp new file mode 100644 index 0000000000..2129552f1a --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3cyclicdfa.hpp @@ -0,0 +1,95 @@ +/// Definition of a cyclic dfa structure such that it can be +/// initialized at compile time and have only a single +/// runtime function that can deal with all cyclic dfa +/// structures and show Java how it is done ;-) +/// +#ifndef ANTLR3_CYCLICDFA_HPP +#define ANTLR3_CYCLICDFA_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +namespace antlr3 { + +template<class ImplTraits, class CtxType> +class CyclicDFA : public ImplTraits::AllocPolicyType +{ +public: + typedef typename CtxType::StreamType StreamType; + typedef typename CtxType::ExceptionBaseType ExceptionBaseType; + typedef typename ImplTraits::template RecognizerType<StreamType> RecognizerType; + typedef typename StreamType::IntStreamType IntStreamType; + typedef typename StreamType::TokenType TokenType; + typedef TokenType CommonTokenType; + typedef CtxType ContextType; + +private: + /// Decision number that a particular static structure + /// represents. + /// + const ANTLR_INT32 m_decisionNumber; + + /// What this decision represents + /// + const ANTLR_UCHAR* m_description; + const ANTLR_INT32* const m_eot; + const ANTLR_INT32* const m_eof; + const ANTLR_INT32* const m_min; + const ANTLR_INT32* const m_max; + const ANTLR_INT32* const m_accept; + const ANTLR_INT32* const m_special; + const ANTLR_INT32* const *const m_transition; + +public: + CyclicDFA( ANTLR_INT32 decisionNumber + , const ANTLR_UCHAR* description + , const ANTLR_INT32* const eot + , const ANTLR_INT32* const eof + , const ANTLR_INT32* const min + , const ANTLR_INT32* const max + , const ANTLR_INT32* const accept + , const ANTLR_INT32* const special + , const ANTLR_INT32* const *const transition ); + CyclicDFA( const CyclicDFA& cdfa ); + CyclicDFA& operator=( const CyclicDFA& dfa); + + ANTLR_INT32 specialStateTransition(CtxType * ctx, RecognizerType* recognizer, IntStreamType* is, ANTLR_INT32 s); + ANTLR_INT32 specialTransition(CtxType * ctx, RecognizerType* recognizer, IntStreamType* is, ANTLR_INT32 s); + + template<typename SuperType> + ANTLR_INT32 predict(CtxType* ctx, RecognizerType* recognizer, IntStreamType* is, SuperType& super); + +private: + void noViableAlt(RecognizerType* rec, ANTLR_UINT32 s); +}; + +} + +#include "antlr3cyclicdfa.inl" + +#endif diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3cyclicdfa.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3cyclicdfa.inl new file mode 100644 index 0000000000..61d15bfcf2 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3cyclicdfa.inl @@ -0,0 +1,204 @@ +namespace antlr3 { + +template<class ImplTraits, class CtxType> +CyclicDFA<ImplTraits, CtxType>::CyclicDFA( ANTLR_INT32 decisionNumber + , const ANTLR_UCHAR* description + , const ANTLR_INT32* const eot + , const ANTLR_INT32* const eof + , const ANTLR_INT32* const min + , const ANTLR_INT32* const max + , const ANTLR_INT32* const accept + , const ANTLR_INT32* const special + , const ANTLR_INT32* const *const transition ) + :m_decisionNumber(decisionNumber) + , m_eot(eot) + , m_eof(eof) + , m_min(min) + , m_max(max) + , m_accept(accept) + , m_special(special) + , m_transition(transition) +{ + m_description = description; +} + +template<class ImplTraits, class CtxType> +CyclicDFA<ImplTraits, CtxType>::CyclicDFA( const CyclicDFA& dfa ) +{ + m_decisionNumber = dfa.m_decisionNumber; + m_description = dfa.m_description; + m_eot = dfa.m_eot; + m_eof = dfa.m_eof; + m_min = dfa.m_min; + m_max = dfa.m_max; + m_accept = dfa.m_accept; + m_special = dfa.m_special; + m_transition = dfa.m_transition; +} + +template<class ImplTraits, class CtxType> +CyclicDFA<ImplTraits, CtxType>& CyclicDFA<ImplTraits, CtxType>::operator=( const CyclicDFA& dfa) +{ + m_decisionNumber = dfa.m_decisionNumber; + m_description = dfa.m_description; + m_eot = dfa.m_eot; + m_eof = dfa.m_eof; + m_min = dfa.m_min; + m_max = dfa.m_max; + m_accept = dfa.m_accept; + m_special = dfa.m_special; + m_transition = dfa.m_transition; + return *this; +} + +template<class ImplTraits, class CtxType> +ANTLR_INT32 CyclicDFA<ImplTraits, CtxType>::specialStateTransition(CtxType * , + RecognizerType* , + IntStreamType* , ANTLR_INT32 ) +{ + return -1; +} + +template<class ImplTraits, class CtxType> +ANTLR_INT32 CyclicDFA<ImplTraits, CtxType>::specialTransition(CtxType * /*ctx*/, + RecognizerType* /*recognizer*/, + IntStreamType* /*is*/, ANTLR_INT32 /*s*/) +{ + return 0; +} + +template<class ImplTraits, class CtxType> + template<typename SuperType> +ANTLR_INT32 CyclicDFA<ImplTraits, CtxType>::predict(CtxType * ctx, + RecognizerType* recognizer, + IntStreamType* is, SuperType& super) +{ + ANTLR_MARKER mark; + ANTLR_INT32 s; + ANTLR_INT32 specialState; + ANTLR_INT32 c; + + mark = is->mark(); /* Store where we are right now */ + s = 0; /* Always start with state 0 */ + + for (;;) + { + /* Pick out any special state entry for this state + */ + specialState = m_special[s]; + + /* Transition the special state and consume an input token + */ + if (specialState >= 0) + { + s = super.specialStateTransition(ctx, recognizer, is, specialState); + + // Error? + // + if (s<0) + { + // If the predicate/rule raised an exception then we leave it + // in tact, else we have an NVA. + // + if (recognizer->get_state()->get_error() != true) + { + this->noViableAlt(recognizer, s); + } + is->rewind(mark); + return 0; + } + is->consume(); + continue; + } + + /* Accept state? + */ + if (m_accept[s] >= 1) + { + is->rewind(mark); + return m_accept[s]; + } + + /* Look for a normal transition state based upon the input token element + */ + c = is->LA(1); + + /* Check against min and max for this state + */ + if (c>= m_min[s] && c <= m_max[s]) + { + ANTLR_INT32 snext; + + /* What is the next state? + */ + snext = m_transition[s][c - m_min[s]]; + + if (snext < 0) + { + /* Was in range but not a normal transition + * must check EOT, which is like the else clause. + * eot[s]>=0 indicates that an EOT edge goes to another + * state. + */ + if ( m_eot[s] >= 0) + { + s = m_eot[s]; + is->consume(); + continue; + } + this->noViableAlt(recognizer, s); + is->rewind(mark); + return 0; + } + + /* New current state - move to it + */ + s = snext; + is->consume(); + continue; + } + /* EOT Transition? + */ + if ( m_eot[s] >= 0) + { + s = m_eot[s]; + is->consume(); + continue; + } + /* EOF transition to accept state? + */ + if ( c == ImplTraits::CommonTokenType::TOKEN_EOF && m_eof[s] >= 0) + { + is->rewind(mark); + return m_accept[m_eof[s]]; + } + + /* No alt, so bomb + */ + this->noViableAlt(recognizer, s); + is->rewind(mark); + return 0; + } +} + +template<class ImplTraits, class CtxType> +void CyclicDFA<ImplTraits, CtxType>::noViableAlt(RecognizerType* rec, ANTLR_UINT32 s) +{ + // In backtracking mode, we just set the failed flag so that the + // alt can just exit right now. If we are parsing though, then + // we want the exception to be raised. + // + if (rec->get_state()->get_backtracking() > 0) + { + rec->get_state()->set_failed(true); + } + else + { + ANTLR_Exception<ImplTraits, NO_VIABLE_ALT_EXCEPTION, StreamType>* ex + = new ANTLR_Exception<ImplTraits, NO_VIABLE_ALT_EXCEPTION, StreamType>( rec, (const char*)m_description ); + ex->set_decisionNum( m_decisionNumber ); + ex->set_state(s); + } +} + +} diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3debugeventlistener.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3debugeventlistener.hpp new file mode 100644 index 0000000000..bc5ea00a3b --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3debugeventlistener.hpp @@ -0,0 +1,399 @@ +/** + * \file + * The definition of all debugging events that a recognizer can trigger. + * + * \remark + * From the java implementation by Terence Parr... + * I did not create a separate AST debugging interface as it would create + * lots of extra classes and DebugParser has a dbg var defined, which makes + * it hard to change to ASTDebugEventListener. I looked hard at this issue + * and it is easier to understand as one monolithic event interface for all + * possible events. Hopefully, adding ST debugging stuff won't be bad. Leave + * for future. 4/26/2006. + */ + +#ifndef ANTLR3_DEBUG_EVENT_LISTENER_HPP +#define ANTLR3_DEBUG_EVENT_LISTENER_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +namespace antlr3 { + +/// Default debugging port +/// +#define DEFAULT_DEBUGGER_PORT 0xBFCC; + +/** The ANTLR3 debugging interface for communicating with ANLTR Works. Function comments + * mostly taken from the Java version. + */ + +template<class ImplTraits> +class DebugEventListener : public ImplTraits::AllocPolicyType +{ +public: + typedef typename ImplTraits::TreeType TreeType; + typedef typename ImplTraits::TreeTypePtr TreeTypePtr; + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::CommonTokenType CommonTokenType; + typedef typename ImplTraits::TreeAdaptorType TreeAdaptorType; + +private: + /// The port number which the debug listener should listen on for a connection + /// + ANTLR_UINT32 m_port; + + /// The socket structure we receive after a successful accept on the serverSocket + /// + SOCKET m_socket; + + /** The version of the debugging protocol supported by the providing + * instance of the debug event listener. + */ + int m_PROTOCOL_VERSION; + + /// The name of the grammar file that we are debugging + /// + StringType m_grammarFileName; + + /// Indicates whether we have already connected or not + /// + bool m_initialized; + + /// Used to serialize the values of any particular token we need to + /// send back to the debugger. + /// + StringType m_tokenString; + + + /// Allows the debug event system to access the adapter in use + /// by the recognizer, if this is a tree parser of some sort. + /// + TreeAdaptorType* m_adaptor; + + +public: + /// Wait for a connection from the debugger and initiate the + /// debugging session. + /// + virtual bool handshake(); + + /** The parser has just entered a rule. No decision has been made about + * which alt is predicted. This is fired AFTER init actions have been + * executed. Attributes are defined and available etc... + */ + virtual void enterRule( const char * grammarFileName, const char * ruleName); + + /** Because rules can have lots of alternatives, it is very useful to + * know which alt you are entering. This is 1..n for n alts. + */ + virtual void enterAlt( int alt); + + /** This is the last thing executed before leaving a rule. It is + * executed even if an exception is thrown. This is triggered after + * error reporting and recovery have occurred (unless the exception is + * not caught in this rule). This implies an "exitAlt" event. + */ + virtual void exitRule( const char * grammarFileName, const char * ruleName); + + /** Track entry into any (...) subrule other EBNF construct + */ + virtual void enterSubRule( int decisionNumber); + + virtual void exitSubRule( int decisionNumber); + + /** Every decision, fixed k or arbitrary, has an enter/exit event + * so that a GUI can easily track what LT/consume events are + * associated with prediction. You will see a single enter/exit + * subrule but multiple enter/exit decision events, one for each + * loop iteration. + */ + virtual void enterDecision( int decisionNumber); + + virtual void exitDecision( int decisionNumber); + + /** An input token was consumed; matched by any kind of element. + * Trigger after the token was matched by things like match(), matchAny(). + */ + virtual void consumeToken( CommonTokenType* t); + + /** An off-channel input token was consumed. + * Trigger after the token was matched by things like match(), matchAny(). + * (unless of course the hidden token is first stuff in the input stream). + */ + virtual void consumeHiddenToken( CommonTokenType* t); + + /** Somebody (anybody) looked ahead. Note that this actually gets + * triggered by both LA and LT calls. The debugger will want to know + * which Token object was examined. Like consumeToken, this indicates + * what token was seen at that depth. A remote debugger cannot look + * ahead into a file it doesn't have so LT events must pass the token + * even if the info is redundant. + */ + virtual void LT( int i, CommonTokenType* t); + + /** The parser is going to look arbitrarily ahead; mark this location, + * the token stream's marker is sent in case you need it. + */ + virtual void mark( ANTLR_MARKER marker); + + /** After an arbitrarily long lookahead as with a cyclic DFA (or with + * any backtrack), this informs the debugger that stream should be + * rewound to the position associated with marker. + */ + virtual void rewind( ANTLR_MARKER marker); + + /** Rewind to the input position of the last marker. + * Used currently only after a cyclic DFA and just + * before starting a sem/syn predicate to get the + * input position back to the start of the decision. + * Do not "pop" the marker off the state. mark(i) + * and rewind(i) should balance still. + */ + virtual void rewindLast(); + + virtual void beginBacktrack( int level); + + virtual void endBacktrack( int level, bool successful); + + /** To watch a parser move through the grammar, the parser needs to + * inform the debugger what line/charPos it is passing in the grammar. + * For now, this does not know how to switch from one grammar to the + * other and back for island grammars etc... + * + * This should also allow breakpoints because the debugger can stop + * the parser whenever it hits this line/pos. + */ + virtual void location( int line, int pos); + + /** A recognition exception occurred such as NoViableAltException. I made + * this a generic event so that I can alter the exception hierarchy later + * without having to alter all the debug objects. + * + * Upon error, the stack of enter rule/subrule must be properly unwound. + * If no viable alt occurs it is within an enter/exit decision, which + * also must be rewound. Even the rewind for each mark must be unwound. + * In the Java target this is pretty easy using try/finally, if a bit + * ugly in the generated code. The rewind is generated in DFA.predict() + * actually so no code needs to be generated for that. For languages + * w/o this "finally" feature (C++?), the target implementor will have + * to build an event stack or something. + * + * Across a socket for remote debugging, only the RecognitionException + * data fields are transmitted. The token object or whatever that + * caused the problem was the last object referenced by LT. The + * immediately preceding LT event should hold the unexpected Token or + * char. + * + * Here is a sample event trace for grammar: + * + * b : C ({;}A|B) // {;} is there to prevent A|B becoming a set + * | D + * ; + * + * The sequence for this rule (with no viable alt in the subrule) for + * input 'c c' (there are 3 tokens) is: + * + * commence + * LT(1) + * enterRule b + * location 7 1 + * enter decision 3 + * LT(1) + * exit decision 3 + * enterAlt1 + * location 7 5 + * LT(1) + * consumeToken [c/<4>,1:0] + * location 7 7 + * enterSubRule 2 + * enter decision 2 + * LT(1) + * LT(1) + * recognitionException NoViableAltException 2 1 2 + * exit decision 2 + * exitSubRule 2 + * beginResync + * LT(1) + * consumeToken [c/<4>,1:1] + * LT(1) + * endResync + * LT(-1) + * exitRule b + * terminate + */ + template<typename ExceptionBaseType> + void recognitionException( ExceptionBaseType* ) {} + + /** Indicates the recognizer is about to consume tokens to resynchronize + * the parser. Any consume events from here until the recovered event + * are not part of the parse--they are dead tokens. + */ + virtual void beginResync(); + + /** Indicates that the recognizer has finished consuming tokens in order + * to resynchronize. There may be multiple beginResync/endResync pairs + * before the recognizer comes out of errorRecovery mode (in which + * multiple errors are suppressed). This will be useful + * in a gui where you want to probably grey out tokens that are consumed + * but not matched to anything in grammar. Anything between + * a beginResync/endResync pair was tossed out by the parser. + */ + virtual void endResync(); + + /** A semantic predicate was evaluate with this result and action text + */ + virtual void semanticPredicate( bool result, const char * predicate); + + /** Announce that parsing has begun. Not technically useful except for + * sending events over a socket. A GUI for example will launch a thread + * to connect and communicate with a remote parser. The thread will want + * to notify the GUI when a connection is made. ANTLR parsers + * trigger this upon entry to the first rule (the ruleLevel is used to + * figure this out). + */ + virtual void commence(); + + /** Parsing is over; successfully or not. Mostly useful for telling + * remote debugging listeners that it's time to quit. When the rule + * invocation level goes to zero at the end of a rule, we are done + * parsing. + */ + virtual void terminate(); + + /// Retrieve acknowledge response from the debugger. in fact this + /// response is never used at the moment. So we just read whatever + /// is in the socket buffer and throw it away. + /// + virtual void ack(); + + // T r e e P a r s i n g + + /** Input for a tree parser is an AST, but we know nothing for sure + * about a node except its type and text (obtained from the adaptor). + * This is the analog of the consumeToken method. The ID is usually + * the memory address of the node. + * If the type is UP or DOWN, then + * the ID is not really meaningful as it's fixed--there is + * just one UP node and one DOWN navigation node. + * + * Note that unlike the Java version, the node type of the C parsers + * is always fixed as pANTLR3_BASE_TREE because all such structures + * contain a super pointer to their parent, which is generally COMMON_TREE and within + * that there is a super pointer that can point to a user type that encapsulates it. + * Almost akin to saying that it is an interface pointer except we don't need to + * know what the interface is in full, just those bits that are the base. + * @param t + */ + virtual void consumeNode( TreeTypePtr t); + + /** The tree parser looked ahead. If the type is UP or DOWN, + * then the ID is not really meaningful as it's fixed--there is + * just one UP node and one DOWN navigation node. + */ + virtual void LTT( int i, TreeTypePtr t); + + + // A S T E v e n t s + + /** A nil was created (even nil nodes have a unique ID... + * they are not "null" per se). As of 4/28/2006, this + * seems to be uniquely triggered when starting a new subtree + * such as when entering a subrule in automatic mode and when + * building a tree in rewrite mode. + * + * If you are receiving this event over a socket via + * RemoteDebugEventSocketListener then only t.ID is set. + */ + virtual void nilNode( TreeTypePtr t); + + /** If a syntax error occurs, recognizers bracket the error + * with an error node if they are building ASTs. This event + * notifies the listener that this is the case + */ + virtual void errorNode( TreeTypePtr t); + + /** Announce a new node built from token elements such as type etc... + * + * If you are receiving this event over a socket via + * RemoteDebugEventSocketListener then only t.ID, type, text are + * set. + */ + virtual void createNode( TreeTypePtr t); + + /** Announce a new node built from an existing token. + * + * If you are receiving this event over a socket via + * RemoteDebugEventSocketListener then only node.ID and token.tokenIndex + * are set. + */ + virtual void createNodeTok( TreeTypePtr node, CommonTokenType* token); + + /** Make a node the new root of an existing root. See + * + * Note: the newRootID parameter is possibly different + * than the TreeAdaptor.becomeRoot() newRoot parameter. + * In our case, it will always be the result of calling + * TreeAdaptor.becomeRoot() and not root_n or whatever. + * + * The listener should assume that this event occurs + * only when the current subrule (or rule) subtree is + * being reset to newRootID. + * + * If you are receiving this event over a socket via + * RemoteDebugEventSocketListener then only IDs are set. + * + * @see org.antlr.runtime.tree.TreeAdaptor.becomeRoot() + */ + virtual void becomeRoot( TreeTypePtr newRoot, TreeTypePtr oldRoot); + + /** Make childID a child of rootID. + * + * If you are receiving this event over a socket via + * RemoteDebugEventSocketListener then only IDs are set. + * + * @see org.antlr.runtime.tree.TreeAdaptor.addChild() + */ + virtual void addChild( TreeTypePtr root, TreeTypePtr child); + + /** Set the token start/stop token index for a subtree root or node. + * + * If you are receiving this event over a socket via + * RemoteDebugEventSocketListener then only t.ID is set. + */ + virtual void setTokenBoundaries( TreeTypePtr t, ANTLR_MARKER tokenStartIndex, ANTLR_MARKER tokenStopIndex); + + /// Free up the resources allocated to this structure + /// + virtual ~DebugEventListener(); +}; + +} + +#endif + diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3defs.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3defs.hpp new file mode 100644 index 0000000000..7f332981e6 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3defs.hpp @@ -0,0 +1,114 @@ +/** \file + * Basic type and constant definitions for ANTLR3 Runtime. + */ +#ifndef _ANTLR3DEFS_HPP +#define _ANTLR3DEFS_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// not used in C++ target (kept for "historical" reasons, the generated code still uses this) +#define ANTLR_SIZE_HINT 0U + +/* Work out what operating system/compiler this is. We just do this once + * here and use an internal symbol after this. + */ +#ifdef _WIN64 +# define ANTLR_USE_64BIT +#endif + +#ifdef _WIN32 + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif + +/* Allow VC 8 (vs2005) and above to use 'secure' versions of various functions such as sprintf + */ +#ifndef _CRT_SECURE_NO_DEPRECATE +#define _CRT_SECURE_NO_DEPRECATE +#endif + +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include <winsock2.h> + +#define ANTLR_INLINE __inline + +typedef FILE * ANTLR_FDSC; + +typedef struct sockaddr_in ANTLR_SOCKADDRT, * pANTLR_SOCKADDRT; // Type used for socket address declaration +typedef struct sockaddr ANTLR_SOCKADDRC, * pANTLR_SOCKADDRC; // Type used for cast on accept() + +#define ANTLR_CLOSESOCKET closesocket + +#else // Un*x + +#ifdef __LP64__ +#define ANTLR_USE_64BIT +#endif + +#define ANTLR_INLINE inline + +typedef int SOCKET; +typedef FILE * ANTLR_FDSC; + +#endif + +// Standard integer types (since C++11) (should work with MSVC 2010/2013, gcc, clang) +// +typedef std::int32_t ANTLR_CHAR; +typedef std::uint32_t ANTLR_UCHAR; + +typedef std::int8_t ANTLR_INT8; +typedef std::int16_t ANTLR_INT16; +typedef std::int32_t ANTLR_INT32; +typedef std::int64_t ANTLR_INT64; + +typedef std::uint8_t ANTLR_UINT8; +typedef std::uint16_t ANTLR_UINT16; +typedef std::uint32_t ANTLR_UINT32; +typedef std::uint64_t ANTLR_UINT64; +typedef std::uint64_t ANTLR_BITWORD; + +#ifdef ANTLR_USE_64BIT +#define ANTLR_UINT64_CAST(ptr) (ANTLR_UINT64)(ptr)) +#define ANTLR_UINT32_CAST(ptr) (ANTLR_UINT32)((ANTLR_UINT64)(ptr)) +typedef ANTLR_INT64 ANTLR_MARKER; +typedef ANTLR_UINT64 ANTLR_INTKEY; +#else +#define ANTLR_UINT64_CAST(ptr) (ANTLR_UINT64)((ANTLR_UINT32)(ptr)) +#define ANTLR_UINT32_CAST(ptr) (ANTLR_UINT32)(ptr) +typedef ANTLR_INT32 ANTLR_MARKER; +typedef ANTLR_UINT32 ANTLR_INTKEY; +#endif + +#define ANTLR_UINT64_LIT(lit) lit##ULL + +#endif /* _ANTLR3DEFS_H */ diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3errors.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3errors.hpp new file mode 100644 index 0000000000..0413d87e39 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3errors.hpp @@ -0,0 +1,43 @@ +#ifndef _ANTLR3ERRORS_HPP +#define _ANTLR3ERRORS_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#define ANTLR_SUCCESS 0 +#define ANTLR_FAIL 1 + +/** Indicates end of character stream and is an invalid Unicode code point. */ +#define ANTLR_CHARSTREAM_EOF 0xFFFFFFFF + +/** Indicates memoizing on a rule failed. + */ +#define MEMO_RULE_FAILED 0xFFFFFFFE +#define MEMO_RULE_UNKNOWN 0xFFFFFFFF + +#endif /* _ANTLR3ERRORS_H */ diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3exception.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3exception.hpp new file mode 100644 index 0000000000..9bc18a0432 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3exception.hpp @@ -0,0 +1,207 @@ +/** \file + * Contains the definition of a basic ANTLR3 exception structure created + * by a recognizer when errors are found/predicted. + + * Two things to be noted for C++ Target: + a) This is not the C++ Exception. Consider this just as yet another class. This + has to be like this because there is a inbuilt recovery and hence there is a try..catch + block for every new token. This is not how C++ Exceptions work.Still there is exception support, as we are handling things like OutofMemory by + throwing exceptions + + b) There is no use in implementing templates here, as all the exceptions are grouped in + one container and hence needs virtual functions. But this would occur only when there is + a exception/ while deleting base recognizer. So shouldn't incur the overhead in normal operation + */ +#ifndef _ANTLR3_EXCEPTION_HPP +#define _ANTLR3_EXCEPTION_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +namespace antlr3 { + +/** Base structure for an ANTLR3 exception tracker + */ + +template<class ImplTraits, class StreamType> +class ANTLR_ExceptionBase +{ +public: + typedef typename StreamType::UnitType TokenType; + typedef typename StreamType::IntStreamType IntStreamType; + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::StringStreamType StringStreamType; + typedef typename ImplTraits::BitsetType BitsetType; + typedef typename ImplTraits::BitsetListType BitsetListType; + typedef typename ImplTraits::template ExceptionBaseType<StreamType> ExceptionBaseType; + +protected: + /** The printable message that goes with this exception, in your preferred + * encoding format. ANTLR just uses ASCII by default but you can ignore these + * messages or convert them to another format or whatever of course. They are + * really internal messages that you then decide how to print out in a form that + * the users of your product will understand, as they are unlikely to know what + * to do with "Recognition exception at: [[TOK_GERUND..... " ;-) + */ + StringType m_message; + + /** Name of the file/input source for reporting. Note that this may be empty!! + */ + StringType m_streamName; + + /** Indicates the index of the 'token' we were looking at when the + * exception occurred. + */ + ANTLR_MARKER m_index; + + /** Indicates what the current token/tree was when the error occurred. Since not + * all input streams will be able to retrieve the nth token, we track it here + * instead. This is for parsers, and even tree parsers may set this. + */ + const TokenType* m_token; + + /** Pointer to the next exception in the chain (if any) + */ + ExceptionBaseType* m_nextException; + + /** Indicates the token we were expecting to see next when the error occurred + */ + ANTLR_UINT32 m_expecting; + + /** Indicates a set of tokens that we were expecting to see one of when the + * error occurred. It is a following bitset list, so you can use load it and use ->toIntList() on it + * to generate an array of integer tokens that it represents. + */ + BitsetListType* m_expectingSet; + + /** If this is a tree parser exception then the node is set to point to the node + * that caused the issue. + */ + TokenType* m_node; + + /** The current character when an error occurred - for lexers. + */ + ANTLR_UCHAR m_c; + + /** Track the line at which the error occurred in case this is + * generated from a lexer. We need to track this since the + * unexpected char doesn't carry the line info. + */ + ANTLR_UINT32 m_line; + + /** Character position in the line where the error occurred. + */ + ANTLR_INT32 m_charPositionInLine; + + /** decision number for NVE + */ + ANTLR_UINT32 m_decisionNum; + + /** State for NVE + */ + ANTLR_UINT32 m_state; + + /** Rule name for failed predicate exception + */ + StringType m_ruleName; + + /** Pointer to the input stream that this exception occurred in. + */ + IntStreamType* m_input; + +public: + StringType& get_message(); + StringType& get_streamName(); + ANTLR_MARKER get_index() const; + const TokenType* get_token() const; + ExceptionBaseType* get_nextException() const; + ANTLR_UINT32 get_expecting() const; + BitsetListType* get_expectingSet() const; + TokenType* get_node() const; + ANTLR_UCHAR get_c() const; + ANTLR_UINT32 get_line() const; + ANTLR_INT32 get_charPositionInLine() const; + ANTLR_UINT32 get_decisionNum() const; + ANTLR_UINT32 get_state() const; + StringType& get_ruleName(); + IntStreamType* get_input() const; + void set_message( const StringType& message ); + void set_streamName( const StringType& streamName ); + void set_index( ANTLR_MARKER index ); + void set_token( const TokenType* token ); + void set_nextException( ExceptionBaseType* nextException ); + void set_expecting( ANTLR_UINT32 expecting ); + void set_expectingSet( BitsetListType* expectingSet ); + void set_node( TokenType* node ); + void set_c( ANTLR_UCHAR c ); + void set_line( ANTLR_UINT32 line ); + void set_charPositionInLine( ANTLR_INT32 charPositionInLine ); + void set_decisionNum( ANTLR_UINT32 decisionNum ); + void set_state( ANTLR_UINT32 state ); + void set_ruleName( const StringType& ruleName ); + void set_input( IntStreamType* input ); + StringType getDescription() const; + + virtual StringType getName() const = 0; + virtual ANTLR_UINT32 getType() const = 0; + virtual void print() const = 0; + virtual void displayRecognitionError( ANTLR_UINT8** tokenNames, StringStreamType& str ) const = 0; + + virtual ~ANTLR_ExceptionBase(); + +protected: + ANTLR_ExceptionBase(const StringType& message); +}; + + +template<class ImplTraits, ExceptionType Ex, class StreamType> +class ANTLR_Exception : public ImplTraits::template ExceptionBaseType<StreamType> +{ +public: + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::StringStreamType StringStreamType; + typedef typename ImplTraits::BitsetType BitsetType; + typedef typename ImplTraits::template ExceptionBaseType<StreamType> BaseType; + +public: + template<typename BaseRecognizerType> + ANTLR_Exception(BaseRecognizerType* recognizer, const StringType& message); + + const StringType& get_name() const; + virtual StringType getName() const; + virtual ANTLR_UINT32 getType() const; + virtual void print() const; + virtual void displayRecognitionError( ANTLR_UINT8** tokenNames, StringStreamType& str_stream) const; +}; + +} + +#include "antlr3exception.inl" + +#endif diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3exception.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3exception.inl new file mode 100644 index 0000000000..27f00bdda8 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3exception.inl @@ -0,0 +1,385 @@ +namespace antlr3 { + +template<class ImplTraits, class StreamType> +ANTLR_ExceptionBase<ImplTraits, StreamType>::ANTLR_ExceptionBase(const StringType& message) + :m_message(message) + ,m_input(NULL) +{ + m_index = 0; + m_token = NULL; + m_expecting = 0; + m_expectingSet = NULL; + m_node = NULL; + m_c = 0; + m_line = 0; + m_charPositionInLine = 0; + m_decisionNum = 0; + m_state = 0; + m_nextException = NULL; +} + +template<class ImplTraits, class StreamType> +ANTLR_INLINE typename ANTLR_ExceptionBase<ImplTraits, StreamType>::StringType& ANTLR_ExceptionBase<ImplTraits, StreamType>::get_message() +{ + return m_message; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE typename ANTLR_ExceptionBase<ImplTraits, StreamType>::StringType& ANTLR_ExceptionBase<ImplTraits, StreamType>::get_streamName() +{ + return m_streamName; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_MARKER ANTLR_ExceptionBase<ImplTraits, StreamType>::get_index() const +{ + return m_index; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE const typename ANTLR_ExceptionBase<ImplTraits, StreamType>::TokenType* ANTLR_ExceptionBase<ImplTraits, StreamType>::get_token() const +{ + return m_token; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE typename ANTLR_ExceptionBase<ImplTraits, StreamType>::ExceptionBaseType* ANTLR_ExceptionBase<ImplTraits, StreamType>::get_nextException() const +{ + return m_nextException; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_UINT32 ANTLR_ExceptionBase<ImplTraits, StreamType>::get_expecting() const +{ + return m_expecting; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE typename ANTLR_ExceptionBase<ImplTraits, StreamType>::BitsetListType* ANTLR_ExceptionBase<ImplTraits, StreamType>::get_expectingSet() const +{ + return m_expectingSet; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE typename ANTLR_ExceptionBase<ImplTraits, StreamType>::TokenType* ANTLR_ExceptionBase<ImplTraits, StreamType>::get_node() const +{ + return m_node; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_UCHAR ANTLR_ExceptionBase<ImplTraits, StreamType>::get_c() const +{ + return m_c; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_UINT32 ANTLR_ExceptionBase<ImplTraits, StreamType>::get_line() const +{ + return m_line; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_INT32 ANTLR_ExceptionBase<ImplTraits, StreamType>::get_charPositionInLine() const +{ + return m_charPositionInLine; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_UINT32 ANTLR_ExceptionBase<ImplTraits, StreamType>::get_decisionNum() const +{ + return m_decisionNum; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_UINT32 ANTLR_ExceptionBase<ImplTraits, StreamType>::get_state() const +{ + return m_state; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE typename ANTLR_ExceptionBase<ImplTraits, StreamType>::StringType& ANTLR_ExceptionBase<ImplTraits, StreamType>::get_ruleName() +{ + return m_ruleName; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE typename ANTLR_ExceptionBase<ImplTraits, StreamType>::IntStreamType* ANTLR_ExceptionBase<ImplTraits, StreamType>::get_input() const +{ + return m_input; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_message( const StringType& message ) +{ + m_message = message; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_streamName( const StringType& streamName ) +{ + m_streamName = streamName; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_index( ANTLR_MARKER index ) +{ + m_index = index; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_token( const TokenType* token ) +{ + if (m_token) + delete m_token; + m_token = token; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_nextException( ExceptionBaseType* nextException ) +{ + m_nextException = nextException; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_expecting( ANTLR_UINT32 expecting ) +{ + m_expecting = expecting; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_expectingSet( BitsetListType* expectingSet ) +{ + m_expectingSet = expectingSet; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_node( TokenType* node ) +{ + m_node = node; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_c( ANTLR_UCHAR c ) +{ + m_c = c; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_line( ANTLR_UINT32 line ) +{ + m_line = line; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_charPositionInLine( ANTLR_INT32 charPositionInLine ) +{ + m_charPositionInLine = charPositionInLine; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_decisionNum( ANTLR_UINT32 decisionNum ) +{ + m_decisionNum = decisionNum; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_state( ANTLR_UINT32 state ) +{ + m_state = state; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_ruleName( const StringType& ruleName ) +{ + m_ruleName = ruleName; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void ANTLR_ExceptionBase<ImplTraits, StreamType>::set_input( IntStreamType* input ) +{ + m_input = input; +} + + +template<class ImplTraits, ExceptionType Ex, class StreamType> + template<typename BaseRecognizerType> +ANTLR_Exception<ImplTraits, Ex, StreamType>::ANTLR_Exception(BaseRecognizerType* recognizer, const StringType& message) + :BaseType( message ) +{ + recognizer->get_super()->fillExceptionData( this ); + BaseType::m_input = recognizer->get_super()->get_istream(); + BaseType::m_nextException = recognizer->get_state()->get_exception(); /* So we don't leak the memory */ + recognizer->get_state()->set_exception(this); + recognizer->get_state()->set_error( true ); /* Exception is outstanding */ +} + +template<class ImplTraits, ExceptionType Ex, class StreamType> +ANTLR_UINT32 ANTLR_Exception<ImplTraits, Ex, StreamType>::getType() const +{ + return static_cast<ANTLR_UINT32>(Ex); +} + +template<class ImplTraits, ExceptionType Ex, class StreamType> +void ANTLR_Exception<ImplTraits, Ex, StreamType>::print() const +{ + /* Ensure valid pointer + */ + /* Number if no message, else the message + */ + if ( BaseType::m_message.empty() ) + { + fprintf(stderr, "ANTLR3_EXCEPTION number %d (%08X).\n", Ex, Ex); + } + else + { + fprintf(stderr, "ANTLR3_EXCEPTION: %s\n", BaseType::m_message.c_str() ); + } +} + +template<class ImplTraits, ExceptionType Ex, class StreamType> +typename ANTLR_Exception<ImplTraits, Ex, StreamType>::StringType + ANTLR_Exception<ImplTraits, Ex, StreamType>::getName() const +{ + const char* exArray[] = { + "org.antlr.runtime.RecognitionException" + , "org.antlr.runtime.MismatchedTokenException" + , "org.antlr.runtime.NoViableAltException" + , "org.antlr.runtime.MismatchedSetException" + , "org.antlr.runtime.EarlyExitException" + , "org.antlr.runtime.FailedPredicateException" + , "org.antlr.runtime.MismatchedTreeNodeException" + , "org.antlr.runtime.tree.RewriteEarlyExitException" + , "org.antlr.runtime.UnwantedTokenException" + , "org.antlr.runtime.MissingTokenException" + }; + return StringType(exArray[Ex]); +} + +template<class ImplTraits, ExceptionType Ex, class StreamType> +void ANTLR_Exception<ImplTraits, Ex, StreamType>::displayRecognitionError( ANTLR_UINT8** tokenNames, + StringStreamType& str_stream ) const +{ + switch( Ex ) + { + case RECOGNITION_EXCEPTION: + // Indicates that the recognizer received a token + // in the input that was not predicted. This is the basic exception type + // from which all others are derived. So we assume it was a syntax error. + // You may get this if there are not more tokens and more are needed + // to complete a parse for instance. + // + str_stream << " : syntax error...\n"; + break; + case UNWANTED_TOKEN_EXCEPTION: + // Indicates that the recognizer was fed a token which seesm to be + // spurious input. We can detect this when the token that follows + // this unwanted token would normally be part of the syntactically + // correct stream. Then we can see that the token we are looking at + // is just something that should not be there and throw this exception. + // + if (tokenNames == NULL) + { + str_stream << " : Extraneous input..."; + } + else + { + if ( BaseType::m_expecting == ImplTraits::CommonTokenType::TOKEN_EOF) + { + str_stream << " : Extraneous input - expected <EOF>\n"; + } + else + { + str_stream << " : Extraneous input - expected " + << tokenNames[ BaseType::m_expecting] << " ...\n"; + } + } + break; + case MISSING_TOKEN_EXCEPTION: + // Indicates that the recognizer detected that the token we just + // hit would be valid syntactically if preceeded by a particular + // token. Perhaps a missing ';' at line end or a missing ',' in an + // expression list, and such like. + // + if (tokenNames == NULL) + { + str_stream << " : Missing token (" + << BaseType::m_expecting << ")...\n"; + } + else + { + if ( BaseType::m_expecting == ImplTraits::CommonTokenType::TOKEN_EOF ) + { + str_stream <<" : Missing <EOF>\n"; + } + else + { + str_stream << " : Missing " << tokenNames[BaseType::m_expecting] <<" \n"; + } + } + break; + case NO_VIABLE_ALT_EXCEPTION: + // We could not pick any alt decision from the input given + // so god knows what happened - however when you examine your grammar, + // you should. It means that at the point where the current token occurred + // that the DFA indicates nowhere to go from here. + // + str_stream << " : cannot match to any predicted input...\n"; + break; + case MISMATCHED_SET_EXCEPTION: + { + ANTLR_UINT32 count; + ANTLR_UINT32 bit; + ANTLR_UINT32 size; + ANTLR_UINT32 numbits; + + // This means we were able to deal with one of a set of + // possible tokens at this point, but we did not see any + // member of that set. + // + str_stream << " : unexpected input :"; + + // What tokens could we have accepted at this point in the + // parse? + // + count = 0; + size = 0; + if (BaseType::m_expectingSet != NULL) { + std::unique_ptr<BitsetType> errBits(BaseType::m_expectingSet->bitsetLoad()); + numbits = errBits->numBits(); + size = errBits->size(); + } + + if (size > 0) + { + // However many tokens we could have dealt with here, it is usually + // not useful to print ALL of the set here. I arbitrarily chose 8 + // here, but you should do whatever makes sense for you of course. + // No token number 0, so look for bit 1 and on. + // + str_stream << " expected one of : "; + for (bit = 1; bit < numbits && count < 8 && count < size; bit++) + { + // TODO: This doesn;t look right - should be asking if the bit is set!! + // + if (tokenNames[bit]) + { + str_stream << ( count > 0 ? ", " : "" ) + << tokenNames[bit]; + count++; + } + } + str_stream << "\n"; + } + else + { + str_stream << " nothing is expected here\n"; + } + } + break; + case EARLY_EXIT_EXCEPTION: + str_stream << " : missing elements...\n"; + break; + default: + str_stream << " : syntax not recognized...\n"; + break; + } +} + +template<class ImplTraits, class StreamType> +ANTLR_ExceptionBase<ImplTraits,StreamType>::~ANTLR_ExceptionBase() +{ + ANTLR_ExceptionBase<ImplTraits,StreamType>* next; + ANTLR_ExceptionBase<ImplTraits,StreamType>* ex = m_nextException; + + /* Ensure valid pointer + */ + while (ex != NULL) + { + /* Pick up anythign following now, before we free the + * current memory block. + */ + next = ex->m_nextException; + ex->m_nextException = NULL; + + /* Free the actual structure itself + */ + delete ex; + + ex = next; + } + if ( m_token) + delete m_token; +} + +} diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3filestream.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3filestream.hpp new file mode 100644 index 0000000000..140b1a5f67 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3filestream.hpp @@ -0,0 +1,70 @@ +#ifndef _ANTLR3_FILESTREAM_HPP +#define _ANTLR3_FILESTREAM_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +namespace antlr3 { + +template<class ImplTraits> +class FileUtils +{ +public: + /** \brief Open an operating system file and return the descriptor + * We just use the common open() and related functions here. + * Later we might find better ways on systems + * such as Windows and OpenVMS for instance. But the idea is to read the + * while file at once anyway, so it may be irrelevant. + */ + static ANTLR_FDSC AntlrFopen(const ANTLR_UINT8* filename, const char * mode); + + /** \brief Close an operating system file and free any handles + * etc. + */ + static void AntlrFclose (ANTLR_FDSC fd); + + static ANTLR_UINT32 AntlrFsize(const ANTLR_UINT8* filename); + template<typename InputStreamType> + static ANTLR_UINT32 AntlrRead8Bit(InputStreamType* input, const ANTLR_UINT8* fileName); + static ANTLR_UINT32 AntlrFread(ANTLR_FDSC fdsc, ANTLR_UINT32 count, void* data); + +}; + +class ParseFileAbsentException : public std::exception +{ + virtual const char* what() const noexcept + { + return " Parse File not Present"; + } +}; + +} + +#include "antlr3filestream.inl" + +#endif diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3filestream.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3filestream.inl new file mode 100644 index 0000000000..b67804adf2 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3filestream.inl @@ -0,0 +1,74 @@ +namespace antlr3 { + +template<class ImplTraits> +ANTLR_FDSC FileUtils<ImplTraits>::AntlrFopen(const ANTLR_UINT8* filename, const char * mode) +{ + return (ANTLR_FDSC)fopen((const char *)filename, mode); +} + +template<class ImplTraits> +void FileUtils<ImplTraits>::AntlrFclose (ANTLR_FDSC fd) +{ + fclose(fd); +} + +template<class ImplTraits> +ANTLR_UINT32 FileUtils<ImplTraits>::AntlrFsize(const ANTLR_UINT8* filename) +{ + struct _stat statbuf; + + _stat((const char *)filename, &statbuf); + + return (ANTLR_UINT32)statbuf.st_size; +} + +template<class ImplTraits> +ANTLR_UINT32 FileUtils<ImplTraits>::AntlrFread(ANTLR_FDSC fdsc, ANTLR_UINT32 count, void* data) +{ + return (ANTLR_UINT32)fread(data, (size_t)count, 1, fdsc); +} + +template<class ImplTraits> + template<typename InputStreamType> +ANTLR_UINT32 FileUtils<ImplTraits>::AntlrRead8Bit(InputStreamType* input, const ANTLR_UINT8* fileName) +{ + ANTLR_FDSC infile; + ANTLR_UINT32 fSize; + + /* Open the OS file in read binary mode + */ + infile = FileUtils<ImplTraits>::AntlrFopen(fileName, "rb"); + + /* Check that it was there + */ + if (infile == NULL) + { + ParseFileAbsentException ex; + throw ex; + } + + /* It was there, so we can read the bytes now + */ + fSize = FileUtils<ImplTraits>::AntlrFsize(fileName); /* Size of input file */ + + /* Allocate buffer for this input set + */ + void* data = ImplTraits::AllocPolicyType::alloc(fSize); + /* Now we read the file. Characters are not converted to + * the internal ANTLR encoding until they are read from the buffer + */ + FileUtils<ImplTraits>::AntlrFread(infile, fSize, data ); + + input->set_data( (unsigned char*) data ); + input->set_sizeBuf( fSize ); + + input->set_isAllocated(true); + + /* And close the file handle + */ + FileUtils<ImplTraits>::AntlrFclose(infile); + + return ANTLR_SUCCESS; +} + +} diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3input.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3input.hpp new file mode 100644 index 0000000000..d167f5b392 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3input.hpp @@ -0,0 +1,325 @@ +/** \file + * Defines the basic structures used to manipulate character + * streams from any input source. Any character size and encoding + * can in theory be used, so long as a set of functinos is provided that + * can return a 32 bit Integer representation of their characters amd efficiently mark and revert + * to specific offsets into their input streams. + */ +#ifndef _ANTLR_INPUT_HPP +#define _ANTLR_INPUT_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +namespace antlr3 { + +/// Master context structure for an ANTLR3 C runtime based input stream. +/// \ingroup apistructures. Calling LT on this doesn't seem right. You would +/// call it only with parser / TreeParser, and their respective input streams +/// has that function. calling it from lexer will throw a compile time error +/// + +template<class ImplTraits> +class InputStream : public ImplTraits::template IntStreamType< typename ImplTraits::InputStreamType > +{ +public: + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename ImplTraits::LexStateType LexStateType; + typedef typename ImplTraits::template IntStreamType< typename ImplTraits::InputStreamType > IntStreamType; + typedef IntStreamType BaseType; + typedef typename ImplTraits::StreamDataType UnitType; + typedef UnitType DataType; + typedef UnitType TokenType; + typedef typename AllocPolicyType::template VectorType<LexStateType> MarkersType; + typedef typename ImplTraits::StringType StringType; + +private: + /** Pointer the start of the input string, characters may be + * taken as offsets from here and in original input format encoding. + */ + const DataType* m_data; + + /** Pointer to the next character to be consumed from the input data + * This is cast to point at the encoding of the original file that + * was read by the functions installed as pointer in this input stream + * context instance at file/string/whatever load time. + */ + const DataType* m_nextChar; + + /** Number of characters that can be consumed at this point in time. + * Mostly this is just what is left in the pre-read buffer, but if the + * input source is a stream such as a socket or something then we may + * call special read code to wait for more input. + */ + ANTLR_UINT32 m_sizeBuf; + + /** The line number we are traversing in the input file. This gets incremented + * by a newline() call in the lexer grammar actions. + */ + ANTLR_UINT32 m_line; + + /** Pointer into the input buffer where the current line + * started. + */ + const DataType* m_currentLine; + + /** The offset within the current line of the current character + */ + ANTLR_INT32 m_charPositionInLine; + + /** Tracks how deep mark() calls are nested + */ + ANTLR_UINT32 m_markDepth; + + /** List of mark() points in the input stream + */ + MarkersType m_markers; + + /** File name string, set to pointer to memory if + * you set it manually as it will be free()d + */ + StringType m_fileName; + + /** File number, needs to be set manually to some file index of your devising. + */ + ANTLR_UINT32 m_fileNo; + + /// Character that automatically causes an internal line count + /// increment. + /// + ANTLR_UCHAR m_newlineChar; + + /// Indicates the size, in 8 bit units, of a single character. Note that + /// the C runtime does not deal with surrogates as this would be + /// slow and complicated. If this is a UTF-8 stream then this field + /// will be set to 0. Generally you are best working internally with 32 bit characters + /// as this is the most efficient. + /// + ANTLR_UINT8 m_charByteSize; + + /** Indicates if the data pointer was allocated by us, and so should be freed + * when the stream dies. + */ + bool m_isAllocated; + + /// Indicates the encoding scheme used in this input stream + /// + ANTLR_UINT32 m_encoding; + + /* API */ +public: + InputStream(const ANTLR_UINT8* fileName, ANTLR_UINT32 encoding); + InputStream(const ANTLR_UINT8* data, ANTLR_UINT32 encoding, ANTLR_UINT32 size, ANTLR_UINT8* name); + ~InputStream(); + const DataType* get_data() const; + bool get_isAllocated() const; + const DataType* get_nextChar() const; + ANTLR_UINT32 get_sizeBuf() const; + ANTLR_UINT32 get_line() const; + const DataType* get_currentLine() const; + ANTLR_INT32 get_charPositionInLine() const; + ANTLR_UINT32 get_markDepth() const; + MarkersType& get_markers(); + const StringType& get_fileName() const; + ANTLR_UINT32 get_fileNo() const; + ANTLR_UCHAR get_newlineChar() const; + ANTLR_UINT8 get_charByteSize() const; + ANTLR_UINT32 get_encoding() const; + + void set_data( DataType* data ); + void set_isAllocated( bool isAllocated ); + void set_nextChar( const DataType* nextChar ); + void set_sizeBuf( ANTLR_UINT32 sizeBuf ); + void set_line( ANTLR_UINT32 line ); + void set_currentLine( const DataType* currentLine ); + void set_charPositionInLine( ANTLR_INT32 charPositionInLine ); + void set_markDepth( ANTLR_UINT32 markDepth ); + void set_markers( const MarkersType& markers ); + void set_fileName( const StringType& fileName ); + void set_fileNo( ANTLR_UINT32 fileNo ); + void set_newlineChar( ANTLR_UCHAR newlineChar ); + void set_charByteSize( ANTLR_UINT8 charByteSize ); + void set_encoding( ANTLR_UINT32 encoding ); + + void inc_charPositionInLine(); + void inc_line(); + void inc_markDepth(); + + IntStreamType* get_istream(); + + /** Function that resets the input stream + */ + void reset(); + + /** Pointer to a function that reuses and resets an input stream by + * supplying a new 'source' + */ + void reuse(ANTLR_UINT8* inString, ANTLR_UINT32 size, ANTLR_UINT8* name); + + + /** Function to return the total size of the input buffer. For streams + * this may be just the total we have available so far. This means of course that + * the input stream must be careful to accumulate enough input so that any backtracking + * can be satisfied. + */ + ANTLR_UINT32 size(); + + /** Function to return a substring of the input stream. String is returned in allocated + * memory and is in same encoding as the input stream itself, NOT internal ANTLR_UCHAR form. + */ + StringType substr(ANTLR_MARKER start, ANTLR_MARKER stop); + + /** Function to return the current line number in the input stream + */ + ANTLR_UINT32 get_line(); + + /** Function to return the current line buffer in the input stream + * The pointer returned is directly into the input stream so you must copy + * it if you wish to manipulate it without damaging the input stream. Encoding + * is obviously in the same form as the input stream. + * \remark + * - Note taht this function wil lbe inaccurate if setLine is called as there + * is no way at the moment to position the input stream at a particular line + * number offset. + */ + const DataType* getLineBuf(); + + /** Function to return the current offset in the current input stream line + */ + ANTLR_UINT32 get_charPositionInLine(); + + /** Function to set the current position in the current line. + */ + void set_charPositionInLine(ANTLR_UINT32 position); + + /** Function to override the default newline character that the input stream + * looks for to trigger the line/offset and line buffer recording information. + * \remark + * - By default the chracter '\n' will be installed as the newline trigger character. When this + * character is seen by the consume() function then the current line number is incremented and the + * current line offset is reset to 0. The Pointer for the line of input we are consuming + * is updated to point to the next character after this one in the input stream (which means it + * may become invalid if the last newline character in the file is seen (so watch out). + * - If for some reason you do not want the counters and pointers to be restee, you can set the + * chracter to some impossible character such as '\0' or whatever. + * - This is a single character only, so choose the last character in a sequence of two or more. + * - This is only a simple aid to error reporting - if you have a complicated binary input structure + * it may not be adequate, but you can always override every function in the input stream with your + * own of course, and can even write your own complete input stream set if you like. + * - It is your responsiblity to set a valid character for the input stream type. There is no point + * setting this to 0xFFFFFFFF if the input stream is 8 bit ASCII, as this will just be truncated and never + * trigger as the comparison will be (INT32)0xFF == (INT32)0xFFFFFFFF + */ + void set_newLineChar(ANTLR_UINT32 newlineChar); + + ANTLR_MARKER index_impl(); + +private: + /** \brief Use the contents of an operating system file as the input + * for an input stream. + * + * \param fileName Name of operating system file to read. + * \return + * - Pointer to new input stream context upon success + * - One of the ANTLR3_ERR_ defines on error. + */ + void createFileStream(const ANTLR_UINT8* fileName); + + /** \brief Use the supplied 'string' as input to the stream + * + * \param data Pointer to the input data + * \return + * - Pointer to new input stream context upon success + * - NULL defines on error. + */ + void createStringStream(const ANTLR_UINT8* data); + void genericSetupStream(); + + /// Determine endianess of the input stream and install the + /// API required for the encoding in that format. + /// + void setupInputStream(); + +}; + +/** \brief Structure for track lex input states as part of mark() + * and rewind() of lexer. + */ +template<class ImplTraits> +class LexState : public ImplTraits::AllocPolicyType +{ +public: + typedef typename ImplTraits::StreamDataType DataType; + +private: + /** Pointer to the next character to be consumed from the input data + * This is cast to point at the encoding of the original file that + * was read by the functions installed as pointer in this input stream + * context instance at file/string/whatever load time. + */ + const DataType* m_nextChar; + + /** The line number we are traversing in the input file. This gets incremented + * by a newline() call in the lexer grammer actions. + */ + ANTLR_UINT32 m_line; + + /** Pointer into the input buffer where the current line + * started. + */ + const DataType* m_currentLine; + + /** The offset within the current line of the current character + */ + ANTLR_INT32 m_charPositionInLine; + +public: + LexState(); + const DataType* get_nextChar() const; + ANTLR_UINT32 get_line() const; + const DataType* get_currentLine() const; + ANTLR_INT32 get_charPositionInLine() const; + void set_nextChar( const DataType* nextChar ); + void set_line( ANTLR_UINT32 line ); + void set_currentLine( const DataType* currentLine ); + void set_charPositionInLine( ANTLR_INT32 charPositionInLine ); +}; + +class ParseNullStringException : public std::exception +{ + virtual const char* what() const noexcept + { + return "Null String"; + } +}; + +} + +#include "antlr3input.inl" + +#endif /* _ANTLR_INPUT_H */ diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3input.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3input.inl new file mode 100644 index 0000000000..6837a06540 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3input.inl @@ -0,0 +1,619 @@ +namespace antlr3 { + +template<class ImplTraits> +InputStream<ImplTraits>::InputStream(const ANTLR_UINT8* fileName, ANTLR_UINT32 encoding) +{ + // First order of business is to read the file into some buffer space + // as just straight 8 bit bytes. Then we will work out the encoding and + // byte order and adjust the API functions that are installed for the + // default 8Bit stream accordingly. + // + this->createFileStream(fileName); + + // We have the data in memory now so we can deal with it according to + // the encoding scheme we were given by the user. + // + m_encoding = encoding; + + // Now we need to work out the endian type and install any + // API functions that differ from 8Bit + // + this->setupInputStream(); + + // Now we can set up the file name + // + BaseType::m_streamName = (const char* )fileName; + m_fileName = BaseType::m_streamName; +} + +template<class ImplTraits> +InputStream<ImplTraits>::InputStream(const ANTLR_UINT8* data, ANTLR_UINT32 encoding, ANTLR_UINT32 size, ANTLR_UINT8* name) +{ + // First order of business is to set up the stream and install the data pointer. + // Then we will work out the encoding and byte order and adjust the API functions that are installed for the + // default 8Bit stream accordingly. + // + this->createStringStream(data); + + // Size (in bytes) of the given 'string' + // + m_sizeBuf = size; + + // We have the data in memory now so we can deal with it according to + // the encoding scheme we were given by the user. + // + m_encoding = encoding; + + // Now we need to work out the endian type and install any + // API functions that differ from 8Bit + // + this->setupInputStream(); + + // Now we can set up the file name + // + BaseType::m_streamName = (name == NULL ) ? "" : (const char*)name; + m_fileName = BaseType::m_streamName; + +} + +template<class ImplTraits> +void InputStream<ImplTraits>::createStringStream(const ANTLR_UINT8* data) +{ + if (data == NULL) + { + ParseNullStringException ex; + throw ex; + } + + // Structure was allocated correctly, now we can install the pointer + // + m_data = data; + m_isAllocated = false; + + // Call the common 8 bit input stream handler + // initialization. + // + this->genericSetupStream(); +} + +template<class ImplTraits> +void InputStream<ImplTraits>::createFileStream(const ANTLR_UINT8* fileName) +{ + if (fileName == NULL) + { + ParseFileAbsentException ex; + throw ex; + } + + // Structure was allocated correctly, now we can read the file. + // + FileUtils<ImplTraits>::AntlrRead8Bit(this, fileName); + + // Call the common 8 bit input stream handler + // initialization. + // + this->genericSetupStream(); +} + +template<class ImplTraits> +void InputStream<ImplTraits>::genericSetupStream() +{ + this->set_charByteSize(1); + + /* Set up the input stream brand new + */ + this->reset(); + + /* Install default line separator character (it can be replaced + * by the grammar programmer later) + */ + this->set_newLineChar((ANTLR_UCHAR)'\n'); +} + +template<class ImplTraits> +InputStream<ImplTraits>::~InputStream() +{ + // Free the input stream buffer if we allocated it + // + if (m_isAllocated && (m_data != NULL)) + AllocPolicyType::free((void*)m_data); //const_cast is required +} + +template<class ImplTraits> +ANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_data() const +{ + return m_data; +} +template<class ImplTraits> +ANTLR_INLINE bool InputStream<ImplTraits>::get_isAllocated() const +{ + return m_isAllocated; +} +template<class ImplTraits> +ANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_nextChar() const +{ + return m_nextChar; +} +template<class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_sizeBuf() const +{ + return m_sizeBuf; +} +template<class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_line() const +{ + return m_line; +} +template<class ImplTraits> +ANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_currentLine() const +{ + return m_currentLine; +} +template<class ImplTraits> +ANTLR_INLINE ANTLR_INT32 InputStream<ImplTraits>::get_charPositionInLine() const +{ + return m_charPositionInLine; +} +template<class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_markDepth() const +{ + return m_markDepth; +} +template<class ImplTraits> +ANTLR_INLINE typename InputStream<ImplTraits>::MarkersType& InputStream<ImplTraits>::get_markers() +{ + return m_markers; +} +template<class ImplTraits> +ANTLR_INLINE const typename InputStream<ImplTraits>::StringType& InputStream<ImplTraits>::get_fileName() const +{ + return m_fileName; +} +template<class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_fileNo() const +{ + return m_fileNo; +} +template<class ImplTraits> +ANTLR_INLINE ANTLR_UCHAR InputStream<ImplTraits>::get_newlineChar() const +{ + return m_newlineChar; +} +template<class ImplTraits> +ANTLR_INLINE ANTLR_UINT8 InputStream<ImplTraits>::get_charByteSize() const +{ + return m_charByteSize; +} +template<class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_encoding() const +{ + return m_encoding; +} +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_data( DataType* data ) +{ + m_data = data; +} +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_isAllocated( bool isAllocated ) +{ + m_isAllocated = isAllocated; +} +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_nextChar( const DataType* nextChar ) +{ + m_nextChar = nextChar; +} +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_sizeBuf( ANTLR_UINT32 sizeBuf ) +{ + m_sizeBuf = sizeBuf; +} +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_line( ANTLR_UINT32 line ) +{ + m_line = line; +} +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_currentLine( const DataType* currentLine ) +{ + m_currentLine = currentLine; +} +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_charPositionInLine( ANTLR_INT32 charPositionInLine ) +{ + m_charPositionInLine = charPositionInLine; +} +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_markDepth( ANTLR_UINT32 markDepth ) +{ + m_markDepth = markDepth; +} +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_markers( const MarkersType& markers ) +{ + m_markers = markers; +} +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_fileName( const StringType& fileName ) +{ + m_fileName = fileName; +} +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_fileNo( ANTLR_UINT32 fileNo ) +{ + m_fileNo = fileNo; +} +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_newlineChar( ANTLR_UCHAR newlineChar ) +{ + m_newlineChar = newlineChar; +} +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_charByteSize( ANTLR_UINT8 charByteSize ) +{ + m_charByteSize = charByteSize; +} +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_encoding( ANTLR_UINT32 encoding ) +{ + m_encoding = encoding; +} + +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::inc_charPositionInLine() +{ + ++m_charPositionInLine; +} + +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::inc_line() +{ + ++m_line; +} + +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::inc_markDepth() +{ + ++m_markDepth; +} + +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::reset() +{ + m_nextChar = m_data; /* Input at first character */ + m_line = 1; /* starts at line 1 */ + m_charPositionInLine = 0; + m_currentLine = m_data; + m_markDepth = 0; /* Reset markers */ + + /* Clear out up the markers table if it is there + */ + m_markers.clear(); +} + +template<class ImplTraits> +void InputStream<ImplTraits>::reuse(ANTLR_UINT8* inString, ANTLR_UINT32 size, ANTLR_UINT8* name) +{ + m_isAllocated = false; + m_data = inString; + m_sizeBuf = size; + + // Now we can set up the file name. As we are reusing the stream, there may already + // be a string that we can reuse for holding the filename. + // + if ( BaseType::m_streamName.empty() ) + { + BaseType::m_streamName = ((name == NULL) ? "-memory-" : (const char *)name); + m_fileName = BaseType::m_streamName; + } + else + { + BaseType::m_streamName = ((name == NULL) ? "-memory-" : (const char *)name); + } + + this->reset(); +} + +/* +template<class ImplTraits> +typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::LT(ANTLR_INT32 lt) +{ + return this->LA(lt); +} +*/ + +template<class ImplTraits> +ANTLR_UINT32 InputStream<ImplTraits>::size() +{ + return m_sizeBuf; +} + +template<class ImplTraits> +ANTLR_MARKER InputStream<ImplTraits>::index_impl() +{ + return (ANTLR_MARKER)m_nextChar; +} + + +template<class ImplTraits> +typename InputStream<ImplTraits>::StringType InputStream<ImplTraits>::substr(ANTLR_MARKER start, ANTLR_MARKER stop) +{ + std::size_t len = static_cast<std::size_t>( (stop-start)/sizeof(DataType) + 1 ); + StringType str( (const char*)start, len ); + return str; +} + +template<class ImplTraits> +ANTLR_UINT32 InputStream<ImplTraits>::get_line() +{ + return m_line; +} + +template<class ImplTraits> +const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::getLineBuf() +{ + return m_currentLine; +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_charPositionInLine() +{ + return m_charPositionInLine; +} + +template<class ImplTraits> +ANTLR_INLINE void InputStream<ImplTraits>::set_charPositionInLine(ANTLR_UINT32 position) +{ + m_charPositionInLine = position; +} + +template<class ImplTraits> +void InputStream<ImplTraits>::set_newLineChar(ANTLR_UINT32 newlineChar) +{ + m_newlineChar = newlineChar; +} + +template<class ImplTraits> +ANTLR_INLINE LexState<ImplTraits>::LexState() +{ + m_nextChar = NULL; + m_line = 0; + m_currentLine = NULL; + m_charPositionInLine = 0; +} + +template<class ImplTraits> +ANTLR_INLINE const typename LexState<ImplTraits>::DataType* LexState<ImplTraits>::get_nextChar() const +{ + return m_nextChar; +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 LexState<ImplTraits>::get_line() const +{ + return m_line; +} + +template<class ImplTraits> +ANTLR_INLINE const typename LexState<ImplTraits>::DataType* LexState<ImplTraits>::get_currentLine() const +{ + return m_currentLine; +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_INT32 LexState<ImplTraits>::get_charPositionInLine() const +{ + return m_charPositionInLine; +} + +template<class ImplTraits> +ANTLR_INLINE void LexState<ImplTraits>::set_nextChar( const DataType* nextChar ) +{ + m_nextChar = nextChar; +} + +template<class ImplTraits> +ANTLR_INLINE void LexState<ImplTraits>::set_line( ANTLR_UINT32 line ) +{ + m_line = line; +} + +template<class ImplTraits> +ANTLR_INLINE void LexState<ImplTraits>::set_currentLine( const DataType* currentLine ) +{ + m_currentLine = currentLine; +} + +template<class ImplTraits> +ANTLR_INLINE void LexState<ImplTraits>::set_charPositionInLine( ANTLR_INT32 charPositionInLine ) +{ + m_charPositionInLine = charPositionInLine; +} + +template<class ImplTraits> +ANTLR_INLINE typename InputStream<ImplTraits>::IntStreamType* InputStream<ImplTraits>::get_istream() +{ + return this; +} + +template<class ImplTraits> +void InputStream<ImplTraits>::setupInputStream() +{ + bool isBigEndian; + + // Used to determine the endianness of the machine we are currently + // running on. + // + ANTLR_UINT16 bomTest = 0xFEFF; + + // What endianess is the machine we are running on? If the incoming + // encoding endianess is the same as this machine's natural byte order + // then we can use more efficient API calls. + // + if (*((ANTLR_UINT8*)(&bomTest)) == 0xFE) + { + isBigEndian = true; + } + else + { + isBigEndian = false; + } + + // What encoding did the user tell us {s}he thought it was? I am going + // to get sick of the questions on antlr-interest, I know I am. + // + switch (m_encoding) + { + case ENC_UTF8: + + // See if there is a BOM at the start of this UTF-8 sequence + // and just eat it if there is. Windows .TXT files have this for instance + // as it identifies UTF-8 even though it is of no consequence for byte order + // as UTF-8 does not have a byte order. + // + if ( (*(m_nextChar)) == 0xEF + && (*(m_nextChar+1)) == 0xBB + && (*(m_nextChar+2)) == 0xBF + ) + { + // The UTF8 BOM is present so skip it + // + m_nextChar += 3; + } + + // Install the UTF8 input routines + // + this->setupIntStream( isBigEndian, isBigEndian ); + this->set_charByteSize(0); + break; + + case ENC_UTF16: + + // See if there is a BOM at the start of the input. If not then + // we assume that the byte order is the natural order of this + // machine (or it is really UCS2). If there is a BOM we determine if the encoding + // is the same as the natural order of this machine. + // + if ( (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar)) == 0xFE + && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0xFF + ) + { + // BOM Present, indicates Big Endian + // + m_nextChar += 1; + + this->setupIntStream( isBigEndian, true ); + } + else if ( (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar)) == 0xFF + && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0xFE + ) + { + // BOM present, indicates Little Endian + // + m_nextChar += 1; + + this->setupIntStream( isBigEndian, false ); + } + else + { + // No BOM present, assume local computer byte order + // + this->setupIntStream(isBigEndian, isBigEndian); + } + this->set_charByteSize(2); + break; + + case ENC_UTF32: + + // See if there is a BOM at the start of the input. If not then + // we assume that the byte order is the natural order of this + // machine. If there is we determine if the encoding + // is the same as the natural order of this machine. + // + if ( (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar)) == 0x00 + && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0x00 + && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+2)) == 0xFE + && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+3)) == 0xFF + ) + { + // BOM Present, indicates Big Endian + // + m_nextChar += 1; + + this->setupIntStream(isBigEndian, true); + } + else if ( (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar)) == 0xFF + && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0xFE + && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0x00 + && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0x00 + ) + { + // BOM present, indicates Little Endian + // + m_nextChar += 1; + + this->setupIntStream( isBigEndian, false ); + } + else + { + // No BOM present, assume local computer byte order + // + this->setupIntStream( isBigEndian, isBigEndian ); + } + this->set_charByteSize(4); + break; + + case ENC_UTF16BE: + + // Encoding is definately Big Endian with no BOM + // + this->setupIntStream( isBigEndian, true ); + this->set_charByteSize(2); + break; + + case ENC_UTF16LE: + + // Encoding is definately Little Endian with no BOM + // + this->setupIntStream( isBigEndian, false ); + this->set_charByteSize(2); + break; + + case ENC_UTF32BE: + + // Encoding is definately Big Endian with no BOM + // + this->setupIntStream( isBigEndian, true ); + this->set_charByteSize(4); + break; + + case ENC_UTF32LE: + + // Encoding is definately Little Endian with no BOM + // + this->setupIntStream( isBigEndian, false ); + this->set_charByteSize(4); + break; + + case ENC_EBCDIC: + + // EBCDIC is basically the same as ASCII but with an on the + // fly translation to ASCII + // + this->setupIntStream( isBigEndian, isBigEndian ); + this->set_charByteSize(1); + break; + + case ENC_8BIT: + default: + + // Standard 8bit/ASCII + // + this->setupIntStream( isBigEndian, isBigEndian ); + this->set_charByteSize(1); + break; + } +} + +} diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3interfaces.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3interfaces.hpp new file mode 100644 index 0000000000..5f04b40c49 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3interfaces.hpp @@ -0,0 +1,293 @@ +/** \file + * Declarations for all the antlr3 C runtime interfaces/classes. This + * allows the structures that define the interfaces to contain pointers to + * each other without trying to sort out the cyclic interdependencies that + * would otherwise result. + */ +#ifndef _ANTLR3_INTERFACES_HPP +#define _ANTLR3_INTERFACES_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +namespace antlr3 { + +// Definitions that indicate the encoding scheme character streams and strings etc +enum Encoding +{ + ENC_8BIT = 4 /// General latin-1 or other 8 bit encoding scheme such as straight ASCII + , ENC_UTF8 = 8 /// UTF-8 encoding scheme + , ENC_UTF16 = 16 /// UTF-16 encoding scheme (which also covers UCS2 as that does not have surrogates) + , ENC_UTF16BE + , ENC_UTF16LE + , ENC_UTF32 = 32 /// UTF-32 encoding scheme (basically straight 32 bit) + , ENC_UTF32BE + , ENC_UTF32LE + , ENC_EBCDIC = 64 /// Input is 8 bit EBCDIC (which we convert to 8 bit ASCII on the fly +}; + +enum ChannelType +{ + TOKEN_DEFAULT_CHANNEL = 0 /// Default channel for a token + , HIDDEN = 99 /// Reserved channel number for a HIDDEN token - a token that is hidden from the parser +}; + +/// Pointer to an instantiation of 'class' #ANTLR3_EXCEPTION +/// \ingroup ANTLR3_EXCEPTION +/// +enum ExceptionType +{ + /** Indicates that the recognizer received a token + * in the input that was not predicted. + */ + RECOGNITION_EXCEPTION = 0 + /** Indicates that the recognizer was expecting one token and found a + * a different one. + */ + , MISMATCHED_TOKEN_EXCEPTION + + /** Recognizer could not find a valid alternative from the input + */ + , NO_VIABLE_ALT_EXCEPTION + + /* Character in a set was not found + */ + , MISMATCHED_SET_EXCEPTION + + /* A rule predicting at least n elements found less than that, + * such as: WS: " "+; + */ + , EARLY_EXIT_EXCEPTION + + , FAILED_PREDICATE_EXCEPTION + + , MISMATCHED_TREE_NODE_EXCEPTION + + , REWRITE_EARLY_EXCEPTION + + , UNWANTED_TOKEN_EXCEPTION + + , MISSING_TOKEN_EXCEPTION +}; + +template<class ImplTraits, class SuperType> +class IntStream; + +/// Pointer to an instantiation of 'class' #ANTLR3_RECOGNIZER_SHARED_STATE +/// \ingroup ANTLR3_RECOGNIZER_SHARED_STATE +/// +template<class ImplTraits, class SuperType> +class RecognizerSharedState; + +/// Pointer to an instantiation of 'class' #ANTLR3_BITSET_LIST +/// \ingroup ANTLR3_BITSET_LIST +/// +template<class AllocatorType> +class BitsetList; + +/// Pointer to an instantiation of 'class' #ANTLR3_BITSET +/// \ingroup ANTLR3_BITSET +/// +template<class AllocatorType> +class Bitset; + +/// Pointer to an instantiation of 'class' #ANTLR3_COMMON_TOKEN +/// \ingroup ANTLR3_COMMON_TOKEN +/// +template<class ImplTraits> +class CommonToken; + +template<class ImplTraits, ExceptionType Ex, class StreamType> +class ANTLR_Exception; + +/// Pointer to an instantiation of 'class' #ANTLR3_TOPO +/// \ingroup ANTLR3_TOPO +/// +template<class AllocPolicyType> +class Topo; + +/// Pointer to an instantiation of 'class' #ANTLR3_INPUT_STREAM +/// \ingroup ANTLR3_INPUT_STREAM +/// +template<class ImplTraits> +class InputStream; + +/// Pointer to an instantiation of 'class' #ANTLR3_LEX_STATE +/// \ingroup ANTLR3_LEX_STATE +/// +template<class ImplTraits> +class LexState; + +/// Pointer to an instantiation of 'class' #ANTLR3_TOKEN_SOURCE +/// \ingroup ANTLR3_TOKEN_SOURCE +/// +template<class ImplTraits> +class TokenSource; + +/// Pointer to an instantiation of 'class' #ANTLR3_TOKEN_STREAM +/// \ingroup ANTLR3_TOKEN_STREAM +/// +template<class ImplTraits> +class TokenStream; + +/// Pointer to an instantiation of 'class' #ANTLR3_COMMON_TOKEN_STREAM +/// \ingroup ANTLR3_COMMON_TOKEN_STREAM +/// +template<class ImplTraits> +class CommonTokenStream; + +/// Pointer to an instantiation of 'class' #ANTLR3_CYCLIC_DFA +/// \ingroup ANTLR3_CYCLIC_DFA +/// +template<class ImplTraits, class ComponentType> +class CyclicDFA; + +/// Pointer to an instantiation of 'class' #ANTLR3_LEXER +/// \ingroup ANTLR3_LEXER +/// +template<class ImplTraits> +class Lexer; + +/// Pointer to an instantiation of 'class' #ANTLR3_PARSER +/// \ingroup ANTLR3_PARSER +/// +template<class ImplTraits> +class Parser; + +/// Pointer to an instantiation of 'class' #ANTLR3_BASE_TREE +/// \ingroup ANTLR3_BASE_TREE +/// +template<class ImplTraits> +class BaseTree; + +/// Pointer to an instantiation of 'class' #ANTLR3_COMMON_TREE +/// \ingroup ANTLR3_COMMON_TREE +/// +template<class ImplTraits> +class CommonTree; + +/// Pointer to an instantiation of 'class' #ANTLR3_PARSE_TREE +/// \ingroup ANTLR3_PARSE_TREE +/// +template<class ImplTraits> +class ParseTree; + +/// Pointer to an instantiation of 'class' #ANTLR3_TREE_NODE_STREAM +/// \ingroup ANTLR3_TREE_NODE_STREAM +/// +template<class ImplTraits> +class TreeNodeStream; + +/// Pointer to an instantiation of 'class' #ANTLR3_COMMON_TREE_NODE_STREAM +/// \ingroup ANTLR3_COMMON_TREE_NODE_STREAM +/// +template<class ImplTraits> +class CommonTreeNodeStream; + +/// Pointer to an instantiation of 'class' #ANTLR3_TREE_WALK_STATE +/// \ingroup ANTLR3_TREE_WALK_STATE +/// +template<class ImplTraits> +class TreeWalkState; + +/// Pointer to an instantiation of 'class' #ANTLR3_COMMON_TREE_ADAPTOR +/// \ingroup ANTLR3_COMMON_TREE_ADAPTOR +/// +template<class ImplTraits> +class CommonTreeAdaptor; + +/// Pointer to an instantiation of 'class' #ANTLR3_TREE_PARSER +/// \ingroup ANTLR3_TREE_PARSER +/// +template<class ImplTraits> +class TreeParser; + +/// Pointer to an instantiation of 'class' #ANTLR3_INT_TRIE +/// \ingroup ANTLR3_INT_TRIE +/// +template< class DataType, class AllocPolicyType > +class IntTrie; + +/// Pointer to an instantiation of 'class' #ANTLR3_REWRITE_RULE_ELEMENT_STREAM +/// \ingroup ANTLR3_REWRITE_RULE_ELEMENT_STREAM +/// +template<class ImplTraits, class SuperType> +class RewriteRuleElementStream; + +template<class ImplTraits> +class RewriteRuleTokenStream; + +template<class ImplTraits> +class RewriteRuleSubtreeStream; + +template<class ImplTraits> +class RewriteRuleNodeStream; + +/// Pointer to an instantiation of 'class' #ANTLR3_DEBUG_EVENT_LISTENER +/// \ingroup ANTLR3_DEBUG_EVENT_LISTENER +/// +template<class ImplTraits> +class DebugEventListener; + +//A Class just used for forwarding other classes for simplifying class forwarding +//Logic: constructor is made simple +template<class A> +class ClassForwarder {}; + +template<bool b> +class BoolForwarder {}; +class Empty {}; + +template<class ImplTraits, class StreamType> +class ComponentTypeFinder +{ +}; + +template<class ImplTraits> +class ComponentTypeFinder< ImplTraits, typename ImplTraits::InputStreamType> +{ +public: + typedef typename ImplTraits::LexerType ComponentType; +}; + +template<class ImplTraits> +class ComponentTypeFinder< ImplTraits, typename ImplTraits::TokenStreamType> +{ +public: + typedef typename ImplTraits::ParserType ComponentType; +}; + +template<class ImplTraits> +class ComponentTypeFinder< ImplTraits, typename ImplTraits::TreeNodeStreamType> +{ +public: + typedef typename ImplTraits::TreeParserType ComponentType; +}; + +} + +#endif diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3intstream.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3intstream.hpp new file mode 100644 index 0000000000..01bf60a7cb --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3intstream.hpp @@ -0,0 +1,401 @@ +/** \file + * Defines the the class interface for an antlr3 INTSTREAM. + * + * Certain functionality (such as DFAs for instance) abstract the stream of tokens + * or characters in to a steam of integers. Hence this structure should be included + * in any stream that is able to provide the output as a stream of integers (which is anything + * basically. + * + * There are no specific implementations of the methods in this interface in general. Though + * for purposes of casting and so on, it may be necesssary to implement a function with + * the signature in this interface which abstracts the base immplementation. In essence though + * the base stream provides a pointer to this interface, within which it installs its + * normal match() functions and so on. Interaces such as DFA are then passed the pANTLR3_INT_STREAM + * and can treat any input as an int stream. + * + * For instance, a lexer implements a pANTLR3_BASE_RECOGNIZER, within which there is a pANTLR3_INT_STREAM. + * However, a pANTLR3_INPUT_STREAM also provides a pANTLR3_INT_STREAM, which it has constructed from + * it's normal interface when it was created. This is then pointed at by the pANTLR_BASE_RECOGNIZER + * when it is intialized with a pANTLR3_INPUT_STREAM. + * + * Similarly if a pANTLR3_BASE_RECOGNIZER is initialized with a pANTLR3_TOKEN_STREAM, then the + * pANTLR3_INT_STREAM is taken from the pANTLR3_TOKEN_STREAM. + * + * If a pANTLR3_BASE_RECOGNIZER is initialized with a pANTLR3_TREENODE_STREAM, then guess where + * the pANTLR3_INT_STREAM comes from? + * + * Note that because the context pointer points to the actual interface structure that is providing + * the ANTLR3_INT_STREAM it is defined as a (void *) in this interface. There is no direct implementation + * of an ANTLR3_INT_STREAM (unless someone did not understand what I was doing here =;?P + */ +#ifndef _ANTLR3_INTSTREAM_HPP +#define _ANTLR3_INTSTREAM_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +namespace antlr3 { + +enum STREAM_TYPE +{ + /** Type indicator for a character stream + * \remark if a custom stream is created but it can be treated as + * a char stream, then you may OR in this value to your type indicator + */ + CHARSTREAM = 0x0001 + + /** Type indicator for a Token stream + * \remark if a custom stream is created but it can be treated as + * a token stream, then you may OR in this value to your type indicator + */ + , TOKENSTREAM = 0x0002 + + /** Type indicator for a common tree node stream + * \remark if a custom stream is created but it can be treated as + * a common tree node stream, then you may OR in this value to your type indicator + */ + , COMMONTREENODE = 0x0004 + + /** Type mask for input stream so we can switch in the above types + * \remark DO NOT USE 0x0000 as a stream type! + */ + , INPUT_MASK = 0x0007 +}; + +class RESOLVE_ENDIAN_AT_RUNTIME {}; +class BYTE_AGNOSTIC {}; +class ANTLR_LITTLE_ENDIAN {}; +class ANTLR_BIG_ENDIAN {}; + +template<class ImplTraits, class SuperType> +class IntStream : public ImplTraits::AllocPolicyType +{ +public: + typedef typename ImplTraits::StringType StringType; + +protected: + /** Potentially useful in error reporting and so on, this string is + * an identification of the input source. It may be NULL, so anything + * attempting to access it needs to check this and substitute a sensible + * default. + */ + StringType m_streamName; + + /** Last marker position allocated + */ + ANTLR_MARKER m_lastMarker; + + bool m_upper_case; //if set, values should be returbed in upper case + + /// Indicates whether we should implement endian-specific logic + /// 0 - Undefined 1 - Default(machine and input are both same), 2 - Little Endian, 3 - Big Endian + ANTLR_UINT8 m_endian_spec; + +public: + IntStream(); + + // Return a string that identifies the input source + // + StringType getSourceName(); + StringType& get_streamName(); + const StringType& get_streamName() const; + ANTLR_MARKER get_lastMarker() const; + + SuperType* get_super(); + /** + * Function that installs a version of LA that always + * returns upper case. Only valid for character streams and creates a case + * insensitive lexer if the lexer tokens are described in upper case. The + * tokens will preserve case in the token text. + */ + void setUcaseLA(bool flag); + + /** Consume the next 'ANTR3_UINT32' in the stream + */ + void consume(); + + /** Get ANTLR3_UINT32 at current input pointer + i ahead where i=1 is next ANTLR3_UINT32 + */ + ANTLR_UINT32 LA( ANTLR_INT32 i); + + /** Tell the stream to start buffering if it hasn't already. Return + * current input position, index(), or some other marker so that + * when passed to rewind() you get back to the same spot. + * rewind(mark()) should not affect the input cursor. + */ + ANTLR_MARKER mark(); + + /** Return the current input symbol index 0..n where n indicates the + * last symbol has been read. + */ + ANTLR_MARKER index(); + + /** Reset the stream so that next call to index would return marker. + * The marker will usually be index() but it doesn't have to be. It's + * just a marker to indicate what state the stream was in. This is + * essentially calling release() and seek(). If there are markers + * created after this marker argument, this routine must unroll them + * like a stack. Assume the state the stream was in when this marker + * was created. + */ + void rewind(ANTLR_MARKER marker); + + /** Reset the stream to the last marker position, witouh destryoing the + * last marker position. + */ + void rewindLast(); + + /** You may want to commit to a backtrack but don't want to force the + * stream to keep bookkeeping objects around for a marker that is + * no longer necessary. This will have the same behavior as + * rewind() except it releases resources without the backward seek. + */ + void release(ANTLR_MARKER mark); + + /** Set the input cursor to the position indicated by index. This is + * normally used to seek ahead in the input stream. No buffering is + * required to do this unless you know your stream will use seek to + * move backwards such as when backtracking. + * + * This is different from rewind in its multi-directional + * requirement and in that its argument is strictly an input cursor (index). + * + * For char streams, seeking forward must update the stream state such + * as line number. For seeking backwards, you will be presumably + * backtracking using the mark/rewind mechanism that restores state and + * so this method does not need to update state when seeking backwards. + * + * Currently, this method is only used for efficient backtracking, but + * in the future it may be used for incremental parsing. + */ + void seek(ANTLR_MARKER index); + + /// Debug only method to flag consumption of initial off-channel + /// tokens in the input stream + /// + void consumeInitialHiddenTokens(); + + void rewindMark(ANTLR_MARKER marker); + ANTLR_MARKER tindex(); + + /** Frees any resources that were allocated for the implementation of this + * interface. Usually this is just releasing the memory allocated + * for the structure itself, but it may of course do anything it need to + * so long as it does not stamp on anything else. + */ + ~IntStream(); + +protected: + void setupIntStream(bool machineBigEndian, bool inputBigEndian); + void findout_endian_spec(bool machineBigEndian, bool inputBigEndian); + + //If the user chooses this option, then we will be resolving stuffs at run-time + ANTLR_UINT32 LA( ANTLR_INT32 i, ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> ); + + //resolve into one of the three categories below at runtime + void consume( ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> ); +}; + +template<class ImplTraits, class SuperType> +class EBCDIC_IntStream : public IntStream<ImplTraits, SuperType> +{ +public: + ANTLR_UINT32 LA( ANTLR_INT32 i); + +protected: + void setupIntStream(); +}; + +template<class ImplTraits, class SuperType> +class UTF8_IntStream : public IntStream<ImplTraits, SuperType> +{ +public: + ANTLR_UINT32 LA( ANTLR_INT32 i); + void consume(); + +protected: + void setupIntStream(bool machineBigEndian, bool inputBigEndian); + +private: + static const ANTLR_UINT32* TrailingBytesForUTF8(); + static const UTF32* OffsetsFromUTF8(); +}; + +template<class ImplTraits, class SuperType> +class UTF16_IntStream : public IntStream<ImplTraits, SuperType> +{ +public: + ANTLR_UINT32 LA( ANTLR_INT32 i); + void consume(); + ANTLR_MARKER index(); + void seek(ANTLR_MARKER seekPoint); + +protected: + void setupIntStream(bool machineBigEndian, bool inputBigEndian); + + /// \brief Return the input element assuming an 8 bit ascii input + /// + /// \param[in] input Input stream context pointer + /// \param[in] la 1 based offset of next input stream element + /// + /// \return Next input character in internal ANTLR3 encoding (UTF32) + /// + ANTLR_UINT32 LA( ANTLR_INT32 i, ClassForwarder<BYTE_AGNOSTIC> ); + + /// \brief Return the input element assuming a UTF16 input when the input is Little Endian and the machine is not + /// + /// \param[in] input Input stream context pointer + /// \param[in] la 1 based offset of next input stream element + /// + /// \return Next input character in internal ANTLR3 encoding (UTF32) + /// + ANTLR_UINT32 LA( ANTLR_INT32 i, ClassForwarder<ANTLR_LITTLE_ENDIAN> ); + + /// \brief Return the input element assuming a UTF16 input when the input is Little Endian and the machine is not + /// + /// \param[in] input Input stream context pointer + /// \param[in] la 1 based offset of next input stream element + /// + /// \return Next input character in internal ANTLR3 encoding (UTF32) + /// + ANTLR_UINT32 LA( ANTLR_INT32 i, ClassForwarder<ANTLR_BIG_ENDIAN> ); + + /// \brief Consume the next character in a UTF16 input stream + /// + /// \param input Input stream context pointer + /// + void consume( ClassForwarder<BYTE_AGNOSTIC> ); + + /// \brief Consume the next character in a UTF16 input stream when the input is Little Endian and the machine is not + /// Note that the UTF16 routines do not do any substantial verification of the input stream as for performance + /// sake, we assume it is validly encoded. So if a low surrogate is found at the curent input position then we + /// just consume it. Surrogate pairs should be seen as Hi, Lo. So if we have a Lo first, then the input stream + /// is fubar but we just ignore that. + /// + /// \param input Input stream context pointer + /// + void consume( ClassForwarder<ANTLR_LITTLE_ENDIAN> ); + + /// \brief Consume the next character in a UTF16 input stream when the input is Big Endian and the machine is not + /// + /// \param input Input stream context pointer + /// + void consume( ClassForwarder<ANTLR_BIG_ENDIAN> ); +}; + + + +template<class ImplTraits, class SuperType> +class UTF32_IntStream : public IntStream<ImplTraits, SuperType> +{ +public: + ANTLR_UINT32 LA( ANTLR_INT32 i); + void consume(); + + /// \brief Calculate the current index in the output stream. + /// \param[in] input Input stream context pointer + /// + ANTLR_MARKER index(); + void seek(ANTLR_MARKER seekPoint); + +protected: + void setupIntStream(bool machineBigEndian, bool inputBigEndian); + ANTLR_UINT32 LA( ANTLR_INT32 i, ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> ); + ANTLR_UINT32 LA( ANTLR_INT32 i, ClassForwarder<BYTE_AGNOSTIC> ); + ANTLR_UINT32 LA( ANTLR_INT32 i, ClassForwarder<ANTLR_LITTLE_ENDIAN> ); + ANTLR_UINT32 LA( ANTLR_INT32 i, ClassForwarder<ANTLR_BIG_ENDIAN> ); + + void consume( ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> ); + void consume( ClassForwarder<BYTE_AGNOSTIC> ); + void consume( ClassForwarder<ANTLR_LITTLE_ENDIAN> ); + void consume( ClassForwarder<ANTLR_BIG_ENDIAN> ); +}; + +template<class ImplTraits> +class TokenIntStream : public IntStream<ImplTraits, typename ImplTraits::TokenStreamType > +{ +public: + typedef typename ImplTraits::CommonTokenType CommonTokenType; + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::TokenStreamType TokenStreamType; + typedef IntStream<ImplTraits, TokenStreamType > BaseType; + +private: + /** Because the indirect call, though small in individual cases can + * mount up if there are thousands of tokens (very large input streams), callers + * of size can optionally use this cached size field. + */ + ANTLR_UINT32 m_cachedSize; + +public: + TokenIntStream(); + ANTLR_UINT32 get_cachedSize() const; + void set_cachedSize( ANTLR_UINT32 cachedSize ); + + void consume(); + void consumeInitialHiddenTokens(); + ANTLR_UINT32 LA( ANTLR_INT32 i ); + ANTLR_MARKER mark(); + ANTLR_UINT32 size(); + void release(); + ANTLR_MARKER tindex(); + void rewindLast(); + void rewind(ANTLR_MARKER marker); + void seek(ANTLR_MARKER index); + StringType getSourceName(); + +}; + +template<class ImplTraits> +class TreeNodeIntStream : public IntStream<ImplTraits, typename ImplTraits::TreeNodeStreamType> +{ +public: + typedef typename ImplTraits::TreeNodeStreamType TreeNodeStreamType; + typedef IntStream<ImplTraits, TreeNodeStreamType > BaseType; + typedef typename ImplTraits::TreeType TreeType; + typedef typename ImplTraits::TreeTypePtr TreeTypePtr; + typedef typename ImplTraits::CommonTokenType CommonTokenType; + +public: + void consume(); + ANTLR_MARKER tindex(); + ANTLR_UINT32 LA(ANTLR_INT32 i); + ANTLR_MARKER mark(); + void release(ANTLR_MARKER marker); + void rewindMark(ANTLR_MARKER marker); + void rewindLast(); + void seek(ANTLR_MARKER index); + ANTLR_UINT32 size(); +}; + +} + +#include "antlr3intstream.inl" + +#endif + diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3intstream.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3intstream.inl new file mode 100644 index 0000000000..e9990786bb --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3intstream.inl @@ -0,0 +1,1661 @@ +namespace antlr3 { + +template<class ImplTraits, class SuperType> +ANTLR_INLINE IntStream<ImplTraits, SuperType>::IntStream() +{ + m_lastMarker = 0; + m_upper_case = false; +} + +template<class ImplTraits, class SuperType> +ANTLR_INLINE typename IntStream<ImplTraits, SuperType>::StringType IntStream<ImplTraits, SuperType>::getSourceName() +{ + return m_streamName; +} + +template<class ImplTraits, class SuperType> +ANTLR_INLINE typename IntStream<ImplTraits, SuperType>::StringType& IntStream<ImplTraits, SuperType>::get_streamName() +{ + return m_streamName; +} + +template<class ImplTraits, class SuperType> +ANTLR_INLINE const typename IntStream<ImplTraits, SuperType>::StringType& IntStream<ImplTraits, SuperType>::get_streamName() const +{ + return m_streamName; +} + +template<class ImplTraits, class SuperType> +ANTLR_INLINE ANTLR_MARKER IntStream<ImplTraits, SuperType>::get_lastMarker() const +{ + return m_lastMarker; +} + +template<class ImplTraits, class SuperType> +ANTLR_INLINE void IntStream<ImplTraits, SuperType>::setUcaseLA(bool flag) +{ + m_upper_case = flag; +} + +template<class ImplTraits, class SuperType> +ANTLR_INLINE SuperType* IntStream<ImplTraits, SuperType>::get_super() +{ + return static_cast<SuperType*>(this); +} + +template<class ImplTraits, class SuperType> +void IntStream<ImplTraits, SuperType>::consume() +{ + SuperType* input = this->get_super(); + + const ANTLR_UINT8* nextChar = input->get_nextChar(); + const ANTLR_UINT8* data = input->get_data(); + ANTLR_UINT32 sizeBuf = input->get_sizeBuf(); + + if ( nextChar < ( data + sizeBuf ) ) + { + /* Indicate one more character in this line + */ + input->inc_charPositionInLine(); + + if ((ANTLR_UCHAR)(*(nextChar)) == input->get_newlineChar() ) + { + /* Reset for start of a new line of input + */ + input->inc_line(); + input->set_charPositionInLine(0); + input->set_currentLine(nextChar + 1); + } + + /* Increment to next character position + */ + input->set_nextChar( nextChar + 1 ); + } +} + +template<class ImplTraits, class SuperType> +ANTLR_UINT32 IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la ) +{ + SuperType* input = this->get_super(); + const ANTLR_UINT8* nextChar = input->get_nextChar(); + const ANTLR_UINT8* data = input->get_data(); + ANTLR_UINT32 sizeBuf = input->get_sizeBuf(); + + if (( nextChar + la - 1) >= (data + sizeBuf)) + { + return ANTLR_CHARSTREAM_EOF; + } + else + { + if( !m_upper_case ) + return (ANTLR_UCHAR)(*(nextChar + la - 1)); + else + return (ANTLR_UCHAR)toupper(*(nextChar + la - 1)); + } +} + +template<class ImplTraits, class SuperType> +ANTLR_MARKER IntStream<ImplTraits, SuperType>::mark() +{ + LexState<ImplTraits>* state; + SuperType* input = this->get_super(); + + /* New mark point + */ + input->inc_markDepth(); + + /* See if we are revisiting a mark as we can just reuse the vector + * entry if we are, otherwise, we need a new one + */ + if (input->get_markDepth() > input->get_markers().size() ) + { + input->get_markers().push_back( LexState<ImplTraits>() ); + LexState<ImplTraits>& state_r = input->get_markers().back(); + state = &state_r; + } + else + { + LexState<ImplTraits>& state_r = input->get_markers().at( input->get_markDepth() - 1 ); + state = &state_r; + + /* Assume no errors for speed, it will just blow up if the table failed + * for some reasons, hence lots of unit tests on the tables ;-) + */ + } + + /* We have created or retrieved the state, so update it with the current + * elements of the lexer state. + */ + state->set_charPositionInLine( input->get_charPositionInLine() ); + state->set_currentLine( input->get_currentLine() ); + state->set_line( input->get_line() ); + state->set_nextChar( input->get_nextChar() ); + + m_lastMarker = input->get_markDepth(); + + /* And that's it + */ + return input->get_markDepth(); +} + +template<class ImplTraits, class SuperType> +ANTLR_MARKER IntStream<ImplTraits, SuperType>::index() +{ + SuperType* input = this->get_super(); + return input->index_impl(); +} + +template<class ImplTraits, class SuperType> +void IntStream<ImplTraits, SuperType>::rewind(ANTLR_MARKER mark) +{ + SuperType* input = this->get_super(); + + /* Perform any clean up of the marks + */ + this->release(mark); + + /* Find the supplied mark state + */ + ANTLR_UINT32 idx = static_cast<ANTLR_UINT32>( mark-1 ); + typename ImplTraits::LexStateType& state = input->get_markers().at( idx ); + + /* Seek input pointer to the requested point (note we supply the void *pointer + * to whatever is implementing the int stream to seek). + */ + this->seek( (ANTLR_MARKER)state.get_nextChar() ); + + /* Reset to the reset of the information in the mark + */ + input->set_charPositionInLine( state.get_charPositionInLine() ); + input->set_currentLine( state.get_currentLine() ); + input->set_line( state.get_line() ); + input->set_nextChar( state.get_nextChar() ); + + /* And we are done + */ +} + +template<class ImplTraits, class SuperType> +void IntStream<ImplTraits, SuperType>::rewindLast() +{ + this->rewind(m_lastMarker); +} + +template<class ImplTraits, class SuperType> +void IntStream<ImplTraits, SuperType>::release(ANTLR_MARKER mark) +{ + SuperType* input = this->get_super(); + + /* We don't do much here in fact as we never free any higher marks in + * the hashtable as we just resuse any memory allocated for them. + */ + input->set_markDepth( (ANTLR_UINT32)(mark - 1) ); + +} + +template<class ImplTraits, class SuperType> +void IntStream<ImplTraits, SuperType>::setupIntStream(bool, bool) +{ +} + +template<class ImplTraits, class SuperType> +void IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint) +{ + ANTLR_INT32 count; + SuperType* input = this->get_super(); + + ANTLR_MARKER nextChar = (ANTLR_MARKER) input->get_nextChar(); + /* If the requested seek point is less than the current + * input point, then we assume that we are resetting from a mark + * and do not need to scan, but can just set to there. + */ + if (seekPoint <= nextChar) + { + input->set_nextChar((ANTLR_UINT8*) seekPoint); + } + else + { + count = (ANTLR_UINT32)(seekPoint - nextChar); + + while (count--) + { + this->consume(); + } + } +} + +template<class ImplTraits, class SuperType> +IntStream<ImplTraits, SuperType>::~IntStream() +{ +} + +template<class ImplTraits, class SuperType> +ANTLR_UINT32 EBCDIC_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la) +{ + // EBCDIC to ASCII conversion table + // + // This for EBCDIC EDF04 translated to ISO-8859.1 which is the usually accepted POSIX + // translation and the character tables are published all over the interweb. + // + const ANTLR_UCHAR e2a[256] = + { + 0x00, 0x01, 0x02, 0x03, 0x85, 0x09, 0x86, 0x7f, + 0x87, 0x8d, 0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x8f, 0x0a, 0x08, 0x97, + 0x18, 0x19, 0x9c, 0x9d, 0x1c, 0x1d, 0x1e, 0x1f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x92, 0x17, 0x1b, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07, + 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, + 0x98, 0x99, 0x9a, 0x9b, 0x14, 0x15, 0x9e, 0x1a, + 0x20, 0xa0, 0xe2, 0xe4, 0xe0, 0xe1, 0xe3, 0xe5, + 0xe7, 0xf1, 0x60, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, + 0x26, 0xe9, 0xea, 0xeb, 0xe8, 0xed, 0xee, 0xef, + 0xec, 0xdf, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x9f, + 0x2d, 0x2f, 0xc2, 0xc4, 0xc0, 0xc1, 0xc3, 0xc5, + 0xc7, 0xd1, 0x5e, 0x2c, 0x25, 0x5f, 0x3e, 0x3f, + 0xf8, 0xc9, 0xca, 0xcb, 0xc8, 0xcd, 0xce, 0xcf, + 0xcc, 0xa8, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, + 0xd8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0xab, 0xbb, 0xf0, 0xfd, 0xfe, 0xb1, + 0xb0, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, + 0x71, 0x72, 0xaa, 0xba, 0xe6, 0xb8, 0xc6, 0xa4, + 0xb5, 0xaf, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, + 0x79, 0x7a, 0xa1, 0xbf, 0xd0, 0xdd, 0xde, 0xae, + 0xa2, 0xa3, 0xa5, 0xb7, 0xa9, 0xa7, 0xb6, 0xbc, + 0xbd, 0xbe, 0xac, 0x5b, 0x5c, 0x5d, 0xb4, 0xd7, + 0xf9, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0xad, 0xf4, 0xf6, 0xf2, 0xf3, 0xf5, + 0xa6, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, + 0x51, 0x52, 0xb9, 0xfb, 0xfc, 0xdb, 0xfa, 0xff, + 0xd9, 0xf7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, + 0x59, 0x5a, 0xb2, 0xd4, 0xd6, 0xd2, 0xd3, 0xd5, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0xb3, 0x7b, 0xdc, 0x7d, 0xda, 0x7e + }; + + SuperType* input = this->get_super(); + + if (( input->get_nextChar() + la - 1) >= ( input->get_data() + input->get_sizeBuf() )) + { + return ANTLR_CHARSTREAM_EOF; + } + else + { + // Translate the required character via the constant conversion table + // + return e2a[(*(input->get_nextChar() + la - 1))]; + } +} + +template<class ImplTraits, class SuperType> +void EBCDIC_IntStream<ImplTraits, SuperType>::setupIntStream() +{ + SuperType* super = this->get_super(); + super->set_charByteSize(1); +} + +template<class ImplTraits, class SuperType> +ANTLR_UINT32 UTF16_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 i) +{ + return this->LA(i, ClassForwarder< typename ImplTraits::Endianness >() ); +} + +template<class ImplTraits, class SuperType> +void UTF16_IntStream<ImplTraits, SuperType>::consume() +{ + this->consume( ClassForwarder< typename ImplTraits::Endianness >() ); +} + +template<class ImplTraits, class SuperType> +ANTLR_MARKER UTF16_IntStream<ImplTraits, SuperType>::index() +{ + SuperType* input = this->get_super(); + return (ANTLR_MARKER)(input->get_nextChar()); +} + +template<class ImplTraits, class SuperType> +void UTF16_IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint) +{ + SuperType* input = this->get_super(); + + // If the requested seek point is less than the current + // input point, then we assume that we are resetting from a mark + // and do not need to scan, but can just set to there as rewind will + // reset line numbers and so on. + // + if (seekPoint <= (ANTLR_MARKER)(input->get_nextChar())) + { + input->set_nextChar( seekPoint ); + } + else + { + // Call consume until we reach the asked for seek point or EOF + // + while( (this->LA(1) != ANTLR_CHARSTREAM_EOF) && (seekPoint < (ANTLR_MARKER)input->get_nextChar() ) ) + { + this->consume(); + } + } +} + +template<class ImplTraits, class SuperType> +void IntStream<ImplTraits, SuperType>::findout_endian_spec(bool machineBigEndian, bool inputBigEndian) +{ + // We must install different UTF16 routines according to whether the input + // is the same endianess as the machine we are executing upon or not. If it is not + // then we must install methods that can convert the endianess on the fly as they go + // + + if(machineBigEndian == true) + { + // Machine is Big Endian, if the input is also then install the + // methods that do not access input by bytes and reverse them. + // Otherwise install endian aware methods. + // + if (inputBigEndian == true) + { + // Input is machine compatible + // + m_endian_spec = 1; + } + else + { + // Need to use methods that know that the input is little endian + // + m_endian_spec = 2; + } + } + else + { + // Machine is Little Endian, if the input is also then install the + // methods that do not access input by bytes and reverse them. + // Otherwise install endian aware methods. + // + if (inputBigEndian == false) + { + // Input is machine compatible + // + m_endian_spec = 1; + } + else + { + // Need to use methods that know that the input is Big Endian + // + m_endian_spec = 3; + } + } +} + +template<class ImplTraits, class SuperType> +void UTF16_IntStream<ImplTraits, SuperType>::setupIntStream(bool machineBigEndian, bool inputBigEndian) +{ + SuperType* super = this->get_super(); + super->set_charByteSize(2); + + this->findout_endian_spec( machineBigEndian, inputBigEndian ); +} + +template<class ImplTraits, class SuperType> +ANTLR_UINT32 IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 i, ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> ) +{ + assert( (m_endian_spec >= 1) && (m_endian_spec <= 3)); + switch(m_endian_spec) + { + case 1: + return this->LA(i, ClassForwarder<BYTE_AGNOSTIC>() ); + break; + case 2: + return this->LA(i, ClassForwarder<ANTLR_LITTLE_ENDIAN>() ); + break; + case 3: + return this->LA(i, ClassForwarder<ANTLR_BIG_ENDIAN>() ); + break; + default: + break; + } + return 0; +} + +template<class ImplTraits, class SuperType> +void IntStream<ImplTraits, SuperType>::consume( ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> ) +{ + assert( (m_endian_spec >= 1) && (m_endian_spec <= 3)); + switch(m_endian_spec) + { + case 1: + this->consume( ClassForwarder<BYTE_AGNOSTIC>() ); + break; + case 2: + this->consume( ClassForwarder<ANTLR_LITTLE_ENDIAN>() ); + break; + case 3: + this->consume( ClassForwarder<ANTLR_BIG_ENDIAN>() ); + break; + default: + break; + } +} + +template<class ImplTraits, class SuperType> +ANTLR_UINT32 UTF16_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la, ClassForwarder<BYTE_AGNOSTIC> ) +{ + SuperType* input; + UTF32 ch; + UTF32 ch2; + UTF16* nextChar; + + // Find the input interface and where we are currently pointing to + // in the input stream + // + input = this->get_super; + nextChar = input->get_nextChar(); + + // If a positive offset then advance forward, else retreat + // + if (la >= 0) + { + while (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ) + { + // Advance our copy of the input pointer + // + // Next char in natural machine byte order + // + ch = *nextChar++; + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) + { + // If the 16 bits following the high surrogate are in the source buffer... + // + if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() )) + { + // Next character is in natural machine byte order + // + ch2 = *nextChar; + + // If it's a valid low surrogate, consume it + // + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) + { + // We consumed one 16 bit character + // + nextChar++; + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + // Note that we did not check for an invalid low surrogate here, or that fact that the + // lo surrogate was missing. We just picked out one 16 bit character unless the character + // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. + // + } + } + else + { + // We need to go backwards from our input point + // + while (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() ) + { + // Get the previous 16 bit character + // + ch = *--nextChar; + + // If we found a low surrogate then go back one more character if + // the hi surrogate is there + // + if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) + { + ch2 = *(nextChar-1); + if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END) + { + // Yes, there is a high surrogate to match it so decrement one more and point to that + // + nextChar--; + } + } + } + } + + // Our local copy of nextChar is now pointing to either the correct character or end of file + // + // Input buffer size is always in bytes + // + if ( (ANTLR_UINT8*)nextChar >= (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() )) + { + return ANTLR_CHARSTREAM_EOF; + } + else + { + // Pick up the next 16 character (native machine byte order) + // + ch = *nextChar++; + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) + { + // If the 16 bits following the high surrogate are in the source buffer... + // + if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf())) + { + // Next character is in natural machine byte order + // + ch2 = *nextChar; + + // If it's a valid low surrogate, consume it + // + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) + { + // Construct the UTF32 code point + // + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + + (ch2 - UNI_SUR_LOW_START) + halfBase; + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + } + return ch; +} + +template<class ImplTraits, class SuperType> +ANTLR_UINT32 UTF16_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la, ClassForwarder<ANTLR_LITTLE_ENDIAN> ) +{ + SuperType* input; + UTF32 ch; + UTF32 ch2; + ANTLR_UCHAR* nextChar; + + // Find the input interface and where we are currently pointing to + // in the input stream + // + input = this->get_super(); + nextChar = input->get_nextChar(); + + // If a positive offset then advance forward, else retreat + // + if (la >= 0) + { + while (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ) + { + // Advance our copy of the input pointer + // + // Next char in Little Endian byte order + // + ch = (*nextChar) + (*(nextChar+1) << 8); + nextChar += 2; + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) + { + // If the 16 bits following the high surrogate are in the source buffer... + // + if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() )) + { + // Next character is in little endian byte order + // + ch2 = (*nextChar) + (*(nextChar+1) << 8); + + // If it's a valid low surrogate, consume it + // + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) + { + // We consumed one 16 bit character + // + nextChar += 2; + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + // Note that we did not check for an invalid low surrogate here, or that fact that the + // lo surrogate was missing. We just picked out one 16 bit character unless the character + // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. + // + } + } + else + { + // We need to go backwards from our input point + // + while (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() ) + { + // Get the previous 16 bit character + // + ch = (*nextChar - 2) + ((*nextChar -1) << 8); + nextChar -= 2; + + // If we found a low surrogate then go back one more character if + // the hi surrogate is there + // + if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) + { + ch2 = (*nextChar - 2) + ((*nextChar -1) << 8); + if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END) + { + // Yes, there is a high surrogate to match it so decrement one more and point to that + // + nextChar -=2; + } + } + } + } + + // Our local copy of nextChar is now pointing to either the correct character or end of file + // + // Input buffer size is always in bytes + // + if ( (ANTLR_UINT8*)nextChar >= (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf())) + { + return ANTLR_CHARSTREAM_EOF; + } + else + { + // Pick up the next 16 character (little endian byte order) + // + ch = (*nextChar) + (*(nextChar+1) << 8); + nextChar += 2; + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) + { + // If the 16 bits following the high surrogate are in the source buffer... + // + if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf())) + { + // Next character is in little endian byte order + // + ch2 = (*nextChar) + (*(nextChar+1) << 8); + + // If it's a valid low surrogate, consume it + // + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) + { + // Construct the UTF32 code point + // + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + + (ch2 - UNI_SUR_LOW_START) + halfBase; + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + } + return ch; +} + +template<class ImplTraits, class SuperType> +ANTLR_UINT32 UTF16_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la, ClassForwarder<ANTLR_BIG_ENDIAN> ) +{ + SuperType* input; + UTF32 ch; + UTF32 ch2; + ANTLR_UCHAR* nextChar; + + // Find the input interface and where we are currently pointing to + // in the input stream + // + input = this->get_super(); + nextChar = input->get_nextChar(); + + // If a positive offset then advance forward, else retreat + // + if (la >= 0) + { + while (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ) + { + // Advance our copy of the input pointer + // + // Next char in Big Endian byte order + // + ch = ((*nextChar) << 8) + *(nextChar+1); + nextChar += 2; + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) + { + // If the 16 bits following the high surrogate are in the source buffer... + // + if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf())) + { + // Next character is in big endian byte order + // + ch2 = ((*nextChar) << 8) + *(nextChar+1); + + // If it's a valid low surrogate, consume it + // + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) + { + // We consumed one 16 bit character + // + nextChar += 2; + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + // Note that we did not check for an invalid low surrogate here, or that fact that the + // lo surrogate was missing. We just picked out one 16 bit character unless the character + // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. + // + } + } + else + { + // We need to go backwards from our input point + // + while (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() ) + { + // Get the previous 16 bit character + // + ch = ((*nextChar - 2) << 8) + (*nextChar -1); + nextChar -= 2; + + // If we found a low surrogate then go back one more character if + // the hi surrogate is there + // + if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) + { + ch2 = ((*nextChar - 2) << 8) + (*nextChar -1); + if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END) + { + // Yes, there is a high surrogate to match it so decrement one more and point to that + // + nextChar -=2; + } + } + } + } + + // Our local copy of nextChar is now pointing to either the correct character or end of file + // + // Input buffer size is always in bytes + // + if ( (ANTLR_UINT8*)nextChar >= (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf())) + { + return ANTLR_CHARSTREAM_EOF; + } + else + { + // Pick up the next 16 character (big endian byte order) + // + ch = ((*nextChar) << 8) + *(nextChar+1); + nextChar += 2; + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) + { + // If the 16 bits following the high surrogate are in the source buffer... + // + if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf())) + { + // Next character is in big endian byte order + // + ch2 = ((*nextChar) << 8) + *(nextChar+1); + + // If it's a valid low surrogate, consume it + // + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) + { + // Construct the UTF32 code point + // + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + + (ch2 - UNI_SUR_LOW_START) + halfBase; + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + } + return ch; +} + +template<class ImplTraits, class SuperType> +void UTF16_IntStream<ImplTraits, SuperType>::consume( ClassForwarder<BYTE_AGNOSTIC> ) +{ + SuperType* input; + UTF32 ch; + UTF32 ch2; + + input = this->get_super(); + + // Buffer size is always in bytes + // + if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) ) + { + // Indicate one more character in this line + // + input->inc_charPositionInLine(); + + if ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar()) + { + // Reset for start of a new line of input + // + input->inc_line(); + input->set_charPositionInLine(0); + input->set_currentLine( input->get_nextChar() + 1 ); + } + + // Increment to next character position, accounting for any surrogates + // + // Next char in natural machine byte order + // + ch = *(input->get_nextChar()); + + // We consumed one 16 bit character + // + input->set_nextChar( input->get_nextChar() + 1 ); + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { + + // If the 16 bits following the high surrogate are in the source buffer... + // + if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) ) + { + // Next character is in natural machine byte order + // + ch2 = *(input->get_nextChar()); + + // If it's a valid low surrogate, consume it + // + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) + { + // We consumed one 16 bit character + // + input->set_nextChar( input->get_nextChar() + 1 ); + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + // Note that we did not check for an invalid low surrogate here, or that fact that the + // lo surrogate was missing. We just picked out one 16 bit character unless the character + // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. + // + } + +} + +template<class ImplTraits, class SuperType> +void UTF16_IntStream<ImplTraits, SuperType>::consume( ClassForwarder<ANTLR_LITTLE_ENDIAN> ) +{ + SuperType* input; + UTF32 ch; + UTF32 ch2; + + input = this->get_super(); + + // Buffer size is always in bytes + // + if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) ) + { + // Indicate one more character in this line + // + input->inc_charPositionInLine(); + + if ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar()) + { + // Reset for start of a new line of input + // + input->inc_line(); + input->set_charPositionInLine(0); + input->set_currentLine(input->get_nextChar() + 1); + } + + // Increment to next character position, accounting for any surrogates + // + // Next char in litle endian form + // + ch = *((ANTLR_UINT8*)input->get_nextChar()) + (*((ANTLR_UINT8*)input->get_nextChar() + 1) <<8); + + // We consumed one 16 bit character + // + input->set_nextChar( input->get_nextChar() + 1); + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) + { + // If the 16 bits following the high surrogate are in the source buffer... + // + if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) ) + { + ch2 = *((ANTLR_UINT8*)input->get_nextChar()) + (*((ANTLR_UINT8*)input->get_nextChar() + 1) <<8); + + // If it's a valid low surrogate, consume it + // + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) + { + // We consumed one 16 bit character + // + input->set_nextChar( input->get_nextChar() + 1); + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + // Note that we did not check for an invalid low surrogate here, or that fact that the + // lo surrogate was missing. We just picked out one 16 bit character unless the character + // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. + // + } +} + +template<class ImplTraits, class SuperType> +void UTF16_IntStream<ImplTraits, SuperType>::consume( ClassForwarder<ANTLR_BIG_ENDIAN> ) +{ + SuperType* input; + UTF32 ch; + UTF32 ch2; + + input = this->get_super(); + + // Buffer size is always in bytes + // + if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) ) + { + // Indicate one more character in this line + // + input->inc_charPositionInLine(); + + if ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar()) + { + // Reset for start of a new line of input + // + input->inc_line(); + input->set_charPositionInLine(0); + input->set_currentLine(input->get_nextChar() + 1); + } + + // Increment to next character position, accounting for any surrogates + // + // Next char in big endian form + // + ch = *((ANTLR_UINT8*)input->get_nextChar() + 1) + (*((ANTLR_UINT8*)input->get_nextChar() ) <<8); + + // We consumed one 16 bit character + // + input->set_nextChar( input->get_nextChar() + 1); + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) + { + // If the 16 bits following the high surrogate are in the source buffer... + // + if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) ) + { + // Big endian + // + ch2 = *((ANTLR_UINT8*)input->get_nextChar() + 1) + (*((ANTLR_UINT8*)input->get_nextChar() ) <<8); + + // If it's a valid low surrogate, consume it + // + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) + { + // We consumed one 16 bit character + // + input->set_nextChar( input->get_nextChar() + 1); + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + // Note that we did not check for an invalid low surrogate here, or that fact that the + // lo surrogate was missing. We just picked out one 16 bit character unless the character + // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. + // + } +} + +template<class ImplTraits, class SuperType> +ANTLR_UINT32 UTF32_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 i) +{ + return this->LA( i, ClassForwarder<typename ImplTraits::Endianness>() ); +} + +template<class ImplTraits, class SuperType> +ANTLR_MARKER UTF32_IntStream<ImplTraits, SuperType>::index() +{ + SuperType* input = this->get_super(); + return (ANTLR_MARKER)(input->get_nextChar()); +} + +template<class ImplTraits, class SuperType> +void UTF32_IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint) +{ + SuperType* input; + + input = this->get_super(); + + // If the requested seek point is less than the current + // input point, then we assume that we are resetting from a mark + // and do not need to scan, but can just set to there as rewind will + // reset line numbers and so on. + // + if (seekPoint <= (ANTLR_MARKER)(input->get_nextChar())) + { + input->set_nextChar( static_cast<typename ImplTraits::DataType*>(seekPoint) ); + } + else + { + // Call consume until we reach the asked for seek point or EOF + // + while( (this->LA(1) != ANTLR_CHARSTREAM_EOF) && (seekPoint < (ANTLR_MARKER)input->get_nextChar()) ) + { + this->consume(); + } + } + +} + +template<class ImplTraits, class SuperType> +void UTF32_IntStream<ImplTraits, SuperType>::setupIntStream(bool machineBigEndian, bool inputBigEndian) +{ + SuperType* super = this->get_super(); + super->set_charByteSize(4); + + this->findout_endian_spec(machineBigEndian, inputBigEndian); +} + +template<class ImplTraits, class SuperType> +ANTLR_UINT32 UTF32_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la, ClassForwarder<BYTE_AGNOSTIC> ) +{ + SuperType* input = this->get_super(); + + if (( input->get_nextChar() + la - 1) >= (input->get_data() + input->get_sizeBuf()/4 )) + { + return ANTLR_CHARSTREAM_EOF; + } + else + { + return (ANTLR_UCHAR)(*(input->get_nextChar() + la - 1)); + } +} + +template<class ImplTraits, class SuperType> +ANTLR_UINT32 UTF32_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la, ClassForwarder<ANTLR_LITTLE_ENDIAN> ) +{ + SuperType* input = this->get_super(); + + if (( input->get_nextChar() + la - 1) >= (input->get_data() + input->get_sizeBuf()/4 )) + { + return ANTLR_CHARSTREAM_EOF; + } + else + { + ANTLR_UCHAR c; + + c = (ANTLR_UCHAR)(*(input->get_nextChar() + la - 1)); + + // Swap Endianess to Big Endian + // + return (c>>24) | ((c<<8) & 0x00FF0000) | ((c>>8) & 0x0000FF00) | (c<<24); + } +} + +template<class ImplTraits, class SuperType> +ANTLR_UINT32 UTF32_IntStream<ImplTraits, SuperType>::LA( ANTLR_INT32 la, ClassForwarder<ANTLR_BIG_ENDIAN> ) +{ + SuperType* input = this->get_super(); + + if (( input->get_nextChar() + la - 1) >= (input->get_data() + input->get_sizeBuf()/4 )) + { + return ANTLR_CHARSTREAM_EOF; + } + else + { + ANTLR_UCHAR c; + + c = (ANTLR_UCHAR)(*(input->get_nextChar() + la - 1)); + + // Swap Endianess to Little Endian + // + return (c>>24) | ((c<<8) & 0x00FF0000) | ((c>>8) & 0x0000FF00) | (c<<24); + } +} + +template<class ImplTraits, class SuperType> +void UTF32_IntStream<ImplTraits, SuperType>::consume() +{ + SuperType* input = this->get_super(); + + // SizeBuf is always in bytes + // + if ( input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/4 )) + { + /* Indicate one more character in this line + */ + input->inc_charPositionInLine(); + + if ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar()) + { + /* Reset for start of a new line of input + */ + input->inc_line(); + input->set_charPositionInLine(0); + input->set_currentLine( input->get_nextChar() + 1 ); + } + + /* Increment to next character position + */ + input->set_nextChar( input->get_nextChar() + 1 ); + } +} + +template<class ImplTraits, class SuperType> +void UTF8_IntStream<ImplTraits, SuperType>::setupIntStream(bool, bool) +{ + SuperType* super = this->get_super(); + super->set_charByteSize(0); +} + +// ------------------------------------------------------ +// Following is from Unicode.org (see antlr3convertutf.c) +// + +/// Index into the table below with the first byte of a UTF-8 sequence to +/// get the number of trailing bytes that are supposed to follow it. +/// Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is +/// left as-is for anyone who may want to do such conversion, which was +/// allowed in earlier algorithms. +/// +template<class ImplTraits, class SuperType> +const ANTLR_UINT32* UTF8_IntStream<ImplTraits, SuperType>::TrailingBytesForUTF8() +{ + static const ANTLR_UINT32 trailingBytesForUTF8[256] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 + }; + + return trailingBytesForUTF8; +} + +/// Magic values subtracted from a buffer value during UTF8 conversion. +/// This table contains as many values as there might be trailing bytes +/// in a UTF-8 sequence. +/// +template<class ImplTraits, class SuperType> +const UTF32* UTF8_IntStream<ImplTraits, SuperType>::OffsetsFromUTF8() +{ + static const UTF32 offsetsFromUTF8[6] = + { 0x00000000UL, 0x00003080UL, 0x000E2080UL, + 0x03C82080UL, 0xFA082080UL, 0x82082080UL + }; + return offsetsFromUTF8; +} + +// End of Unicode.org tables +// ------------------------- + + +/** \brief Consume the next character in a UTF8 input stream + * + * \param input Input stream context pointer + */ +template<class ImplTraits, class SuperType> +void UTF8_IntStream<ImplTraits, SuperType>::consume() +{ + SuperType* input = this->get_super(); + const ANTLR_UINT32* trailingBytesForUTF8 = UTF8_IntStream::TrailingBytesForUTF8(); + const UTF32* offsetsFromUTF8 = UTF8_IntStream::OffsetsFromUTF8(); + + ANTLR_UINT32 extraBytesToRead; + ANTLR_UCHAR ch; + ANTLR_UINT8* nextChar; + + nextChar = input->get_nextChar(); + + if (nextChar < (input->get_data() + input->get_sizeBuf())) + { + // Indicate one more character in this line + // + input->inc_charPositionInLine(); + + // Are there more bytes needed to make up the whole thing? + // + extraBytesToRead = trailingBytesForUTF8[*nextChar]; + + if ((nextChar + extraBytesToRead) >= (input->get_data() + input->get_sizeBuf())) + { + input->set_nextChar( input->get_data() + input->get_sizeBuf() ); + return; + } + + // Cases deliberately fall through (see note A in antlrconvertutf.c) + // Legal UTF8 is only 4 bytes but 6 bytes could be used in old UTF8 so + // we allow it. + // + ch = 0; + switch (extraBytesToRead) + { + case 5: ch += *nextChar++; ch <<= 6; + case 4: ch += *nextChar++; ch <<= 6; + case 3: ch += *nextChar++; ch <<= 6; + case 2: ch += *nextChar++; ch <<= 6; + case 1: ch += *nextChar++; ch <<= 6; + case 0: ch += *nextChar++; + } + + // Magically correct the input value + // + ch -= offsetsFromUTF8[extraBytesToRead]; + if (ch == input->get_newlineChar()) + { + /* Reset for start of a new line of input + */ + input->inc_line(); + input->set_charPositionInLine(0); + input->set_currentLine(nextChar); + } + + // Update input pointer + // + input->set_nextChar(nextChar); + } +} + +/** \brief Return the input element assuming a UTF8 input + * + * \param[in] input Input stream context pointer + * \param[in] la 1 based offset of next input stream element + * + * \return Next input character in internal ANTLR3 encoding (UTF32) + */ +template<class ImplTraits, class SuperType> +ANTLR_UCHAR UTF8_IntStream<ImplTraits, SuperType>::LA(ANTLR_INT32 la) +{ + SuperType* input = this->get_super(); + const ANTLR_UINT32* trailingBytesForUTF8 = UTF8_IntStream::TrailingBytesForUTF8(); + const UTF32* offsetsFromUTF8 = UTF8_IntStream::OffsetsFromUTF8(); + ANTLR_UINT32 extraBytesToRead; + ANTLR_UCHAR ch; + ANTLR_UINT8* nextChar; + + nextChar = input->get_nextChar(); + + // Do we need to traverse forwards or backwards? + // - LA(0) is treated as LA(1) and we assume that the nextChar is + // already positioned. + // - LA(n+) ; n>1 means we must traverse forward n-1 characters catering for UTF8 encoding + // - LA(-n) means we must traverse backwards n chracters + // + if (la > 1) { + + // Make sure that we have at least one character left before trying to + // loop through the buffer. + // + if (nextChar < (input->get_data() + input->get_sizeBuf())) + { + // Now traverse n-1 characters forward + // + while (--la > 0) + { + // Does the next character require trailing bytes? + // If so advance the pointer by that many bytes as well as advancing + // one position for what will be at least a single byte character. + // + nextChar += trailingBytesForUTF8[*nextChar] + 1; + + // Does that calculation take us past the byte length of the buffer? + // + if (nextChar >= (input->get_data() + input->get_sizeBuf())) + { + return ANTLR_CHARSTREAM_EOF; + } + } + } + else + { + return ANTLR_CHARSTREAM_EOF; + } + } + else + { + // LA is negative so we decrease the pointer by n character positions + // + while (nextChar > input->get_data() && la++ < 0) + { + // Traversing backwards in UTF8 means decermenting by one + // then continuing to decrement while ever a character pattern + // is flagged as being a trailing byte of an encoded code point. + // Trailing UTF8 bytes always start with 10 in binary. We assumne that + // the UTF8 is well formed and do not check boundary conditions + // + nextChar--; + while ((*nextChar & 0xC0) == 0x80) + { + nextChar--; + } + } + } + + // nextChar is now pointing at the UTF8 encoded character that we need to + // decode and return. + // + // Are there more bytes needed to make up the whole thing? + // + extraBytesToRead = trailingBytesForUTF8[*nextChar]; + if (nextChar + extraBytesToRead >= (input->get_data() + input->get_sizeBuf())) + { + return ANTLR_CHARSTREAM_EOF; + } + + // Cases deliberately fall through (see note A in antlrconvertutf.c) + // + ch = 0; + switch (extraBytesToRead) + { + case 5: ch += *nextChar++; ch <<= 6; + case 4: ch += *nextChar++; ch <<= 6; + case 3: ch += *nextChar++; ch <<= 6; + case 2: ch += *nextChar++; ch <<= 6; + case 1: ch += *nextChar++; ch <<= 6; + case 0: ch += *nextChar++; + } + + // Magically correct the input value + // + ch -= offsetsFromUTF8[extraBytesToRead]; + + return ch; +} + +template<class ImplTraits> +TokenIntStream<ImplTraits>::TokenIntStream() +{ + m_cachedSize = 0; +} + +template<class ImplTraits> +ANTLR_UINT32 TokenIntStream<ImplTraits>::get_cachedSize() const +{ + return m_cachedSize; +} + +template<class ImplTraits> +void TokenIntStream<ImplTraits>::set_cachedSize( ANTLR_UINT32 cachedSize ) +{ + m_cachedSize = cachedSize; +} + +/** Move the input pointer to the next incoming token. The stream + * must become active with LT(1) available. consume() simply + * moves the input pointer so that LT(1) points at the next + * input symbol. Consume at least one token. + * + * Walk past any token not on the channel the parser is listening to. + */ +template<class ImplTraits> +void TokenIntStream<ImplTraits>::consume() +{ + TokenStreamType* cts = static_cast<TokenStreamType*>(this); + + if((ANTLR_UINT32)cts->get_p() < m_cachedSize ) + { + cts->inc_p(); + cts->set_p( cts->skipOffTokenChannels(cts->get_p()) ); + } +} +template<class ImplTraits> +void TokenIntStream<ImplTraits>::consumeInitialHiddenTokens() +{ + ANTLR_MARKER first; + ANTLR_INT32 i; + TokenStreamType* ts; + + ts = this->get_super(); + first = this->index(); + + for (i=0; i<first; i++) + { + ts->get_debugger()->consumeHiddenToken(ts->get(i)); + } + + ts->set_initialStreamState(false); +} + + +template<class ImplTraits> +ANTLR_UINT32 TokenIntStream<ImplTraits>::LA( ANTLR_INT32 i ) +{ + const CommonTokenType* tok; + TokenStreamType* ts = static_cast<TokenStreamType*>(this); + + tok = ts->LT(i); + + if (tok != NULL) + { + return tok->get_type(); + } + else + { + return CommonTokenType::TOKEN_INVALID; + } + +} + +template<class ImplTraits> +ANTLR_MARKER TokenIntStream<ImplTraits>::mark() +{ + BaseType::m_lastMarker = this->index(); + return BaseType::m_lastMarker; +} + +template<class ImplTraits> +ANTLR_UINT32 TokenIntStream<ImplTraits>::size() +{ + if (this->get_cachedSize() > 0) + { + return this->get_cachedSize(); + } + TokenStreamType* cts = this->get_super(); + + this->set_cachedSize( static_cast<ANTLR_UINT32>(cts->get_tokens().size()) ); + return this->get_cachedSize(); +} + +template<class ImplTraits> +void TokenIntStream<ImplTraits>::release() +{ + return; +} + +template<class ImplTraits> +ANTLR_MARKER TokenIntStream<ImplTraits>::tindex() +{ + return this->get_super()->get_p(); +} + +template<class ImplTraits> +void TokenIntStream<ImplTraits>::rewindLast() +{ + this->rewind( this->get_lastMarker() ); +} + +template<class ImplTraits> +void TokenIntStream<ImplTraits>::rewind(ANTLR_MARKER marker) +{ + return this->seek(marker); +} + +template<class ImplTraits> +void TokenIntStream<ImplTraits>::seek(ANTLR_MARKER index) +{ + TokenStreamType* cts = static_cast<TokenStreamType*>(this); + + cts->set_p( static_cast<ANTLR_INT32>(index) ); +} + + +/// Return a string that represents the name assoicated with the input source +/// +/// /param[in] is The ANTLR3_INT_STREAM interface that is representing this token stream. +/// +/// /returns +/// /implements ANTLR3_INT_STREAM_struct::getSourceName() +/// +template<class ImplTraits> +typename TokenIntStream<ImplTraits>::StringType +TokenIntStream<ImplTraits>::getSourceName() +{ + // Slightly convoluted as we must trace back to the lexer's input source + // via the token source. The streamName that is here is not initialized + // because this is a token stream, not a file or string stream, which are the + // only things that have a context for a source name. + // + return this->get_super()->get_tokenSource()->get_fileName(); +} + +template<class ImplTraits> +void TreeNodeIntStream<ImplTraits>::consume() +{ + TreeNodeStreamType* ctns = this->get_super(); + if( ctns->get_p() == -1 ) + ctns->fillBufferRoot(); + ctns->inc_p(); +} +template<class ImplTraits> +ANTLR_MARKER TreeNodeIntStream<ImplTraits>::tindex() +{ + TreeNodeStreamType* ctns = this->get_super(); + return (ANTLR_MARKER)(ctns->get_p()); +} + +template<class ImplTraits> +ANTLR_UINT32 TreeNodeIntStream<ImplTraits>::LA(ANTLR_INT32 i) +{ + TreeNodeStreamType* tns = this->get_super(); + + // Ask LT for the 'token' at that position + // + TreeTypePtr t = tns->LT(i); + + if (t == NULL) + { + return CommonTokenType::TOKEN_INVALID; + } + + // Token node was there so return the type of it + // + return t->get_type(); +} + +template<class ImplTraits> +ANTLR_MARKER TreeNodeIntStream<ImplTraits>::mark() +{ + TreeNodeStreamType* ctns = this->get_super(); + + if (ctns->get_p() == -1) + { + ctns->fillBufferRoot(); + } + + // Return the current mark point + // + this->set_lastMarker( this->index() ); + + return this->get_lastMarker(); + +} + +template<class ImplTraits> +void TreeNodeIntStream<ImplTraits>::release(ANTLR_MARKER /*marker*/) +{ + +} + +template<class ImplTraits> +void TreeNodeIntStream<ImplTraits>::rewindMark(ANTLR_MARKER marker) +{ + this->seek(marker); +} + +template<class ImplTraits> +void TreeNodeIntStream<ImplTraits>::rewindLast() +{ + this->seek( this->get_lastMarker() ); +} + +template<class ImplTraits> +void TreeNodeIntStream<ImplTraits>::seek(ANTLR_MARKER index) +{ + TreeNodeStreamType* ctns = this->get_super(); + ctns->set_p( ANTLR_UINT32_CAST(index) ); +} + +template<class ImplTraits> +ANTLR_UINT32 TreeNodeIntStream<ImplTraits>::size() +{ + TreeNodeStreamType* ctns = this->get_super(); + + if (ctns->get_p() == -1) + { + ctns->fillBufferRoot(); + } + + return ctns->get_nodes().size(); +} + + +} diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3lexer.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3lexer.hpp new file mode 100644 index 0000000000..d23e65dea0 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3lexer.hpp @@ -0,0 +1,247 @@ +/** \file + * Base interface for any ANTLR3 lexer. + * + * An ANLTR3 lexer builds from two sets of components: + * + * - The runtime components that provide common functionality such as + * traversing character streams, building tokens for output and so on. + * - The generated rules and struutre of the actual lexer, which call upon the + * runtime components. + * + * A lexer class contains a character input stream, a base recognizer interface + * (which it will normally implement) and a token source interface (which it also + * implements. The Tokensource interface is called by a token consumer (such as + * a parser, but in theory it can be anything that wants a set of abstract + * tokens in place of a raw character stream. + * + * So then, we set up a lexer in a sequence akin to: + * + * - Create a character stream (something which implements ANTLR3_INPUT_STREAM) + * and initialize it. + * - Create a lexer interface and tell it where it its input stream is. + * This will cause the creation of a base recognizer class, which it will + * override with its own implementations of some methods. The lexer creator + * can also then in turn override anything it likes. + * - The lexer token source interface is then passed to some interface that + * knows how to use it, byte calling for a next token. + * - When a next token is called, let ze lexing begin. + * + */ +#ifndef _ANTLR3_LEXER_HPP +#define _ANTLR3_LEXER_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/* Definitions + */ + +namespace antlr3 { + +static const ANTLR_UINT32 ANTLR_STRING_TERMINATOR = 0xFFFFFFFF; + +template<class ImplTraits> +class Lexer : public ImplTraits::template RecognizerType< typename ImplTraits::InputStreamType >, + public ImplTraits::TokenSourceType +{ +public: + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename ImplTraits::InputStreamType InputStreamType; + typedef InputStreamType StreamType; + typedef typename InputStreamType::IntStreamType IntStreamType; + typedef typename ImplTraits::CommonTokenType CommonTokenType; + typedef typename ImplTraits::StreamDataType TokenType; + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::StringStreamType StringStreamType; + typedef typename ImplTraits::template RecognizerType< InputStreamType > RecognizerType; + typedef typename RecognizerType::RecognizerSharedStateType RecognizerSharedStateType; + typedef typename ImplTraits::template ExceptionBaseType<InputStreamType> ExceptionBaseType; + typedef typename ImplTraits::BitsetListType BitsetListType; + typedef typename ImplTraits::TokenSourceType TokenSourceType; + + typedef typename RecognizerSharedStateType::RuleMemoType RuleMemoType; + typedef typename RecognizerType::DebugEventListenerType DebuggerType; + +private: + /** A pointer to the character stream whence this lexer is receiving + * characters. + * TODO: I may come back to this and implement charstream outside + * the input stream as per the java implementation. + */ + InputStreamType* m_input; + +public: + Lexer(ANTLR_UINT32 sizeHint, RecognizerSharedStateType* state); + Lexer(ANTLR_UINT32 sizeHint, InputStreamType* input, RecognizerSharedStateType* state); + + InputStreamType* get_input() const; + IntStreamType* get_istream() const; + RecognizerType* get_rec(); + const RecognizerType* get_rec() const; + TokenSourceType* get_tokSource(); + + //functions used in .stg file + const RecognizerType* get_recognizer() const; + RecognizerSharedStateType* get_lexstate() const; + void set_lexstate( RecognizerSharedStateType* lexstate ); + const TokenSourceType* get_tokSource() const; + CommonTokenType* get_ltoken() const; + void set_ltoken( const CommonTokenType* ltoken ); + bool hasFailed() const; + ANTLR_INT32 get_backtracking() const; + void inc_backtracking(); + void dec_backtracking(); + bool get_failedflag() const; + void set_failedflag( bool failed ); + InputStreamType* get_strstream() const; + ANTLR_MARKER index() const; + void seek(ANTLR_MARKER index); + const CommonTokenType* EOF_Token() const; + bool hasException() const; + ExceptionBaseType* get_exception() const; + void constructEx(); + void lrecover(); + ANTLR_MARKER mark(); + void rewind(ANTLR_MARKER marker); + void rewindLast(); + void setText( const StringType& text ); + void skip(); + RuleMemoType* getRuleMemo() const; + DebuggerType* get_debugger() const; + void setRuleMemo(RuleMemoType* rulememo); + ANTLR_UINT32 LA(ANTLR_INT32 i); + void consume(); + void memoize(ANTLR_MARKER ruleIndex, ANTLR_MARKER ruleParseStart); + bool haveParsedRule(ANTLR_MARKER ruleIndex); + + /** Pointer to a function that sets the charstream source for the lexer and + * causes it to be reset. + */ + void setCharStream(InputStreamType* input); + + /*! + * \brief + * Change to a new input stream, remembering the old one. + * + * \param lexer + * Pointer to the lexer instance to switch input streams for. + * + * \param input + * New input stream to install as the current one. + * + * Switches the current character input stream to + * a new one, saving the old one, which we will revert to at the end of this + * new one. + */ + void pushCharStream(InputStreamType* input); + + /*! + * \brief + * Stops using the current input stream and reverts to any prior + * input stream on the stack. + * + * \param lexer + * Description of parameter lexer. + * + * Pointer to a function that abandons the current input stream, whether it + * is empty or not and reverts to the previous stacked input stream. + * + * \remark + * The function fails silently if there are no prior input streams. + */ + void popCharStream(); + + /** Function that emits (a copy of ) the supplied token as the next token in + * the stream. + */ + void emit(const CommonTokenType* token); + + /** Pointer to a function that constructs a new token from the lexer stored information + */ + CommonTokenType* emit(); + + /** Pointer to a function that attempts to match and consume the specified string from the input + * stream. Note that strings muse be passed as terminated arrays of ANTLR3_UCHAR. Strings are terminated + * with 0xFFFFFFFF, which is an invalid UTF32 character + */ + bool matchs(ANTLR_UCHAR* string); + + /** Pointer to a function that matches and consumes the specified character from the input stream. + * The input stream is required to provide characters via LA() as UTF32 characters. The default lexer + * implementation is source encoding agnostic and so input streams do not generally need to + * override the default implmentation. + */ + bool matchc(ANTLR_UCHAR c); + + /** Pointer to a function that matches any character in the supplied range (I suppose it could be a token range too + * but this would only be useful if the tokens were in tsome guaranteed order which is + * only going to happen with a hand crafted token set). + */ + bool matchRange(ANTLR_UCHAR low, ANTLR_UCHAR high); + + /** Pointer to a function that matches the next token/char in the input stream + * regardless of what it actaully is. + */ + void matchAny(); + + /** Pointer to a function that recovers from an error found in the input stream. + * Generally, this will be a #ANTLR3_EXCEPTION_NOVIABLE_ALT but it could also + * be from a mismatched token that the (*match)() could not recover from. + */ + void recover(); + + /** Function to return the current line number in the input stream + */ + ANTLR_UINT32 getLine(); + ANTLR_MARKER getCharIndex(); + ANTLR_UINT32 getCharPositionInLine(); + + /** Function to return the text so far for the current token being generated + */ + StringType getText(); + + //Other utility functions + void fillExceptionData( ExceptionBaseType* ex ); + + /** Default lexer error handler (works for 8 bit streams only!!!) + */ + void displayRecognitionError( ANTLR_UINT8** tokenNames, ExceptionBaseType* ex); + void exConstruct(); + TokenType* getMissingSymbol( IntStreamType* istream, ExceptionBaseType* e, + ANTLR_UINT32 expectedTokenType, BitsetListType* follow); + + /** Pointer to a function that knows how to free the resources of a lexer + */ + ~Lexer(); +}; + +} + +#include "antlr3lexer.inl" + +#endif diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3lexer.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3lexer.inl new file mode 100644 index 0000000000..bf6960569f --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3lexer.inl @@ -0,0 +1,592 @@ +namespace antlr3 { + +template<class ImplTraits> +Lexer<ImplTraits>::Lexer(ANTLR_UINT32 sizeHint, RecognizerSharedStateType* state) + :Lexer<ImplTraits>::RecognizerType(sizeHint, state) + ,m_input(NULL) +{ +} + +template<class ImplTraits> +Lexer<ImplTraits>::Lexer(ANTLR_UINT32 sizeHint, InputStreamType* input, RecognizerSharedStateType* state) + :Lexer<ImplTraits>::RecognizerType(sizeHint, state) +{ + this->setCharStream(input); +} + +template<class ImplTraits> +typename Lexer<ImplTraits>::InputStreamType* Lexer<ImplTraits>::get_input() const +{ + return m_input; +} + +template<class ImplTraits> +typename Lexer<ImplTraits>::IntStreamType* Lexer<ImplTraits>::get_istream() const +{ + return m_input; +} + +template<class ImplTraits> +typename Lexer<ImplTraits>::RecognizerType* Lexer<ImplTraits>::get_rec() +{ + return this; +} + +template<class ImplTraits> +typename Lexer<ImplTraits>::TokenSourceType* Lexer<ImplTraits>::get_tokSource() +{ + return this; +} + +template<class ImplTraits> +void Lexer<ImplTraits>::displayRecognitionError( ANTLR_UINT8** , ExceptionBaseType* ex) +{ + StringStreamType err_stream; + + // See if there is a 'filename' we can use + // + if( ex->getName().empty() ) + { + err_stream << "-unknown source-("; + } + else + { + err_stream << ex->get_streamName().c_str(); + err_stream << "("; + } + err_stream << ex->get_line() << ")"; + + err_stream << ": lexer error " << ex->getName() << '(' << ex->getType() << ')' << " :\n\t" + << ex->get_message() << " at position [" << ex->get_line() << ", " + << ex->get_charPositionInLine()+1 << "], "; + + { + ANTLR_UINT32 width; + + width = ANTLR_UINT32_CAST(( (ANTLR_UINT8*)(m_input->get_data()) + + (m_input->size() )) - (ANTLR_UINT8*)( ex->get_index() )); + + if (width >= 1) + { + if (isprint(ex->get_c() )) + { + err_stream << "near '" << (typename StringType::value_type) ex->get_c() << "' :\n"; + } + else + { + err_stream << "near char(" << std::hex << ex->get_c() << std::dec << ") :\n"; + } + err_stream << "\t"; + err_stream.width( width > 20 ? 20 : width ); + err_stream << (typename StringType::const_pointer)ex->get_index() << "\n"; + } + else + { + err_stream << "(end of input).\n\t This indicates a poorly specified lexer RULE\n\t or unterminated input element such as: \"STRING[\"]\n"; + err_stream << "\t The lexer was matching from line " + << this->get_state()->get_tokenStartLine() + << ", offset " << this->get_state()->get_tokenStartCharPositionInLine() + << ", which\n\t "; + width = ANTLR_UINT32_CAST(((ANTLR_UINT8*)(m_input->get_data() )+ + (m_input->size())) - + (ANTLR_UINT8*)(this->get_state()->get_tokenStartCharIndex() )); + + if (width >= 1) + { + err_stream << "looks like this:\n\t\t"; + err_stream.width( width > 20 ? 20 : width ); + err_stream << (typename StringType::const_pointer)this->get_state()->get_tokenStartCharIndex() << "\n"; + } + else + { + err_stream << "is also the end of the line, so you must check your lexer rules\n"; + } + } + } + ImplTraits::displayRecognitionError( err_stream.str() ); +} + +template<class ImplTraits> +void Lexer<ImplTraits>::fillExceptionData( ExceptionBaseType* ex ) +{ + ex->set_c( m_input->LA(1) ); /* Current input character */ + ex->set_line( m_input->get_line() ); /* Line number comes from stream */ + ex->set_charPositionInLine( m_input->get_charPositionInLine() ); /* Line offset also comes from the stream */ + ex->set_index( m_input->index() ); + ex->set_streamName( m_input->get_fileName() ); + ex->set_message( "Unexpected character" ); +} + +template<class ImplTraits> +void Lexer<ImplTraits>::setCharStream(InputStreamType* input) +{ + /* Install the input interface + */ + m_input = input; + + /* Set the current token to nothing + */ + RecognizerSharedStateType* state = this->get_rec()->get_state(); + state->set_token_present( false ); + state->set_text(""); + state->set_tokenStartCharIndex(-1); + + /* Copy the name of the char stream to the token source + */ + this->get_tokSource()->set_fileName( input->get_fileName() ); +} + +template<class ImplTraits> +void Lexer<ImplTraits>::pushCharStream(InputStreamType* input) +{ + // We have a stack, so we can save the current input stream + // into it. + // + this->get_istream()->mark(); + this->get_rec()->get_state()->get_streams().push(this->get_input()); + + // And now we can install this new one + // + this->setCharStream(input); +} + +template<class ImplTraits> +void Lexer<ImplTraits>::popCharStream() +{ + InputStreamType* input; + + // If we do not have a stream stack or we are already at the + // stack bottom, then do nothing. + // + typename RecognizerSharedStateType::StreamsType& streams = this->get_rec()->get_state()->get_streams(); + if ( streams.size() > 0) + { + // We just leave the current stream to its fate, we do not close + // it or anything as we do not know what the programmer intended + // for it. This method can always be overridden of course. + // So just find out what was currently saved on the stack and use + // that now, then pop it from the stack. + // + input = streams.top(); + streams.pop(); + + // Now install the stream as the current one. + // + this->setCharStream(input); + this->get_istream()->rewindLast(); + } + return; +} + +template<class ImplTraits> +void Lexer<ImplTraits>::emit(const CommonTokenType* token) +{ + this->get_rec()->get_state()->set_token(token); +} + +template<class ImplTraits> +typename Lexer<ImplTraits>::CommonTokenType* Lexer<ImplTraits>::emit() +{ + /* We could check pointers to token factories and so on, but + * we are in code that we want to run as fast as possible + * so we are not checking any errors. So make sure you have installed an input stream before + * trying to emit a new token. + */ + RecognizerSharedStateType* state = this->get_rec()->get_state(); + state->set_token_present(true); + CommonTokenType* token = state->get_token(); + token->set_input( this->get_input() ); + + /* Install the supplied information, and some other bits we already know + * get added automatically, such as the input stream it is associated with + * (though it can all be overridden of course) + */ + token->set_type( state->get_type() ); + token->set_channel( state->get_channel() ); + token->set_startIndex( state->get_tokenStartCharIndex() ); + token->set_stopIndex( this->getCharIndex() - 1 ); + token->set_line( state->get_tokenStartLine() ); + token->set_charPositionInLine( state->get_tokenStartCharPositionInLine() ); + + token->set_tokText( state->get_text() ); + token->set_lineStart( this->get_input()->get_currentLine() ); + + return token; +} + +template<class ImplTraits> +Lexer<ImplTraits>::~Lexer() +{ + // This may have ben a delegate or delegator lexer, in which case the + // state may already have been freed (and set to NULL therefore) + // so we ignore the state if we don't have it. + // + RecognizerSharedStateType* state = this->get_rec()->get_state(); + + if ( state != NULL) + { + state->get_streams().clear(); + } +} + +template<class ImplTraits> +bool Lexer<ImplTraits>::matchs(ANTLR_UCHAR* str ) +{ + RecognizerSharedStateType* state = this->get_rec()->get_state(); + while (*str != ANTLR_STRING_TERMINATOR) + { + if ( this->get_istream()->LA(1) != (*str)) + { + if ( state->get_backtracking() > 0) + { + state->set_failed(true); + return false; + } + + this->exConstruct(); + state->set_failed( true ); + + /* TODO: Implement exception creation more fully perhaps + */ + this->recover(); + return false; + } + + /* Matched correctly, do consume it + */ + this->get_istream()->consume(); + str++; + + } + /* Reset any failed indicator + */ + state->set_failed( false ); + return true; +} + +template<class ImplTraits> +bool Lexer<ImplTraits>::matchc(ANTLR_UCHAR c) +{ + if (this->get_istream()->LA(1) == c) + { + /* Matched correctly, do consume it + */ + this->get_istream()->consume(); + + /* Reset any failed indicator + */ + this->get_rec()->get_state()->set_failed( false ); + + return true; + } + + /* Failed to match, exception and recovery time. + */ + if(this->get_rec()->get_state()->get_backtracking() > 0) + { + this->get_rec()->get_state()->set_failed( true ); + return false; + } + + this->exConstruct(); + + /* TODO: Implement exception creation more fully perhaps + */ + this->recover(); + + return false; +} + +template<class ImplTraits> +bool Lexer<ImplTraits>::matchRange(ANTLR_UCHAR low, ANTLR_UCHAR high) +{ + ANTLR_UCHAR c; + + /* What is in the stream at the moment? + */ + c = this->get_istream()->LA(1); + if ( c >= low && c <= high) + { + /* Matched correctly, consume it + */ + this->get_istream()->consume(); + + /* Reset any failed indicator + */ + this->get_rec()->get_state()->set_failed( false ); + + return true; + } + + /* Failed to match, execption and recovery time. + */ + + if (this->get_rec()->get_state()->get_backtracking() > 0) + { + this->get_rec()->get_state()->set_failed( true ); + return false; + } + + this->exConstruct(); + + /* TODO: Implement exception creation more fully + */ + this->recover(); + + return false; +} + +template<class ImplTraits> +void Lexer<ImplTraits>::matchAny() +{ + this->get_istream()->consume(); +} + +template<class ImplTraits> +void Lexer<ImplTraits>::recover() +{ + this->get_istream()->consume(); +} + +template<class ImplTraits> +ANTLR_UINT32 Lexer<ImplTraits>::getLine() +{ + return this->get_input()->get_line(); +} + +template<class ImplTraits> +ANTLR_MARKER Lexer<ImplTraits>::getCharIndex() +{ + return this->get_istream()->index(); +} + +template<class ImplTraits> +ANTLR_UINT32 Lexer<ImplTraits>::getCharPositionInLine() +{ + return this->get_input()->get_charPositionInLine(); +} + +template<class ImplTraits> +typename Lexer<ImplTraits>::StringType Lexer<ImplTraits>::getText() +{ + RecognizerSharedStateType* state = this->get_rec()->get_state(); + if ( !state->get_text().empty() ) + { + return state->get_text(); + + } + return this->get_input()->substr( state->get_tokenStartCharIndex(), + this->getCharIndex() - this->get_input()->get_charByteSize() + ); +} + +template<class ImplTraits> +void Lexer<ImplTraits>::exConstruct() +{ + new ANTLR_Exception<ImplTraits, RECOGNITION_EXCEPTION, InputStreamType>( this->get_rec(), "" ); +} + +template< class ImplTraits> +typename Lexer<ImplTraits>::TokenType* Lexer<ImplTraits>::getMissingSymbol( IntStreamType*, + ExceptionBaseType*, + ANTLR_UINT32 , BitsetListType*) +{ + return NULL; +} + +template< class ImplTraits> +ANTLR_INLINE const typename Lexer<ImplTraits>::RecognizerType* Lexer<ImplTraits>::get_rec() const +{ + return this; +} + +template< class ImplTraits> +ANTLR_INLINE const typename Lexer<ImplTraits>::RecognizerType* Lexer<ImplTraits>::get_recognizer() const +{ + return this->get_rec(); +} + +template< class ImplTraits> +ANTLR_INLINE typename Lexer<ImplTraits>::RecognizerSharedStateType* Lexer<ImplTraits>::get_lexstate() const +{ + return this->get_rec()->get_state(); +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer<ImplTraits>::set_lexstate( RecognizerSharedStateType* lexstate ) +{ + this->get_rec()->set_state(lexstate); +} + +template< class ImplTraits> +ANTLR_INLINE const typename Lexer<ImplTraits>::TokenSourceType* Lexer<ImplTraits>::get_tokSource() const +{ + return this; +} + +template< class ImplTraits> +ANTLR_INLINE typename Lexer<ImplTraits>::CommonTokenType* Lexer<ImplTraits>::get_ltoken() const +{ + return this->get_lexstate()->token(); +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer<ImplTraits>::set_ltoken( const CommonTokenType* ltoken ) +{ + this->get_lexstate()->set_token( ltoken ); +} + +template< class ImplTraits> +ANTLR_INLINE bool Lexer<ImplTraits>::hasFailed() const +{ + return this->get_lexstate()->get_failed(); +} + +template< class ImplTraits> +ANTLR_INLINE ANTLR_INT32 Lexer<ImplTraits>::get_backtracking() const +{ + return this->get_lexstate()->get_backtracking(); +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer<ImplTraits>::inc_backtracking() +{ + this->get_lexstate()->inc_backtracking(); +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer<ImplTraits>::dec_backtracking() +{ + this->get_lexstate()->dec_backtracking(); +} + +template< class ImplTraits> +ANTLR_INLINE bool Lexer<ImplTraits>::get_failedflag() const +{ + return this->get_lexstate()->get_failed(); +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer<ImplTraits>::set_failedflag( bool failed ) +{ + this->get_lexstate()->set_failed(failed); +} + +template< class ImplTraits> +ANTLR_INLINE typename Lexer<ImplTraits>::InputStreamType* Lexer<ImplTraits>::get_strstream() const +{ + return this->get_input(); +} + +template< class ImplTraits> +ANTLR_INLINE ANTLR_MARKER Lexer<ImplTraits>::index() const +{ + return this->get_istream()->index(); +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer<ImplTraits>::seek(ANTLR_MARKER index) +{ + this->get_istream()->seek(index); +} + +template< class ImplTraits> +ANTLR_INLINE const typename Lexer<ImplTraits>::CommonTokenType* Lexer<ImplTraits>::EOF_Token() const +{ + const CommonTokenType& eof_token = this->get_tokSource()->get_eofToken(); + return &eof_token; +} + +template< class ImplTraits> +ANTLR_INLINE bool Lexer<ImplTraits>::hasException() const +{ + return this->get_lexstate()->get_error(); +} + +template< class ImplTraits> +ANTLR_INLINE typename Lexer<ImplTraits>::ExceptionBaseType* Lexer<ImplTraits>::get_exception() const +{ + return this->get_lexstate()->get_exception(); +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer<ImplTraits>::constructEx() +{ + this->get_rec()->exConstruct(); +} + +template< class ImplTraits> +ANTLR_INLINE ANTLR_MARKER Lexer<ImplTraits>::mark() +{ + return this->get_istream()->mark(); +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer<ImplTraits>::rewind(ANTLR_MARKER marker) +{ + this->get_istream()->rewind(marker); +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer<ImplTraits>::rewindLast() +{ + this->get_istream()->rewindLast(); +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer<ImplTraits>::memoize(ANTLR_MARKER ruleIndex, ANTLR_MARKER ruleParseStart) +{ + this->get_rec()->memoize( ruleIndex, ruleParseStart ); +} + +template< class ImplTraits> +ANTLR_INLINE bool Lexer<ImplTraits>::haveParsedRule(ANTLR_MARKER ruleIndex) +{ + return this->get_rec()->alreadyParsedRule(ruleIndex); +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer<ImplTraits>::setText( const StringType& text ) +{ + this->get_lexstate()->set_text(text); +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer<ImplTraits>::skip() +{ + CommonTokenType& skipToken = this->get_tokSource()->get_skipToken(); + this->get_lexstate()->set_token( &skipToken ); +} + +template< class ImplTraits> +ANTLR_INLINE typename Lexer<ImplTraits>::RuleMemoType* Lexer<ImplTraits>::getRuleMemo() const +{ + return this->get_lexstate()->get_rulememo(); +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer<ImplTraits>::setRuleMemo(RuleMemoType* rulememo) +{ + return this->get_lexstate()->set_rulememo(rulememo); +} + +template< class ImplTraits> +ANTLR_INLINE typename Lexer<ImplTraits>::DebuggerType* Lexer<ImplTraits>::get_debugger() const +{ + return this->get_rec()->get_debugger(); +} + +template< class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 Lexer<ImplTraits>::LA(ANTLR_INT32 i) +{ + return this->get_istream()->LA(i); +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer<ImplTraits>::consume() +{ + return this->get_istream()->consume(); +} + +} + diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3memory.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3memory.hpp new file mode 100644 index 0000000000..7b85f67545 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3memory.hpp @@ -0,0 +1,174 @@ +#ifndef _ANTLR3MEMORY_HPP +#define _ANTLR3MEMORY_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +namespace antlr3 { + +class DefaultAllocPolicy +{ +public: + //limitation of c++. unable to write a typedef + template <class TYPE> + class AllocatorType : public std::allocator<TYPE> + { + public: + typedef TYPE value_type; + typedef value_type* pointer; + typedef const value_type* const_pointer; + typedef value_type& reference; + typedef const value_type& const_reference; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + template<class U> struct rebind { + typedef AllocatorType<U> other; + }; + + AllocatorType() noexcept {} + AllocatorType( const AllocatorType& ) noexcept {} + template<typename U> AllocatorType(const AllocatorType<U>& ) noexcept{} + }; + + template<class TYPE> + class VectorType : public std::vector< TYPE, AllocatorType<TYPE> > + { + }; + + template<class TYPE> + class ListType : public std::deque< TYPE, AllocatorType<TYPE> > + { + }; + + template<class TYPE> + class StackType : public std::deque< TYPE, AllocatorType<TYPE> > + { + public: + void push( const TYPE& elem ) { this->push_back(elem); } + void pop() { this->pop_back(); } + TYPE& peek() { return this->back(); } + TYPE& top() { return this->back(); } + const TYPE& peek() const { return this->back(); } + const TYPE& top() const { return this->back(); } + }; + + + template<class TYPE> + class OrderedSetType : public std::set< TYPE, std::less<TYPE>, AllocatorType<TYPE> > + { + }; + + template<class TYPE> + class UnOrderedSetType : public std::set< TYPE, std::less<TYPE>, AllocatorType<TYPE> > + { + }; + + template<class KeyType, class ValueType> + class UnOrderedMapType : public std::map< KeyType, ValueType, std::less<KeyType>, + AllocatorType<std::pair<const KeyType, ValueType> > > + { + }; + + template<class KeyType, class ValueType> + class OrderedMapType : public std::map< KeyType, ValueType, std::less<KeyType>, + AllocatorType<std::pair<KeyType, ValueType> > > + { + }; + + template<class TYPE> + class SmartPtrType : public std::unique_ptr<TYPE, std::default_delete<TYPE> > + { + typedef typename std::unique_ptr<TYPE, std::default_delete<TYPE> > BaseType; + public: + SmartPtrType() {}; + SmartPtrType( SmartPtrType&& other ) + : BaseType(other) + {}; + SmartPtrType & operator=(SmartPtrType&& other) //= default; + { + BaseType::swap(other); + //return std::move((BaseType&)other); + return *this; + } + private: + SmartPtrType & operator=(const SmartPtrType&) /*= delete*/; + SmartPtrType(const SmartPtrType&) /*= delete*/; + }; + + ANTLR_INLINE static void* operator new (std::size_t bytes) + { + void* p = alloc(bytes); + return p; + } + ANTLR_INLINE static void* operator new (std::size_t , void* p) { return p; } + ANTLR_INLINE static void* operator new[]( std::size_t bytes) + { + void* p = alloc(bytes); + return p; + } + ANTLR_INLINE static void operator delete(void* p) + { + DefaultAllocPolicy::free(p); + } + ANTLR_INLINE static void operator delete(void* , void* ) {} //placement delete + + ANTLR_INLINE static void operator delete[](void* p) + { + DefaultAllocPolicy::free(p); + } + + ANTLR_INLINE static void* alloc( std::size_t bytes ) + { + void* p = malloc(bytes); + if( p== NULL ) + throw std::bad_alloc(); + return p; + } + + ANTLR_INLINE static void* alloc0( std::size_t bytes ) + { + void* p = calloc(1, bytes); + if( p== NULL ) + throw std::bad_alloc(); + return p; + } + + ANTLR_INLINE static void free( void* p ) + { + return ::free(p); + } + + ANTLR_INLINE static void* realloc(void *ptr, size_t size) + { + return ::realloc( ptr, size ); + } +}; + +} + +#endif /* _ANTLR3MEMORY_H */ diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3parser.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3parser.hpp new file mode 100644 index 0000000000..ccf8e9a323 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3parser.hpp @@ -0,0 +1,197 @@ +/** \file + * Base implementation of an ANTLR3 parser. + * + * + */ +#ifndef _ANTLR3_PARSER_HPP +#define _ANTLR3_PARSER_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +namespace antlr3 { + +/** This is the main interface for an ANTLR3 parser. + */ +template< class ImplTraits > +class Parser : public ImplTraits::template RecognizerType< typename ImplTraits::TokenStreamType > +{ +public: + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::TokenStreamType TokenStreamType; + typedef typename TokenStreamType::IntStreamType IntStreamType; + typedef TokenStreamType StreamType; + + typedef typename ImplTraits::template RecognizerType< typename ImplTraits::TokenStreamType > RecognizerType; + typedef typename RecognizerType::RecognizerSharedStateType RecognizerSharedStateType; + + typedef DebugEventListener<ImplTraits> DebugEventListenerType; + typedef typename ImplTraits::CommonTokenType CommonTokenType; + typedef CommonTokenType TokenType; + typedef typename ImplTraits::BitsetListType BitsetListType; + typedef ANTLR_ExceptionBase<ImplTraits, TokenStreamType> ExceptionBaseType; + typedef Empty TokenSourceType; + + typedef typename RecognizerSharedStateType::FollowingType FollowingType; + typedef typename RecognizerSharedStateType::RuleMemoType RuleMemoType; + typedef typename ImplTraits::DebugEventListenerType DebuggerType; + +private: + /** A provider of a tokenstream interface, for the parser to consume + * tokens from. + */ + TokenStreamType* m_tstream; + +public: + Parser( ANTLR_UINT32 sizeHint, RecognizerSharedStateType* state ); + Parser( ANTLR_UINT32 sizeHint, TokenStreamType* tstream, RecognizerSharedStateType* state ); + Parser( ANTLR_UINT32 sizeHint, TokenStreamType* tstream, DebugEventListenerType* dbg, + RecognizerSharedStateType* state ); + TokenStreamType* get_tstream() const; + TokenStreamType* get_input() const; + IntStreamType* get_istream() const; + RecognizerType* get_rec(); + + //same as above. Just that get_istream exists for lexer, parser, treeparser + //get_parser_istream exists only for parser, treeparser. So use it accordingly + IntStreamType* get_parser_istream() const; + + /** A pointer to a function that installs a debugger object (it also + * installs the debugging versions of the parser methods. This means that + * a non debug parser incurs no overhead because of the debugging stuff. + */ + void setDebugListener(DebugEventListenerType* dbg); + + /** A pointer to a function that installs a token stream + * for the parser. + */ + void setTokenStream(TokenStreamType*); + + /** A pointer to a function that returns the token stream for this + * parser. + */ + TokenStreamType* getTokenStream(); + + void exConstruct(); + TokenType* getMissingSymbol( IntStreamType* istream, ExceptionBaseType* e, + ANTLR_UINT32 expectedTokenType, BitsetListType* follow); + + void mismatch(ANTLR_UINT32 ttype, BitsetListType* follow); + + /** Pointer to a function that knows how to free resources of an ANTLR3 parser. + */ + ~Parser(); + + void fillExceptionData( ExceptionBaseType* ex ); + void displayRecognitionError( ANTLR_UINT8** tokenNames, ExceptionBaseType* ex ); + + //convenience functions exposed in .stg + const RecognizerType* get_recognizer() const; + RecognizerSharedStateType* get_psrstate() const; + void set_psrstate(RecognizerSharedStateType* state); + bool haveParsedRule(ANTLR_MARKER ruleIndex); + void memoize(ANTLR_MARKER ruleIndex, ANTLR_MARKER ruleParseStart); + ANTLR_MARKER index() const; + bool hasException() const; + ExceptionBaseType* get_exception() const; + const CommonTokenType* matchToken( ANTLR_UINT32 ttype, BitsetListType* follow ); + void matchAnyToken(); + const FollowingType& get_follow_stack() const; + void followPush( const BitsetListType& follow ); + void followPop(); + void precover(); + void preporterror(); + ANTLR_UINT32 LA(ANTLR_INT32 i); + const CommonTokenType* LT(ANTLR_INT32 k); + void constructEx(); + void consume(); + ANTLR_MARKER mark(); + void rewind(ANTLR_MARKER marker); + void rewindLast(); + void seek(ANTLR_MARKER index); + bool get_perror_recovery() const; + void set_perror_recovery( bool val ); + bool hasFailed() const; + bool get_failedflag() const; + void set_failedflag( bool failed ); + ANTLR_INT32 get_backtracking() const; + void inc_backtracking(); + void dec_backtracking(); + CommonTokenType* recoverFromMismatchedSet(BitsetListType* follow); + bool recoverFromMismatchedElement(BitsetListType* follow); + RuleMemoType* getRuleMemo() const; + DebuggerType* get_debugger() const; + TokenStreamType* get_strstream() const; + void setRuleMemo(RuleMemoType* rulememo); + +}; + +//Generic rule return value. Unlike the general ANTLR, this gets generated for +//every rule in the target. Handle rule exit here +template<class ImplTraits> +class RuleReturnValue +{ +public: + typedef typename ImplTraits::BaseParserType BaseParserType; + typedef typename ImplTraits::CommonTokenType CommonTokenType; + + const CommonTokenType* start; + const CommonTokenType* stop; + + RuleReturnValue(BaseParserType* psr = NULL ); + RuleReturnValue( const RuleReturnValue& val ); + RuleReturnValue& operator=( const RuleReturnValue& val ); + void call_start_placeholder(BaseParserType*); + void call_stop_placeholder(BaseParserType*); + RuleReturnValue& get_struct(); + ~RuleReturnValue(); +}; + +//This kind makes sure that whenever tokens are condensed into a rule, +//all the tokens except the start and stop tokens are deleted +template<class ImplTraits> +class RuleReturnValue_1 : public RuleReturnValue<ImplTraits> +{ +public: + typedef RuleReturnValue<ImplTraits> BaseType; + typedef typename BaseType::BaseParserType BaseParserType; + + BaseParserType* parser; + + RuleReturnValue_1(); + RuleReturnValue_1( BaseParserType* psr); + RuleReturnValue_1( const RuleReturnValue_1& val ); + void call_start_placeholder(BaseParserType*); //its dummy here + ~RuleReturnValue_1(); +}; + +} + +#include "antlr3parser.inl" + +#endif diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3parser.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3parser.inl new file mode 100644 index 0000000000..bb1e4e6960 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3parser.inl @@ -0,0 +1,579 @@ +namespace antlr3 { + +template< class ImplTraits > +Parser<ImplTraits>::Parser( ANTLR_UINT32 sizeHint, RecognizerSharedStateType* state ) + : RecognizerType( sizeHint, state ) +{ + m_tstream = NULL; +} + +template< class ImplTraits > +Parser<ImplTraits>::Parser( ANTLR_UINT32 sizeHint, TokenStreamType* tstream, + RecognizerSharedStateType* state ) + : RecognizerType( sizeHint, state ) +{ + this->setTokenStream( tstream ); +} + +template< class ImplTraits > +Parser<ImplTraits>::Parser( ANTLR_UINT32 sizeHint, TokenStreamType* tstream, + DebugEventListenerType* dbg, + RecognizerSharedStateType* state ) + : RecognizerType( sizeHint, state ) +{ + this->setTokenStream( tstream ); + this->setDebugListener( dbg ); +} + +template< class ImplTraits > +ANTLR_INLINE typename Parser<ImplTraits>::TokenStreamType* Parser<ImplTraits>::get_tstream() const +{ + return m_tstream; +} + +template< class ImplTraits > +ANTLR_INLINE typename Parser<ImplTraits>::IntStreamType* Parser<ImplTraits>::get_istream() const +{ + return m_tstream; +} + +template< class ImplTraits > +ANTLR_INLINE typename Parser<ImplTraits>::IntStreamType* Parser<ImplTraits>::get_parser_istream() const +{ + return m_tstream; +} + +template< class ImplTraits > +ANTLR_INLINE typename Parser<ImplTraits>::TokenStreamType* Parser<ImplTraits>::get_input() const +{ + return m_tstream; +} + +template< class ImplTraits > +void Parser<ImplTraits>::fillExceptionData( ExceptionBaseType* ex ) +{ + ex->set_token( new CommonTokenType(*(m_tstream->LT(1))) ); /* Current input token (clonned) - held by the exception */ + ex->set_line( ex->get_token()->get_line() ); + ex->set_charPositionInLine( ex->get_token()->get_charPositionInLine() ); + ex->set_index( this->get_istream()->index() ); + if( ex->get_token()->get_type() == CommonTokenType::TOKEN_EOF) + { + ex->set_streamName(""); + } + else + { + ex->set_streamName( ex->get_token()->get_input()->get_fileName() ); + } + ex->set_message("Unexpected token"); +} + +template< class ImplTraits > +void Parser<ImplTraits>::displayRecognitionError( ANTLR_UINT8** tokenNames, ExceptionBaseType* ex ) +{ + typename ImplTraits::StringStreamType errtext; + // See if there is a 'filename' we can use + // + if( ex->get_streamName().empty() ) + { + if(ex->get_token()->get_type() == CommonTokenType::TOKEN_EOF) + { + errtext << "-end of input-("; + } + else + { + errtext << "-unknown source-("; + } + } + else + { + errtext << ex->get_streamName() << "("; + } + + // Next comes the line number + // + errtext << this->get_rec()->get_state()->get_exception()->get_line() << ") "; + errtext << " : error " << this->get_rec()->get_state()->get_exception()->getType() + << " : " + << this->get_rec()->get_state()->get_exception()->get_message(); + + // Prepare the knowledge we know we have + // + const CommonTokenType* theToken = this->get_rec()->get_state()->get_exception()->get_token(); + StringType ttext = theToken->toString(); + + errtext << ", at offset , " + << this->get_rec()->get_state()->get_exception()->get_charPositionInLine(); + if (theToken != NULL) + { + if (theToken->get_type() == CommonTokenType::TOKEN_EOF) + { + errtext << ", at <EOF>"; + } + else + { + // Guard against null text in a token + // + errtext << "\n near " << ( ttext.empty() + ? "<no text for the token>" : ttext ) << "\n"; + } + } + + ex->displayRecognitionError( tokenNames, errtext ); + ImplTraits::displayRecognitionError( errtext.str() ); +} + +template< class ImplTraits > +Parser<ImplTraits>::~Parser() +{ + if (this->get_rec() != NULL) + { + // This may have ben a delegate or delegator parser, in which case the + // state may already have been freed (and set to NULL therefore) + // so we ignore the state if we don't have it. + // + RecognizerSharedStateType* state = this->get_rec()->get_state(); + if (state != NULL) + { + state->get_following().clear(); + } + } +} + +template< class ImplTraits > +void Parser<ImplTraits>::setDebugListener(DebugEventListenerType* dbg) +{ + // Set the debug listener. There are no methods to override + // because currently the only ones that notify the debugger + // are error reporting and recovery. Hence we can afford to + // check and see if the debugger interface is null or not + // there. If there is ever an occasion for a performance + // sensitive function to use the debugger interface, then + // a replacement function for debug mode should be supplied + // and installed here. + // + this->get_rec()->set_debugger(dbg); + + // If there was a tokenstream installed already + // then we need to tell it about the debug interface + // + if (this->get_tstream() != NULL) + { + this->get_tstream()->setDebugListener(dbg); + } +} + +template< class ImplTraits > +ANTLR_INLINE void Parser<ImplTraits>::setTokenStream(TokenStreamType* tstream) +{ + m_tstream = tstream; + this->get_rec()->reset(); +} + +template< class ImplTraits > +ANTLR_INLINE typename Parser<ImplTraits>::TokenStreamType* Parser<ImplTraits>::getTokenStream() +{ + return m_tstream; +} + +template< class ImplTraits > +ANTLR_INLINE typename Parser<ImplTraits>::RecognizerType* Parser<ImplTraits>::get_rec() +{ + return this; +} + +template< class ImplTraits > +ANTLR_INLINE void Parser<ImplTraits>::exConstruct() +{ + new ANTLR_Exception<ImplTraits, MISMATCHED_TOKEN_EXCEPTION, StreamType>( this->get_rec(), "" ); +} + +template< class ImplTraits > +typename Parser<ImplTraits>::TokenType* Parser<ImplTraits>::getMissingSymbol( IntStreamType* istream, + ExceptionBaseType*, + ANTLR_UINT32 expectedTokenType, + BitsetListType* ) +{ + // Dereference the standard pointers + // + TokenStreamType *cts = static_cast<TokenStreamType*>(istream); + + // Work out what to use as the current symbol to make a line and offset etc + // If we are at EOF, we use the token before EOF + // + const CommonTokenType* current = cts->LT(1); + if (current->get_type() == CommonTokenType::TOKEN_EOF) + { + current = cts->LT(-1); + } + + CommonTokenType* token = new CommonTokenType; + + // Set some of the token properties based on the current token + // + token->set_line(current->get_line()); + token->set_charPositionInLine( current->get_charPositionInLine()); + token->set_channel( TOKEN_DEFAULT_CHANNEL ); + token->set_type(expectedTokenType); + token->set_lineStart( current->get_lineStart() ); + + // Create the token text that shows it has been inserted + // + if ( expectedTokenType == CommonTokenType::TOKEN_EOF ) + { + token->setText( "<missing EOF>" ); + } else { + typename ImplTraits::StringStreamType text; + text << "<missing " << this->get_rec()->get_state()->get_tokenName(expectedTokenType) << ">"; + token->setText( text.str().c_str() ); + } + // Finally return the pointer to our new token + // + return token; +} + +template< class ImplTraits > +void Parser<ImplTraits>::mismatch(ANTLR_UINT32 ttype, BitsetListType* follow) +{ + // Install a mismatched token exception in the exception stack + // + new ANTLR_Exception<ImplTraits, MISMATCHED_TOKEN_EXCEPTION, StreamType>(this, ""); + + //With the statement below, only the parsers are allowed to compile fine + IntStreamType* is = this->get_istream(); + + + if (this->mismatchIsUnwantedToken(is, ttype)) + { + // Now update it to indicate this is an unwanted token exception + // + new ANTLR_Exception<ImplTraits, UNWANTED_TOKEN_EXCEPTION, StreamType>(this, ""); + return; + } + + if ( this->mismatchIsMissingToken(is, follow)) + { + // Now update it to indicate this is an unwanted token exception + // + new ANTLR_Exception<ImplTraits, MISSING_TOKEN_EXCEPTION, StreamType>(this, ""); + return; + } + + // Just a mismatched token is all we can dtermine + // + new ANTLR_Exception<ImplTraits, MISMATCHED_TOKEN_EXCEPTION, StreamType>(this, ""); + + return; +} + +template< class ImplTraits> +ANTLR_INLINE const typename Parser<ImplTraits>::RecognizerType* Parser<ImplTraits>::get_recognizer() const +{ + return this; +} + +template< class ImplTraits> +ANTLR_INLINE typename Parser<ImplTraits>::RecognizerSharedStateType* Parser<ImplTraits>::get_psrstate() const +{ + return this->get_recognizer()->get_state(); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::set_psrstate(RecognizerSharedStateType* state) +{ + this->get_rec()->set_state( state ); +} + +template< class ImplTraits> +ANTLR_INLINE bool Parser<ImplTraits>::haveParsedRule(ANTLR_MARKER ruleIndex) +{ + return this->get_rec()->alreadyParsedRule(ruleIndex); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::memoize(ANTLR_MARKER ruleIndex, ANTLR_MARKER ruleParseStart) +{ + return this->get_rec()->memoize( ruleIndex, ruleParseStart ); +} + +template< class ImplTraits> +ANTLR_INLINE ANTLR_MARKER Parser<ImplTraits>::index() const +{ + return this->get_istream()->index(); +} + +template< class ImplTraits> +ANTLR_INLINE bool Parser<ImplTraits>::hasException() const +{ + return this->get_psrstate()->get_error(); +} + +template< class ImplTraits> +ANTLR_INLINE typename Parser<ImplTraits>::ExceptionBaseType* Parser<ImplTraits>::get_exception() const +{ + return this->get_psrstate()->get_exception(); +} + +template< class ImplTraits> +ANTLR_INLINE const typename Parser<ImplTraits>::CommonTokenType* Parser<ImplTraits>::matchToken( ANTLR_UINT32 ttype, BitsetListType* follow ) +{ + return this->get_rec()->match( ttype, follow ); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::matchAnyToken() +{ + return this->get_rec()->matchAny(); +} + +template< class ImplTraits> +ANTLR_INLINE const typename Parser<ImplTraits>::FollowingType& Parser<ImplTraits>::get_follow_stack() const +{ + return this->get_psrstate()->get_following(); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::followPush(const BitsetListType& follow) +{ +#ifndef SKIP_FOLLOW_SETS + this->get_rec()->get_state()->get_following().push(follow); +#endif +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::followPop() +{ +#ifndef SKIP_FOLLOW_SETS + this->get_rec()->get_state()->get_following().pop(); +#endif +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::precover() +{ + return this->get_rec()->recover(); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::preporterror() +{ + return this->get_rec()->reportError(); +} + +template< class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 Parser<ImplTraits>::LA(ANTLR_INT32 i) +{ + return this->get_istream()->LA(i); +} + +template< class ImplTraits> +ANTLR_INLINE const typename Parser<ImplTraits>::CommonTokenType* Parser<ImplTraits>::LT(ANTLR_INT32 k) +{ + return this->get_input()->LT(k); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::constructEx() +{ + this->get_rec()->constructEx(); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::consume() +{ + this->get_istream()->consume(); +} + +template< class ImplTraits> +ANTLR_INLINE ANTLR_MARKER Parser<ImplTraits>::mark() +{ + return this->get_istream()->mark(); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::rewind(ANTLR_MARKER marker) +{ + this->get_istream()->rewind(marker); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::rewindLast() +{ + this->get_istream()->rewindLast(); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::seek(ANTLR_MARKER index) +{ + this->get_istream()->seek(index); +} + +template< class ImplTraits> +ANTLR_INLINE bool Parser<ImplTraits>::get_perror_recovery() const +{ + return this->get_psrstate()->get_errorRecovery(); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::set_perror_recovery( bool val ) +{ + this->get_psrstate()->set_errorRecovery(val); +} + +template< class ImplTraits> +ANTLR_INLINE bool Parser<ImplTraits>::hasFailed() const +{ + return this->get_psrstate()->get_failed(); +} + +template< class ImplTraits> +ANTLR_INLINE bool Parser<ImplTraits>::get_failedflag() const +{ + return this->get_psrstate()->get_failed(); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::set_failedflag( bool failed ) +{ + this->get_psrstate()->set_failed(failed); +} + +template< class ImplTraits> +ANTLR_INLINE ANTLR_INT32 Parser<ImplTraits>::get_backtracking() const +{ + return this->get_psrstate()->get_backtracking(); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::inc_backtracking() +{ + this->get_psrstate()->inc_backtracking(); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::dec_backtracking() +{ + this->get_psrstate()->dec_backtracking(); +} + +template< class ImplTraits> +ANTLR_INLINE typename Parser<ImplTraits>::CommonTokenType* Parser<ImplTraits>::recoverFromMismatchedSet(BitsetListType* follow) +{ + return this->get_rec()->recoverFromMismatchedSet(follow); +} + +template< class ImplTraits> +ANTLR_INLINE bool Parser<ImplTraits>::recoverFromMismatchedElement(BitsetListType* follow) +{ + return this->get_rec()->recoverFromMismatchedElement(follow); +} + +template< class ImplTraits> +ANTLR_INLINE typename Parser<ImplTraits>::RuleMemoType* Parser<ImplTraits>::getRuleMemo() const +{ + return this->get_psrstate()->get_ruleMemo(); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser<ImplTraits>::setRuleMemo(RuleMemoType* rulememo) +{ + this->get_psrstate()->set_ruleMemo(rulememo); +} + +template< class ImplTraits> +ANTLR_INLINE typename Parser<ImplTraits>::DebuggerType* Parser<ImplTraits>::get_debugger() const +{ + return this->get_rec()->get_debugger(); +} + +template< class ImplTraits> +ANTLR_INLINE typename Parser<ImplTraits>::TokenStreamType* Parser<ImplTraits>::get_strstream() const +{ + return this->get_tstream(); +} + +template< class ImplTraits> +ANTLR_INLINE RuleReturnValue<ImplTraits>::RuleReturnValue(BaseParserType* /*psr*/) +{ + start = NULL; + stop = NULL; +} + +template< class ImplTraits> +ANTLR_INLINE RuleReturnValue<ImplTraits>::RuleReturnValue( const RuleReturnValue& val ) +{ + start = val.start; + stop = val.stop; +} + +template< class ImplTraits> +ANTLR_INLINE RuleReturnValue<ImplTraits>& RuleReturnValue<ImplTraits>::operator=( const RuleReturnValue& val ) +{ + start = val.start; + stop = val.stop; + return *this; +} + +template< class ImplTraits> +ANTLR_INLINE RuleReturnValue<ImplTraits>::~RuleReturnValue() +{ +} + +template< class ImplTraits> +ANTLR_INLINE void RuleReturnValue<ImplTraits>::call_start_placeholder(BaseParserType *parser) +{ + start = parser->LT(1); + stop = start; +} + +template< class ImplTraits> +ANTLR_INLINE void RuleReturnValue<ImplTraits>::call_stop_placeholder(BaseParserType *parser) +{ + stop = parser->LT(-1); +} + +template< class ImplTraits> +ANTLR_INLINE RuleReturnValue_1<ImplTraits>::RuleReturnValue_1() + : parser() +{ +} + +template< class ImplTraits> +RuleReturnValue_1<ImplTraits>::RuleReturnValue_1( BaseParserType* psr ) + : RuleReturnValue_1<ImplTraits>::BaseType(psr) + , parser(psr) +{ + BaseType::start = psr->LT(1); + BaseType::stop = BaseType::start; +} + +template< class ImplTraits> +RuleReturnValue_1<ImplTraits>::RuleReturnValue_1( const RuleReturnValue_1& val ) + : RuleReturnValue_1<ImplTraits>::BaseType(val) + , parser(val.parser) +{ +} + +template< class ImplTraits> +void RuleReturnValue_1<ImplTraits>::call_start_placeholder(BaseParserType*) +{ +} + +template< class ImplTraits> +RuleReturnValue_1<ImplTraits>::~RuleReturnValue_1() +{ + if( parser && parser->get_backtracking() == 0 ) + { + if( BaseType::stop == NULL ) + BaseType::stop = BaseType::parser->LT(-1); + if( BaseType::stop != NULL ) + { + ANTLR_MARKER start_token_idx = BaseType::start->get_index() + 1; + ANTLR_MARKER stop_token_idx = BaseType::stop->get_index() - 1; + if( start_token_idx > stop_token_idx ) + return; + parser->getTokenStream()->discardTokens( start_token_idx, stop_token_idx); + } + } +} + +} diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3recognizersharedstate.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3recognizersharedstate.hpp new file mode 100644 index 0000000000..ef0855ea08 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3recognizersharedstate.hpp @@ -0,0 +1,272 @@ +/** \file + * While the C runtime does not need to model the state of + * multiple lexers and parsers in the same way as the Java runtime does + * it is no overhead to reflect that model. In fact the + * C runtime has always been able to share recognizer state. + * + * This 'class' therefore defines all the elements of a recognizer + * (either lexer, parser or tree parser) that are need to + * track the current recognition state. Multiple recognizers + * may then share this state, for instance when one grammar + * imports another. + */ + +#ifndef _ANTLR3_RECOGNIZER_SHARED_STATE_HPP +#define _ANTLR3_RECOGNIZER_SHARED_STATE_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +namespace antlr3 { + +/** All the data elements required to track the current state + * of any recognizer (lexer, parser, tree parser). + * May be share between multiple recognizers such that + * grammar inheritance is easily supported. + */ +template<class ImplTraits, class StreamType> +class RecognizerSharedState : public ImplTraits::AllocPolicyType +{ +public: + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename StreamType::UnitType TokenType; + typedef typename ImplTraits::CommonTokenType CommonTokenType; + + typedef typename ComponentTypeFinder<ImplTraits, StreamType>::ComponentType ComponentType; + typedef typename ImplTraits::template RewriteStreamType< ComponentType > RewriteStreamType; + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::TokenSourceType TokenSourceType; + typedef typename ImplTraits::template ExceptionBaseType<StreamType> ExceptionBaseType; + typedef typename ImplTraits::BitsetType BitsetType; + typedef typename ImplTraits::BitsetListType BitsetListType; + + typedef typename ImplTraits::TreeAdaptorType TreeAdaptorType; + + typedef typename AllocPolicyType::template StackType< BitsetListType > FollowingType; + typedef typename AllocPolicyType::template StackType< typename ImplTraits::InputStreamType* > InputStreamsType; + typedef InputStreamsType StreamsType; + typedef typename AllocPolicyType::template VectorType<RewriteStreamType> RewriteStreamsType; + + typedef IntTrie<ImplTraits, ANTLR_MARKER> RuleListType; + typedef IntTrie<ImplTraits, std::shared_ptr<RuleListType>> RuleMemoType; + +private: + /** Points to the first in a possible chain of exceptions that the + * recognizer has discovered. + */ + ExceptionBaseType* m_exception; + + + /** Track the set of token types that can follow any rule invocation. + * Stack structure, to support: List<BitSet>. + */ + FollowingType m_following; + + /** Track around a hint from the creator of the recognizer as to how big this + * thing is going to get, as the actress said to the bishop. This allows us + * to tune hash tables accordingly. This might not be the best place for this + * in the end but we will see. + */ + ANTLR_UINT32 m_sizeHint; + + + /** If set to true then the recognizer has an exception + * condition (this is tested by the generated code for the rules of + * the grammar). + */ + bool m_error; + + + /** This is true when we see an error and before having successfully + * matched a token. Prevents generation of more than one error message + * per error. + */ + bool m_errorRecovery; + + /** In lieu of a return value, this indicates that a rule or token + * has failed to match. Reset to false upon valid token match. + */ + bool m_failed; + + /* + Instead of allocating CommonTokenType, we do it in the stack. hence we need a null indicator + */ + bool m_token_present; + + /** The index into the input stream where the last error occurred. + * This is used to prevent infinite loops where an error is found + * but no token is consumed during recovery...another error is found, + * ad nauseam. This is a failsafe mechanism to guarantee that at least + * one token/tree node is consumed for two errors. + */ + ANTLR_MARKER m_lastErrorIndex; + + /** When the recognizer terminates, the error handling functions + * will have incremented this value if any error occurred (that was displayed). It can then be + * used by the grammar programmer without having to use static globals. + */ + ANTLR_UINT32 m_errorCount; + + /** If 0, no backtracking is going on. Safe to exec actions etc... + * If >0 then it's the level of backtracking. + */ + ANTLR_INT32 m_backtracking; + + /** ANTLR3_VECTOR of ANTLR3_LIST for rule memoizing. + * Tracks the stop token index for each rule. ruleMemo[ruleIndex] is + * the memoization table for ruleIndex. For key ruleStartIndex, you + * get back the stop token for associated rule or MEMO_RULE_FAILED. + * + * This is only used if rule memoization is on. + */ + RuleMemoType* m_ruleMemo; + + /** Pointer to an array of token names + * that are generally useful in error reporting. The generated parsers install + * this pointer. The table it points to is statically allocated as 8 bit ascii + * at parser compile time - grammar token names are thus restricted in character + * sets, which does not seem to terrible. + */ + ANTLR_UINT8** m_tokenNames; + + /** The goal of all lexer rules/methods is to create a token object. + * This is an instance variable as multiple rules may collaborate to + * create a single token. For example, NUM : INT | FLOAT ; + * In this case, you want the INT or FLOAT rule to set token and not + * have it reset to a NUM token in rule NUM. + */ + CommonTokenType m_token; + + /** A lexer is a source of tokens, produced by all the generated (or + * hand crafted if you like) matching rules. As such it needs to provide + * a token source interface implementation. For others, this will become a empty class + */ + TokenSourceType* m_tokSource; + + /** The channel number for the current token + */ + ANTLR_UINT32 m_channel; + + /** The token type for the current token + */ + ANTLR_UINT32 m_type; + + /** The input line (where it makes sense) on which the first character of the current + * token resides. + */ + ANTLR_INT32 m_tokenStartLine; + + /** The character position of the first character of the current token + * within the line specified by tokenStartLine + */ + ANTLR_INT32 m_tokenStartCharPositionInLine; + + /** What character index in the stream did the current token start at? + * Needed, for example, to get the text for current token. Set at + * the start of nextToken. + */ + ANTLR_MARKER m_tokenStartCharIndex; + + /** Text for the current token. This can be overridden by setting this + * variable directly or by using the SETTEXT() macro (preferred) in your + * lexer rules. + */ + StringType m_text; + + /** Input stream stack, which allows the C programmer to switch input streams + * easily and allow the standard nextToken() implementation to deal with it + * as this is a common requirement. + */ + InputStreamsType m_streams; + + /** Tree adaptor drives an AST trie construction. + * Is shared between multiple imported grammars. + */ + TreeAdaptorType* m_treeAdaptor; + +public: + RecognizerSharedState(); + ExceptionBaseType* get_exception() const; + FollowingType& get_following(); + ANTLR_UINT32 get_sizeHint() const; + bool get_error() const; + bool get_errorRecovery() const; + bool get_failed() const; + bool get_token_present() const; + ANTLR_MARKER get_lastErrorIndex() const; + ANTLR_UINT32 get_errorCount() const; + ANTLR_INT32 get_backtracking() const; + RuleMemoType* get_ruleMemo() const; + ANTLR_UINT8** get_tokenNames() const; + ANTLR_UINT8* get_tokenName( ANTLR_UINT32 i ) const; + CommonTokenType* get_token(); + TokenSourceType* get_tokSource() const; + ANTLR_UINT32& get_channel(); + ANTLR_UINT32 get_type() const; + ANTLR_INT32 get_tokenStartLine() const; + ANTLR_INT32 get_tokenStartCharPositionInLine() const; + ANTLR_MARKER get_tokenStartCharIndex() const; + StringType& get_text(); + InputStreamsType& get_streams(); + TreeAdaptorType* get_treeAdaptor() const; + + void set_following( const FollowingType& following ); + void set_sizeHint( ANTLR_UINT32 sizeHint ); + void set_error( bool error ); + void set_errorRecovery( bool errorRecovery ); + void set_failed( bool failed ); + void set_token_present(bool token_present); + void set_lastErrorIndex( ANTLR_MARKER lastErrorIndex ); + void set_errorCount( ANTLR_UINT32 errorCount ); + void set_backtracking( ANTLR_INT32 backtracking ); + void set_ruleMemo( RuleMemoType* ruleMemo ); + void set_tokenNames( ANTLR_UINT8** tokenNames ); + void set_tokSource( TokenSourceType* tokSource ); + void set_channel( ANTLR_UINT32 channel ); + void set_exception( ExceptionBaseType* exception ); + void set_type( ANTLR_UINT32 type ); + void set_token( const CommonTokenType* tok); + void set_tokenStartLine( ANTLR_INT32 tokenStartLine ); + void set_tokenStartCharPositionInLine( ANTLR_INT32 tokenStartCharPositionInLine ); + void set_tokenStartCharIndex( ANTLR_MARKER tokenStartCharIndex ); + void set_text( const StringType& text ); + void set_streams( const InputStreamsType& streams ); + void set_treeAdaptor( TreeAdaptorType* adaptor ); + + void inc_errorCount(); + void inc_backtracking(); + void dec_backtracking(); +}; + +} + +#include "antlr3recognizersharedstate.inl" + +#endif + + diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3recognizersharedstate.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3recognizersharedstate.inl new file mode 100644 index 0000000000..27732cb34f --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3recognizersharedstate.inl @@ -0,0 +1,280 @@ +namespace antlr3 { + +template<class ImplTraits, class StreamType> +RecognizerSharedState<ImplTraits, StreamType>::RecognizerSharedState() +{ + m_exception = NULL; + m_sizeHint = 0; + m_error = false; + m_errorRecovery = false; + m_failed = false; + m_token_present = false; + m_lastErrorIndex = 0; + m_errorCount = 0; + m_backtracking = false; + m_ruleMemo = NULL; + m_tokenNames = NULL; + m_tokSource = NULL; + m_channel = 0; + m_type = 0; + m_tokenStartLine = 0; + m_tokenStartCharPositionInLine = 0; + m_tokenStartCharIndex = 0; + m_treeAdaptor = NULL; +} + +template<class ImplTraits, class StreamType> +ANTLR_INLINE typename RecognizerSharedState<ImplTraits, StreamType>::FollowingType& RecognizerSharedState<ImplTraits, StreamType>::get_following() +{ + return m_following; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_UINT32 RecognizerSharedState<ImplTraits, StreamType>::get_sizeHint() const +{ + return m_sizeHint; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE bool RecognizerSharedState<ImplTraits, StreamType>::get_error() const +{ + return m_error; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE typename RecognizerSharedState<ImplTraits, StreamType>::ExceptionBaseType* +RecognizerSharedState<ImplTraits, StreamType>::get_exception() const +{ + return m_exception; +} + +template<class ImplTraits, class StreamType> +ANTLR_INLINE bool RecognizerSharedState<ImplTraits, StreamType>::get_errorRecovery() const +{ + return m_errorRecovery; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE bool RecognizerSharedState<ImplTraits, StreamType>::get_failed() const +{ + return m_failed; +} + +template<class ImplTraits, class StreamType> +ANTLR_INLINE bool RecognizerSharedState<ImplTraits, StreamType>::get_token_present() const +{ + return m_token_present; +} + +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_MARKER RecognizerSharedState<ImplTraits, StreamType>::get_lastErrorIndex() const +{ + return m_lastErrorIndex; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_UINT32 RecognizerSharedState<ImplTraits, StreamType>::get_errorCount() const +{ + return m_errorCount; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_INT32 RecognizerSharedState<ImplTraits, StreamType>::get_backtracking() const +{ + return m_backtracking; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE typename RecognizerSharedState<ImplTraits, StreamType>::RuleMemoType* RecognizerSharedState<ImplTraits, StreamType>::get_ruleMemo() const +{ + return m_ruleMemo; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_UINT8** RecognizerSharedState<ImplTraits, StreamType>::get_tokenNames() const +{ + return m_tokenNames; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_UINT8* RecognizerSharedState<ImplTraits, StreamType>::get_tokenName( ANTLR_UINT32 i ) const +{ + return m_tokenNames[i]; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE typename RecognizerSharedState<ImplTraits, StreamType>::CommonTokenType* RecognizerSharedState<ImplTraits, StreamType>::get_token() +{ + return &m_token; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE typename RecognizerSharedState<ImplTraits, StreamType>::TokenSourceType* RecognizerSharedState<ImplTraits, StreamType>::get_tokSource() const +{ + return m_tokSource; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_UINT32& RecognizerSharedState<ImplTraits, StreamType>::get_channel() +{ + return m_channel; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_UINT32 RecognizerSharedState<ImplTraits, StreamType>::get_type() const +{ + return m_type; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_INT32 RecognizerSharedState<ImplTraits, StreamType>::get_tokenStartLine() const +{ + return m_tokenStartLine; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_INT32 RecognizerSharedState<ImplTraits, StreamType>::get_tokenStartCharPositionInLine() const +{ + return m_tokenStartCharPositionInLine; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE ANTLR_MARKER RecognizerSharedState<ImplTraits, StreamType>::get_tokenStartCharIndex() const +{ + return m_tokenStartCharIndex; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE typename RecognizerSharedState<ImplTraits, StreamType>::StringType& RecognizerSharedState<ImplTraits, StreamType>::get_text() +{ + return m_text; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE typename RecognizerSharedState<ImplTraits, StreamType>::StreamsType& RecognizerSharedState<ImplTraits, StreamType>::get_streams() +{ + return m_streams; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE typename RecognizerSharedState<ImplTraits, StreamType>::TreeAdaptorType* RecognizerSharedState<ImplTraits, StreamType>::get_treeAdaptor() const +{ + return m_treeAdaptor; +} + +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_exception( ExceptionBaseType* exception ) +{ + m_exception = exception; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_following( const FollowingType& following ) +{ + m_following = following; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_sizeHint( ANTLR_UINT32 sizeHint ) +{ + m_sizeHint = sizeHint; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_error( bool error ) +{ + m_error = error; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_errorRecovery( bool errorRecovery ) +{ + m_errorRecovery = errorRecovery; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_failed( bool failed ) +{ + m_failed = failed; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_token_present(bool token_present) +{ + m_token_present = token_present; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_lastErrorIndex( ANTLR_MARKER lastErrorIndex ) +{ + m_lastErrorIndex = lastErrorIndex; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_errorCount( ANTLR_UINT32 errorCount ) +{ + m_errorCount = errorCount; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_backtracking( ANTLR_INT32 backtracking ) +{ + m_backtracking = backtracking; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_ruleMemo( RuleMemoType* ruleMemo ) +{ + m_ruleMemo = ruleMemo; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_tokenNames( ANTLR_UINT8** tokenNames ) +{ + m_tokenNames = tokenNames; +} + +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_tokSource( TokenSourceType* tokSource ) +{ + m_tokSource = tokSource; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_channel( ANTLR_UINT32 channel ) +{ + m_channel = channel; +} + +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_token(const CommonTokenType* tok) +{ + this->set_token_present( tok != NULL ); + if( tok != NULL ) + m_token = *tok; +} + +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_type( ANTLR_UINT32 type ) +{ + m_type = type; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_tokenStartLine( ANTLR_INT32 tokenStartLine ) +{ + m_tokenStartLine = tokenStartLine; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_tokenStartCharPositionInLine( ANTLR_INT32 tokenStartCharPositionInLine ) +{ + m_tokenStartCharPositionInLine = tokenStartCharPositionInLine; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_tokenStartCharIndex( ANTLR_MARKER tokenStartCharIndex ) +{ + m_tokenStartCharIndex = tokenStartCharIndex; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_text( const StringType& text ) +{ + m_text = text; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_streams( const InputStreamsType& streams ) +{ + m_streams = streams; +} +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::set_treeAdaptor( TreeAdaptorType* adaptor ) +{ + m_treeAdaptor = adaptor; +} + +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::inc_errorCount() +{ + ++m_errorCount; +} + +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::inc_backtracking() +{ + ++m_backtracking; +} + +template<class ImplTraits, class StreamType> +ANTLR_INLINE void RecognizerSharedState<ImplTraits, StreamType>::dec_backtracking() +{ + --m_backtracking; +} + +} diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3rewriterulesubtreestream.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3rewriterulesubtreestream.hpp new file mode 100644 index 0000000000..db92ee0db0 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3rewriterulesubtreestream.hpp @@ -0,0 +1,123 @@ +#ifndef ANTLR3REWRITERULESUBTREESTREAM_HPP +#define ANTLR3REWRITERULESUBTREESTREAM_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +namespace antlr3 { + +/// This is an implementation of a subtree stream which is a set of trees +/// modeled as an element stream. +/// +template<class ImplTraits> +class RewriteRuleSubtreeStream +{ +public: + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename ImplTraits::TreeAdaptorType TreeAdaptorType; + typedef typename ImplTraits::TreeParserType ComponentType; + typedef typename ComponentType::StreamType StreamType; + typedef typename ImplTraits::TreeType TreeType; + typedef typename ImplTraits::TreeTypePtr TreeTypePtr; + typedef TreeType TokenType; + typedef typename ImplTraits::template RecognizerType< StreamType > RecognizerType; + typedef typename AllocPolicyType::template VectorType< TreeTypePtr > ElementsType; + typedef typename ImplTraits::TreeType ElementType; + + RewriteRuleSubtreeStream(TreeAdaptorType* adaptor, const char* description); + RewriteRuleSubtreeStream(TreeAdaptorType* adaptor, const char* description, TreeType* oneElement); + RewriteRuleSubtreeStream(TreeAdaptorType* adaptor, const char* description, TreeTypePtr& oneElement); + RewriteRuleSubtreeStream(TreeAdaptorType* adaptor, const char* description, const ElementsType& elements); + ~RewriteRuleSubtreeStream(); + + /// Reset the condition of this stream so that it appears we have + /// not consumed any of its elements. Elements themselves are untouched. + /// + void reset(); + + TreeTypePtr nextNode(); + + /// TODO copied from RewriteRuleElementStreamType + /// Add a new pANTLR3_BASE_TREE to this stream + /// + void add(TreeTypePtr& el); + bool hasNext(); + TreeTypePtr nextTree(); + typename ElementsType::iterator _next(); + ElementType* toTree(ElementType* el); + + /// Number of elements available in the stream + /// + ANTLR_UINT32 size(); + + /// Returns the description string if there is one available (check for NULL). + /// + StringType getDescription(); + +protected: + TreeTypePtr dup(const TreeTypePtr& el ); + TreeTypePtr dup(const TreeType* el ); + + TreeTypePtr& leftestNode(TreeTypePtr& node) const; +private: + /// Pointer to the tree adaptor in use for this stream + /// + TreeAdaptorType* m_adaptor; + + /// Cursor 0..n-1. If singleElement!=NULL, cursor is 0 until you next(), + /// which bumps it to 1 meaning no more elements. + /// + typename ElementsType::iterator m_cursor; + + /// The element or stream description; usually has name of the token or + /// rule reference that this list tracks. Can include rulename too, but + /// the exception would track that info. + /// + StringType m_elementDescription; + + /// The list of tokens or subtrees we are tracking + /// + ElementsType m_elements; + + TreeTypePtr dupTree(const TreeTypePtr& el ); + TreeTypePtr dupTree(const TreeType* el ); + + /// Once a node / subtree has been used in a stream, it must be dup'ed + /// from then on. Streams are reset after sub rules so that the streams + /// can be reused in future sub rules. So, reset must set a dirty bit. + /// If dirty, then next() always returns a dup. + /// + bool m_dirty; +}; + +} + +#include "antlr3rewriterulesubtreestream.inl" + +#endif diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3rewriterulesubtreestream.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3rewriterulesubtreestream.inl new file mode 100644 index 0000000000..b210586245 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3rewriterulesubtreestream.inl @@ -0,0 +1,244 @@ +namespace antlr3 { + +template<class ImplTraits> +RewriteRuleSubtreeStream<ImplTraits>::RewriteRuleSubtreeStream(TreeAdaptorType* adaptor, const char* description) + : m_adaptor(adaptor) + , m_elementDescription(description) + , m_dirty(false) +{ + m_cursor = m_elements.begin(); +} + +template<class ImplTraits> +RewriteRuleSubtreeStream<ImplTraits>::RewriteRuleSubtreeStream(TreeAdaptorType* adaptor, const char* description, + TreeType* oneElement + ) + : m_adaptor(adaptor) + , m_elementDescription(description) + , m_dirty(false) +{ + if( oneElement != NULL ) + { + auto tree_clone = this->dup(oneElement); + this->add( tree_clone ); + } + m_cursor = m_elements.begin(); +} + +template<class ImplTraits> +RewriteRuleSubtreeStream<ImplTraits>::RewriteRuleSubtreeStream(TreeAdaptorType* adaptor, const char* description, + TreeTypePtr& oneElement + ) + : m_adaptor(adaptor) + , m_elementDescription(description) + , m_dirty(false) +{ + if( oneElement != NULL ) + this->add( oneElement ); + m_cursor = m_elements.begin(); +} + +template<class ImplTraits> +RewriteRuleSubtreeStream<ImplTraits>::RewriteRuleSubtreeStream(TreeAdaptorType* adaptor, const char* description, + const ElementsType& elements + ) + : m_adaptor(adaptor) + , m_elementDescription(description) + , m_dirty(false) + , m_elements(elements) +{ + m_cursor = m_elements.begin(); +} + +template<class ImplTraits> +void +RewriteRuleSubtreeStream<ImplTraits>::reset() +{ + m_cursor = m_elements.begin(); + m_dirty = true; +} + +template<class ImplTraits> +void +RewriteRuleSubtreeStream<ImplTraits>::add(TreeTypePtr& el) +{ + if ( el == NULL ) + return; + + m_elements.push_back(std::move(el)); + m_cursor = m_elements.begin(); +} + +template<class ImplTraits> +typename RewriteRuleSubtreeStream<ImplTraits>::ElementsType::iterator +RewriteRuleSubtreeStream<ImplTraits>::_next() +{ + if (m_elements.empty()) + { + // This means that the stream is empty + // Caller must cope with this (TODO throw RewriteEmptyStreamException) + return m_elements.end(); + } + + if (m_dirty || m_cursor == m_elements.end()) + { + if( m_elements.size() == 1) + { + // Special case when size is single element, it will just dup a lot + return m_elements.begin(); + } + + // Out of elements and the size is not 1, so we cannot assume + // that we just duplicate the entry n times (such as ID ent+ -> ^(ID ent)+) + // This means we ran out of elements earlier than was expected. + // + return m_elements.end(); // Caller must cope with this (TODO throw RewriteEmptyStreamException) + } + + // More than just a single element so we extract it from the + // vector. + return m_cursor++; +} + +template<class ImplTraits> +typename RewriteRuleSubtreeStream<ImplTraits>::TreeTypePtr +RewriteRuleSubtreeStream<ImplTraits>::nextTree() +{ + if ( m_dirty || ( m_cursor == m_elements.end() && m_elements.size() == 1 )) + { + // if out of elements and size is 1, dup + typename ElementsType::iterator el = this->_next(); + return this->dup(*el); + } + + // test size above then fetch + typename ElementsType::iterator el = this->_next(); + return std::move(*el); +} + +/* +template<class ImplTraits, class SuperType> +typename RewriteRuleSubtreeStream<ImplTraits, SuperType>::TokenType* +RewriteRuleSubtreeStream<ImplTraits, SuperType>::nextToken() +{ + return this->_next(); +} + +template<class ImplTraits, class SuperType> +typename RewriteRuleSubtreeStream<ImplTraits, SuperType>::TokenType* +RewriteRuleSubtreeStream<ImplTraits, SuperType>::next() +{ + ANTLR_UINT32 s; + s = this->size(); + if ( (m_cursor >= s) && (s == 1) ) + { + TreeTypePtr el; + el = this->_next(); + return this->dup(el); + } + return this->_next(); +} + +*/ + +template<class ImplTraits> +typename RewriteRuleSubtreeStream<ImplTraits>::TreeTypePtr +RewriteRuleSubtreeStream<ImplTraits>::dup(const TreeTypePtr& element) +{ + return this->dupTree(element); +} + +template<class ImplTraits> +typename RewriteRuleSubtreeStream<ImplTraits>::TreeTypePtr +RewriteRuleSubtreeStream<ImplTraits>::dup(const TreeType* element) +{ + return std::move(this->dupTree(element)); +} + +template<class ImplTraits> +typename RewriteRuleSubtreeStream<ImplTraits>::TreeTypePtr +RewriteRuleSubtreeStream<ImplTraits>::dupTree(const TreeTypePtr& element) +{ + return std::move(m_adaptor->dupTree(element)); +} + +template<class ImplTraits> +typename RewriteRuleSubtreeStream<ImplTraits>::TreeTypePtr +RewriteRuleSubtreeStream<ImplTraits>::dupTree(const TreeType* element) +{ + return std::move(m_adaptor->dupTree(element)); +} + +template<class ImplTraits> +typename RewriteRuleSubtreeStream<ImplTraits>::ElementType* +RewriteRuleSubtreeStream<ImplTraits>::toTree( ElementType* element) +{ + return element; +} + +template<class ImplTraits> +bool RewriteRuleSubtreeStream<ImplTraits>::hasNext() +{ + return m_cursor != m_elements.end(); +} + +/// Number of elements available in the stream +/// +template<class ImplTraits> +ANTLR_UINT32 RewriteRuleSubtreeStream<ImplTraits>::size() +{ + return (ANTLR_UINT32)(m_elements.size()); +} + +template<class ImplTraits> +typename RewriteRuleSubtreeStream<ImplTraits>::StringType +RewriteRuleSubtreeStream<ImplTraits>::getDescription() +{ + if ( m_elementDescription.empty() ) + { + m_elementDescription = "<unknown source>"; + } + return m_elementDescription; +} + +template<class ImplTraits> +RewriteRuleSubtreeStream<ImplTraits>::~RewriteRuleSubtreeStream() +{ + // Before placing the stream back in the pool, we + // need to clear any vector it has. + m_elements.clear(); +} + +template<class ImplTraits> +typename RewriteRuleSubtreeStream<ImplTraits>::TreeTypePtr +RewriteRuleSubtreeStream<ImplTraits>::nextNode() +{ + //System.out.println("nextNode: elements="+elements+", singleElement="+((Tree)singleElement).toStringTree()); + //ANTLR_UINT32 n = this->size(); + if (m_dirty || (m_cursor == m_elements.end() && m_elements.size() == 1)) { + // if out of elements and size is 1, dup (at most a single node + // since this is for making root nodes). + typename ElementsType::iterator el = this->_next(); + return m_adaptor->dupNode(*el); + } + + typename ElementsType::iterator el = this->_next(); + //while (m_adaptor->isNilNode(el) && m_adaptor->getChildCount(el) == 1) + // tree = m_adaptor->getChild(tree, 0); + TreeTypePtr& node = leftestNode(*el); + //System.out.println("_next="+((Tree)tree).toStringTree()); + return m_adaptor->dupNode(node); // dup just the root (want node here) +} + +template<class ImplTraits> +ANTLR_INLINE +typename RewriteRuleSubtreeStream<ImplTraits>::TreeTypePtr& +RewriteRuleSubtreeStream<ImplTraits>::leftestNode(TreeTypePtr& node) const +{ + if(m_adaptor->isNilNode(node) && m_adaptor->getChildCount(node) == 1) + return leftestNode(node->getChild(0)); + else + return node; +} + +} diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3rewriteruletokenstream.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3rewriteruletokenstream.hpp new file mode 100644 index 0000000000..c982b0a2f1 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3rewriteruletokenstream.hpp @@ -0,0 +1,131 @@ +#ifndef ANTLR3REWRITESTREAM_HPP +#define ANTLR3REWRITESTREAM_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +namespace antlr3 { + +/// This is an implementation of a token stream, which is basically an element +/// stream that deals with tokens only. +/// +template<class ImplTraits> +class RewriteRuleTokenStream +{ +public: + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename ImplTraits::TreeAdaptorType TreeAdaptorType; + typedef typename ImplTraits::ParserType ComponentType; + typedef typename ComponentType::StreamType StreamType; + typedef typename ImplTraits::CommonTokenType TokenType; + typedef typename ImplTraits::TreeType TreeType; + typedef typename ImplTraits::TreeTypePtr TreeTypePtr; + typedef typename AllocPolicyType::template VectorType<const TokenType* > ElementsType; + typedef typename ImplTraits::template RecognizerType< StreamType > RecognizerType; + typedef typename ImplTraits::CommonTokenType ElementType; +public: + RewriteRuleTokenStream(TreeAdaptorType* adaptor, const char* description); + RewriteRuleTokenStream(TreeAdaptorType* adaptor, const char* description, const TokenType* oneElement); + RewriteRuleTokenStream(TreeAdaptorType* adaptor, const char* description, const ElementsType& elements); + ~RewriteRuleTokenStream(); + + /// Reset the condition of this stream so that it appears we have + /// not consumed any of its elements. Elements themselves are untouched. + /// + void reset(); + + TreeTypePtr nextNode(); + const TokenType* nextToken(); + + /// TODO copied from RewriteRuleElementStreamType + /// Add a new pANTLR3_BASE_TREE to this stream + /// + void add(const ElementType* el); + + /// When constructing trees, sometimes we need to dup a token or AST + /// subtree. Dup'ing a token means just creating another AST node + /// around it. For trees, you must call the adaptor.dupTree(). + /// + ElementType* dup( ElementType* el ); + + /// Ensure stream emits trees; tokens must be converted to AST nodes. + /// AST nodes can be passed through unmolested. + /// + TreeTypePtr toTree(const ElementType* el); + + /// Pointer to the tree adaptor in use for this stream + /// + TreeAdaptorType* m_adaptor; + ElementType nextTree(); + typename ElementsType::iterator _next(); + + /// Returns true if there is a next element available + /// + bool hasNext(); + + /// Number of elements available in the stream + /// + ANTLR_UINT32 size(); + + /// Returns the description string if there is one available (check for NULL). + /// + StringType getDescription(); + +private: + ElementType* dupImpl(typename ImplTraits::CommonTokenType* el); + ElementType* dupImpl(typename ImplTraits::TreeTypePtr el); + + /// Cursor 0..n-1. If singleElement!=NULL, cursor is 0 until you next(), + /// which bumps it to 1 meaning no more elements. + /// + typename ElementsType::iterator m_cursor; + + /// The element or stream description; usually has name of the token or + /// rule reference that this list tracks. Can include rulename too, but + /// the exception would track that info. + /// + StringType m_elementDescription; + + /// The list of tokens or subtrees we are tracking + /// + ElementsType m_elements; + + /// Once a node / subtree has been used in a stream, it must be dup'ed + /// from then on. Streams are reset after sub rules so that the streams + /// can be reused in future sub rules. So, reset must set a dirty bit. + /// If dirty, then next() always returns a dup. + /// + bool m_dirty; +}; + +} + +#include "antlr3rewriteruletokenstream.inl" + +#endif diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3rewriteruletokenstream.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3rewriteruletokenstream.inl new file mode 100644 index 0000000000..a7dd8b54fc --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3rewriteruletokenstream.inl @@ -0,0 +1,224 @@ +namespace antlr3 { + +template<class ImplTraits> +RewriteRuleTokenStream<ImplTraits>::RewriteRuleTokenStream(TreeAdaptorType* adaptor, const char* /*description*/) + : m_adaptor(adaptor) + , m_elements() + , m_dirty(false) +{ + m_cursor = m_elements.begin(); +} + +template<class ImplTraits> +RewriteRuleTokenStream<ImplTraits>::RewriteRuleTokenStream(TreeAdaptorType* adaptor, const char* /*description*/, + const TokenType* oneElement + ) + : m_adaptor(adaptor) + , m_elements() + , m_dirty(false) +{ + if( oneElement != NULL ) + this->add( oneElement ); + m_cursor = m_elements.begin(); +} + +template<class ImplTraits> +RewriteRuleTokenStream<ImplTraits>::RewriteRuleTokenStream(TreeAdaptorType* adaptor, const char* /*description*/, + const ElementsType& elements + ) + : m_adaptor(adaptor) + , m_elements(elements) + , m_dirty(false) +{ + m_cursor = m_elements.begin(); +} + +template<class ImplTraits> +void RewriteRuleTokenStream<ImplTraits>::reset() +{ + m_cursor = m_elements.begin(); + m_dirty = true; +} + +template<class ImplTraits> +void RewriteRuleTokenStream<ImplTraits>::add(const ElementType* el) +{ + if ( el == NULL) + return; + m_elements.push_back(el); + m_cursor = m_elements.begin(); +} + +template<class ImplTraits> +typename RewriteRuleTokenStream<ImplTraits>::ElementsType::iterator +RewriteRuleTokenStream<ImplTraits>::_next() +{ + if (m_elements.empty()) + { + // This means that the stream is empty + // Caller must cope with this (TODO throw RewriteEmptyStreamException) + return m_elements.end(); + } + + if (m_dirty || m_cursor == m_elements.end()) + { + if( m_elements.size() == 1) + { + // Special case when size is single element, it will just dup a lot + //return this->toTree(m_singleElement); + return m_elements.begin(); + } + + // Out of elements and the size is not 1, so we cannot assume + // that we just duplicate the entry n times (such as ID ent+ -> ^(ID ent)+) + // This means we ran out of elements earlier than was expected. + // + return m_elements.end(); // Caller must cope with this (TODO throw RewriteEmptyStreamException) + } + + // More than just a single element so we extract it from the + // vector. + return m_cursor++; +} + +template<class ImplTraits> +typename RewriteRuleTokenStream<ImplTraits>::ElementType +RewriteRuleTokenStream<ImplTraits>::nextTree() +{ + ANTLR_UINT32 n = this->size(); + if ( m_dirty || ( (m_cursor >=n) && (n==1)) ) + { + // if out of elements and size is 1, dup + typename ElementsType::iterator el = this->_next(); + return this->dup(*el); + } + + // test size above then fetch + typename ElementsType::iterator el = this->_next(); + return *el; +} + +/* +template<class ImplTraits, class SuperType> +typename RewriteRuleTokenStream<ImplTraits, SuperType>::TokenType* +RewriteRuleTokenStream<ImplTraits, SuperType>::nextToken() +{ + return this->_next(); +} + +template<class ImplTraits, class SuperType> +typename RewriteRuleTokenStream<ImplTraits, SuperType>::TokenType* +RewriteRuleTokenStream<ImplTraits, SuperType>::next() +{ + ANTLR_UINT32 s; + s = this->size(); + if ( (m_cursor >= s) && (s == 1) ) + { + TreeTypePtr el; + el = this->_next(); + return this->dup(el); + } + return this->_next(); +} + +*/ + +template<class ImplTraits> +typename RewriteRuleTokenStream<ImplTraits>::ElementType* +RewriteRuleTokenStream<ImplTraits>::dup( ElementType* element) +{ + return dupImpl(element); +} + +template<class ImplTraits> +typename RewriteRuleTokenStream<ImplTraits>::ElementType* +RewriteRuleTokenStream<ImplTraits>::dupImpl( typename ImplTraits::CommonTokenType* /*element*/) +{ + return NULL; // TODO throw here +} + +template<class ImplTraits> +typename RewriteRuleTokenStream<ImplTraits>::ElementType* +RewriteRuleTokenStream<ImplTraits>::dupImpl( typename ImplTraits::TreeTypePtr element) +{ + return m_adaptor->dupTree(element); +} + +template<class ImplTraits> +typename RewriteRuleTokenStream<ImplTraits>::TreeTypePtr +RewriteRuleTokenStream<ImplTraits>::toTree(const ElementType* element) +{ + return m_adaptor->create(element); +} + +template<class ImplTraits> +bool +RewriteRuleTokenStream<ImplTraits>::hasNext() +{ + return m_cursor != m_elements.end(); +} + +template<class ImplTraits > +typename RewriteRuleTokenStream<ImplTraits>::TreeTypePtr +RewriteRuleTokenStream<ImplTraits>::nextNode() +{ + const TokenType *Token = this->nextToken(); + return m_adaptor->create(Token); +} + +/// Number of elements available in the stream +/// +template<class ImplTraits> +ANTLR_UINT32 RewriteRuleTokenStream<ImplTraits>::size() +{ + return (ANTLR_UINT32)(m_elements.size()); +} + +template<class ImplTraits> +typename RewriteRuleTokenStream<ImplTraits>::StringType +RewriteRuleTokenStream<ImplTraits>::getDescription() +{ + if ( m_elementDescription.empty() ) + { + m_elementDescription = "<unknown source>"; + } + return m_elementDescription; +} + +template<class ImplTraits> +RewriteRuleTokenStream<ImplTraits>::~RewriteRuleTokenStream() +{ + // Before placing the stream back in the pool, we + // need to clear any vector it has. This is so any + // free pointers that are associated with the + // entries are called. However, if this particular function is called + // then we know that the entries in the stream are definitely + // tree nodes. Hence we check to see if any of them were nilNodes as + // if they were, we can reuse them. + // + // We have some elements to traverse + // + for (ANTLR_UINT32 i = 0; i < m_elements.size(); i++) + { + const ElementType *tree = m_elements.at(i); + //if ( (tree != NULL) && tree->isNilNode() ) + { + // Had to remove this for now, check is not comprehensive enough + // tree->reuse(tree); + } + } + m_elements.clear(); +} + +template<class ImplTraits> +const typename RewriteRuleTokenStream<ImplTraits>::TokenType* +RewriteRuleTokenStream<ImplTraits>::nextToken() +{ + auto retval = this->_next(); + if (retval == m_elements.end()) + return NULL; + else + return *retval; +} + +} diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3rewritestreams.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3rewritestreams.hpp new file mode 100644 index 0000000000..a8d7396643 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3rewritestreams.hpp @@ -0,0 +1,299 @@ +#ifndef ANTLR3REWRITESTREAM_HPP +#define ANTLR3REWRITESTREAM_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/// A generic list of elements tracked in an alternative to be used in +/// a -> rewrite rule. +/// +/// In the C implementation, all tree oriented streams return a pointer to +/// the same type: pANTLR3_BASE_TREE. Anything that has subclassed from this +/// still passes this type, within which there is a super pointer, which points +/// to it's own data and methods. Hence we do not need to implement this as +/// the equivalent of an abstract class, but just fill in the appropriate interface +/// as usual with this model. +/// +/// Once you start next()ing, do not try to add more elements. It will +/// break the cursor tracking I believe. +/// +/// +/// \see #pANTLR3_REWRITE_RULE_NODE_STREAM +/// \see #pANTLR3_REWRITE_RULE_ELEMENT_STREAM +/// \see #pANTLR3_REWRITE_RULE_SUBTREE_STREAM +/// +/// TODO: add mechanism to detect/puke on modification after reading from stream +/// +namespace antlr3 { + +template<class ImplTraits, class ElementType> +//template<class ImplTraits> +class RewriteRuleElementStream : public ImplTraits::AllocPolicyType +{ +public: + //typedef typename ElementTypePtr::element_type ElementType; unique_ptr + //typedef typename ImplTraits::TreeType TreeType; + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename ImplTraits::TreeAdaptorType TreeAdaptorType; + + //typedef typename ImplTraits::template RecognizerType< typename SuperType::StreamType > RecognizerType; + typedef typename ImplTraits::StringType StringType; + typedef typename AllocPolicyType::template VectorType< ElementType* > ElementsType; + +protected: + /// The list of tokens or subtrees we are tracking + /// + ElementsType m_elements; + + /// The element or stream description; usually has name of the token or + /// rule reference that this list tracks. Can include rulename too, but + /// the exception would track that info. + /// + StringType m_elementDescription; + +private: + ElementType* dupImpl(typename ImplTraits::CommonTokenType* el); + ElementType* dupImpl(typename ImplTraits::TreeTypePtr el); + + + /// Pointer to the tree adaptor in use for this stream + /// + TreeAdaptorType* m_adaptor; + + /// Cursor 0..n-1. If singleElement!=NULL, cursor is 0 until you next(), + /// which bumps it to 1 meaning no more elements. + /// + ANTLR_UINT32 m_cursor; + + /// Once a node / subtree has been used in a stream, it must be dup'ed + /// from then on. Streams are reset after sub rules so that the streams + /// can be reused in future sub rules. So, reset must set a dirty bit. + /// If dirty, then next() always returns a dup. + /// + bool m_dirty; + +public: + RewriteRuleElementStream(TreeAdaptorType* adaptor, const char* description); + RewriteRuleElementStream(TreeAdaptorType* adaptor, const char* description, const ElementType* oneElement); + RewriteRuleElementStream(TreeAdaptorType* adaptor, const char* description, const ElementsType& elements); + + ~RewriteRuleElementStream(); + // Methods + + /// Reset the condition of this stream so that it appears we have + /// not consumed any of its elements. Elements themselves are untouched. + /// + void reset(); + + /// Add a new pANTLR3_BASE_TREE to this stream + /// + void add(ElementType* el); + + /// Return the next element in the stream. If out of elements, throw + /// an exception unless size()==1. If size is 1, then return elements[0]. + /// + //TokenType* next(); + ElementType nextTree(); + //TokenType* nextToken(); + ElementType* _next(); + + /// When constructing trees, sometimes we need to dup a token or AST + /// subtree. Dup'ing a token means just creating another AST node + /// around it. For trees, you must call the adaptor.dupTree(). + /// + ElementType* dup( ElementType* el ); + + /// Ensure stream emits trees; tokens must be converted to AST nodes. + /// AST nodes can be passed through unmolested. + /// + ElementType* toTree(ElementType* el); + + /// Returns true if there is a next element available + /// + bool hasNext(); + + /// Treat next element as a single node even if it's a subtree. + /// This is used instead of next() when the result has to be a + /// tree root node. Also prevents us from duplicating recently-added + /// children; e.g., ^(type ID)+ adds ID to type and then 2nd iteration + /// must dup the type node, but ID has been added. + /// + /// Referencing to a rule result twice is ok; dup entire tree as + /// we can't be adding trees; e.g., expr expr. + /// + //TreeTypePtr nextNode(); + + /// Number of elements available in the stream + /// + ANTLR_UINT32 size(); + + /// Returns the description string if there is one available (check for NULL). + /// + StringType getDescription(); + +protected: + void init(TreeAdaptorType* adaptor, const char* description); +}; + +/// This is an implementation of a token stream, which is basically an element +/// stream that deals with tokens only. +/// +template<class ImplTraits> +//class RewriteRuleTokenStream : public ImplTraits::template RewriteRuleElementStreamType< typename ImplTraits::ParserType> +class RewriteRuleTokenStream + //: public ImplTraits::template RewriteStreamType< const typename ImplTraits::CommonTokenType > +{ +public: + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename ImplTraits::TreeAdaptorType TreeAdaptorType; + typedef typename ImplTraits::ParserType ComponentType; + typedef typename ComponentType::StreamType StreamType; + typedef typename ImplTraits::CommonTokenType TokenType; + typedef typename ImplTraits::TreeType TreeType; + typedef typename ImplTraits::TreeTypePtr TreeTypePtr; + typedef typename AllocPolicyType::template VectorType< TokenType* > ElementsType; + typedef typename ImplTraits::template RecognizerType< StreamType > RecognizerType; + typedef typename ImplTraits::template RewriteStreamType< const typename ImplTraits::CommonTokenType > BaseType; + +public: + RewriteRuleTokenStream(TreeAdaptorType* adaptor, const char* description); + RewriteRuleTokenStream(TreeAdaptorType* adaptor, const char* description, const TokenType* oneElement); + RewriteRuleTokenStream(TreeAdaptorType* adaptor, const char* description, const ElementsType& elements); + + TreeTypePtr nextNode(); + TokenType* nextToken(); + + /// TODO copied from RewriteRuleElementStreamType + /// Add a new pANTLR3_BASE_TREE to this stream + /// + typedef typename ImplTraits::CommonTokenType ElementType; + void add(const ElementType* el); + /// Pointer to the tree adaptor in use for this stream + /// + TreeAdaptorType* m_adaptor; + ElementType* _next(); + +private: + //TreeTypePtr nextNodeToken(); +}; + +/// This is an implementation of a subtree stream which is a set of trees +/// modeled as an element stream. +/// +template<class ImplTraits> +//class RewriteRuleSubtreeStream : public ImplTraits::template RewriteStreamType< typename ImplTraits::TreeParserType> +class RewriteRuleSubtreeStream + //: public ImplTraits::template RewriteStreamType< typename ImplTraits::TreeType > +{ +public: + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename ImplTraits::TreeAdaptorType TreeAdaptorType; + typedef typename ImplTraits::TreeParserType ComponentType; + typedef typename ComponentType::StreamType StreamType; + typedef typename ImplTraits::TreeType TreeType; + typedef typename ImplTraits::TreeTypePtr TreeTypePtr; + typedef TreeType TokenType; + typedef typename ImplTraits::template RecognizerType< StreamType > RecognizerType; + typedef typename AllocPolicyType::template VectorType< TokenType* > ElementsType; + typedef typename ImplTraits::template RewriteStreamType< typename ImplTraits::TreeType > BaseType; + + RewriteRuleSubtreeStream(TreeAdaptorType* adaptor, const char* description); + RewriteRuleSubtreeStream(TreeAdaptorType* adaptor, const char* description, TreeTypePtr& oneElement); + RewriteRuleSubtreeStream(TreeAdaptorType* adaptor, const char* description, const ElementsType& elements); + + TreeTypePtr nextNode(TreeTypePtr); + + /// TODO copied from RewriteRuleElementStreamType + /// Add a new pANTLR3_BASE_TREE to this stream + /// + void add(TreeTypePtr& el); + bool hasNext(); + TreeTypePtr& nextTree(); + void reset(); + +protected: + TreeTypePtr dup( TreeTypePtr el ); + +private: + TreeTypePtr dupTree( TreeTypePtr el ); +}; + +/* TODO This class is probably used in TreeParser only + * Notes about Java target + * - these classes reimplement only dup and toTree methods: + * base ElementStr + * abstract dup + * toTree(Object e) { return e; } + * TokenStr + * dup { throw } + * toTree(Object e) { return e; } + * SubTreeStr + * dup(Object e) { return adaptor.dupTree } + * NodeStr + * dup { throw } + * toTree(Object e) { return adaptor.dupNode } + * See: RewriteRuleElementStream::dup, RewriteRuleElementStream::dupImpl + * + * There should 3 types of specializations for RewriteRuleElementStreamType (which is not defined yet) + * ATM: RewriteRuleElementStreamType is replaced with ImplTraits::template RewriteStreamType + * +/// This is an implementation of a node stream, which is basically an element +/// stream that deals with tree nodes only. +/// +template<class ImplTraits> +//class RewriteRuleNodeStream : public ImplTraits::template RewriteStreamType< typename ImplTraits::TreeParserType> +class RewriteRuleNodeStream : public ImplTraits::template RewriteStreamType< typename ImplTraits::TreeType > +{ +public: + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename ImplTraits::TreeAdaptorType TreeAdaptorType; + typedef typename ImplTraits::TreeParserType ComponentType; + typedef typename ComponentType::StreamType StreamType; + typedef typename ImplTraits::TreeType TreeType; + typedef TreeType TokenType; + typedef typename ImplTraits::template RecognizerType< StreamType > RecognizerType; + typedef typename AllocPolicyType::template VectorType< TokenType* > ElementsType; + typedef typename ImplTraits::template RewriteRuleElementStreamType< typename ImplTraits::TreeType > BaseType; + +public: + RewriteRuleNodeStream(TreeAdaptorType* adaptor, const char* description); + RewriteRuleNodeStream(TreeAdaptorType* adaptor, const char* description, TokenType* oneElement); + RewriteRuleNodeStream(TreeAdaptorType* adaptor, const char* description, const ElementsType& elements); + +protected: + TreeTypePtr toTree(TreeTypePtr element); + +private: + TreeTypePtr toTreeNode(TreeTypePtr element); +}; +*/ +} + +#include "antlr3rewritestreams.inl" + +#endif diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3rewritestreams.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3rewritestreams.inl new file mode 100644 index 0000000000..47568da649 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3rewritestreams.inl @@ -0,0 +1,363 @@ +namespace antlr3 { + +template<class ImplTraits, class ElementType> +RewriteRuleElementStream<ImplTraits, ElementType>::RewriteRuleElementStream(TreeAdaptorType* adaptor, + const char* description) +{ + this->init(adaptor, description); +} + +template<class ImplTraits, class ElementType> +RewriteRuleElementStream<ImplTraits, ElementType>::RewriteRuleElementStream(TreeAdaptorType* adaptor, + const char* description, + const ElementType* oneElement) +{ + this->init(adaptor, description); + if( oneElement != NULL ) + this->add( oneElement ); +} + +template<class ImplTraits, class ElementType> +RewriteRuleElementStream<ImplTraits, ElementType>::RewriteRuleElementStream(TreeAdaptorType* adaptor, + const char* description, + const ElementsType& elements) + : m_elements(elements) +{ + this->init(adaptor, description); +} + +template<class ImplTraits, class ElementType> +void RewriteRuleElementStream<ImplTraits, ElementType>::init(TreeAdaptorType* adaptor, + const char* description) +{ + m_adaptor = adaptor; + m_cursor = 0; + m_dirty = false; +} + +template<class ImplTraits> +RewriteRuleTokenStream<ImplTraits>::RewriteRuleTokenStream(TreeAdaptorType* adaptor, + const char* description) + //: BaseType(adaptor, description) +{ +} + +template<class ImplTraits> +RewriteRuleTokenStream<ImplTraits>::RewriteRuleTokenStream(TreeAdaptorType* adaptor, + const char* description, + const TokenType* oneElement) + //: BaseType(adaptor, description, oneElement) +{ +} + +template<class ImplTraits> +RewriteRuleTokenStream<ImplTraits>::RewriteRuleTokenStream(TreeAdaptorType* adaptor, + const char* description, + const ElementsType& elements) + //: BaseType(adaptor, description, elements) +{ +} + +template<class ImplTraits> +RewriteRuleSubtreeStream<ImplTraits>::RewriteRuleSubtreeStream(TreeAdaptorType* adaptor, + const char* description) + //: BaseType(adaptor, description) +{ +} + +template<class ImplTraits> +RewriteRuleSubtreeStream<ImplTraits>::RewriteRuleSubtreeStream(TreeAdaptorType* adaptor, + const char* description, + TreeTypePtr& oneElement) + //: BaseType(adaptor, description, oneElement) +{ +} + +template<class ImplTraits> +RewriteRuleSubtreeStream<ImplTraits>::RewriteRuleSubtreeStream(TreeAdaptorType* adaptor, + const char* description, + const ElementsType& elements) + //: BaseType(adaptor, description, elements) +{ +} + +/* +template<class ImplTraits> +RewriteRuleNodeStream<ImplTraits>::RewriteRuleNodeStream(TreeAdaptorType* adaptor, + const char* description) + : BaseType(adaptor, description) +{ +} + +template<class ImplTraits> +RewriteRuleNodeStream<ImplTraits>::RewriteRuleNodeStream(TreeAdaptorType* adaptor, + const char* description, + TokenType* oneElement) + : BaseType(adaptor, description, oneElement) +{ +} + +template<class ImplTraits> +RewriteRuleNodeStream<ImplTraits>::RewriteRuleNodeStream(TreeAdaptorType* adaptor, + const char* description, + const ElementsType& elements) + : BaseType(adaptor, description, elements) +{ +} +*/ + +template<class ImplTraits, class ElementType> +void RewriteRuleElementStream<ImplTraits, ElementType>::reset() +{ + m_cursor = 0; + m_dirty = true; +} + +template<class ImplTraits, class ElementType> +void RewriteRuleElementStream<ImplTraits, ElementType>::add(ElementType* el) +{ + if ( el== NULL ) + return; + + m_elements.push_back(el); +} + +template<class ImplTraits, class ElementType> +ElementType* RewriteRuleElementStream<ImplTraits, ElementType>::_next() +{ + ANTLR_UINT32 n = this->size(); + + if (n == 0) + { + // This means that the stream is empty + return NULL; // Caller must cope with this (TODO throw RewriteEmptyStreamException) + } + + // Traversed all the available elements already? + if ( m_cursor >= n) // out of elements? + { + if (n == 1) + { + // Special case when size is single element, it will just dup a lot + //return this->toTree(m_singleElement); + return this->toTree(m_elements.at(0)); + } + + // Out of elements and the size is not 1, so we cannot assume + // that we just duplicate the entry n times (such as ID ent+ -> ^(ID ent)+) + // This means we ran out of elements earlier than was expected. + // + return NULL; // Caller must cope with this (TODO throw RewriteEmptyStreamException) + } + + // More than just a single element so we extract it from the + // vector. + ElementType* t = this->toTree(m_elements.at(m_cursor)); + m_cursor++; + return t; +} + +template<class ImplTraits, class ElementType> +ElementType +RewriteRuleElementStream<ImplTraits, ElementType>::nextTree() +{ + ANTLR_UINT32 n = this->size(); + if ( m_dirty || ( (m_cursor >=n) && (n==1)) ) + { + // if out of elements and size is 1, dup + ElementType* el = this->_next(); + return this->dup(el); + } + + // test size above then fetch + ElementType* el = this->_next(); + return el; +} + +/* +template<class ImplTraits, class SuperType> +typename RewriteRuleElementStream<ImplTraits, SuperType>::TokenType* +RewriteRuleElementStream<ImplTraits, SuperType>::nextToken() +{ + return this->_next(); +} + +template<class ImplTraits, class SuperType> +typename RewriteRuleElementStream<ImplTraits, SuperType>::TokenType* +RewriteRuleElementStream<ImplTraits, SuperType>::next() +{ + ANTLR_UINT32 s; + s = this->size(); + if ( (m_cursor >= s) && (s == 1) ) + { + TreeTypePtr el; + el = this->_next(); + return this->dup(el); + } + return this->_next(); +} + +*/ + +template<class ImplTraits, class ElementType> +ElementType* +RewriteRuleElementStream<ImplTraits, ElementType>::dup( ElementType* element) +{ + return dupImpl(element); +} + +template<class ImplTraits, class ElementType> +ElementType* +RewriteRuleElementStream<ImplTraits, ElementType>::dupImpl( typename ImplTraits::CommonTokenType* element) +{ + return NULL; // TODO throw here +} + +template<class ImplTraits, class ElementType> +ElementType* +RewriteRuleElementStream<ImplTraits, ElementType>::dupImpl( typename ImplTraits::TreeTypePtr element) +{ + return m_adaptor->dupTree(element); +} + +template<class ImplTraits> +typename RewriteRuleSubtreeStream<ImplTraits>::TreeTypePtr +RewriteRuleSubtreeStream<ImplTraits>::dup(TreeTypePtr element) +{ + return this->dupTree(element); +} + +template<class ImplTraits> +typename RewriteRuleSubtreeStream<ImplTraits>::TreeTypePtr +RewriteRuleSubtreeStream<ImplTraits>::dupTree(TreeTypePtr element) +{ + return BaseType::m_adaptor->dupNode(element); +} + +template<class ImplTraits, class ElementType> +ElementType* +RewriteRuleElementStream<ImplTraits, ElementType>::toTree( ElementType* element) +{ + return element; +} + +/* +template<class ImplTraits> +typename RewriteRuleNodeStream<ImplTraits>::TreeTypePtr +RewriteRuleNodeStream<ImplTraits>::toTree(TreeTypePtr element) +{ + return this->toTreeNode(element); +} + +template<class ImplTraits> +typename RewriteRuleNodeStream<ImplTraits>::TreeTypePtr +RewriteRuleNodeStream<ImplTraits>::toTreeNode(TreeTypePtr element) +{ + return BaseType::m_adaptor->dupNode(element); +} +*/ + +template<class ImplTraits, class ElementType> +bool RewriteRuleElementStream<ImplTraits, ElementType>::hasNext() +{ + if ( !m_elements.empty() && m_cursor < m_elements.size()) + { + return true; + } + else + { + return false; + } +} + +template<class ImplTraits > +typename RewriteRuleTokenStream<ImplTraits>::TreeTypePtr +RewriteRuleTokenStream<ImplTraits>::nextNode() +{ + TokenType *Token = this->nextToken(); + //return BaseType::m_adaptor->create(Token); + return m_adaptor->create(Token); +} + +/* +template<class ImplTraits> +typename RewriteRuleTokenStream<ImplTraits>::TreeTypePtr +RewriteRuleTokenStream<ImplTraits>::nextNodeToken() +{ + return BaseType::m_adaptor->create(this->_next()); +} +*/ + +/// Number of elements available in the stream +/// +template<class ImplTraits, class ElementType> +ANTLR_UINT32 RewriteRuleElementStream<ImplTraits, ElementType>::size() +{ + return (ANTLR_UINT32)(m_elements.size()); +} + +template<class ImplTraits, class ElementType> +typename RewriteRuleElementStream<ImplTraits, ElementType>::StringType +RewriteRuleElementStream<ImplTraits, ElementType>::getDescription() +{ + if ( m_elementDescription.empty() ) + { + m_elementDescription = "<unknown source>"; + } + return m_elementDescription; +} + +template<class ImplTraits, class ElementType> +RewriteRuleElementStream<ImplTraits, ElementType>::~RewriteRuleElementStream() +{ + // Before placing the stream back in the pool, we + // need to clear any vector it has. This is so any + // free pointers that are associated with the + // entries are called. However, if this particular function is called + // then we know that the entries in the stream are definitely + // tree nodes. Hence we check to see if any of them were nilNodes as + // if they were, we can reuse them. + // + // We have some elements to traverse + // + for (ANTLR_UINT32 i = 0; i < m_elements.size(); i++) + { + ElementType *tree = m_elements.at(i); + //if ( (tree != NULL) && tree->isNilNode() ) + { + // Had to remove this for now, check is not comprehensive enough + // tree->reuse(tree); + } + } + m_elements.clear(); +} + +template<class ImplTraits> +typename RewriteRuleTokenStream<ImplTraits>::TokenType* +RewriteRuleTokenStream<ImplTraits>::nextToken() +{ + return this->_next(); +} + +template<class ImplTraits> +typename RewriteRuleSubtreeStream<ImplTraits>::TreeTypePtr +RewriteRuleSubtreeStream<ImplTraits>::nextNode(TreeTypePtr element) +{ + //System.out.println("nextNode: elements="+elements+", singleElement="+((Tree)singleElement).toStringTree()); + ANTLR_UINT32 n = this->size(); + if ( BaseType::m_dirty || (BaseType::m_cursor>=n && n==1) ) { + // if out of elements and size is 1, dup (at most a single node + // since this is for making root nodes). + TreeTypePtr el = this->_next(); + return BaseType::m_adaptor->dupNode(el); + } + // test size above then fetch + TreeType *tree = this->_next(); + while (BaseType::m_adaptor.isNil(tree) && BaseType::m_adaptor.getChildCount(tree) == 1) + tree = BaseType::m_adaptor->getChild(tree, 0); + //System.out.println("_next="+((Tree)tree).toStringTree()); + TreeType *el = BaseType::m_adaptor->dupNode(tree); // dup just the root (want node here) + return el; +} + +} diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3tokenstream.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3tokenstream.hpp new file mode 100644 index 0000000000..947ac097c8 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3tokenstream.hpp @@ -0,0 +1,406 @@ +/** \file + * Defines the interface for an ANTLR3 common token stream. Custom token streams should create + * one of these and then override any functions by installing their own pointers + * to implement the various functions. + */ +#ifndef _ANTLR3_TOKENSTREAM_HPP +#define _ANTLR3_TOKENSTREAM_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/** Definition of a token source, which has a pointer to a function that + * returns the next token (using a token factory if it is going to be + * efficient) and a pointer to an ANTLR3_INPUT_STREAM. This is slightly + * different to the Java interface because we have no way to implement + * multiple interfaces without defining them in the interface structure + * or casting (void *), which is too convoluted. + */ +namespace antlr3 { + +//We are not making it subclass AllocPolicy, as this will always be a base class +template<class ImplTraits> +class TokenSource +{ +public: + typedef typename ImplTraits::CommonTokenType TokenType; + typedef TokenType CommonTokenType; + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::LexerType LexerType; + +private: + /** A special pre-allocated token, which signifies End Of Tokens. Because this must + * be set up with the current input index and so on, we embed the structure and + * return the address of it. It is marked as factoryMade, so that it is never + * attempted to be freed. + */ + TokenType m_eofToken; + + /// A special pre-allocated token, which is returned by mTokens() if the + /// lexer rule said to just skip the generated token altogether. + /// Having this single token stops us wasting memory by have the token factory + /// actually create something that we are going to SKIP(); anyway. + /// + TokenType m_skipToken; + + /** When the token source is constructed, it is populated with the file + * name from whence the tokens were produced by the lexer. This pointer is a + * copy of the one supplied by the CharStream (and may be NULL) so should + * not be manipulated other than to copy or print it. + */ + StringType m_fileName; + +public: + TokenType& get_eofToken(); + const TokenType& get_eofToken() const; + TokenType& get_skipToken(); + StringType& get_fileName(); + LexerType* get_super(); + + void set_fileName( const StringType& fileName ); + + /** + * \brief + * Default implementation of the nextToken() call for a lexer. + * + * \param toksource + * Points to the implementation of a token source. The lexer is + * addressed by the super structure pointer. + * + * \returns + * The next token in the current input stream or the EOF token + * if there are no more tokens in any input stream in the stack. + * + * Write detailed description for nextToken here. + * + * \remarks + * Write remarks for nextToken here. + * + * \see nextTokenStr + */ + TokenType* nextToken(); + CommonTokenType* nextToken( BoolForwarder<true> /*isFiltered*/ ); + CommonTokenType* nextToken( BoolForwarder<false> /*isFiltered*/ ); + + /// + /// \brief + /// Returns the next available token from the current input stream. + /// + /// \param toksource + /// Points to the implementation of a token source. The lexer is + /// addressed by the super structure pointer. + /// + /// \returns + /// The next token in the current input stream or the EOF token + /// if there are no more tokens. + /// + /// \remarks + /// Write remarks for nextToken here. + /// + /// \see nextToken + /// + TokenType* nextTokenStr(); + +protected: + TokenSource(); +}; + +/** Definition of the ANTLR3 common token stream interface. + * \remark + * Much of the documentation for this interface is stolen from Ter's Java implementation. + */ +template<class ImplTraits> +class TokenStream : public ImplTraits::TokenIntStreamType +{ +public: + typedef typename ImplTraits::TokenSourceType TokenSourceType; + typedef typename ImplTraits::TokenIntStreamType IntStreamType; + typedef typename ImplTraits::CommonTokenType TokenType; + typedef TokenType UnitType; + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::DebugEventListenerType DebugEventListenerType; + typedef typename ImplTraits::TokenStreamType TokenStreamType; + typedef typename ImplTraits::ParserType ComponentType; + +protected: + /** Pointer to the token source for this stream + */ + TokenSourceType* m_tokenSource; + + /// Debugger interface, is this is a debugging token stream + /// + DebugEventListenerType* m_debugger; + + /// Indicates the initial stream state for dbgConsume() + /// + bool m_initialStreamState; + +public: + TokenStream(TokenSourceType* source, DebugEventListenerType* debugger); + IntStreamType* get_istream(); + TokenSourceType* get_tokenSource() const; + void set_tokenSource( TokenSourceType* tokenSource ); + + /** Get Token at current input pointer + i ahead where i=1 is next Token. + * i<0 indicates tokens in the past. So -1 is previous token and -2 is + * two tokens ago. LT(0) is undefined. For i>=n, return Token.EOFToken. + * Return null for LT(0) and any index that results in an absolute address + * that is negative. + */ + const TokenType* LT(ANTLR_INT32 k); + + /** Where is this stream pulling tokens from? This is not the name, but + * a pointer into an interface that contains a ANTLR3_TOKEN_SOURCE interface. + * The Token Source interface contains a pointer to the input stream and a pointer + * to a function that returns the next token. + */ + TokenSourceType* getTokenSource(); + + /** Function that installs a token source for teh stream + */ + void setTokenSource(TokenSourceType* tokenSource); + + /** Return the text of all the tokens in the stream, as the old tramp in + * Leeds market used to say; "Get the lot!" + */ + StringType toString(); + + /** Return the text of all tokens from start to stop, inclusive. + * If the stream does not buffer all the tokens then it can just + * return an empty ANTLR3_STRING or NULL; Grammars should not access $ruleLabel.text in + * an action in that case. + */ + StringType toStringSS(ANTLR_MARKER start, ANTLR_MARKER stop); + + /** Because the user is not required to use a token with an index stored + * in it, we must provide a means for two token objects themselves to + * indicate the start/end location. Most often this will just delegate + * to the other toString(int,int). This is also parallel with + * the pTREENODE_STREAM->toString(Object,Object). + */ + StringType toStringTT(const TokenType* start, const TokenType* stop); + + + /** Function that sets the token stream into debugging mode + */ + void setDebugListener(DebugEventListenerType* debugger); + + TokenStream(); + +}; + +/** Common token stream is an implementation of ANTLR_TOKEN_STREAM for the default + * parsers and recognizers. You may of course build your own implementation if + * you are so inclined. + */ +template<bool TOKENS_ACCESSED_FROM_OWNING_RULE, class ListType, class MapType> +class TokenStoreSelector +{ +public: + typedef ListType TokensType; +}; + +template<class ListType, class MapType> +class TokenStoreSelector<true, ListType, MapType> +{ +public: + typedef MapType TokensType; +}; + +template<class ImplTraits> +class CommonTokenStream : public TokenStream<ImplTraits> +{ +public: + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename ImplTraits::BitsetType BitsetType; + typedef typename ImplTraits::CommonTokenType TokenType; + typedef typename ImplTraits::TokenSourceType TokenSourceType; + typedef typename ImplTraits::DebugEventListenerType DebugEventListenerType; + typedef typename AllocPolicyType::template ListType<TokenType> TokensListType; + typedef typename AllocPolicyType::template OrderedMapType<ANTLR_MARKER, TokenType> TokensMapType; + typedef typename TokenStoreSelector< ImplTraits::TOKENS_ACCESSED_FROM_OWNING_RULE, + TokensListType, TokensMapType >::TokensType TokensType; + + typedef typename AllocPolicyType::template UnOrderedMapType<ANTLR_UINT32, ANTLR_UINT32> ChannelOverridesType; + typedef typename AllocPolicyType::template OrderedSetType<ANTLR_UINT32> DiscardSetType; + typedef typename AllocPolicyType::template ListType<ANTLR_UINT32> IntListType; + typedef TokenStream<ImplTraits> BaseType; + +private: + /** Records every single token pulled from the source indexed by the token index. + * There might be more efficient ways to do this, such as referencing directly in to + * the token factory pools, but for now this is convenient and the ANTLR3_LIST is not + * a huge overhead as it only stores pointers anyway, but allows for iterations and + * so on. + */ + TokensType m_tokens; + + /** Override map of tokens. If a token type has an entry in here, then + * the pointer in the table points to an int, being the override channel number + * that should always be used for this token type. + */ + ChannelOverridesType m_channelOverrides; + + /** Discared set. If a token has an entry in this table, then it is thrown + * away (data pointer is always NULL). + */ + DiscardSetType m_discardSet; + + /* The channel number that this token stream is tuned to. For instance, whitespace + * is usually tuned to channel 99, which no token stream would normally tune to and + * so it is thrown away. + */ + ANTLR_UINT32 m_channel; + + /** The index into the tokens list of the current token (the next one that will be + * consumed. p = -1 indicates that the token list is empty. + */ + ANTLR_INT32 m_p; + + /* The total number of tokens issued till now. For streams that delete tokens, + this helps in issuing the index + */ + ANTLR_UINT32 m_nissued; + + /** If this flag is set to true, then tokens that the stream sees that are not + * in the channel that this stream is tuned to, are not tracked in the + * tokens table. When set to false, ALL tokens are added to the tracking. + */ + bool m_discardOffChannel; + +public: + CommonTokenStream(ANTLR_UINT32 hint, TokenSourceType* source = NULL, + DebugEventListenerType* debugger = NULL); + ~CommonTokenStream(); + TokensType& get_tokens(); + const TokensType& get_tokens() const; + DiscardSetType& get_discardSet(); + const DiscardSetType& get_discardSet() const; + ANTLR_INT32 get_p() const; + void set_p( ANTLR_INT32 p ); + void inc_p(); + void dec_p(); + + /** A simple filter mechanism whereby you can tell this token stream + * to force all tokens of type ttype to be on channel. For example, + * when interpreting, we cannot exec actions so we need to tell + * the stream to force all WS and NEWLINE to be a different, ignored + * channel. + */ + void setTokenTypeChannel(ANTLR_UINT32 ttype, ANTLR_UINT32 channel); + + /** Add a particular token type to the discard set. If a token is found to belong + * to this set, then it is skipped/thrown away + */ + void discardTokenType(ANTLR_INT32 ttype); + + //This will discard tokens of a particular rule after the rule execution completion + void discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop ); + void discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop, + BoolForwarder<true> tokens_accessed_from_owning_rule ); + void discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop, + BoolForwarder<false> tokens_accessed_from_owning_rule ); + + void insertToken( const TokenType& tok ); + void insertToken( const TokenType& tok, BoolForwarder<true> tokens_accessed_from_owning_rule ); + void insertToken( const TokenType& tok, BoolForwarder<false> tokens_accessed_from_owning_rule ); + + /** Get a token at an absolute index i; 0..n-1. This is really only + * needed for profiling and debugging and token stream rewriting. + * If you don't want to buffer up tokens, then this method makes no + * sense for you. Naturally you can't use the rewrite stream feature. + * I believe DebugTokenStream can easily be altered to not use + * this method, removing the dependency. + */ + const TokenType* get(ANTLR_MARKER i); + const TokenType* getToken(ANTLR_MARKER i); + const TokenType* getToken( ANTLR_MARKER tok_idx, BoolForwarder<true> tokens_accessed_from_owning_rule ); + const TokenType* getToken( ANTLR_MARKER tok_idx, BoolForwarder<false> tokens_accessed_from_owning_rule ); + + /** Signal to discard off channel tokens from here on in. + */ + void discardOffChannelToks(bool discard); + + /** Function that returns a pointer to the ANTLR3_LIST of all tokens + * in the stream (this causes the buffer to fill if we have not get any yet) + */ + TokensType* getTokens(); + + /** Function that returns all the tokens between a start and a stop index. + */ + void getTokenRange(ANTLR_UINT32 start, ANTLR_UINT32 stop, TokensListType& tokenRange); + + /** Function that returns all the tokens indicated by the specified bitset, within a range of tokens + */ + void getTokensSet(ANTLR_UINT32 start, ANTLR_UINT32 stop, BitsetType* types, TokensListType& tokenSet); + + /** Function that returns all the tokens indicated by being a member of the supplied List + */ + void getTokensList(ANTLR_UINT32 start, ANTLR_UINT32 stop, + const IntListType& list, TokensListType& tokenList); + + /** Function that returns all tokens of a certain type within a range. + */ + void getTokensType(ANTLR_UINT32 start, ANTLR_UINT32 stop, ANTLR_UINT32 type, TokensListType& tokens); + + /** Function that resets the token stream so that it can be reused, but + * but that does not free up any resources, such as the token factory + * the factory pool and so on. This prevents the need to keep freeing + * and reallocating the token pools if the thing you are building is + * a multi-shot dameon or somethign like that. It is much faster to + * just reuse all the vectors. + */ + void reset(); + + const TokenType* LB(ANTLR_INT32 k); + + + void fillBufferExt(); + void fillBuffer(); + + bool hasReachedFillbufferTarget( ANTLR_UINT32 cnt, BoolForwarder<true> tokens_accessed_from_owning_rule ); + bool hasReachedFillbufferTarget( ANTLR_UINT32 cnt, BoolForwarder<false> tokens_accessed_from_owning_rule ); + + ANTLR_UINT32 skipOffTokenChannels(ANTLR_INT32 i); + ANTLR_UINT32 skipOffTokenChannelsReverse(ANTLR_INT32 x); + ANTLR_MARKER index_impl(); +}; + +class TokenAccessException : public std::exception +{ + virtual const char* what() const noexcept + { + return " Attempted access on Deleted Token"; + } +}; + +} + +#include "antlr3tokenstream.inl" + +#endif diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3tokenstream.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3tokenstream.inl new file mode 100644 index 0000000000..b2c4e3bcc8 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3tokenstream.inl @@ -0,0 +1,941 @@ +namespace antlr3 { + +template<class ImplTraits> +TokenSource<ImplTraits>::TokenSource() + :m_eofToken( ImplTraits::CommonTokenType::TOKEN_EOF), + m_skipToken( ImplTraits::CommonTokenType::TOKEN_INVALID) +{ +} + +template<class ImplTraits> +ANTLR_INLINE typename TokenSource<ImplTraits>::CommonTokenType& TokenSource<ImplTraits>::get_eofToken() +{ + return m_eofToken; +} + +template<class ImplTraits> +ANTLR_INLINE const typename TokenSource<ImplTraits>::TokenType& TokenSource<ImplTraits>::get_eofToken() const +{ + return m_eofToken; +} + +template<class ImplTraits> +ANTLR_INLINE typename TokenSource<ImplTraits>::CommonTokenType& TokenSource<ImplTraits>::get_skipToken() +{ + return m_skipToken; +} + +template<class ImplTraits> +ANTLR_INLINE typename TokenSource<ImplTraits>::StringType& TokenSource<ImplTraits>::get_fileName() +{ + return m_fileName; +} + +template<class ImplTraits> +ANTLR_INLINE void TokenSource<ImplTraits>::set_fileName( const StringType& fileName ) +{ + m_fileName = fileName; +} + +template<class ImplTraits> +typename TokenSource<ImplTraits>::LexerType* TokenSource<ImplTraits>::get_super() +{ + return static_cast<LexerType*>(this); +} + +template<class ImplTraits> +typename TokenSource<ImplTraits>::TokenType* TokenSource<ImplTraits>::nextTokenStr() +{ + typedef typename LexerType::RecognizerSharedStateType RecognizerSharedStateType; + typedef typename LexerType::InputStreamType InputStreamType; + typedef typename LexerType::IntStreamType IntStreamType; + LexerType* lexer; + RecognizerSharedStateType* state; + InputStreamType* input; + IntStreamType* istream; + + lexer = this->get_super(); + state = lexer->get_rec()->get_state(); + input = lexer->get_input(); + istream = input->get_istream(); + + /// Loop until we get a non skipped token or EOF + /// + for (;;) + { + // Get rid of any previous token (token factory takes care of + // any de-allocation when this token is finally used up. + // + state->set_token_present(false); + state->set_error(false); // Start out without an exception + state->set_failed(false); + + // Now call the matching rules and see if we can generate a new token + // + for (;;) + { + // Record the start of the token in our input stream. + // + state->set_channel( TOKEN_DEFAULT_CHANNEL ); + state->set_tokenStartCharIndex( (ANTLR_MARKER)input->get_nextChar() ); + state->set_tokenStartCharPositionInLine( input->get_charPositionInLine() ); + state->set_tokenStartLine( input->get_line() ); + state->set_text(""); + + if (istream->LA(1) == ANTLR_CHARSTREAM_EOF) + { + // Reached the end of the current stream, nothing more to do if this is + // the last in the stack. + // + TokenType& teof = m_eofToken; + + teof.set_startIndex(lexer->getCharIndex()); + teof.set_stopIndex(lexer->getCharIndex()); + teof.set_line(lexer->getLine()); + return &teof; + } + + state->set_token_present( false ); + state->set_error(false); // Start out without an exception + state->set_failed(false); + + // Call the generated lexer, see if it can get a new token together. + // + lexer->mTokens(); + + if (state->get_error() == true) + { + // Recognition exception, report it and try to recover. + // + state->set_failed(true); + lexer->get_rec()->reportError(); + lexer->recover(); + if (state->get_token_present()) + // Good(or invalid) token factored by custom recover procedure + // + return state->get_token(); + } + else + { + if ( !state->get_token_present() ) + { + // Emit the real token, which adds it in to the token stream basically + // + lexer->emit(); + } + else if ( *(state->get_token()) == m_skipToken ) + { + // A real token could have been generated, but "Computer say's naaaaah" and it + // it is just something we need to skip altogether. + // + continue; + } + + // Good token, not skipped, not EOF token + // + return state->get_token(); + } + } + } +} + +template<class ImplTraits> +typename TokenSource<ImplTraits>::TokenType* TokenSource<ImplTraits>::nextToken() +{ + return this->nextToken( BoolForwarder<LexerType::IsFiltered>() ); +} + +template<class ImplTraits> +typename TokenSource<ImplTraits>::CommonTokenType* TokenSource<ImplTraits>::nextToken( BoolForwarder<true> /*isFiltered*/ ) +{ + LexerType* lexer; + typename LexerType::RecognizerSharedStateType* state; + + lexer = this->get_super(); + state = lexer->get_lexstate(); + + /* Get rid of any previous token (token factory takes care of + * any deallocation when this token is finally used up. + */ + state->set_token_present( false ); + state->set_error( false ); /* Start out without an exception */ + state->set_failed(false); + + /* Record the start of the token in our input stream. + */ + state->set_tokenStartCharIndex( lexer->index() ); + state->set_tokenStartCharPositionInLine( lexer->getCharPositionInLine() ); + state->set_tokenStartLine( lexer->getLine() ); + state->set_text(""); + + /* Now call the matching rules and see if we can generate a new token + */ + for (;;) + { + if (lexer->LA(1) == ANTLR_CHARSTREAM_EOF) + { + /* Reached the end of the stream, nothing more to do. + */ + CommonTokenType& teof = m_eofToken; + + teof.set_startIndex(lexer->getCharIndex()); + teof.set_stopIndex(lexer->getCharIndex()); + teof.set_line(lexer->getLine()); + return &teof; + } + + state->set_token_present(false); + state->set_error(false); /* Start out without an exception */ + + { + ANTLR_MARKER m; + + m = lexer->get_istream()->mark(); + state->set_backtracking(1); /* No exceptions */ + state->set_failed(false); + + /* Call the generated lexer, see if it can get a new token together. + */ + lexer->mTokens(); + state->set_backtracking(0); + + /* mTokens backtracks with synpred at BACKTRACKING==2 + and we set the synpredgate to allow actions at level 1. */ + + if(state->get_failed()) + { + lexer->rewind(m); + lexer->consume(); //<! advance one char and try again !> + } + else + { + lexer->emit(); /* Assemble the token and emit it to the stream */ + TokenType* tok = state->get_token(); + return tok; + } + } + } +} + +template<class ImplTraits> +typename TokenSource<ImplTraits>::CommonTokenType* TokenSource<ImplTraits>::nextToken( BoolForwarder<false> /*isFiltered*/ ) +{ + // Find the next token in the current stream + // + CommonTokenType* tok = this->nextTokenStr(); + + // If we got to the EOF token then switch to the previous + // input stream if there were any and just return the + // EOF if there are none. We must check the next token + // in any outstanding input stream we pop into the active + // role to see if it was sitting at EOF after PUSHing the + // stream we just consumed, otherwise we will return EOF + // on the reinstalled input stream, when in actual fact + // there might be more input streams to POP before the + // real EOF of the whole logical inptu stream. Hence we + // use a while loop here until we find somethign in the stream + // that isn't EOF or we reach the actual end of the last input + // stream on the stack. + // + while(tok->get_type() == CommonTokenType::TOKEN_EOF) + { + typename ImplTraits::LexerType* lexer; + lexer = static_cast<typename ImplTraits::LexerType*>( this->get_super() ); + + if ( lexer->get_rec()->get_state()->get_streams().size() > 0) + { + // We have another input stream in the stack so we + // need to revert to it, then resume the loop to check + // it wasn't sitting at EOF itself. + // + lexer->popCharStream(); + tok = this->nextTokenStr(); + } + else + { + // There were no more streams on the input stack + // so this EOF is the 'real' logical EOF for + // the input stream. So we just exit the loop and + // return the EOF we have found. + // + break; + } + + } + + // return whatever token we have, which may be EOF + // + return tok; +} + +template<class ImplTraits> +TokenStream<ImplTraits>::TokenStream() +{ + m_tokenSource = NULL; + m_debugger = NULL; + m_initialStreamState = false; +} + +template<class ImplTraits> +typename TokenStream<ImplTraits>::IntStreamType* TokenStream<ImplTraits>::get_istream() +{ + return this; +} + +template<class ImplTraits> +TokenStream<ImplTraits>::TokenStream(TokenSourceType* source, DebugEventListenerType* debugger) +{ + m_initialStreamState = false; + m_tokenSource = source; + m_debugger = debugger; +} + +template<class ImplTraits> +CommonTokenStream<ImplTraits>::CommonTokenStream(ANTLR_UINT32 , TokenSourceType* source, + DebugEventListenerType* debugger) + : CommonTokenStream<ImplTraits>::BaseType( source, debugger ) +{ + m_p = -1; + m_channel = TOKEN_DEFAULT_CHANNEL; + m_discardOffChannel = false; + m_nissued = 0; +} + +template<class ImplTraits> +typename CommonTokenStream<ImplTraits>::TokensType& CommonTokenStream<ImplTraits>::get_tokens() +{ + return m_tokens; +} + +template<class ImplTraits> +const typename CommonTokenStream<ImplTraits>::TokensType& CommonTokenStream<ImplTraits>::get_tokens() const +{ + return m_tokens; +} + +template<class ImplTraits> +typename CommonTokenStream<ImplTraits>::DiscardSetType& CommonTokenStream<ImplTraits>::get_discardSet() +{ + return m_discardSet; +} + +template<class ImplTraits> +const typename CommonTokenStream<ImplTraits>::DiscardSetType& CommonTokenStream<ImplTraits>::get_discardSet() const +{ + return m_discardSet; +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_INT32 CommonTokenStream<ImplTraits>::get_p() const +{ + return m_p; +} + +template<class ImplTraits> +ANTLR_INLINE void CommonTokenStream<ImplTraits>::set_p( ANTLR_INT32 p ) +{ + m_p = p; +} + +template<class ImplTraits> +ANTLR_INLINE void CommonTokenStream<ImplTraits>::inc_p() +{ + ++m_p; +} + +template<class ImplTraits> +ANTLR_INLINE void CommonTokenStream<ImplTraits>::dec_p() +{ + --m_p; +} + +template<class ImplTraits> +ANTLR_INLINE ANTLR_MARKER CommonTokenStream<ImplTraits>::index_impl() +{ + return m_p; +} + +// Reset a token stream so it can be used again and can reuse it's +// resources. +// +template<class ImplTraits> +void CommonTokenStream<ImplTraits>::reset() +{ + // Free any resources that ar most like specifc to the + // run we just did. + // + m_discardSet.clear(); + m_channelOverrides.clear(); + + // Now, if there were any existing tokens in the stream, + // then we just reset the vector count so that it starts + // again. We must traverse the entries unfortunately as + // there may be free pointers for custom token types and + // so on. However that is just a quick NULL check on the + // vector entries. + // + m_tokens.clear(); + + // Reset to defaults + // + m_discardOffChannel = false; + m_channel = ImplTraits::CommonTokenType::TOKEN_DEFAULT_CHANNEL; + m_p = -1; +} + +template<class ImplTraits> +void TokenStream<ImplTraits>::setDebugListener(DebugEventListenerType* debugger) +{ + m_debugger = debugger; + m_initialStreamState = false; +} + +template<class ImplTraits> +const typename TokenStream<ImplTraits>::TokenType* TokenStream<ImplTraits>::LT(ANTLR_INT32 k) +{ + ANTLR_INT32 i; + ANTLR_INT32 n; + TokenStreamType* cts; + + cts = this->get_super(); + + if(k < 0) + { + return cts->LB(-k); + } + + ANTLR_INT32 req_idx = cts->get_p() + k - 1; + ANTLR_INT32 cached_size = static_cast<ANTLR_INT32>(this->get_istream()->get_cachedSize()); + + if( (cts->get_p() == -1) || + ( ( req_idx >= cached_size ) && ( (cached_size % ImplTraits::TOKEN_FILL_BUFFER_INCREMENT) == 0 ) ) + ) + { + cts->fillBuffer(); + } + + // Here we used to check for k == 0 and return 0, but this seems + // a superfluous check to me. LT(k=0) is therefore just undefined + // and we won't waste the clock cycles on the check + // + cached_size = static_cast<ANTLR_INT32>(this->get_istream()->get_cachedSize()); + if ( req_idx >= cached_size ) + { + TokenType& teof = cts->get_tokenSource()->get_eofToken(); + + teof.set_startIndex( this->get_istream()->index()); + teof.set_stopIndex( this->get_istream()->index()); + return &teof; + } + + i = cts->get_p(); + n = 1; + + /* Need to find k good tokens, skipping ones that are off channel + */ + while( n < k) + { + /* Skip off-channel tokens */ + i = cts->skipOffTokenChannels(i+1); /* leave p on valid token */ + n++; + } + + if( ( i >= cached_size ) && ( (cached_size % ImplTraits::TOKEN_FILL_BUFFER_INCREMENT) == 0 ) ) + { + cts->fillBuffer(); + } + if ( (ANTLR_UINT32) i >= this->get_istream()->get_cachedSize() ) + { + TokenType& teof = cts->get_tokenSource()->get_eofToken(); + + teof.set_startIndex(this->get_istream()->index()); + teof.set_stopIndex(this->get_istream()->index()); + return &teof; + } + + // Here the token must be in the input vector. Rather then incur + // function call penalty, we just return the pointer directly + // from the vector + // + return cts->getToken(i); +} + +template<class ImplTraits> +const typename CommonTokenStream<ImplTraits>::TokenType* CommonTokenStream<ImplTraits>::LB(ANTLR_INT32 k) +{ + ANTLR_INT32 i; + ANTLR_INT32 n; + + if (m_p == -1) + { + this->fillBuffer(); + } + if (k == 0) + { + return NULL; + } + if ((m_p - k) < 0) + { + return NULL; + } + + i = m_p; + n = 1; + + /* Need to find k good tokens, going backwards, skipping ones that are off channel + */ + while (n <= k) + { + /* Skip off-channel tokens + */ + + i = this->skipOffTokenChannelsReverse(i - 1); /* leave p on valid token */ + n++; + } + if (i < 0) + { + return NULL; + } + + // Here the token must be in the input vector. Rather then incut + // function call penalty, we jsut return the pointer directly + // from the vector + // + return this->getToken(i); +} + +template<class ImplTraits> +const typename CommonTokenStream<ImplTraits>::TokenType* CommonTokenStream<ImplTraits>::getToken(ANTLR_MARKER i) +{ + return this->get(i); +} + + +template<class ImplTraits> +const typename CommonTokenStream<ImplTraits>::TokenType* CommonTokenStream<ImplTraits>::get(ANTLR_MARKER i) +{ + return this->getToken( static_cast<ANTLR_MARKER>(i), + BoolForwarder<ImplTraits::TOKENS_ACCESSED_FROM_OWNING_RULE>() ); +} + +template<class ImplTraits> +const typename CommonTokenStream<ImplTraits>::TokenType* CommonTokenStream<ImplTraits>::getToken( ANTLR_MARKER tok_idx, + BoolForwarder<true> /*tokens_accessed_from_owning_rule*/ ) +{ + typename TokensType::iterator iter = m_tokens.find(tok_idx); + if( iter == m_tokens.end() ) + { + TokenAccessException ex; + throw ex; + } + const TokenType& tok = iter->second; + return &tok; +} + +template<class ImplTraits> +const typename CommonTokenStream<ImplTraits>::TokenType* CommonTokenStream<ImplTraits>::getToken( ANTLR_MARKER tok_idx, BoolForwarder<false> /*tokens_accessed_from_owning_rule*/ ) +{ + TokenType& tok = m_tokens.at( static_cast<ANTLR_UINT32>(tok_idx) ); + return &tok; +} + +template<class ImplTraits> +typename TokenStream<ImplTraits>::TokenSourceType* TokenStream<ImplTraits>::get_tokenSource() const +{ + return m_tokenSource; +} + +template<class ImplTraits> +void TokenStream<ImplTraits>::set_tokenSource( TokenSourceType* tokenSource ) +{ + m_tokenSource = tokenSource; +} + +template<class ImplTraits> +typename TokenStream<ImplTraits>::StringType TokenStream<ImplTraits>::toString() +{ + TokenStreamType* cts = static_cast<TokenStreamType>(this); + + if (cts->get_p() == -1) + { + cts->fillBuffer(); + } + + return this->toStringSS(0, this->get_istream()->size()); +} + +template<class ImplTraits> +typename TokenStream<ImplTraits>::StringType +TokenStream<ImplTraits>::toStringSS(ANTLR_MARKER start, ANTLR_MARKER stop) +{ + StringType string; + TokenSourceType* tsource; + const TokenType* tok; + TokenStreamType* cts; + + cts = this->get_super(); + + if (cts->get_p() == -1) + { + cts->fillBuffer(); + } + if (stop >= this->get_istream()->size()) + { + stop = this->get_istream()->size() - 1; + } + + /* Who is giving us these tokens? + */ + tsource = cts->get_tokenSource(); + + if (tsource != NULL && !cts->get_tokens().empty() ) + { + /* Finally, let's get a string + */ + for (ANTLR_MARKER i = start; i <= stop; i++) + { + tok = cts->get(i); + if (tok != NULL) + { + string.append( tok->getText() ); + } + } + + return string; + } + return ""; +} + +template<class ImplTraits> +typename TokenStream<ImplTraits>::StringType +TokenStream<ImplTraits>::toStringTT(const TokenType* start, const TokenType* stop) +{ + if (start != NULL && stop != NULL) + { + return this->toStringSS( start->get_tokenIndex(), + stop->get_tokenIndex()); + } + else + { + return ""; + } +} + +/** A simple filter mechanism whereby you can tell this token stream + * to force all tokens of type ttype to be on channel. For example, + * when interpreting, we cannot execute actions so we need to tell + * the stream to force all WS and NEWLINE to be a different, ignored, + * channel. + */ +template<class ImplTraits> +void CommonTokenStream<ImplTraits>::setTokenTypeChannel ( ANTLR_UINT32 ttype, ANTLR_UINT32 channel) +{ + /* We add one to the channel so we can distinguish NULL as being no entry in the + * table for a particular token type. + */ + m_channelOverrides[ttype] = (ANTLR_UINT32)channel + 1; + +} + +template<class ImplTraits> +void CommonTokenStream<ImplTraits>::discardTokenType(ANTLR_INT32 ttype) +{ + /* We add one to the channel so we can distinguish NULL as being no entry in the + * table for a particular token type. We could use bitsets for this I suppose too. + */ + m_discardSet.insert(ttype); +} + +template<class ImplTraits> +void CommonTokenStream<ImplTraits>::discardOffChannelToks(bool discard) +{ + m_discardOffChannel = discard; +} + +template<class ImplTraits> +typename CommonTokenStream<ImplTraits>::TokensType* CommonTokenStream<ImplTraits>::getTokens() +{ + if (m_p == -1) + { + this->fillBuffer(); + } + + return &m_tokens; +} + +template<class ImplTraits> +void CommonTokenStream<ImplTraits>::getTokenRange(ANTLR_UINT32 start, ANTLR_UINT32 stop, + TokensListType& tokenRange) +{ + return this->getTokensSet(start, stop, NULL, tokenRange); +} + +/** Given a start and stop index, return a List of all tokens in + * the token type BitSet. Return null if no tokens were found. This + * method looks at both on and off channel tokens. + */ +template<class ImplTraits> +void +CommonTokenStream<ImplTraits>::getTokensSet(ANTLR_UINT32 start, ANTLR_UINT32 stop, BitsetType* types, + TokensListType& filteredList ) +{ + ANTLR_UINT32 i; + ANTLR_UINT32 n; + TokenType* tok; + + if ( m_p == -1) + { + this->fillBuffer(); + } + if (stop > this->get_istream()->size()) + { + stop = this->get_istream()->size(); + } + if (start > stop) + { + return; + } + + /* We have the range set, now we need to iterate through the + * installed tokens and create a new list with just the ones we want + * in it. We are just moving pointers about really. + */ + for(i = start, n = 0; i<= stop; i++) + { + tok = this->get(i); + + if ( types == NULL + || (types->isMember( tok->get_type() ) == true ) + ) + { + filteredList.push_back(tok); + } + } + + return ; +} + +template<class ImplTraits> +void +CommonTokenStream<ImplTraits>::getTokensList(ANTLR_UINT32 start, ANTLR_UINT32 stop, + const IntListType& list, TokensListType& newlist) +{ + BitsetType* bitSet; + + bitSet = Bitset<ImplTraits>::BitsetFromList(list); + this->getTokensSet(start, stop, bitSet, newlist); + delete bitSet; +} + +template<class ImplTraits> +void +CommonTokenStream<ImplTraits>::getTokensType(ANTLR_UINT32 start, ANTLR_UINT32 stop, ANTLR_UINT32 type, + TokensListType& newlist ) +{ + BitsetType* bitSet; + + bitSet = BitsetType::BitsetOf(type, -1); + this->getTokensSet(start, stop, bitSet, newlist); + + delete bitSet; +} + +template<class ImplTraits> +void CommonTokenStream<ImplTraits>::fillBufferExt() +{ + this->fillBuffer(); +} + +template<class ImplTraits> +bool CommonTokenStream<ImplTraits>::hasReachedFillbufferTarget( ANTLR_UINT32 cnt, + BoolForwarder<true> ) +{ + return ( cnt >= ImplTraits::TOKEN_FILL_BUFFER_INCREMENT ); +} + +template<class ImplTraits> +bool CommonTokenStream<ImplTraits>::hasReachedFillbufferTarget( ANTLR_UINT32, + BoolForwarder<false> ) +{ + return false; +} + + +template<class ImplTraits> +void CommonTokenStream<ImplTraits>::fillBuffer() +{ + ANTLR_UINT32 index; + TokenType* tok; + bool discard; + + /* Start at index 0 of course + */ + ANTLR_UINT32 cached_p = (m_p < 0) ? 0 : m_p; + index = m_nissued; + ANTLR_UINT32 cnt = 0; + + /* Pick out the next token from the token source + * Remember we just get a pointer (reference if you like) here + * and so if we store it anywhere, we don't set any pointers to auto free it. + */ + tok = this->get_tokenSource()->nextToken(); + + while ( tok->get_type() != TokenType::TOKEN_EOF ) + { + discard = false; /* Assume we are not discarding */ + + /* I employ a bit of a trick, or perhaps hack here. Rather than + * store a pointer to a structure in the override map and discard set + * we store the value + 1 cast to a void *. Hence on systems where NULL = (void *)0 + * we can distinguish "not being there" from "being channel or type 0" + */ + + if ( m_discardSet.find(tok->get_type()) != m_discardSet.end() ) + { + discard = true; + } + else if ( m_discardOffChannel == true + && tok->get_channel() != m_channel + ) + { + discard = true; + } + else if (!m_channelOverrides.empty()) + { + /* See if this type is in the override map + */ + typename ChannelOverridesType::iterator iter = m_channelOverrides.find( tok->get_type() + 1 ); + + if (iter != m_channelOverrides.end()) + { + /* Override found + */ + tok->set_channel( ANTLR_UINT32_CAST(iter->second) - 1); + } + } + + /* If not discarding it, add it to the list at the current index + */ + if (discard == false) + { + /* Add it, indicating that we will delete it and the table should not + */ + tok->set_tokenIndex(index); + ++m_p; + this->insertToken(*tok); + index++; + m_nissued++; + cnt++; + } + + if( !this->hasReachedFillbufferTarget( cnt, + BoolForwarder<ImplTraits::TOKENS_ACCESSED_FROM_OWNING_RULE>() ) ) + tok = this->get_tokenSource()->nextToken(); + else + break; + } + + /* Cache the size so we don't keep doing indirect method calls. We do this as + * early as possible so that anything after this may utilize the cached value. + */ + this->get_istream()->set_cachedSize( m_nissued ); + + /* Set the consume pointer to the first token that is on our channel, we just read + */ + m_p = cached_p; + m_p = this->skipOffTokenChannels( m_p ); + +} +/// Given a starting index, return the index of the first on-channel +/// token. +/// +template<class ImplTraits> +ANTLR_UINT32 CommonTokenStream<ImplTraits>::skipOffTokenChannels(ANTLR_INT32 i) +{ + ANTLR_INT32 n; + n = this->get_istream()->get_cachedSize(); + + while (i < n) + { + const TokenType* tok = this->getToken(i); + + if (tok->get_channel() != m_channel ) + { + i++; + } + else + { + return i; + } + } + return i; +} + +template<class ImplTraits> +ANTLR_UINT32 CommonTokenStream<ImplTraits>::skipOffTokenChannelsReverse(ANTLR_INT32 x) +{ + while (x >= 0) + { + const TokenType* tok = this->getToken(x); + + if( tok->get_channel() != m_channel ) + { + x--; + } + else + { + return x; + } + } + return x; +} + +template<class ImplTraits> +void CommonTokenStream<ImplTraits>::discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop ) +{ + this->discardTokens( start, stop, BoolForwarder< ImplTraits::TOKENS_ACCESSED_FROM_OWNING_RULE >() ); +} + +template<class ImplTraits> +void CommonTokenStream<ImplTraits>::discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop, + BoolForwarder<true> /*tokens_accessed_from_owning_rule */ ) +{ + typename TokensType::iterator iter1 = m_tokens.lower_bound(start); + typename TokensType::iterator iter2 = m_tokens.upper_bound(stop); + m_tokens.erase( iter1, iter2 ); +} + +template<class ImplTraits> +void CommonTokenStream<ImplTraits>::discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop, + BoolForwarder<false> /*tokens_accessed_from_owning_rule*/ ) +{ + m_tokens.erase( m_tokens.begin() + start, m_tokens.begin() + stop ); +} + +template<class ImplTraits> +void CommonTokenStream<ImplTraits>::insertToken( const TokenType& tok ) +{ + this->insertToken( tok, BoolForwarder< ImplTraits::TOKENS_ACCESSED_FROM_OWNING_RULE >() ); +} + +template<class ImplTraits> +void CommonTokenStream<ImplTraits>::insertToken( const TokenType& tok, BoolForwarder<true> /*tokens_accessed_from_owning_rule*/ ) +{ + assert( m_tokens.find( tok.get_index() ) == m_tokens.end() ); + assert( tok.get_index() == m_nissued ); + m_tokens[ tok.get_index() ] = tok; +} + +template<class ImplTraits> +void CommonTokenStream<ImplTraits>::insertToken( const TokenType& tok, BoolForwarder<false> /*tokens_accessed_from_owning_rule*/ ) +{ + m_tokens.push_back( tok ); +} + +template<class ImplTraits> +CommonTokenStream<ImplTraits>::~CommonTokenStream() +{ + m_tokens.clear(); +} + +} diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3traits.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3traits.hpp new file mode 100644 index 0000000000..c5741ce985 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3traits.hpp @@ -0,0 +1,377 @@ +#ifndef _ANTLR3_TRAITS_HPP +#define _ANTLR3_TRAITS_HPP + +namespace antlr3 { + +/** + * Users implementing overrides should inherit from this + * + * All classes typenames reffer to Empty class + */ +template<class ImplTraits> +class CustomTraitsBase +{ +public: + typedef Empty AllocPolicyType; + typedef Empty StringType; + typedef Empty StringStreamType; + typedef Empty StreamDataType; + typedef Empty Endianness; + + //collections + typedef Empty BitsetType; + typedef Empty BitsetListType; + + typedef Empty InputStreamType; + + template<class StreamType> + class IntStreamType : public Empty + { + public: + typedef Empty BaseType; + }; + + typedef Empty LexStateType; + + typedef Empty CommonTokenType; + typedef Empty TokenUserDataType; + + typedef Empty TokenIntStreamType; + typedef Empty TokenStreamType; + + typedef Empty TreeNodeIntStreamType; + typedef Empty TreeNodeStreamType; + + + typedef Empty DebugEventListenerType; + template<class StreamType> + class RecognizerSharedStateType : public Empty + { + public: + typedef Empty BaseType; + }; + + template<class StreamType> + class RecognizerType : public Empty + { + public: + typedef Empty BaseType; + }; + + typedef Empty TreeType; + typedef Empty TreeUserDataType; + typedef Empty TreeAdaptorType; + typedef Empty TreeStoreType; + + template<class StreamType> + class ExceptionBaseType : public Empty + { + public: + typedef Empty BaseType; + }; + + //this should be overridden with generated lexer + typedef Empty BaseLexerType; + + typedef Empty TokenSourceType; + typedef Empty BaseParserType;//this should be overridden with generated lexer + typedef Empty BaseTreeParserType; + + template<class ElementType> + class RewriteStreamType : public Empty + { + public: + typedef Empty BaseType; + }; + + typedef Empty RuleReturnValueType; + + //If we want to change the way tokens are stored + static const bool TOKENS_ACCESSED_FROM_OWNING_RULE = false; + static const unsigned TOKEN_FILL_BUFFER_INCREMENT = 100; //used only if the above val is true + + static void displayRecognitionError( const std::string& str ) { printf("%s", str.c_str() ); } +}; + +/** + * Traits manipulation classes + */ +template<class A, class B> +class TraitsSelector +{ +public: + typedef A selected; +}; + +template<class B> +class TraitsSelector<Empty, B> +{ +public: + typedef B selected; +}; + +template<class A, class B, class C> +class TraitsOneArgSelector +{ +public: + typedef A selected; +}; + +template<class A, class B> +class TraitsOneArgSelector<A,B,Empty> +{ +public: + typedef B selected; +}; + +template<bool v, class A, class B> +class BoolSelector +{ +public: + typedef A selected; +}; + +template<class A, class B> +class BoolSelector<false, A, B> +{ +public: + typedef B selected; +}; + +/** + * Base traits template + * + * This class contains default typenames for every trait + */ +template< template<class ImplTraits> class UserTraits > +class TraitsBase +{ +public: + typedef TraitsBase TraitsType; + + typedef typename TraitsSelector< typename UserTraits<TraitsType>::AllocPolicyType, + DefaultAllocPolicy + >::selected AllocPolicyType; + + typedef typename TraitsSelector< typename UserTraits<TraitsType>::StringType, + std::string + >::selected StringType; + + typedef typename TraitsSelector< typename UserTraits<TraitsType>::StringStreamType, + std::stringstream + >::selected StringStreamType; + + typedef typename TraitsSelector< typename UserTraits<TraitsType>::StreamDataType, + ANTLR_UINT8 + >::selected StreamDataType; + + typedef typename TraitsSelector< typename UserTraits<TraitsType>::Endianness, + RESOLVE_ENDIAN_AT_RUNTIME + >::selected Endianness; + + typedef typename TraitsSelector< typename UserTraits<TraitsType>::BitsetType, + Bitset<TraitsType> + >::selected BitsetType; + typedef typename TraitsSelector< typename UserTraits<TraitsType>::BitsetListType, + BitsetList<TraitsType> + >::selected BitsetListType; + + typedef typename TraitsSelector< typename UserTraits<TraitsType>::InputStreamType, + InputStream<TraitsType> + >::selected InputStreamType; + + template<class SuperType> + class IntStreamType : public TraitsOneArgSelector< typename UserTraits<TraitsType>::template IntStreamType<SuperType>, + IntStream<TraitsType, SuperType>, + typename UserTraits<TraitsType>::template IntStreamType<SuperType>::BaseType + >::selected + { }; + + typedef typename TraitsSelector< typename UserTraits<TraitsType>::LexStateType, + LexState<TraitsType> + >::selected LexStateType; + + static const bool TOKENS_ACCESSED_FROM_OWNING_RULE = UserTraits<TraitsType>::TOKENS_ACCESSED_FROM_OWNING_RULE; + static const unsigned TOKEN_FILL_BUFFER_INCREMENT = UserTraits<TraitsType>::TOKEN_FILL_BUFFER_INCREMENT; //used only if the above val is true + + static void displayRecognitionError( const StringType& str ) { UserTraits<TraitsType>::displayRecognitionError(str); } +}; + +/** + * Final traits + * + * They combine Traits and user provided traits(UserTraits) + */ +template< class LxrType, + class PsrType, + template<class ImplTraits> class UserTraits = CustomTraitsBase + //, + //class TreePsrType = antlr3::Empty + //template<class ImplTraits> class TreePsrType = TreeParser + > +class Traits : public TraitsBase<UserTraits> +{ +public: + typedef Traits TraitsType; + typedef TraitsBase<UserTraits> BaseTraitsType; + + // CommonTokenType + typedef typename TraitsSelector< typename UserTraits<TraitsType>::CommonTokenType, + CommonToken<TraitsType> >::selected CommonTokenType; + + // TokenUserDataType + typedef typename TraitsSelector< typename UserTraits<TraitsType>::TokenUserDataType, + Empty >::selected TokenUserDataType; + + // TokenListType + typedef typename BaseTraitsType::AllocPolicyType::template ListType<const CommonTokenType*> TokenListType; + + // TokenIntStreamType + typedef typename TraitsSelector< typename UserTraits<TraitsType>::TokenIntStreamType, + TokenIntStream<TraitsType> >::selected TokenIntStreamType; + // TokenStreamType + typedef typename TraitsSelector< typename UserTraits<TraitsType>::TokenStreamType, + CommonTokenStream<TraitsType> >::selected TokenStreamType; + + // TreeNodeIntStreamType + typedef typename TraitsSelector< typename UserTraits<TraitsType>::TreeNodeIntStreamType, + TreeNodeIntStream<TraitsType> >::selected TreeNodeIntStreamType; + + // TreeNodeStreamType + typedef typename TraitsSelector< typename UserTraits<TraitsType>::TreeNodeStreamType, + CommonTreeNodeStream<TraitsType> >::selected TreeNodeStreamType; + + // DebugEventListenerType + typedef typename TraitsSelector< typename UserTraits<TraitsType>::DebugEventListenerType, + DebugEventListener<TraitsType> >::selected DebugEventListenerType; + + // RecognizerSharedStateType + template<class StreamType> + class RecognizerSharedStateType : public TraitsOneArgSelector< typename UserTraits<TraitsType>::template RecognizerSharedStateType<StreamType>, + RecognizerSharedState<TraitsType, StreamType>, + typename UserTraits<TraitsType>::template RecognizerSharedStateType<StreamType>::BaseType + >::selected + {}; + + // RecognizerType + template<class StreamType> + class RecognizerType : public TraitsOneArgSelector< typename UserTraits<TraitsType>::template RecognizerType<StreamType>, + BaseRecognizer<TraitsType, StreamType>, + typename UserTraits<TraitsType>::template RecognizerType<StreamType>::BaseType + >::selected + { + public: + typedef typename TraitsOneArgSelector< typename UserTraits<TraitsType>::template RecognizerType<StreamType>, + BaseRecognizer<TraitsType, StreamType>, + typename UserTraits<TraitsType>::template RecognizerType<StreamType>::BaseType + >::selected BaseType; + typedef typename BaseType::RecognizerSharedStateType RecognizerSharedStateType; + + public: + RecognizerType(ANTLR_UINT32 sizeHint, RecognizerSharedStateType* state) + : BaseType( sizeHint, state ) + { + } + }; + + // TreeType + typedef typename TraitsSelector< typename UserTraits<TraitsType>::TreeType, + CommonTree<TraitsType> >::selected TreeType; + typedef typename TraitsSelector< typename UserTraits<TraitsType>::TreeUserDataType, + Empty >::selected TreeUserDataType; + // TreeAdaptorType + typedef typename TraitsSelector< typename UserTraits<TraitsType>::TreeAdaptorType, + CommonTreeAdaptor<TraitsType> >::selected TreeAdaptorType; + typedef typename TraitsSelector< typename UserTraits<TraitsType>::TreeStoreType, + CommonTreeStore<TraitsType> >::selected TreeStoreType; + typedef typename TreeStoreType::TreeTypePtr TreeTypePtr; + //typedef std::unique_ptr<TreeType, ResourcePoolManager<ImplTraits>> TreeTypePtr; + + // ExceptionBaseType + template<class StreamType> + class ExceptionBaseType : public TraitsOneArgSelector< typename UserTraits<TraitsType>::template ExceptionBaseType<StreamType>, + ANTLR_ExceptionBase<TraitsType, StreamType>, + typename UserTraits<TraitsType>::template ExceptionBaseType<StreamType>::BaseType + >::selected + { + public: + typedef typename TraitsOneArgSelector< typename UserTraits<TraitsType>::template ExceptionBaseType<StreamType>, + ANTLR_ExceptionBase<TraitsType, StreamType>, + typename UserTraits<TraitsType>::template ExceptionBaseType<StreamType>::BaseType + >::selected BaseType; + + protected: + ExceptionBaseType( const typename BaseTraitsType::StringType& message ) + :BaseType(message) + { + } + }; + + // this should be overridden with generated lexer + // BaseLexerType + typedef typename TraitsSelector< typename UserTraits<TraitsType>::BaseLexerType, + Lexer<TraitsType> >::selected BaseLexerType; + typedef LxrType LexerType; + + // TokenSourceType + typedef typename TraitsSelector< typename UserTraits<TraitsType>::TokenSourceType, + TokenSource<TraitsType> >::selected TokenSourceType; + + // this should be overridden with generated parser + // BaseParserType + typedef typename TraitsSelector< typename UserTraits<TraitsType>::BaseParserType, + Parser<TraitsType> >::selected BaseParserType; + typedef PsrType ParserType; + + // this should be overridden with generated treeparser (not implemented yet) + // BaseTreeParserType + typedef typename TraitsSelector< typename UserTraits<TraitsType>::BaseTreeParserType, + TreeParser<TraitsType> >::selected BaseTreeParserType; + //typedef TreePsrType<Traits> TreeParserType; + typedef BaseTreeParserType TreeParserType; + + // RewriteStreamType + template<class ElementType> + class RewriteStreamType : public TraitsOneArgSelector< typename UserTraits<TraitsType>::template RewriteStreamType<ElementType>, + RewriteRuleElementStream<TraitsType, ElementType>, + typename UserTraits<TraitsType>::template RewriteStreamType<ElementType>::BaseType + >::selected + { + public: + typedef typename TraitsOneArgSelector< typename UserTraits<TraitsType>::template RewriteStreamType<ElementType>, + RewriteRuleElementStream<TraitsType, ElementType>, + typename UserTraits<TraitsType>::template RewriteStreamType<ElementType>::BaseType + >::selected BaseType; + + //typedef typename SuperType::StreamType StreamType; + //typedef typename BaseType::RecognizerType Recognizer_Type; + //typedef typename BaseType::ElementType ElementType; + typedef typename BaseType::ElementsType ElementsType; + + public: + RewriteStreamType(TreeAdaptorType* adaptor = NULL, const char* description = NULL) + :BaseType(adaptor, description) + { + } + RewriteStreamType(TreeAdaptorType* adaptor, const char* description, ElementType* oneElement) + :BaseType(adaptor, description, oneElement) + { + } + RewriteStreamType(TreeAdaptorType* adaptor, const char* description, const ElementsType& elements) + :BaseType(adaptor, description, elements) + { + } + }; + + // RuleReturnValueType + typedef typename TraitsSelector< typename UserTraits<TraitsType>::RuleReturnValueType, + typename BoolSelector< TraitsType::TOKENS_ACCESSED_FROM_OWNING_RULE, + RuleReturnValue_1<TraitsType>, + RuleReturnValue<TraitsType> + >::selected + >::selected RuleReturnValueType; +}; + +} + +#endif //_ANTLR3_TRAITS_HPP diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3treeparser.hpp b/contrib/libs/antlr3_cpp_runtime/include/antlr3treeparser.hpp new file mode 100644 index 0000000000..c1395382b5 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3treeparser.hpp @@ -0,0 +1,100 @@ +#ifndef ANTLR3TREEPARSER_HPP +#define ANTLR3TREEPARSER_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/** Internal structure representing an element in a hash bucket. + * Stores the original key so that duplicate keys can be rejected + * if necessary, and contains function can be supported If the hash key + * could be unique I would have invented the perfect compression algorithm ;-) + */ +namespace antlr3 { + +template<class ImplTraits> +class TreeParser : public ImplTraits::template RecognizerType< typename ImplTraits::TreeNodeStreamType > +{ +public: + typedef typename ImplTraits::TreeNodeStreamType TreeNodeStreamType; + typedef TreeNodeStreamType StreamType; + typedef typename TreeNodeStreamType::IntStreamType IntStreamType; + typedef typename ImplTraits::TreeType TreeType; + typedef typename ImplTraits::TreeTypePtr TreeTypePtr; + typedef TreeType TokenType; + typedef typename ImplTraits::template ExceptionBaseType<TreeNodeStreamType> ExceptionBaseType; + typedef typename ImplTraits::template RecognizerType< typename ImplTraits::TreeNodeStreamType > RecognizerType; + typedef typename RecognizerType::RecognizerSharedStateType RecognizerSharedStateType; + typedef Empty TokenSourceType; + typedef typename ImplTraits::BitsetListType BitsetListType; + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::CommonTokenType CommonTokenType; + +private: + /** Pointer to the common tree node stream for the parser + */ + TreeNodeStreamType* m_ctnstream; + +public: + TreeParser( ANTLR_UINT32 sizeHint, TreeNodeStreamType* ctnstream, + RecognizerSharedStateType* state); + TreeNodeStreamType* get_ctnstream() const; + IntStreamType* get_istream() const; + RecognizerType* get_rec(); + + //same as above. Just that get_istream exists for lexer, parser, treeparser + //get_parser_istream exists only for parser, treeparser. So use it accordingly + IntStreamType* get_parser_istream() const; + + /** Set the input stream and reset the parser + */ + void setTreeNodeStream(TreeNodeStreamType* input); + + /** Return a pointer to the input stream + */ + TreeNodeStreamType* getTreeNodeStream(); + + TokenType* getMissingSymbol( IntStreamType* istream, + ExceptionBaseType* e, + ANTLR_UINT32 expectedTokenType, + BitsetListType* follow); + + /** Pointer to a function that knows how to free resources of an ANTLR3 tree parser. + */ + ~TreeParser(); + + void fillExceptionData( ExceptionBaseType* ex ); + void displayRecognitionError( ANTLR_UINT8** tokenNames, ExceptionBaseType* ex ); + void exConstruct(); + void mismatch(ANTLR_UINT32 ttype, BitsetListType* follow); +}; + +} + +#include "antlr3treeparser.inl" + +#endif diff --git a/contrib/libs/antlr3_cpp_runtime/include/antlr3treeparser.inl b/contrib/libs/antlr3_cpp_runtime/include/antlr3treeparser.inl new file mode 100644 index 0000000000..5f5991f4eb --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/include/antlr3treeparser.inl @@ -0,0 +1,199 @@ +namespace antlr3 { + +template< class ImplTraits > +TreeParser<ImplTraits>::TreeParser( ANTLR_UINT32 sizeHint, TreeNodeStreamType* ctnstream, + RecognizerSharedStateType* state) + :RecognizerType( sizeHint, state ) +{ + /* Install the tree node stream + */ + this->setTreeNodeStream(ctnstream); + +} + +template< class ImplTraits > +TreeParser<ImplTraits>::~TreeParser() +{ + this->get_rec()->get_state()->get_following().clear(); +} + +template< class ImplTraits > +typename TreeParser<ImplTraits>::TreeNodeStreamType* TreeParser<ImplTraits>::get_ctnstream() const +{ + return m_ctnstream; +} + +template< class ImplTraits > +typename TreeParser<ImplTraits>::IntStreamType* TreeParser<ImplTraits>::get_istream() const +{ + return m_ctnstream; +} + +template< class ImplTraits > +typename TreeParser<ImplTraits>::IntStreamType* TreeParser<ImplTraits>::get_parser_istream() const +{ + return m_ctnstream; +} + +template< class ImplTraits > +typename TreeParser<ImplTraits>::RecognizerType* TreeParser<ImplTraits>::get_rec() +{ + return this; +} + +template< class ImplTraits > +void TreeParser<ImplTraits>::fillExceptionData( ExceptionBaseType* ex ) +{ + auto xxx = m_ctnstream->LT(1); + //ex->set_token( m_ctnstream->LT(1) ); /* Current input tree node */ + ex->set_line( ex->get_token()->get_line() ); + ex->set_charPositionInLine( ex->get_token()->get_charPositionInLine() ); + ex->set_index( m_ctnstream->index() ); + + // Are you ready for this? Deep breath now... + // + { + TreeTypePtr tnode; + + tnode = ex->get_token(); + + if (tnode->get_token() == NULL) + { + ex->set_streamName("-unknown source-" ); + } + else + { + if ( tnode->get_token()->get_input() == NULL) + { + ex->set_streamName(""); + } + else + { + ex->set_streamName( tnode->get_token()->get_input()->get_fileName() ); + } + } + ex->set_message("Unexpected node"); + } +} + +template< class ImplTraits > +void TreeParser<ImplTraits>::displayRecognitionError( ANTLR_UINT8** /*tokenNames*/, ExceptionBaseType* ex ) +{ + typename ImplTraits::StringStreamType errtext; + // See if there is a 'filename' we can use + // + if( ex->get_streamName().empty() ) + { + if(ex->get_token()->get_type() == ImplTraits::CommonTokenType::TOKEN_EOF) + { + errtext << "-end of input-("; + } + else + { + errtext << "-unknown source-("; + } + } + else + { + errtext << ex->get_streamName() << "("; + } + + // Next comes the line number + // + errtext << this->get_rec()->get_state()->get_exception()->get_line() << ") "; + errtext << " : error " << this->get_rec()->get_state()->get_exception()->getType() + << " : " + << this->get_rec()->get_state()->get_exception()->get_message(); + + IntStreamType* is = this->get_istream(); + TreeTypePtr theBaseTree = this->get_rec()->get_state()->get_exception()->get_token(); + StringType ttext = theBaseTree->toStringTree(); + + if (theBaseTree != NULL) + { + TreeTypePtr theCommonTree = static_cast<TreeTypePtr>(theBaseTree); + if (theCommonTree != NULL) + { + CommonTokenType* theToken = theBaseTree->getToken(); + } + errtext << ", at offset " + << theBaseTree->getCharPositionInLine(); + errtext << ", near " << ttext; + } + ex->displayRecognitionError( errtext ); + ImplTraits::displayRecognitionError( errtext.str() ); +} + +template< class ImplTraits > +void TreeParser<ImplTraits>::setTreeNodeStream(TreeNodeStreamType* input) +{ + m_ctnstream = input; + this->get_rec()->reset(); + m_ctnstream->reset(); +} + +template< class ImplTraits > +typename TreeParser<ImplTraits>::TreeNodeStreamType* TreeParser<ImplTraits>::getTreeNodeStream() +{ + return m_ctnstream; +} + +template< class ImplTraits > +void TreeParser<ImplTraits>::exConstruct() +{ + new ANTLR_Exception<ImplTraits, MISMATCHED_TREE_NODE_EXCEPTION, TreeNodeStreamType>( this->get_rec(), "" ); +} + +template< class ImplTraits > +void TreeParser<ImplTraits>::mismatch(ANTLR_UINT32 ttype, BitsetListType* follow) +{ + this->exConstruct(); + this->recoverFromMismatchedToken(ttype, follow); +} + +template< class ImplTraits > +typename TreeParser<ImplTraits>::TokenType* +TreeParser<ImplTraits>::getMissingSymbol( IntStreamType* istream, ExceptionBaseType* /*e*/, + ANTLR_UINT32 expectedTokenType, BitsetListType* /*follow*/) +{ + TreeNodeStreamType* tns; + TreeTypePtr node; + TreeTypePtr current; + CommonTokenType* token; + StringType text; + ANTLR_INT32 i; + + // Dereference the standard pointers + // + tns = static_cast<TreeNodeStreamType*>(istream); + + // Create a new empty node, by stealing the current one, or the previous one if the current one is EOF + // + current = tns->LT(1); + i = -1; + + if (current == tns->get_EOF_NODE_p()) + { + current = tns->LT(-1); + i--; + } + node = current->dupNode(); + + // Find the newly dupicated token + // + token = node->getToken(); + + // Create the token text that shows it has been inserted + // + token->setText("<missing "); + text = token->getText(); + text.append((const char *)this->get_rec()->get_state()->get_tokenName(expectedTokenType)); + text.append((const char *)">"); + + // Finally return the pointer to our new node + // + return node; +} + + +} diff --git a/contrib/libs/antlr3_cpp_runtime/ya.make b/contrib/libs/antlr3_cpp_runtime/ya.make new file mode 100644 index 0000000000..5c019aa396 --- /dev/null +++ b/contrib/libs/antlr3_cpp_runtime/ya.make @@ -0,0 +1,29 @@ +LIBRARY() + +# git repository: https://github.com/ibre5041/antlr3.git +# XXX fork of: https://github.com/antlr/antlr3.git +# directory: runtime/Cpp +# revision: a4d1928e03b2b3f74579e54a6211cd1d695001b9 + +VERSION(2016-03-31-a4d1928e03b2b3f74579e54a6211cd1d695001b9) + +LICENSE( + BSD-3-Clause AND + Unicode-Mappings +) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + +OWNER(g:yql) + +NO_COMPILER_WARNINGS() + +ADDINCL( + GLOBAL contrib/libs/antlr3_cpp_runtime/include +) + +SRCS( + antlr3.cpp +) + +END() |