diff options
author | asmyasnikov <asmyasnikov@ydb.tech> | 2024-06-26 17:09:51 +0300 |
---|---|---|
committer | asmyasnikov <asmyasnikov@ydb.tech> | 2024-06-26 17:27:07 +0300 |
commit | e25934f4bbe7b98daa362f04861972e8f83066ad (patch) | |
tree | b350932f398fafa6740fe43a529edf700c747270 /contrib/libs | |
parent | e6190f5d36aef50e2fec0076c384ba0874f5564c (diff) | |
download | ydb-e25934f4bbe7b98daa362f04861972e8f83066ad.tar.gz |
Added antlr4 to exported contribs into github.com/ydb-platform/ydb
4916444b182c044b7cd4c10f838a37a252ea36cf
Diffstat (limited to 'contrib/libs')
315 files changed, 31111 insertions, 0 deletions
diff --git a/contrib/libs/antlr4_cpp_runtime/.yandex_meta/devtools.copyrights.report b/contrib/libs/antlr4_cpp_runtime/.yandex_meta/devtools.copyrights.report new file mode 100644 index 0000000000..1816ca6a20 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/.yandex_meta/devtools.copyrights.report @@ -0,0 +1,406 @@ +# File format ($ symbol means the beginning of a line): +# +# $ # this message +# $ # ======================= +# $ # comments (all commentaries should starts with some number of spaces and # symbol) +# ${action} {license id} {license text hash} +# $BELONGS ./ya/make/file/relative/path/1/ya.make ./ya/make/2/ya.make +# ${all_file_action} filename +# $ # user commentaries (many lines) +# $ generated description - files with this license, license text... (some number of lines that starts with some number of spaces, do not modify) +# ${action} {license spdx} {license text hash} +# $BELONGS ./ya/make/file/relative/path/3/ya.make +# ${all_file_action} filename +# $ # user commentaries +# $ generated description +# $ ... +# +# You can modify action, all_file_action and add commentaries +# Available actions: +# keep - keep license in contrib and use in credits +# skip - skip license +# remove - remove all files with this license +# rename - save license text/links into licenses texts file, but not store SPDX into LINCENSE macro. You should store correct license id into devtools.license.spdx.txt file +# +# {all file action} records will be generated when license text contains filename that exists on filesystem (in contrib directory) +# We suppose that that files can contain some license info +# Available all file actions: +# FILE_IGNORE - ignore file (do nothing) +# FILE_INCLUDE - include all file data into licenses text file +# ======================= + +KEEP COPYRIGHT_SERVICE_LABEL 0038689435a930038c59cd7b6aa3253a +BELONGS ya.make + License text: + * Copyright (c) 2012 Terence Parr + * Copyright (c) 2012 Sam Harwell + * All rights reserved. + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + src/tree/IterativeParseTreeWalker.h [3:5] + +KEEP COPYRIGHT_SERVICE_LABEL 4ae71b56ebdfe548c9e21dc6776d3efa +BELONGS ya.make + License text: + Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + LICENSE.txt [1:1] + +KEEP COPYRIGHT_SERVICE_LABEL 605ef20fede2709d5c2d11c56a786d10 +BELONGS ya.make + License text: + /* Copyright (c) 2012-2021 The ANTLR Project. All rights reserved. + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + src/support/Casts.h [1:1] + +KEEP COPYRIGHT_SERVICE_LABEL 6216e987e1e22c7b399c9eb26217a878 +BELONGS ya.make + License text: + /* Copyright (c) 2021 The ANTLR Project. All rights reserved. + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + src/support/Unicode.h [1:1] + src/support/Utf8.cpp [1:1] + src/support/Utf8.h [1:1] + +KEEP COPYRIGHT_SERVICE_LABEL b87a6df5f745dd87f65713e3965bc8bb +BELONGS ya.make + License text: + // Copyright 2012-2022 The ANTLR Project + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + src/FlatHashMap.h [1:1] + src/FlatHashSet.h [1:1] + src/Version.h [1:1] + src/atn/ParserATNSimulatorOptions.h [1:1] + src/atn/PredictionContextCache.cpp [1:1] + src/atn/PredictionContextCache.h [1:1] + src/atn/PredictionContextMergeCache.cpp [1:1] + src/atn/PredictionContextMergeCache.h [1:1] + src/atn/PredictionContextMergeCacheOptions.h [1:1] + src/internal/Synchronization.cpp [1:1] + src/internal/Synchronization.h [1:1] + +KEEP COPYRIGHT_SERVICE_LABEL c814f02234dec37d81b4a2ae196440c5 +BELONGS ya.make + License text: + /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + src/ANTLRErrorListener.cpp [1:1] + src/ANTLRErrorListener.h [1:1] + src/ANTLRErrorStrategy.cpp [1:1] + src/ANTLRErrorStrategy.h [1:1] + src/ANTLRFileStream.cpp [1:1] + src/ANTLRFileStream.h [1:1] + src/ANTLRInputStream.cpp [1:1] + src/ANTLRInputStream.h [1:1] + src/BailErrorStrategy.cpp [1:1] + src/BailErrorStrategy.h [1:1] + src/BaseErrorListener.cpp [1:1] + src/BaseErrorListener.h [1:1] + src/BufferedTokenStream.cpp [1:1] + src/BufferedTokenStream.h [1:1] + src/CharStream.cpp [1:1] + src/CharStream.h [1:1] + src/CommonToken.cpp [1:1] + src/CommonToken.h [1:1] + src/CommonTokenFactory.cpp [1:1] + src/CommonTokenFactory.h [1:1] + src/CommonTokenStream.cpp [1:1] + src/CommonTokenStream.h [1:1] + src/ConsoleErrorListener.cpp [1:1] + src/ConsoleErrorListener.h [1:1] + src/DefaultErrorStrategy.cpp [1:1] + src/DefaultErrorStrategy.h [1:1] + src/DiagnosticErrorListener.cpp [1:1] + src/DiagnosticErrorListener.h [1:1] + src/Exceptions.cpp [1:1] + src/Exceptions.h [1:1] + src/FailedPredicateException.cpp [1:1] + src/FailedPredicateException.h [1:1] + src/InputMismatchException.cpp [1:1] + src/InputMismatchException.h [1:1] + src/IntStream.cpp [1:1] + src/IntStream.h [1:1] + src/InterpreterRuleContext.cpp [1:1] + src/InterpreterRuleContext.h [1:1] + src/Lexer.cpp [1:1] + src/Lexer.h [1:1] + src/LexerInterpreter.cpp [1:1] + src/LexerInterpreter.h [1:1] + src/LexerNoViableAltException.cpp [1:1] + src/LexerNoViableAltException.h [1:1] + src/ListTokenSource.cpp [1:1] + src/ListTokenSource.h [1:1] + src/NoViableAltException.cpp [1:1] + src/NoViableAltException.h [1:1] + src/Parser.cpp [1:1] + src/Parser.h [1:1] + src/ParserInterpreter.cpp [1:1] + src/ParserInterpreter.h [1:1] + src/ParserRuleContext.cpp [1:1] + src/ParserRuleContext.h [1:1] + src/ProxyErrorListener.cpp [1:1] + src/ProxyErrorListener.h [1:1] + src/RecognitionException.cpp [1:1] + src/RecognitionException.h [1:1] + src/Recognizer.cpp [1:1] + src/Recognizer.h [1:1] + src/RuleContext.cpp [1:1] + src/RuleContext.h [1:1] + src/RuleContextWithAltNum.cpp [1:1] + src/RuleContextWithAltNum.h [1:1] + src/RuntimeMetaData.cpp [1:1] + src/RuntimeMetaData.h [1:1] + src/Token.cpp [1:1] + src/Token.h [1:1] + src/TokenFactory.h [1:1] + src/TokenSource.cpp [1:1] + src/TokenSource.h [1:1] + src/TokenStream.cpp [1:1] + src/TokenStream.h [1:1] + src/TokenStreamRewriter.cpp [1:1] + src/TokenStreamRewriter.h [1:1] + src/UnbufferedCharStream.cpp [1:1] + src/UnbufferedCharStream.h [1:1] + src/UnbufferedTokenStream.cpp [1:1] + src/UnbufferedTokenStream.h [1:1] + src/Vocabulary.cpp [1:1] + src/Vocabulary.h [1:1] + src/WritableToken.cpp [1:1] + src/WritableToken.h [1:1] + src/antlr4-common.h [1:1] + src/antlr4-runtime.h [1:1] + src/atn/ATN.cpp [1:1] + src/atn/ATN.h [1:1] + src/atn/ATNConfig.cpp [1:1] + src/atn/ATNConfig.h [1:1] + src/atn/ATNConfigSet.cpp [1:1] + src/atn/ATNConfigSet.h [1:1] + src/atn/ATNDeserializationOptions.cpp [1:1] + src/atn/ATNDeserializationOptions.h [1:1] + src/atn/ATNDeserializer.cpp [1:1] + src/atn/ATNDeserializer.h [1:1] + src/atn/ATNSimulator.cpp [1:1] + src/atn/ATNSimulator.h [1:1] + src/atn/ATNState.cpp [1:1] + src/atn/ATNState.h [1:1] + src/atn/ATNStateType.h [1:1] + src/atn/ATNType.h [1:1] + src/atn/ActionTransition.cpp [1:1] + src/atn/ActionTransition.h [1:1] + src/atn/AmbiguityInfo.cpp [1:1] + src/atn/AmbiguityInfo.h [1:1] + src/atn/ArrayPredictionContext.cpp [1:1] + src/atn/ArrayPredictionContext.h [2:2] + src/atn/AtomTransition.cpp [1:1] + src/atn/AtomTransition.h [1:1] + src/atn/BasicBlockStartState.h [1:1] + src/atn/BasicState.h [1:1] + src/atn/BlockEndState.h [1:1] + src/atn/BlockStartState.h [1:1] + src/atn/ContextSensitivityInfo.cpp [1:1] + src/atn/ContextSensitivityInfo.h [1:1] + src/atn/DecisionEventInfo.cpp [1:1] + src/atn/DecisionEventInfo.h [1:1] + src/atn/DecisionInfo.cpp [1:1] + src/atn/DecisionInfo.h [1:1] + src/atn/DecisionState.cpp [1:1] + src/atn/DecisionState.h [1:1] + src/atn/EpsilonTransition.cpp [1:1] + src/atn/EpsilonTransition.h [1:1] + src/atn/ErrorInfo.cpp [1:1] + src/atn/ErrorInfo.h [1:1] + src/atn/LL1Analyzer.cpp [1:1] + src/atn/LL1Analyzer.h [1:1] + src/atn/LexerATNConfig.cpp [1:1] + src/atn/LexerATNConfig.h [1:1] + src/atn/LexerATNSimulator.cpp [1:1] + src/atn/LexerATNSimulator.h [1:1] + src/atn/LexerAction.h [1:1] + src/atn/LexerActionExecutor.cpp [1:1] + src/atn/LexerActionExecutor.h [1:1] + src/atn/LexerActionType.h [1:1] + src/atn/LexerChannelAction.cpp [1:1] + src/atn/LexerChannelAction.h [1:1] + src/atn/LexerCustomAction.cpp [1:1] + src/atn/LexerCustomAction.h [1:1] + src/atn/LexerIndexedCustomAction.cpp [1:1] + src/atn/LexerIndexedCustomAction.h [1:1] + src/atn/LexerModeAction.cpp [1:1] + src/atn/LexerModeAction.h [1:1] + src/atn/LexerMoreAction.cpp [1:1] + src/atn/LexerMoreAction.h [1:1] + src/atn/LexerPopModeAction.cpp [1:1] + src/atn/LexerPopModeAction.h [1:1] + src/atn/LexerPushModeAction.cpp [1:1] + src/atn/LexerPushModeAction.h [1:1] + src/atn/LexerSkipAction.cpp [1:1] + src/atn/LexerSkipAction.h [1:1] + src/atn/LexerTypeAction.cpp [1:1] + src/atn/LexerTypeAction.h [1:1] + src/atn/LookaheadEventInfo.cpp [1:1] + src/atn/LookaheadEventInfo.h [1:1] + src/atn/LoopEndState.h [1:1] + src/atn/NotSetTransition.cpp [1:1] + src/atn/NotSetTransition.h [1:1] + src/atn/OrderedATNConfigSet.cpp [1:1] + src/atn/OrderedATNConfigSet.h [1:1] + src/atn/ParseInfo.cpp [1:1] + src/atn/ParseInfo.h [1:1] + src/atn/ParserATNSimulator.cpp [1:1] + src/atn/ParserATNSimulator.h [1:1] + src/atn/PlusBlockStartState.h [1:1] + src/atn/PlusLoopbackState.h [1:1] + src/atn/PrecedencePredicateTransition.cpp [1:1] + src/atn/PrecedencePredicateTransition.h [1:1] + src/atn/PredicateEvalInfo.cpp [1:1] + src/atn/PredicateEvalInfo.h [1:1] + src/atn/PredicateTransition.cpp [1:1] + src/atn/PredicateTransition.h [1:1] + src/atn/PredictionContext.cpp [1:1] + src/atn/PredictionContext.h [1:1] + src/atn/PredictionContextType.h [1:1] + src/atn/PredictionMode.cpp [1:1] + src/atn/PredictionMode.h [1:1] + src/atn/ProfilingATNSimulator.cpp [1:1] + src/atn/ProfilingATNSimulator.h [1:1] + src/atn/RangeTransition.cpp [1:1] + src/atn/RangeTransition.h [1:1] + src/atn/RuleStartState.h [1:1] + src/atn/RuleStopState.h [1:1] + src/atn/RuleTransition.cpp [1:1] + src/atn/RuleTransition.h [1:1] + src/atn/SemanticContext.cpp [1:1] + src/atn/SemanticContext.h [1:1] + src/atn/SemanticContextType.h [1:1] + src/atn/SerializedATNView.h [1:1] + src/atn/SetTransition.cpp [1:1] + src/atn/SetTransition.h [1:1] + src/atn/SingletonPredictionContext.cpp [1:1] + src/atn/SingletonPredictionContext.h [1:1] + src/atn/StarBlockStartState.h [1:1] + src/atn/StarLoopEntryState.h [1:1] + src/atn/StarLoopbackState.cpp [1:1] + src/atn/StarLoopbackState.h [1:1] + src/atn/TokensStartState.h [1:1] + src/atn/Transition.cpp [1:1] + src/atn/Transition.h [1:1] + src/atn/TransitionType.h [1:1] + src/atn/WildcardTransition.cpp [1:1] + src/atn/WildcardTransition.h [1:1] + src/dfa/DFA.cpp [1:1] + src/dfa/DFA.h [1:1] + src/dfa/DFASerializer.cpp [1:1] + src/dfa/DFASerializer.h [1:1] + src/dfa/DFAState.cpp [1:1] + src/dfa/DFAState.h [1:1] + src/dfa/LexerDFASerializer.cpp [1:1] + src/dfa/LexerDFASerializer.h [1:1] + src/misc/InterpreterDataReader.cpp [1:1] + src/misc/InterpreterDataReader.h [1:1] + src/misc/Interval.cpp [1:1] + src/misc/Interval.h [1:1] + src/misc/IntervalSet.cpp [1:1] + src/misc/IntervalSet.h [1:1] + src/misc/MurmurHash.cpp [1:1] + src/misc/MurmurHash.h [1:1] + src/misc/Predicate.h [1:1] + src/support/Any.cpp [1:1] + src/support/Any.h [1:1] + src/support/Arrays.cpp [1:1] + src/support/Arrays.h [1:1] + src/support/BitSet.h [1:1] + src/support/CPPUtils.cpp [1:1] + src/support/CPPUtils.h [1:1] + src/support/Declarations.h [1:1] + src/support/StringUtils.cpp [1:1] + src/support/StringUtils.h [1:1] + src/tree/AbstractParseTreeVisitor.h [1:1] + src/tree/ErrorNode.h [1:1] + src/tree/ErrorNodeImpl.cpp [1:1] + src/tree/ErrorNodeImpl.h [1:1] + src/tree/IterativeParseTreeWalker.cpp [1:1] + src/tree/ParseTree.cpp [1:1] + src/tree/ParseTree.h [1:1] + src/tree/ParseTreeListener.cpp [1:1] + src/tree/ParseTreeListener.h [1:1] + src/tree/ParseTreeProperty.h [1:1] + src/tree/ParseTreeType.h [1:1] + src/tree/ParseTreeVisitor.cpp [1:1] + src/tree/ParseTreeVisitor.h [1:1] + src/tree/ParseTreeWalker.cpp [1:1] + src/tree/ParseTreeWalker.h [1:1] + src/tree/TerminalNode.h [1:1] + src/tree/TerminalNodeImpl.cpp [1:1] + src/tree/TerminalNodeImpl.h [1:1] + src/tree/Trees.cpp [1:1] + src/tree/Trees.h [1:1] + src/tree/pattern/Chunk.cpp [1:1] + src/tree/pattern/Chunk.h [1:1] + src/tree/pattern/ParseTreeMatch.cpp [1:1] + src/tree/pattern/ParseTreeMatch.h [1:1] + src/tree/pattern/ParseTreePattern.cpp [1:1] + src/tree/pattern/ParseTreePattern.h [1:1] + src/tree/pattern/ParseTreePatternMatcher.cpp [1:1] + src/tree/pattern/ParseTreePatternMatcher.h [1:1] + src/tree/pattern/RuleTagToken.cpp [1:1] + src/tree/pattern/RuleTagToken.h [1:1] + src/tree/pattern/TagChunk.cpp [1:1] + src/tree/pattern/TagChunk.h [1:1] + src/tree/pattern/TextChunk.cpp [1:1] + src/tree/pattern/TextChunk.h [1:1] + src/tree/pattern/TokenTagToken.cpp [1:1] + src/tree/pattern/TokenTagToken.h [1:1] + src/tree/xpath/XPath.cpp [1:1] + src/tree/xpath/XPath.h [1:1] + src/tree/xpath/XPathElement.cpp [1:1] + src/tree/xpath/XPathElement.h [1:1] + src/tree/xpath/XPathLexerErrorListener.cpp [1:1] + src/tree/xpath/XPathLexerErrorListener.h [1:1] + src/tree/xpath/XPathRuleAnywhereElement.cpp [1:1] + src/tree/xpath/XPathRuleAnywhereElement.h [1:1] + src/tree/xpath/XPathRuleElement.cpp [1:1] + src/tree/xpath/XPathRuleElement.h [1:1] + src/tree/xpath/XPathTokenAnywhereElement.cpp [1:1] + src/tree/xpath/XPathTokenAnywhereElement.h [1:1] + src/tree/xpath/XPathTokenElement.cpp [1:1] + src/tree/xpath/XPathTokenElement.h [1:1] + src/tree/xpath/XPathWildcardAnywhereElement.cpp [1:1] + src/tree/xpath/XPathWildcardAnywhereElement.h [1:1] + src/tree/xpath/XPathWildcardElement.cpp [1:1] + src/tree/xpath/XPathWildcardElement.h [1:1] + +KEEP COPYRIGHT_SERVICE_LABEL cef5fd247317ab255a0f3d28365b2492 +BELONGS ya.make + License text: + * Copyright (c) 2012 Terence Parr + * Copyright (c) 2012 Sam Harwell + * All rights reserved. + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + src/tree/IterativeParseTreeWalker.h [3:5] diff --git a/contrib/libs/antlr4_cpp_runtime/.yandex_meta/devtools.licenses.report b/contrib/libs/antlr4_cpp_runtime/.yandex_meta/devtools.licenses.report new file mode 100644 index 0000000000..3deef3d7cb --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/.yandex_meta/devtools.licenses.report @@ -0,0 +1,410 @@ +# File format ($ symbol means the beginning of a line): +# +# $ # this message +# $ # ======================= +# $ # comments (all commentaries should starts with some number of spaces and # symbol) +# ${action} {license spdx} {license text hash} +# $BELONGS ./ya/make/file/relative/path/1/ya.make ./ya/make/2/ya.make +# ${all_file_action} filename +# $ # user commentaries (many lines) +# $ generated description - files with this license, license text... (some number of lines that starts with some number of spaces, do not modify) +# ${action} {license spdx} {license text hash} +# $BELONGS ./ya/make/file/relative/path/3/ya.make +# ${all_file_action} filename +# $ # user commentaries +# $ generated description +# $ ... +# +# You can modify action, all_file_action and add commentaries +# Available actions: +# keep - keep license in contrib and use in credits +# skip - skip license +# remove - remove all files with this license +# rename - save license text/links into licenses texts file, but not store SPDX into LINCENSE macro. You should store correct license id into devtools.license.spdx.txt file +# +# {all file action} records will be generated when license text contains filename that exists on filesystem (in contrib directory) +# We suppose that that files can contain some license info +# Available all file actions: +# FILE_IGNORE - ignore file (do nothing) +# FILE_INCLUDE - include all file data into licenses text file +# ======================= + +KEEP BSD-3-Clause 077d9b4e5ae285fbec3b8860269fa41e +BELONGS ya.make + License text: + [![License](https://img.shields.io/badge/license-BSD-blue.svg)](https://raw.githubusercontent.com/antlr/antlr4/master/LICENSE.txt) + Scancode info: + Original SPDX id: BSD-3-Clause + Score : 95.00 + Match type : TAG + Links : http://www.opensource.org/licenses/BSD-3-Clause, https://spdx.org/licenses/BSD-3-Clause + Files with this license: + README.md [4:4] + +KEEP BSD-3-Clause 161cbda9544ea07f3f423c41526fe353 +BELONGS ya.make +FILE_INCLUDE LICENSE.txt found in files: src/ANTLRErrorListener.cpp at line 3, src/ANTLRErrorListener.h at line 3, src/ANTLRErrorStrategy.cpp at line 3, src/ANTLRErrorStrategy.h at line 3, src/ANTLRFileStream.cpp at line 3, src/ANTLRFileStream.h at line 3, src/ANTLRInputStream.cpp at line 3, src/ANTLRInputStream.h at line 3, src/BailErrorStrategy.cpp at line 3, src/BailErrorStrategy.h at line 3, src/BaseErrorListener.cpp at line 3, src/BaseErrorListener.h at line 3, src/BufferedTokenStream.cpp at line 3, src/BufferedTokenStream.h at line 3, src/CharStream.cpp at line 3, src/CharStream.h at line 3, src/CommonToken.cpp at line 3, src/CommonToken.h at line 3, src/CommonTokenFactory.cpp at line 3, src/CommonTokenFactory.h at line 3, src/CommonTokenStream.cpp at line 3, src/CommonTokenStream.h at line 3, src/ConsoleErrorListener.cpp at line 3, src/ConsoleErrorListener.h at line 3, src/DefaultErrorStrategy.cpp at line 3, src/DefaultErrorStrategy.h at line 3, src/DiagnosticErrorListener.cpp at line 3, src/DiagnosticErrorListener.h at line 3, src/Exceptions.cpp at line 3, src/Exceptions.h at line 3, src/FailedPredicateException.cpp at line 3, src/FailedPredicateException.h at line 3, src/InputMismatchException.cpp at line 3, src/InputMismatchException.h at line 3, src/IntStream.cpp at line 3, src/IntStream.h at line 3, src/InterpreterRuleContext.cpp at line 3, src/InterpreterRuleContext.h at line 3, src/Lexer.cpp at line 3, src/Lexer.h at line 3, src/LexerInterpreter.cpp at line 3, src/LexerInterpreter.h at line 3, src/LexerNoViableAltException.cpp at line 3, src/LexerNoViableAltException.h at line 3, src/ListTokenSource.cpp at line 3, src/ListTokenSource.h at line 3, src/NoViableAltException.cpp at line 3, src/NoViableAltException.h at line 3, src/Parser.cpp at line 3, src/Parser.h at line 3, src/ParserInterpreter.cpp at line 3, src/ParserInterpreter.h at line 3, src/ParserRuleContext.cpp at line 3, src/ParserRuleContext.h at line 3, src/ProxyErrorListener.cpp at line 3, src/ProxyErrorListener.h at line 3, src/RecognitionException.cpp at line 3, src/RecognitionException.h at line 3, src/Recognizer.cpp at line 3, src/Recognizer.h at line 3, src/RuleContext.cpp at line 3, src/RuleContext.h at line 3, src/RuleContextWithAltNum.cpp at line 3, src/RuleContextWithAltNum.h at line 3, src/RuntimeMetaData.cpp at line 3, src/RuntimeMetaData.h at line 3, src/Token.cpp at line 3, src/Token.h at line 3, src/TokenFactory.h at line 3, src/TokenSource.cpp at line 3, src/TokenSource.h at line 3, src/TokenStream.cpp at line 3, src/TokenStream.h at line 3, src/TokenStreamRewriter.cpp at line 3, src/TokenStreamRewriter.h at line 3, src/UnbufferedCharStream.cpp at line 3, src/UnbufferedCharStream.h at line 3, src/UnbufferedTokenStream.cpp at line 3, src/UnbufferedTokenStream.h at line 3, src/Vocabulary.cpp at line 3, src/Vocabulary.h at line 3, src/WritableToken.cpp at line 3, src/WritableToken.h at line 3, src/antlr4-common.h at line 3, src/antlr4-runtime.h at line 3, src/atn/ATN.cpp at line 3, src/atn/ATN.h at line 3, src/atn/ATNConfig.cpp at line 3, src/atn/ATNConfig.h at line 3, src/atn/ATNConfigSet.cpp at line 3, src/atn/ATNConfigSet.h at line 3, src/atn/ATNDeserializationOptions.cpp at line 3, src/atn/ATNDeserializationOptions.h at line 3, src/atn/ATNDeserializer.cpp at line 3, src/atn/ATNDeserializer.h at line 3, src/atn/ATNSimulator.cpp at line 3, src/atn/ATNSimulator.h at line 3, src/atn/ATNState.cpp at line 3, src/atn/ATNState.h at line 3, src/atn/ATNStateType.h at line 3, src/atn/ATNType.h at line 3, src/atn/ActionTransition.cpp at line 3, src/atn/ActionTransition.h at line 3, src/atn/AmbiguityInfo.cpp at line 3, src/atn/AmbiguityInfo.h at line 3, src/atn/ArrayPredictionContext.cpp at line 3, src/atn/ArrayPredictionContext.h at line 4, src/atn/AtomTransition.cpp at line 3, src/atn/AtomTransition.h at line 3, src/atn/BasicBlockStartState.h at line 3, src/atn/BasicState.h at line 3, src/atn/BlockEndState.h at line 3, src/atn/BlockStartState.h at line 3, src/atn/ContextSensitivityInfo.cpp at line 3, src/atn/ContextSensitivityInfo.h at line 3, src/atn/DecisionEventInfo.cpp at line 3, src/atn/DecisionEventInfo.h at line 3, src/atn/DecisionInfo.cpp at line 3, src/atn/DecisionInfo.h at line 3, src/atn/DecisionState.cpp at line 3, src/atn/DecisionState.h at line 3, src/atn/EpsilonTransition.cpp at line 3, src/atn/EpsilonTransition.h at line 3, src/atn/ErrorInfo.cpp at line 3, src/atn/ErrorInfo.h at line 3, src/atn/LL1Analyzer.cpp at line 3, src/atn/LL1Analyzer.h at line 3, src/atn/LexerATNConfig.cpp at line 3, src/atn/LexerATNConfig.h at line 3, src/atn/LexerATNSimulator.cpp at line 3, src/atn/LexerATNSimulator.h at line 3, src/atn/LexerAction.h at line 3, src/atn/LexerActionExecutor.cpp at line 3, src/atn/LexerActionExecutor.h at line 3, src/atn/LexerActionType.h at line 3, src/atn/LexerChannelAction.cpp at line 3, src/atn/LexerChannelAction.h at line 3, src/atn/LexerCustomAction.cpp at line 3, src/atn/LexerCustomAction.h at line 3, src/atn/LexerIndexedCustomAction.cpp at line 3, src/atn/LexerIndexedCustomAction.h at line 3, src/atn/LexerModeAction.cpp at line 3, src/atn/LexerModeAction.h at line 3, src/atn/LexerMoreAction.cpp at line 3, src/atn/LexerMoreAction.h at line 3, src/atn/LexerPopModeAction.cpp at line 3, src/atn/LexerPopModeAction.h at line 3, src/atn/LexerPushModeAction.cpp at line 3, src/atn/LexerPushModeAction.h at line 3, src/atn/LexerSkipAction.cpp at line 3, src/atn/LexerSkipAction.h at line 3, src/atn/LexerTypeAction.cpp at line 3, src/atn/LexerTypeAction.h at line 3, src/atn/LookaheadEventInfo.cpp at line 3, src/atn/LookaheadEventInfo.h at line 3, src/atn/LoopEndState.h at line 3, src/atn/NotSetTransition.cpp at line 3, src/atn/NotSetTransition.h at line 3, src/atn/OrderedATNConfigSet.cpp at line 3, src/atn/OrderedATNConfigSet.h at line 3, src/atn/ParseInfo.cpp at line 3, src/atn/ParseInfo.h at line 3, src/atn/ParserATNSimulator.cpp at line 3, src/atn/ParserATNSimulator.h at line 3, src/atn/PlusBlockStartState.h at line 3, src/atn/PlusLoopbackState.h at line 3, src/atn/PrecedencePredicateTransition.cpp at line 3, src/atn/PrecedencePredicateTransition.h at line 3, src/atn/PredicateEvalInfo.cpp at line 3, src/atn/PredicateEvalInfo.h at line 3, src/atn/PredicateTransition.cpp at line 3, src/atn/PredicateTransition.h at line 3, src/atn/PredictionContext.cpp at line 3, src/atn/PredictionContext.h at line 3, src/atn/PredictionContextType.h at line 3, src/atn/PredictionMode.cpp at line 3, src/atn/PredictionMode.h at line 3, src/atn/ProfilingATNSimulator.cpp at line 3, src/atn/ProfilingATNSimulator.h at line 3, src/atn/RangeTransition.cpp at line 3, src/atn/RangeTransition.h at line 3, src/atn/RuleStartState.h at line 3, src/atn/RuleStopState.h at line 3, src/atn/RuleTransition.cpp at line 3, src/atn/RuleTransition.h at line 3, src/atn/SemanticContext.cpp at line 3, src/atn/SemanticContext.h at line 3, src/atn/SemanticContextType.h at line 3, src/atn/SerializedATNView.h at line 3, src/atn/SetTransition.cpp at line 3, src/atn/SetTransition.h at line 3, src/atn/SingletonPredictionContext.cpp at line 3, src/atn/SingletonPredictionContext.h at line 3, src/atn/StarBlockStartState.h at line 3, src/atn/StarLoopEntryState.h at line 3, src/atn/StarLoopbackState.cpp at line 3, src/atn/StarLoopbackState.h at line 3, src/atn/TokensStartState.h at line 3, src/atn/Transition.cpp at line 3, src/atn/Transition.h at line 3, src/atn/TransitionType.h at line 3, src/atn/WildcardTransition.cpp at line 3, src/atn/WildcardTransition.h at line 3, src/dfa/DFA.cpp at line 3, src/dfa/DFA.h at line 3, src/dfa/DFASerializer.cpp at line 3, src/dfa/DFASerializer.h at line 3, src/dfa/DFAState.cpp at line 3, src/dfa/DFAState.h at line 3, src/dfa/LexerDFASerializer.cpp at line 3, src/dfa/LexerDFASerializer.h at line 3, src/misc/InterpreterDataReader.cpp at line 3, src/misc/InterpreterDataReader.h at line 3, src/misc/Interval.cpp at line 3, src/misc/Interval.h at line 3, src/misc/IntervalSet.cpp at line 3, src/misc/IntervalSet.h at line 3, src/misc/MurmurHash.cpp at line 3, src/misc/MurmurHash.h at line 3, src/misc/Predicate.h at line 3, src/support/Any.cpp at line 3, src/support/Any.h at line 3, src/support/Arrays.cpp at line 3, src/support/Arrays.h at line 3, src/support/BitSet.h at line 3, src/support/CPPUtils.cpp at line 3, src/support/CPPUtils.h at line 3, src/support/Casts.h at line 3, src/support/Declarations.h at line 3, src/support/StringUtils.cpp at line 3, src/support/StringUtils.h at line 3, src/support/Unicode.h at line 3, src/support/Utf8.cpp at line 3, src/support/Utf8.h at line 3, src/tree/AbstractParseTreeVisitor.h at line 3, src/tree/ErrorNode.h at line 3, src/tree/ErrorNodeImpl.cpp at line 3, src/tree/ErrorNodeImpl.h at line 3, src/tree/IterativeParseTreeWalker.cpp at line 3, src/tree/ParseTree.cpp at line 3, src/tree/ParseTree.h at line 3, src/tree/ParseTreeListener.cpp at line 3, src/tree/ParseTreeListener.h at line 3, src/tree/ParseTreeProperty.h at line 3, src/tree/ParseTreeType.h at line 3, src/tree/ParseTreeVisitor.cpp at line 3, src/tree/ParseTreeVisitor.h at line 3, src/tree/ParseTreeWalker.cpp at line 3, src/tree/ParseTreeWalker.h at line 3, src/tree/TerminalNode.h at line 3, src/tree/TerminalNodeImpl.cpp at line 3, src/tree/TerminalNodeImpl.h at line 3, src/tree/Trees.cpp at line 3, src/tree/Trees.h at line 3, src/tree/pattern/Chunk.cpp at line 3, src/tree/pattern/Chunk.h at line 3, src/tree/pattern/ParseTreeMatch.cpp at line 3, src/tree/pattern/ParseTreeMatch.h at line 3, src/tree/pattern/ParseTreePattern.cpp at line 3, src/tree/pattern/ParseTreePattern.h at line 3, src/tree/pattern/ParseTreePatternMatcher.cpp at line 3, src/tree/pattern/ParseTreePatternMatcher.h at line 3, src/tree/pattern/RuleTagToken.cpp at line 3, src/tree/pattern/RuleTagToken.h at line 3, src/tree/pattern/TagChunk.cpp at line 3, src/tree/pattern/TagChunk.h at line 3, src/tree/pattern/TextChunk.cpp at line 3, src/tree/pattern/TextChunk.h at line 3, src/tree/pattern/TokenTagToken.cpp at line 3, src/tree/pattern/TokenTagToken.h at line 3, src/tree/xpath/XPath.cpp at line 3, src/tree/xpath/XPath.h at line 3, src/tree/xpath/XPathElement.cpp at line 3, src/tree/xpath/XPathElement.h at line 3, src/tree/xpath/XPathLexerErrorListener.cpp at line 3, src/tree/xpath/XPathLexerErrorListener.h at line 3, src/tree/xpath/XPathRuleAnywhereElement.cpp at line 3, src/tree/xpath/XPathRuleAnywhereElement.h at line 3, src/tree/xpath/XPathRuleElement.cpp at line 3, src/tree/xpath/XPathRuleElement.h at line 3, src/tree/xpath/XPathTokenAnywhereElement.cpp at line 3, src/tree/xpath/XPathTokenAnywhereElement.h at line 3, src/tree/xpath/XPathTokenElement.cpp at line 3, src/tree/xpath/XPathTokenElement.h at line 3, src/tree/xpath/XPathWildcardAnywhereElement.cpp at line 3, src/tree/xpath/XPathWildcardAnywhereElement.h at line 3, src/tree/xpath/XPathWildcardElement.cpp at line 3, src/tree/xpath/XPathWildcardElement.h at line 3 + License text: + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + Scancode info: + Original SPDX id: BSD-3-Clause + Score : 65.00 + Match type : NOTICE + Links : http://www.opensource.org/licenses/BSD-3-Clause, https://spdx.org/licenses/BSD-3-Clause + Files with this license: + src/ANTLRErrorListener.cpp [2:3] + src/ANTLRErrorListener.h [2:3] + src/ANTLRErrorStrategy.cpp [2:3] + src/ANTLRErrorStrategy.h [2:3] + src/ANTLRFileStream.cpp [2:3] + src/ANTLRFileStream.h [2:3] + src/ANTLRInputStream.cpp [2:3] + src/ANTLRInputStream.h [2:3] + src/BailErrorStrategy.cpp [2:3] + src/BailErrorStrategy.h [2:3] + src/BaseErrorListener.cpp [2:3] + src/BaseErrorListener.h [2:3] + src/BufferedTokenStream.cpp [2:3] + src/BufferedTokenStream.h [2:3] + src/CharStream.cpp [2:3] + src/CharStream.h [2:3] + src/CommonToken.cpp [2:3] + src/CommonToken.h [2:3] + src/CommonTokenFactory.cpp [2:3] + src/CommonTokenFactory.h [2:3] + src/CommonTokenStream.cpp [2:3] + src/CommonTokenStream.h [2:3] + src/ConsoleErrorListener.cpp [2:3] + src/ConsoleErrorListener.h [2:3] + src/DefaultErrorStrategy.cpp [2:3] + src/DefaultErrorStrategy.h [2:3] + src/DiagnosticErrorListener.cpp [2:3] + src/DiagnosticErrorListener.h [2:3] + src/Exceptions.cpp [2:3] + src/Exceptions.h [2:3] + src/FailedPredicateException.cpp [2:3] + src/FailedPredicateException.h [2:3] + src/InputMismatchException.cpp [2:3] + src/InputMismatchException.h [2:3] + src/IntStream.cpp [2:3] + src/IntStream.h [2:3] + src/InterpreterRuleContext.cpp [2:3] + src/InterpreterRuleContext.h [2:3] + src/Lexer.cpp [2:3] + src/Lexer.h [2:3] + src/LexerInterpreter.cpp [2:3] + src/LexerInterpreter.h [2:3] + src/LexerNoViableAltException.cpp [2:3] + src/LexerNoViableAltException.h [2:3] + src/ListTokenSource.cpp [2:3] + src/ListTokenSource.h [2:3] + src/NoViableAltException.cpp [2:3] + src/NoViableAltException.h [2:3] + src/Parser.cpp [2:3] + src/Parser.h [2:3] + src/ParserInterpreter.cpp [2:3] + src/ParserInterpreter.h [2:3] + src/ParserRuleContext.cpp [2:3] + src/ParserRuleContext.h [2:3] + src/ProxyErrorListener.cpp [2:3] + src/ProxyErrorListener.h [2:3] + src/RecognitionException.cpp [2:3] + src/RecognitionException.h [2:3] + src/Recognizer.cpp [2:3] + src/Recognizer.h [2:3] + src/RuleContext.cpp [2:3] + src/RuleContext.h [2:3] + src/RuleContextWithAltNum.cpp [2:3] + src/RuleContextWithAltNum.h [2:3] + src/RuntimeMetaData.cpp [2:3] + src/RuntimeMetaData.h [2:3] + src/Token.cpp [2:3] + src/Token.h [2:3] + src/TokenFactory.h [2:3] + src/TokenSource.cpp [2:3] + src/TokenSource.h [2:3] + src/TokenStream.cpp [2:3] + src/TokenStream.h [2:3] + src/TokenStreamRewriter.cpp [2:3] + src/TokenStreamRewriter.h [2:3] + src/UnbufferedCharStream.cpp [2:3] + src/UnbufferedCharStream.h [2:3] + src/UnbufferedTokenStream.cpp [2:3] + src/UnbufferedTokenStream.h [2:3] + src/Vocabulary.cpp [2:3] + src/Vocabulary.h [2:3] + src/WritableToken.cpp [2:3] + src/WritableToken.h [2:3] + src/antlr4-common.h [2:3] + src/antlr4-runtime.h [2:3] + src/atn/ATN.cpp [2:3] + src/atn/ATN.h [2:3] + src/atn/ATNConfig.cpp [2:3] + src/atn/ATNConfig.h [2:3] + src/atn/ATNConfigSet.cpp [2:3] + src/atn/ATNConfigSet.h [2:3] + src/atn/ATNDeserializationOptions.cpp [2:3] + src/atn/ATNDeserializationOptions.h [2:3] + src/atn/ATNDeserializer.cpp [2:3] + src/atn/ATNDeserializer.h [2:3] + src/atn/ATNSimulator.cpp [2:3] + src/atn/ATNSimulator.h [2:3] + src/atn/ATNState.cpp [2:3] + src/atn/ATNState.h [2:3] + src/atn/ATNStateType.h [2:3] + src/atn/ATNType.h [2:3] + src/atn/ActionTransition.cpp [2:3] + src/atn/ActionTransition.h [2:3] + src/atn/AmbiguityInfo.cpp [2:3] + src/atn/AmbiguityInfo.h [2:3] + src/atn/ArrayPredictionContext.cpp [2:3] + src/atn/ArrayPredictionContext.h [3:4] + src/atn/AtomTransition.cpp [2:3] + src/atn/AtomTransition.h [2:3] + src/atn/BasicBlockStartState.h [2:3] + src/atn/BasicState.h [2:3] + src/atn/BlockEndState.h [2:3] + src/atn/BlockStartState.h [2:3] + src/atn/ContextSensitivityInfo.cpp [2:3] + src/atn/ContextSensitivityInfo.h [2:3] + src/atn/DecisionEventInfo.cpp [2:3] + src/atn/DecisionEventInfo.h [2:3] + src/atn/DecisionInfo.cpp [2:3] + src/atn/DecisionInfo.h [2:3] + src/atn/DecisionState.cpp [2:3] + src/atn/DecisionState.h [2:3] + src/atn/EpsilonTransition.cpp [2:3] + src/atn/EpsilonTransition.h [2:3] + src/atn/ErrorInfo.cpp [2:3] + src/atn/ErrorInfo.h [2:3] + src/atn/LL1Analyzer.cpp [2:3] + src/atn/LL1Analyzer.h [2:3] + src/atn/LexerATNConfig.cpp [2:3] + src/atn/LexerATNConfig.h [2:3] + src/atn/LexerATNSimulator.cpp [2:3] + src/atn/LexerATNSimulator.h [2:3] + src/atn/LexerAction.h [2:3] + src/atn/LexerActionExecutor.cpp [2:3] + src/atn/LexerActionExecutor.h [2:3] + src/atn/LexerActionType.h [2:3] + src/atn/LexerChannelAction.cpp [2:3] + src/atn/LexerChannelAction.h [2:3] + src/atn/LexerCustomAction.cpp [2:3] + src/atn/LexerCustomAction.h [2:3] + src/atn/LexerIndexedCustomAction.cpp [2:3] + src/atn/LexerIndexedCustomAction.h [2:3] + src/atn/LexerModeAction.cpp [2:3] + src/atn/LexerModeAction.h [2:3] + src/atn/LexerMoreAction.cpp [2:3] + src/atn/LexerMoreAction.h [2:3] + src/atn/LexerPopModeAction.cpp [2:3] + src/atn/LexerPopModeAction.h [2:3] + src/atn/LexerPushModeAction.cpp [2:3] + src/atn/LexerPushModeAction.h [2:3] + src/atn/LexerSkipAction.cpp [2:3] + src/atn/LexerSkipAction.h [2:3] + src/atn/LexerTypeAction.cpp [2:3] + src/atn/LexerTypeAction.h [2:3] + src/atn/LookaheadEventInfo.cpp [2:3] + src/atn/LookaheadEventInfo.h [2:3] + src/atn/LoopEndState.h [2:3] + src/atn/NotSetTransition.cpp [2:3] + src/atn/NotSetTransition.h [2:3] + src/atn/OrderedATNConfigSet.cpp [2:3] + src/atn/OrderedATNConfigSet.h [2:3] + src/atn/ParseInfo.cpp [2:3] + src/atn/ParseInfo.h [2:3] + src/atn/ParserATNSimulator.cpp [2:3] + src/atn/ParserATNSimulator.h [2:3] + src/atn/PlusBlockStartState.h [2:3] + src/atn/PlusLoopbackState.h [2:3] + src/atn/PrecedencePredicateTransition.cpp [2:3] + src/atn/PrecedencePredicateTransition.h [2:3] + src/atn/PredicateEvalInfo.cpp [2:3] + src/atn/PredicateEvalInfo.h [2:3] + src/atn/PredicateTransition.cpp [2:3] + src/atn/PredicateTransition.h [2:3] + src/atn/PredictionContext.cpp [2:3] + src/atn/PredictionContext.h [2:3] + src/atn/PredictionContextType.h [2:3] + src/atn/PredictionMode.cpp [2:3] + src/atn/PredictionMode.h [2:3] + src/atn/ProfilingATNSimulator.cpp [2:3] + src/atn/ProfilingATNSimulator.h [2:3] + src/atn/RangeTransition.cpp [2:3] + src/atn/RangeTransition.h [2:3] + src/atn/RuleStartState.h [2:3] + src/atn/RuleStopState.h [2:3] + src/atn/RuleTransition.cpp [2:3] + src/atn/RuleTransition.h [2:3] + src/atn/SemanticContext.cpp [2:3] + src/atn/SemanticContext.h [2:3] + src/atn/SemanticContextType.h [2:3] + src/atn/SerializedATNView.h [2:3] + src/atn/SetTransition.cpp [2:3] + src/atn/SetTransition.h [2:3] + src/atn/SingletonPredictionContext.cpp [2:3] + src/atn/SingletonPredictionContext.h [2:3] + src/atn/StarBlockStartState.h [2:3] + src/atn/StarLoopEntryState.h [2:3] + src/atn/StarLoopbackState.cpp [2:3] + src/atn/StarLoopbackState.h [2:3] + src/atn/TokensStartState.h [2:3] + src/atn/Transition.cpp [2:3] + src/atn/Transition.h [2:3] + src/atn/TransitionType.h [2:3] + src/atn/WildcardTransition.cpp [2:3] + src/atn/WildcardTransition.h [2:3] + src/dfa/DFA.cpp [2:3] + src/dfa/DFA.h [2:3] + src/dfa/DFASerializer.cpp [2:3] + src/dfa/DFASerializer.h [2:3] + src/dfa/DFAState.cpp [2:3] + src/dfa/DFAState.h [2:3] + src/dfa/LexerDFASerializer.cpp [2:3] + src/dfa/LexerDFASerializer.h [2:3] + src/misc/InterpreterDataReader.cpp [2:3] + src/misc/InterpreterDataReader.h [2:3] + src/misc/Interval.cpp [2:3] + src/misc/Interval.h [2:3] + src/misc/IntervalSet.cpp [2:3] + src/misc/IntervalSet.h [2:3] + src/misc/MurmurHash.cpp [2:3] + src/misc/MurmurHash.h [2:3] + src/misc/Predicate.h [2:3] + src/support/Any.cpp [2:3] + src/support/Any.h [2:3] + src/support/Arrays.cpp [2:3] + src/support/Arrays.h [2:3] + src/support/BitSet.h [2:3] + src/support/CPPUtils.cpp [2:3] + src/support/CPPUtils.h [2:3] + src/support/Casts.h [2:3] + src/support/Declarations.h [2:3] + src/support/StringUtils.cpp [2:3] + src/support/StringUtils.h [2:3] + src/support/Unicode.h [2:3] + src/support/Utf8.cpp [2:3] + src/support/Utf8.h [2:3] + src/tree/AbstractParseTreeVisitor.h [2:3] + src/tree/ErrorNode.h [2:3] + src/tree/ErrorNodeImpl.cpp [2:3] + src/tree/ErrorNodeImpl.h [2:3] + src/tree/IterativeParseTreeWalker.cpp [2:3] + src/tree/ParseTree.cpp [2:3] + src/tree/ParseTree.h [2:3] + src/tree/ParseTreeListener.cpp [2:3] + src/tree/ParseTreeListener.h [2:3] + src/tree/ParseTreeProperty.h [2:3] + src/tree/ParseTreeType.h [2:3] + src/tree/ParseTreeVisitor.cpp [2:3] + src/tree/ParseTreeVisitor.h [2:3] + src/tree/ParseTreeWalker.cpp [2:3] + src/tree/ParseTreeWalker.h [2:3] + src/tree/TerminalNode.h [2:3] + src/tree/TerminalNodeImpl.cpp [2:3] + src/tree/TerminalNodeImpl.h [2:3] + src/tree/Trees.cpp [2:3] + src/tree/Trees.h [2:3] + src/tree/pattern/Chunk.cpp [2:3] + src/tree/pattern/Chunk.h [2:3] + src/tree/pattern/ParseTreeMatch.cpp [2:3] + src/tree/pattern/ParseTreeMatch.h [2:3] + src/tree/pattern/ParseTreePattern.cpp [2:3] + src/tree/pattern/ParseTreePattern.h [2:3] + src/tree/pattern/ParseTreePatternMatcher.cpp [2:3] + src/tree/pattern/ParseTreePatternMatcher.h [2:3] + src/tree/pattern/RuleTagToken.cpp [2:3] + src/tree/pattern/RuleTagToken.h [2:3] + src/tree/pattern/TagChunk.cpp [2:3] + src/tree/pattern/TagChunk.h [2:3] + src/tree/pattern/TextChunk.cpp [2:3] + src/tree/pattern/TextChunk.h [2:3] + src/tree/pattern/TokenTagToken.cpp [2:3] + src/tree/pattern/TokenTagToken.h [2:3] + src/tree/xpath/XPath.cpp [2:3] + src/tree/xpath/XPath.h [2:3] + src/tree/xpath/XPathElement.cpp [2:3] + src/tree/xpath/XPathElement.h [2:3] + src/tree/xpath/XPathLexerErrorListener.cpp [2:3] + src/tree/xpath/XPathLexerErrorListener.h [2:3] + src/tree/xpath/XPathRuleAnywhereElement.cpp [2:3] + src/tree/xpath/XPathRuleAnywhereElement.h [2:3] + src/tree/xpath/XPathRuleElement.cpp [2:3] + src/tree/xpath/XPathRuleElement.h [2:3] + src/tree/xpath/XPathTokenAnywhereElement.cpp [2:3] + src/tree/xpath/XPathTokenAnywhereElement.h [2:3] + src/tree/xpath/XPathTokenElement.cpp [2:3] + src/tree/xpath/XPathTokenElement.h [2:3] + src/tree/xpath/XPathWildcardAnywhereElement.cpp [2:3] + src/tree/xpath/XPathWildcardAnywhereElement.h [2:3] + src/tree/xpath/XPathWildcardElement.cpp [2:3] + src/tree/xpath/XPathWildcardElement.h [2:3] + +KEEP BSD-3-Clause 214ed1e625b23c4c4e6d272eeb55b035 +BELONGS ya.make + License text: + // obligations required by the BSD 3-clause license that governs this file. + Scancode info: + Original SPDX id: BSD-3-Clause + Score : 100.00 + Match type : REFERENCE + Links : http://www.opensource.org/licenses/BSD-3-Clause, https://spdx.org/licenses/BSD-3-Clause + Files with this license: + src/support/Utf8.cpp [16:16] + +KEEP BSD-3-Clause 6d96a20c86c6ae929c48a02c32d9e9cc +BELONGS ya.make + Note: matched license text is too long. Read it in the source files. + Scancode info: + Original SPDX id: BSD-3-Clause + Score : 100.00 + Match type : TEXT + Links : http://www.opensource.org/licenses/BSD-3-Clause, https://spdx.org/licenses/BSD-3-Clause + Files with this license: + src/tree/IterativeParseTreeWalker.h [7:28] + +KEEP BSD-3-Clause ea24a35dd8063a467fd9230e248036d4 +BELONGS ya.make + License text: + * [The "BSD license"] + Scancode info: + Original SPDX id: BSD-3-Clause + Score : 99.00 + Match type : REFERENCE + Links : http://www.opensource.org/licenses/BSD-3-Clause, https://spdx.org/licenses/BSD-3-Clause + Files with this license: + src/tree/IterativeParseTreeWalker.h [2:2] + +KEEP BSD-3-Clause efbc6616f9881a16cddbd0b5b6daf42d +BELONGS ya.make + Note: matched license text is too long. Read it in the source files. + Scancode info: + Original SPDX id: BSD-3-Clause + Score : 100.00 + Match type : TEXT + Links : http://www.opensource.org/licenses/BSD-3-Clause, https://spdx.org/licenses/BSD-3-Clause + Files with this license: + src/FlatHashMap.h [3:24] + src/FlatHashSet.h [3:24] + src/Version.h [3:24] + src/atn/ParserATNSimulatorOptions.h [3:24] + src/atn/PredictionContextCache.cpp [3:24] + src/atn/PredictionContextCache.h [3:24] + src/atn/PredictionContextMergeCache.cpp [3:24] + src/atn/PredictionContextMergeCache.h [3:24] + src/atn/PredictionContextMergeCacheOptions.h [3:24] + src/internal/Synchronization.cpp [3:24] + src/internal/Synchronization.h [3:24] + +KEEP BSD-3-Clause faa7e07f6be9a0fefe2c84867370314b +BELONGS ya.make + Note: matched license text is too long. Read it in the source files. + Scancode info: + Original SPDX id: BSD-3-Clause + Score : 100.00 + Match type : TEXT + Links : http://www.opensource.org/licenses/BSD-3-Clause, https://spdx.org/licenses/BSD-3-Clause + Files with this license: + LICENSE.txt [3:28] diff --git a/contrib/libs/antlr4_cpp_runtime/.yandex_meta/licenses.list.txt b/contrib/libs/antlr4_cpp_runtime/.yandex_meta/licenses.list.txt new file mode 100644 index 0000000000..d03079872c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/.yandex_meta/licenses.list.txt @@ -0,0 +1,150 @@ +====================BSD-3-Clause==================== + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +====================BSD-3-Clause==================== + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + + +====================BSD-3-Clause==================== + * [The "BSD license"] + + +====================BSD-3-Clause==================== +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +====================BSD-3-Clause==================== +// obligations required by the BSD 3-clause license that governs this file. + + +====================BSD-3-Clause==================== +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither name of copyright holders nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +====================BSD-3-Clause==================== +[![License](https://img.shields.io/badge/license-BSD-blue.svg)](https://raw.githubusercontent.com/antlr/antlr4/master/LICENSE.txt) + + +====================COPYRIGHT==================== + * Copyright (c) 2012 Terence Parr + * Copyright (c) 2012 Sam Harwell + * All rights reserved. + + +====================COPYRIGHT==================== +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + + +====================COPYRIGHT==================== +/* Copyright (c) 2012-2021 The ANTLR Project. All rights reserved. + + +====================COPYRIGHT==================== +/* Copyright (c) 2021 The ANTLR Project. All rights reserved. + + +====================COPYRIGHT==================== +// Copyright 2012-2022 The ANTLR Project + + +====================COPYRIGHT==================== +Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. + + +====================File: LICENSE.txt==================== +Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither name of copyright holders nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/contrib/libs/antlr4_cpp_runtime/CHANGES.txt b/contrib/libs/antlr4_cpp_runtime/CHANGES.txt new file mode 100644 index 0000000000..b2eef10540 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/CHANGES.txt @@ -0,0 +1,582 @@ +**************************************************************************** +As of ANTLR 4.2.1, March 25 2014, we are no longer updating this file. Instead, +we are using the github release mechanism. For example, here is +4.2.1 release notes: + +https://github.com/antlr/antlr4/releases/tag/4.2.1 +**************************************************************************** + +ANTLR v4 Honey Badger + +January 15, 2014 + +* Unit tests for lexer actions from yesterday. +* Refactored TreeView so we can refresh tree externally w/o creating new one. + Needed for intellij plugin. + +January 14, 2014 + +* Updated serialized ATN representation of lexer actions, allowing the lexer + interpreter to execute the majority of lexer commands (#408) + +January 12, 2014 + +* Support executing precedence predicates during the SLL phase of + adaptivePredict (#401). The result is a massive performance boost for grammars + containing direct left-recursion (improvements of 5% to 1000+% have been + observed, depending on the grammar and input). + +December 29, 2013 + +* Internal change: Tool.loadGrammar() -> parseGrammar(). Tool.load()->parse() + +* Added Tool.loadGrammar(fileName) that completely parses, extracts implicit lexer, + and processes into Grammar object. Does not geneate code. Use + Grammar.getImplicitLexer() to get the lexer created during processing of + combined grammar. + +* Added Grammar.load(fileName) that creates Tool object for you. loadGrammar() + lets you create your own Tool for setting error handlers etc... + + final Grammar g = Grammar.load("/tmp/MyGrammar.g4"); + +December 19, 2013 + +* Sam: + Improved documentation for tree patterns classes + Refactored parts of the tree patterns API to simplify classes and improve encapsulation + Move ATN serializer to runtime + Use ATNDeserializer methods instead of ATNSimulator methods which are now deprecated + +* parrt: fix null pointer bug with rule "a : a;" + +November 24, 2013 + +* Ter adds tree pattern matching. Preferred interface: + + ParseTree t = parser.expr(); + ParseTreePattern p = parser.compileParseTreePattern("<ID>+0", MyParser.RULE_expr); + ParseTreeMatch m = p.match(t); + String id = m.get("ID"); + + or + + String xpath = "//blockStatement/*"; + String treePattern = "int <Identifier> = <expression>;"; + ParseTreePattern p = + parser.compileParseTreePattern(treePattern, + JavaParser.RULE_localVariableDeclarationStatement); + List<ParseTreeMatch> matches = p.findAll(tree, xpath); + +November 20, 2013 + +* Sam added method stuff like expr() that calls expr(0). Makes it possible + to call expr rule from TestRig (grun). + +November 14, 2013 + +* Added Sam's ParserInterpreter implementation that uses ATN after + deserialization. + + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n" + + "A : 'a' ;\n" + + "B : 'b' ;\n" + + "C : 'c' ;\n"); + Grammar g = new Grammar( + "parser grammar T;\n" + + "s : (A{;}|B)* C ;\n", + lg); + + LexerInterpreter lexEngine = lg.createLexerInterpreter(new ANTLRInputStream(input)); + CommonTokenStream tokens = new CommonTokenStream(lexEngine); + ParserInterpreter parser = g.createParserInterpreter(tokens); + ParseTree t = parser.parse(g.rules.get(startRule).index); + +November 13, 2013 + +* move getChildren() from Tree into Trees (to avoid breaking change) +* Notation: + /prog/func, -> all funcs under prog at root + /prog/*, -> all children of prog at root + /*/func, -> all func kids of any root node + prog, -> prog must be root node + /prog, -> prog must be root node + /*, -> any root + *, -> any root + //ID, -> any ID in tree + //expr/primary/ID, -> any ID child of a primary under any expr + //body//ID, -> any ID under a body + //'return', -> any 'return' literal in tree + //primary/*, -> all kids of any primary + //func/*/stat, -> all stat nodes grandkids of any func node + /prog/func/'def', -> all def literal kids of func kid of prog + //stat/';', -> all ';' under any stat node + //expr/primary/!ID, -> anything but ID under primary under any expr node + //expr/!primary, -> anything but primary under any expr node + //!*, -> nothing anywhere + /!*, -> nothing at root + +September 16, 2013 + +* Updated build.xml to support v4 grammars in v4 itself; compiles XPathLexer.g4 +* Add to XPath: + Collection<ParseTree> findAll(String xpath); + +September 11, 2013 + +* Add ! operator to XPath +* Use ANTLR v4 XPathLexer.g4 not regex +* Copy lots of find node stuff from v3 GrammarAST to Trees class in runtime. + +September 10, 2013 + +* Adding in XPath stuff. + +August 31, 2013 + +* Lots of little fixes thanks to Coverity Scan + +August 7, 2013 + +* [BREAKING CHANGE] Altered left-recursion elimination to be simpler. Now, + we use the following patterns: + + * Binary expressions are expressions which contain a recursive invocation of + the rule as the first and last element of the alternative. + + * Suffix expressions contain a recursive invocation of the rule as the first + element of the alternative, but not as the last element. + + * Prefix expressions contain a recursive invocation of the rule as the last + element of the alternative, but not as the first element. + +There is no such thing as a "ternary" expression--they are just binary +expressions in disguise. + +The right associativity specifiers no longer on the individual tokens because +it's done on alternative basis anyway. The option is now on the individual +alternative; e.g., + + e : e '*' e + | e '+' e + |<assoc=right> e '?' e ':' e + |<assoc=right> e '=' e + | INT + ; + +If your language uses a right-associative ternary operator, you will need +to update your grammar to include <assoc=right> on the alternative operator. + +This also fixes #245 and fixes #268: + +https://github.com/antlr/antlr4/issues/245 +https://github.com/antlr/antlr4/issues/268 + +To smooth the transition, <assoc=right> is still allowed on token references +but it is ignored. + +June 30, 2013 -- 4.1 release + +June 24, 2013 + +* Resize ANTLRInputStream.data after reading a file with fewer characters than + bytes +* Fix ATN created for non-greedy optional block with multiple alternatives +* Support Unicode escape sequences with indirection in JavaUnicodeInputStream + (fixes #287) +* Remove the ParserRuleContext.altNum field (fixes #288) +* PredictionContext no longer implements Iterable<SingletonPredictionContext> +* PredictionContext no longer implements Comparable<PredictionContext> +* Add the EPSILON_CLOSURE error and EPSILON_OPTIONAL warning +* Optimized usage of closureBusy set (fixes #282) + +June 9, 2013 + +* Add regression test for #239 (already passes) + +June 8, 2013 + +* Support list labels on a set of tokens (fixes #270) +* Fix associativity of XOR in Java LR grammar (fixes #280) + +June 1, 2013 + +* DiagnosticErrorListener includes rule names for each decision in its reports +* Document ANTLRErrorListener and DiagnosticErrorListener (fixes #265) +* Support '\uFFFF' (fixes #267) +* Optimize serialized ATN + +May 26, 2013 + +* Report errors that occur while lexing a grammar (fixes #262) +* Improved error message for unterminated string literals (fixes #243) + +May 24, 2013 + +* Significantly improve performance of JavaUnicodeInputStream.LA(1) + +May 20, 2013 + +* Generate Javadoc for generated visitor and listener interfaces and classes +* Fix unit tests + +May 18, 2013 + +* Group terminals in Java grammars so ATN can collapse sets +* Improved Java 7 support in Java grammars (numeric literals) +* Updated error listener interfaces +* Support detailed statistics in TestPerformance + +May 17, 2013 + +* Add JavaUnicodeInputStream to handle Unicode escapes in Java code +* Proper Unicode identifier handling in Java grammars +* Report file names with lexer errors in TestPerformance + +May 14, 2013 + +* Use a called rule stack to prevent stack overflow in LL1Analyzer +* Use 0-based indexing for several arrays in the tool +* Code simplification, assertions, documentation + +May 13, 2013 + +* Unit test updates to ensure exceptions are not hidden + +May 12, 2013 + +* Updates to TestPerformance + +May 5, 2013 + +* Updated several classes to use MurmurHash 3 hashing + +May 1, 2013 + +* Added parse tree JTree to TreeViewer (Bart Kiers) + +April 30, 2013 + +* Updated TestPerformance to support parallelization across passes + +April 24, 2013 + +* Remove unused stub class ParserATNPathFinder +* Remove ParserInterpreter.predictATN +* Remove DFA.getATNStatesAlongPath +* Encapsulate implementation methods in LexerATNSimulator and ParserATNSimulator +* Updated documentation +* Simplify creation of new DFA edges +* Fix handling of previously cached error edges +* Fix DFA created during forced-SLL parsing (PredictionMode.SLL) +* Extract methods ParserATNSimulator.getExistingTargetState and + ParserATNSimulator.computeTargetState. + +April 22, 2013 + +* Lazy initialization of ParserATNSimulator.mergeCache +* Improved hash code for DFAState +* Improved hash code with caching for ATNConfigSet +* Add new configuration parameters to TestPerformance +* Update Java LR and Java Std to support Java 7 syntax + +April 21, 2013 + +* Add new configuration parameters to TestPerformance + +April 18, 2013 + +* Must check rule transition follow states before eliminating states in + the ATN (fixes #224) +* Simplify ParserATNSimulator and improve performance by combining execDFA and + execATN and using DFA edges even after edge computation is required + +April 15, 2013 + +* Fix code in TestPerformance that clears the DFA + +April 12, 2013 + +* Improved initialization and concurrency control in DFA updates +* Fix EOF handling in edge case (fixes #218) + +April 4, 2013 + +* Improved testing of error reporting +* Fix NPE revealed by updated testing method +* Strict handling of redefined rules - prevents code generation (fixes #210) +* Updated documentation in Tool + +March 27, 2013 + +* Avoid creating empty action methods in lexer (fixes #202) +* Split serialized ATN when it exceeds Java's 65535 byte limit (fixes #76) +* Fix incorrect reports of label type conflicts across separated labeled outer + alternatives (fixes #195) +* Update Maven plugin site documentation + +March 26, 2013 + +* Fix bugs with the closureBusy set in ParserATNSimulator.closure +* Fix handling of empty options{} block (fixes #194) +* Add error 149 INVALID_LEXER_COMMAND (fixes #190) +* Add error 150 MISSING_LEXER_COMMAND_ARGUMENT +* Add error 151 UNWANTED_LEXER_COMMAND_ARGUMENT +* Updated documentation in the Parser and RecognitionException classes +* Refactored and extensively documented the ANTLRErrorStrategy interface and + DefaultErrorStrategy default implementation +* Track the number of syntax errors in Parser.notifyErrorListeners instead of in + the error strategy +* Move primary implementation of getExpectedTokens to ATN, fixes #191 +* Updated ATN documentation +* Use UUID instead of incremented integer for serialized ATN versioning + +March 7, 2013 + +* Added export to PNG feature to the parse tree viewer + +March 6, 2013 + +* Allow direct calls to left-recursive rules (fixes #161) +* Change error type 146 (EPSILON_TOKEN) to a warning (fixes #180) +* Specify locale for all format operations (fixes #158) +* Fix generation of invalid Unicode escape sequences in Java code (fixes #164) +* Do not require escape for $ in action when not followed by an ID start char + (fixes #176) + +February 23, 2013 + +* Refactoring Target-related classes to improve support for additional language + targets + +February 22, 2013 + +* Do not allow raw newline characters in literals +* Pair and Triple are immutable; Triple is not a Pair + +February 5, 2013 + +* Fix IntervalSet.add when multiple merges are required (fixes #153) + +January 29, 2013 + +* don't call process() if args aren't specified (Dave Parfitt) + +January 21, 2013 -- Release 4.0 + +* Updated PredictionContext Javadocs +* Updated Maven site documentation +* Minor tweaks in Java.stg + +January 15, 2013 + +* Tweak error messages +* (Tool) Make TokenVocabParser fields `protected final` +* Fix generated escape sequences for literals containing backslashes + +January 14, 2013 + +* Relax parser in favor of errors during semantic analysis +* Add error 145: lexer mode must contain at least one non-fragment rule +* Add error 146: non-fragment lexer rule can match the empty string + +January 11, 2013 + +* Updated error 72, 76; added 73-74 and 136-143: detailed errors about name + conflicts +* Report exact location for parameter/retval/local name conflicts +* Add error 144: multi-character literals are not allowed in lexer sets +* Error 134 now only applies to rule references in lexer sets +* Updated error messages (cleanup) +* Reduce size of _serializedATN by adding 2 to each element: new representation + avoids embedded values 0 and 0xFFFF which are common and have multi-byte + representations in Java's modified UTF-8 + +January 10, 2013 + +* Add error 135: cannot assign a value to list label: $label + (fixes antlr/antlr4#128) + +January 2, 2013 + +* Fix EOF handling (antlr/antlr4#110) +* Remove TREE_PARSER reference +* Additional validation checks in ATN deserialization +* Fix potential NPE in parser predicate evaluation +* Fix termination condition detection in full-context parsing + +January 1, 2013 + +* Updated documentation +* Minor code cleanup +* Added the `-XdbgSTWait` command line option for the Tool +* Removed method override since bug was fixed in V3 runtime + +December 31, 2012 + +* I altered Target.getTargetStringLiteralFromANTLRStringLiteral() so that + it converts \uXXXX in an ANTLR string to \\uXXXX, thus, avoiding Java's + conversion to a single character before compilation. + +December 16, 2012 + +* Encapsulate some fields in ANTLRMessage +* Remove ErrorType.INVALID +* Update error/warning messages, show all v3 compatibility messages + +December 12, 2012 + +* Use arrays instead of HashSet to save memory in SemanticContext.AND/OR +* Use arrays instead of HashSet to save memory in cached DFA +* Reduce single-operand SemanticContext.and/or operations + +December 11, 2012 + +* Add -long-messages option; only show exceptions with errors when set +* "warning treated as error" is a one-off error +* Listen for issues reported by StringTemplate, report them as warnings +* Fix template issues +* GrammarASTWithOptions.getOptions never returns null +* Use EnumSet instead of HashSet +* Use new STGroup.GROUP_FILE_EXTENSION value + +December 2, 2012 + +* Remove -Xverbose-dfa option +* Create the ParseTreeVisitor interface for all visitors, rename previous base + visitor class to AbstractParseTreeVisitor + +December 1, 2012 + +* escape [\n\r\t] in lexical error messages; e.g,: + line 2:3 token recognition error at: '\t' + line 2:4 token recognition error at: '\n' + +* added error for bad sets in lexer; e.g.: + lexer set element A is invalid (either rule ref or literal with > 1 char) + some tests in TestSets appeared to allow ~('a'|B) but it was randomly working. + ('a'|B) works, though doesn't collapse to a set. + +* label+='foo' wasn't generating good code. It was generating token type as + variable name. Now, I gen "s<ttype>" for implicit labels on string literals. + +* tokens now have token and char source to draw from. + +* remove -Xsave-lexer option; log file as implicit lexer AST. + +November 30, 2012 + +* Maven updates (cleanup, unification, and specify Java 6 bootstrap classpath) + +November 28, 2012 + +* Maven updates (uber-jar, manifest details) + +November 27, 2012 + +* Maven updates (prepare for deploying to Sonatype OSS) +* Use efficient bitset tests instead of long chains of operator == + +November 26, 2012 + +* Maven updates (include sources and javadocs, fix warnings) +* Don't generate action methods for lexer rules not containing an action +* Generated action and sempred methods are private +* Remove unused / problematic methods: +** (unused) TerminalNodeImpl.isErrorNode +** (unused) RuleContext.conflictsWith, RuleContext.suffix. +** (problematic) RuleContext.hashCode, RuleContext.equals. + +November 23, 2012 + +* Updated Maven build (added master POM, cleaned up module POMs) + +November 22, 2012 + +* make sure left-recur rule translation uses token stream from correct imported file. +* actions like @after in imported rules caused inf loop. +* This misidentified scope lexer/parser: @lexer::members { } @parser::members { } + +November 18, 2012 + +* fixed: undefined rule refs caused exception +* cleanup, rm dead etypes, add check for ids that cause code gen issues +* added notion of one-off error +* added check for v3 backward incompatibilities: +** tree grammars +** labels in lexer rules +** tokens {A;B;} syntax +** tokens {A='C';} syntax +** {...}?=> gate semantic predicates +** (...)=> syntactic predicates +* Detect EOF in lexer rule + +November 17, 2012 + +* .tokens files goes in output dir like parser file. +* added check: action in lexer rules must be last element of outermost alt +* properly check for grammar/filename difference +* if labels, don't allow set collapse for + a : A # X | B ; +* wasn't checking soon enough for rule redef; now it sets a dead flag in + AST so no more walking dup. + error(51): T.g:7:0: rule s redefinition (ignoring); previous at line 3 + +November 11, 2012 + +* Change version to 4.0b4 (btw, forgot to push 4.0b3 in build.properties when + I made git tag 4.0b3...ooops). + +November 4, 2012 + +* Kill box in tree dialog box makes dialog dispose of itself + +October 29, 2012 + +* Sam fixes nongreedy more. +* -Werror added. +* Sam made speed improvement re preds in lexer. + +October 20, 2012 + +* Merged Sam's fix for nongreedy lexer/parser. lots of unit tests. A fix in + prediction ctx merge. https://github.com/parrt/antlr4/pull/99 + +October 14, 2012 + +* Rebuild how ANTLR detects SLL conflict and failover to full LL. LL is + a bit slower but correct now. Added ability to ask for exact ambiguity + detection. + +October 8, 2012 + +* Fixed a bug where labeling the alternatives of the start rule caused + a null pointer exception. + +October 1, 2012 -- 4.0b2 release + +September 30, 2012 + +* Fixed the unbuffered streams, which actually buffered everything + up by mistake. tweaked a few comments. + +* Added a getter to IntStream for the token factory + +* Added -depend cmd-line option. + +September 29, 2012 + +* no nongreedy or wildcard in parser. + +September 28, 2012 + +* empty "tokens {}" is ok now. + +September 22, 2012 + +* Rule exception handlers weren't passed to the generated code +* $ruleattribute.foo weren't handled properly +* Added -package option + +September 18, 2012 -- 4.0b1 release diff --git a/contrib/libs/antlr4_cpp_runtime/CONTRIBUTING.md b/contrib/libs/antlr4_cpp_runtime/CONTRIBUTING.md new file mode 100644 index 0000000000..0a2317bab3 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/CONTRIBUTING.md @@ -0,0 +1,22 @@ +# Contributing to ANTLR 4 + +1. [Fork](https://help.github.com/articles/fork-a-repo) the [antlr/antlr4 repo](https://github.com/antlr/antlr4), which will give you both key branches, `master` and `dev` +2. Make sure to `git checkout dev` in your fork so that you are working from the latest development branch +3. Create and work from a branch derived from `dev` such as `git checkout -b your-branch-name` +4. Install and configure [EditorConfig](http://editorconfig.org/) so your text editor or IDE uses the ANTLR 4 coding style +5. [Build ANTLR 4](doc/building-antlr.md) +6. [Run the ANTLR project unit tests](doc/antlr-project-testing.md) +7. Create a [pull request](https://help.github.com/articles/using-pull-requests/) with your changes and make sure you're comparing your `dev`-derived branch in your fork to the `dev` branch from the `antlr/antlr4` repo: + +<img src="doc/images/PR-on-dev.png" width="600"> + +**Note:** Each commit requires a "signature", which is simple as using `-s` (not +`-S`) to the git commit command: + +``` +git commit -s -m 'This is my commit message' +``` + +Github's pull request process enforces the sig and gives instructions on how to +fix any commits that lack the sig. See [Github DCO app](https://github.com/apps/dco) +for more info. diff --git a/contrib/libs/antlr4_cpp_runtime/LICENSE.txt b/contrib/libs/antlr4_cpp_runtime/LICENSE.txt new file mode 100644 index 0000000000..5d27694155 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/LICENSE.txt @@ -0,0 +1,28 @@ +Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither name of copyright holders nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/contrib/libs/antlr4_cpp_runtime/README-cpp.md b/contrib/libs/antlr4_cpp_runtime/README-cpp.md new file mode 100644 index 0000000000..622289ba77 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/README-cpp.md @@ -0,0 +1,72 @@ +# C++ target for ANTLR 4 + +This folder contains the C++ runtime support for ANTLR. See [the canonical antlr4 repository](https://github.com/antlr/antlr4) for in depth detail about how to use ANTLR 4. + +## Authors and major contributors + +ANTLR 4 is the result of substantial effort of the following people: + +* [Terence Parr](http://www.cs.usfca.edu/~parrt/), parrt@cs.usfca.edu + ANTLR project lead and supreme dictator for life + [University of San Francisco](http://www.usfca.edu/) +* [Sam Harwell](http://tunnelvisionlabs.com/) + Tool co-author, Java and C# target) + +The C++ target has been the work of the following people: + +* Dan McLaughlin, dan.mclaughlin@gmail.com (initial port, got code to compile) +* David Sisson, dsisson@google.com (initial port, made the runtime C++ tests runnable) +* [Mike Lischke](http://www.soft-gems.net), mike@lischke-online.de (brought the initial port to a working library, made most runtime tests passing) + +## Other contributors + +* Marcin Szalowicz, mszalowicz@mailplus.pl (cmake build setup) +* Tim O'Callaghan, timo@linux.com (additional superbuild cmake pattern script) + +## Project Status + +* Building on macOS, Windows, Android and Linux +* No errors and warnings +* Library linking +* Some unit tests in the macOS project, for important base classes with almost 100% code coverage. +* All memory allocations checked +* Simple command line demo application working on all supported platforms. +* All runtime tests pass. + +### Build + Usage Notes + +The minimum C++ version to compile the ANTLR C++ runtime with is C++11. The supplied projects can built the runtime either as static or dynamic library, as both 32bit and 64bit arch. The macOS project contains a target for iOS and can also be built using cmake (instead of XCode). + +Include the antlr4-runtime.h umbrella header in your target application to get everything needed to use the library. + +If you are compiling with cmake, the minimum version required is cmake 2.8. +By default, the libraries produced by the CMake build target C++11. If you want to target a different C++ standard, you can explicitly pass the standard - e.g. `-DCMAKE_CXX_STANDARD=17`. + +#### Compiling on Windows with Visual Studio using he Visual Studio projects +Simply open the VS project from the runtime folder (VS 2019+) and build it. + +#### Compiling on Windows using cmake with Visual Studio VS2019 and later +Use the "Open Folder" Feature from the File->Open->Folder menu to open the runtime/Cpp directory. +It will automatically use the CMake description to open up a Visual Studio Solution. + +#### Compiling on macOS +Either open the included XCode project and build that or use the cmake compilation as described for linux. + +#### Compiling on Android +Try run cmake -DCMAKE_ANDROID_NDK=/folder/of/android_ndkr17_and_above -DCMAKE_SYSTEM_NAME=Android -DCMAKE_ANDROID_API=14 -DCMAKE_ANDROID_ARCH_ABI=x86 -DCMAKE_ANDROID_STL_TYPE=c++_shared -DCMAKE_ANDROID_NDK_TOOLCHAIN_VERSION=clang -DCMAKE_BUILD_TYPE=Release /folder/antlr4_src_dir -G Ninja. + +#### Compiling on Linux +- cd \<antlr4-dir\>/runtime/Cpp (this is where this readme is located) +- mkdir build && mkdir run && cd build +- cmake .. -DANTLR_JAR_LOCATION=full/path/to/antlr4-4.5.4-SNAPSHOT.jar -DWITH_DEMO=True +- make +- DESTDIR=\<antlr4-dir\>/runtime/Cpp/run make install + +If you don't want to build the demo then replace the "cmake .. -DANTLR_JAR_LOCATION<...>" command in the above recipe with "cmake .." without any further parameters. +There is another cmake script available in the subfolder cmake/ for those who prefer the superbuild cmake pattern. + +#### CMake Package support +If the CMake variable 'ANTLR4_INSTALL' is set, CMake Packages will be build and installed during the install step. +They expose two packages: antlr4_runtime and antlr4_generator which can be referenced to ease up the use of the +ANTLR Generator and runtime. +Use and Sample can be found [here](cmake/Antlr4Package.md) diff --git a/contrib/libs/antlr4_cpp_runtime/README.md b/contrib/libs/antlr4_cpp_runtime/README.md new file mode 100644 index 0000000000..5566fa224d --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/README.md @@ -0,0 +1,84 @@ +# ANTLR v4 + +[![Java 7+](https://img.shields.io/badge/java-7+-4c7e9f.svg)](http://java.oracle.com) +[![License](https://img.shields.io/badge/license-BSD-blue.svg)](https://raw.githubusercontent.com/antlr/antlr4/master/LICENSE.txt) + + +**ANTLR** (ANother Tool for Language Recognition) is a powerful parser generator for reading, processing, executing, or translating structured text or binary files. It's widely used to build languages, tools, and frameworks. From a grammar, ANTLR generates a parser that can build parse trees and also generates a listener interface (or visitor) that makes it easy to respond to the recognition of phrases of interest. + +**Dev branch build status** + +[![MacOSX, Windows, Linux](https://github.com/antlr/antlr4/actions/workflows/hosted.yml/badge.svg)](https://github.com/antlr/antlr4/actions/workflows/hosted.yml) (github actions) + +<!-- +* [![Windows](https://github.com/antlr/antlr4/actions/workflows/windows.yml/badge.svg?branch=dev)](https://github.com/antlr/antlr4/actions/workflows/windows.yml) (github actions) + +* [![Circle CI Build Status (Linux)](https://img.shields.io/circleci/build/gh/antlr/antlr4/master?label=Linux)](https://app.circleci.com/pipelines/github/antlr/antlr4) (CircleCI) + +[![AppVeyor CI Build Status (Windows)](https://img.shields.io/appveyor/build/parrt/antlr4?label=Windows)](https://ci.appveyor.com/project/parrt/antlr4) +[![Travis-CI Build Status (Swift-Linux)](https://img.shields.io/travis/antlr/antlr4.svg?label=Linux-Swift&branch=master)](https://travis-ci.com/github/antlr/antlr4) +--> + +## Repo branch structure + +The default branch for this repo is [`master`](https://github.com/antlr/antlr4/tree/master), which is the latest stable release and has tags for the various releases; e.g., see release tag [4.9.3](https://github.com/antlr/antlr4/tree/4.9.3). Branch [`dev`](https://github.com/antlr/antlr4/tree/dev) is where development occurs between releases and all pull requests should be derived from that branch. The `dev` branch is merged back into `master` to cut a release and the release state is tagged (e.g., with `4.10-rc1` or `4.10`.) Visually our process looks roughly like this: + +<img src="doc/images/new-antlr-branches.png" width="500"> + +Targets such as Go that pull directly from the repository can use the default `master` branch but can also pull from the active `dev` branch: + +```bash +$ go get github.com/antlr/antlr4/runtime/Go/antlr@dev +``` + +## Authors and major contributors + +* [Terence Parr](http://www.cs.usfca.edu/~parrt/), parrt@cs.usfca.edu +ANTLR project lead and supreme dictator for life +[University of San Francisco](http://www.usfca.edu/) +* [Sam Harwell](http://tunnelvisionlabs.com/) (Tool co-author, Java and original C# target) +* [Eric Vergnaud](https://github.com/ericvergnaud) (Javascript, Python2, Python3 targets and maintenance of C# target) +* [Peter Boyer](https://github.com/pboyer) (Go target) +* [Mike Lischke](http://www.soft-gems.net/) (C++ completed target) +* Dan McLaughlin (C++ initial target) +* David Sisson (C++ initial target and test) +* [Janyou](https://github.com/janyou) (Swift target) +* [Ewan Mellor](https://github.com/ewanmellor), [Hanzhou Shi](https://github.com/hanjoes) (Swift target merging) +* [Ben Hamilton](https://github.com/bhamiltoncx) (Full Unicode support in serialized ATN and all languages' runtimes for code points > U+FFFF) +* [Marcos Passos](https://github.com/marcospassos) (PHP target) +* [Lingyu Li](https://github.com/lingyv-li) (Dart target) +* [Ivan Kochurkin](https://github.com/KvanTTT) has made major contributions to overall quality, error handling, and Target performance. +* [Justin King](https://github.com/jcking) has done a huge amount of work across multiple targets, but especially for C++. +* [Ken Domino](https://github.com/kaby76) has a knack for finding bugs/issues and analysis; also a major contributor on the [grammars-v4 repo](https://github.com/antlr/grammars-v4). +* [Jim Idle](https://github.com/jimidle) has contributed to previous versions of ANTLR and recently jumped back in to solve a major problem with the Go target. + + +## Useful information + +* [Release notes](https://github.com/antlr/antlr4/releases) +* [Getting started with v4](https://github.com/antlr/antlr4/blob/master/doc/getting-started.md) +* [Official site](http://www.antlr.org/) +* [Documentation](https://github.com/antlr/antlr4/blob/master/doc/index.md) +* [FAQ](https://github.com/antlr/antlr4/blob/master/doc/faq/index.md) +* [ANTLR code generation targets](https://github.com/antlr/antlr4/blob/master/doc/targets.md)<br>(Currently: Java, C#, Python2|3, JavaScript, Go, C++, Swift, Dart, PHP) +* [Java API](http://www.antlr.org/api/Java/index.html) +* [ANTLR v3](http://www.antlr3.org/) +* [v3 to v4 Migration, differences](https://github.com/antlr/antlr4/blob/master/doc/faq/general.md) + +You might also find the following pages useful, particularly if you want to mess around with the various target languages. + +* [How to build ANTLR itself](https://github.com/antlr/antlr4/blob/master/doc/building-antlr.md) +* [How we create and deploy an ANTLR release](https://github.com/antlr/antlr4/blob/master/doc/releasing-antlr.md) + +## The Definitive ANTLR 4 Reference + +Programmers run into parsing problems all the time. Whether it’s a data format like JSON, a network protocol like SMTP, a server configuration file for Apache, a PostScript/PDF file, or a simple spreadsheet macro language—ANTLR v4 and this book will demystify the process. ANTLR v4 has been rewritten from scratch to make it easier than ever to build parsers and the language applications built on top. This completely rewritten new edition of the bestselling Definitive ANTLR Reference shows you how to take advantage of these new features. + +You can buy the book [The Definitive ANTLR 4 Reference](http://amzn.com/1934356999) at amazon or an [electronic version at the publisher's site](https://pragprog.com/book/tpantlr2/the-definitive-antlr-4-reference). + +You will find the [Book source code](http://pragprog.com/titles/tpantlr2/source_code) useful. + +## Additional grammars +[This repository](https://github.com/antlr/grammars-v4) is a collection of grammars without actions where the +root directory name is the all-lowercase name of the language parsed +by the grammar. For example, java, cpp, csharp, c, etc... diff --git a/contrib/libs/antlr4_cpp_runtime/src/ANTLRErrorListener.cpp b/contrib/libs/antlr4_cpp_runtime/src/ANTLRErrorListener.cpp new file mode 100644 index 0000000000..6ceadb87f9 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ANTLRErrorListener.cpp @@ -0,0 +1,10 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "ANTLRErrorListener.h" + +antlr4::ANTLRErrorListener::~ANTLRErrorListener() +{ +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/ANTLRErrorListener.h b/contrib/libs/antlr4_cpp_runtime/src/ANTLRErrorListener.h new file mode 100644 index 0000000000..6dc66237e4 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ANTLRErrorListener.h @@ -0,0 +1,167 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RecognitionException.h" + +namespace antlrcpp { + class BitSet; +} + +namespace antlr4 { + + /// How to emit recognition errors (an interface in Java). + class ANTLR4CPP_PUBLIC ANTLRErrorListener { + public: + virtual ~ANTLRErrorListener(); + + /// <summary> + /// Upon syntax error, notify any interested parties. This is not how to + /// recover from errors or compute error messages. <seealso cref="ANTLRErrorStrategy"/> + /// specifies how to recover from syntax errors and how to compute error + /// messages. This listener's job is simply to emit a computed message, + /// though it has enough information to create its own message in many cases. + /// <p/> + /// The <seealso cref="RecognitionException"/> is non-null for all syntax errors except + /// when we discover mismatched token errors that we can recover from + /// in-line, without returning from the surrounding rule (via the single + /// token insertion and deletion mechanism). + /// </summary> + /// <param name="recognizer"> + /// What parser got the error. From this + /// object, you can access the context as well + /// as the input stream. </param> + /// <param name="offendingSymbol"> + /// The offending token in the input token + /// stream, unless recognizer is a lexer (then it's null). If + /// no viable alternative error, {@code e} has token at which we + /// started production for the decision. </param> + /// <param name="line"> + /// The line number in the input where the error occurred. </param> + /// <param name="charPositionInLine"> + /// The character position within that line where the error occurred. </param> + /// <param name="msg"> + /// The message to emit. </param> + /// <param name="e"> + /// The exception generated by the parser that led to + /// the reporting of an error. It is null in the case where + /// the parser was able to recover in line without exiting the + /// surrounding rule. </param> + virtual void syntaxError(Recognizer *recognizer, Token *offendingSymbol, size_t line, + size_t charPositionInLine, const std::string &msg, std::exception_ptr e) = 0; + + /** + * This method is called by the parser when a full-context prediction + * results in an ambiguity. + * + * <p>Each full-context prediction which does not result in a syntax error + * will call either {@link #reportContextSensitivity} or + * {@link #reportAmbiguity}.</p> + * + * <p>When {@code ambigAlts} is not null, it contains the set of potentially + * viable alternatives identified by the prediction algorithm. When + * {@code ambigAlts} is null, use {@link ATNConfigSet#getAlts} to obtain the + * represented alternatives from the {@code configs} argument.</p> + * + * <p>When {@code exact} is {@code true}, <em>all</em> of the potentially + * viable alternatives are truly viable, i.e. this is reporting an exact + * ambiguity. When {@code exact} is {@code false}, <em>at least two</em> of + * the potentially viable alternatives are viable for the current input, but + * the prediction algorithm terminated as soon as it determined that at + * least the <em>minimum</em> potentially viable alternative is truly + * viable.</p> + * + * <p>When the {@link PredictionMode#LL_EXACT_AMBIG_DETECTION} prediction + * mode is used, the parser is required to identify exact ambiguities so + * {@code exact} will always be {@code true}.</p> + * + * <p>This method is not used by lexers.</p> + * + * @param recognizer the parser instance + * @param dfa the DFA for the current decision + * @param startIndex the input index where the decision started + * @param stopIndex the input input where the ambiguity was identified + * @param exact {@code true} if the ambiguity is exactly known, otherwise + * {@code false}. This is always {@code true} when + * {@link PredictionMode#LL_EXACT_AMBIG_DETECTION} is used. + * @param ambigAlts the potentially ambiguous alternatives, or {@code null} + * to indicate that the potentially ambiguous alternatives are the complete + * set of represented alternatives in {@code configs} + * @param configs the ATN configuration set where the ambiguity was + * identified + */ + virtual void reportAmbiguity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, bool exact, + const antlrcpp::BitSet &ambigAlts, atn::ATNConfigSet *configs) = 0; + + /** + * This method is called when an SLL conflict occurs and the parser is about + * to use the full context information to make an LL decision. + * + * <p>If one or more configurations in {@code configs} contains a semantic + * predicate, the predicates are evaluated before this method is called. The + * subset of alternatives which are still viable after predicates are + * evaluated is reported in {@code conflictingAlts}.</p> + * + * <p>This method is not used by lexers.</p> + * + * @param recognizer the parser instance + * @param dfa the DFA for the current decision + * @param startIndex the input index where the decision started + * @param stopIndex the input index where the SLL conflict occurred + * @param conflictingAlts The specific conflicting alternatives. If this is + * {@code null}, the conflicting alternatives are all alternatives + * represented in {@code configs}. At the moment, conflictingAlts is non-null + * (for the reference implementation, but Sam's optimized version can see this + * as null). + * @param configs the ATN configuration set where the SLL conflict was + * detected + */ + virtual void reportAttemptingFullContext(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + const antlrcpp::BitSet &conflictingAlts, atn::ATNConfigSet *configs) = 0; + + /** + * This method is called by the parser when a full-context prediction has a + * unique result. + * + * <p>Each full-context prediction which does not result in a syntax error + * will call either {@link #reportContextSensitivity} or + * {@link #reportAmbiguity}.</p> + * + * <p>For prediction implementations that only evaluate full-context + * predictions when an SLL conflict is found (including the default + * {@link ParserATNSimulator} implementation), this method reports cases + * where SLL conflicts were resolved to unique full-context predictions, + * i.e. the decision was context-sensitive. This report does not necessarily + * indicate a problem, and it may appear even in completely unambiguous + * grammars.</p> + * + * <p>{@code configs} may have more than one represented alternative if the + * full-context prediction algorithm does not evaluate predicates before + * beginning the full-context prediction. In all cases, the final prediction + * is passed as the {@code prediction} argument.</p> + * + * <p>Note that the definition of "context sensitivity" in this method + * differs from the concept in {@link DecisionInfo#contextSensitivities}. + * This method reports all instances where an SLL conflict occurred but LL + * parsing produced a unique result, whether or not that unique result + * matches the minimum alternative in the SLL conflicting set.</p> + * + * <p>This method is not used by lexers.</p> + * + * @param recognizer the parser instance + * @param dfa the DFA for the current decision + * @param startIndex the input index where the decision started + * @param stopIndex the input index where the context sensitivity was + * finally determined + * @param prediction the unambiguous result of the full-context prediction + * @param configs the ATN configuration set where the unambiguous prediction + * was determined + */ + virtual void reportContextSensitivity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + size_t prediction, atn::ATNConfigSet *configs) = 0; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/ANTLRErrorStrategy.cpp b/contrib/libs/antlr4_cpp_runtime/src/ANTLRErrorStrategy.cpp new file mode 100644 index 0000000000..1655a5731d --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ANTLRErrorStrategy.cpp @@ -0,0 +1,10 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "ANTLRErrorStrategy.h" + +antlr4::ANTLRErrorStrategy::~ANTLRErrorStrategy() +{ +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/ANTLRErrorStrategy.h b/contrib/libs/antlr4_cpp_runtime/src/ANTLRErrorStrategy.h new file mode 100644 index 0000000000..a3eecd14c4 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ANTLRErrorStrategy.h @@ -0,0 +1,121 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Token.h" + +namespace antlr4 { + + /// <summary> + /// The interface for defining strategies to deal with syntax errors encountered + /// during a parse by ANTLR-generated parsers. We distinguish between three + /// different kinds of errors: + /// + /// <ul> + /// <li>The parser could not figure out which path to take in the ATN (none of + /// the available alternatives could possibly match)</li> + /// <li>The current input does not match what we were looking for</li> + /// <li>A predicate evaluated to false</li> + /// </ul> + /// + /// Implementations of this interface report syntax errors by calling + /// <seealso cref="Parser#notifyErrorListeners"/>. + /// <p/> + /// TODO: what to do about lexers + /// </summary> + class ANTLR4CPP_PUBLIC ANTLRErrorStrategy { + public: + + /// <summary> + /// Reset the error handler state for the specified {@code recognizer}. </summary> + /// <param name="recognizer"> the parser instance </param> + virtual ~ANTLRErrorStrategy(); + + virtual void reset(Parser *recognizer) = 0; + + /** + * This method is called when an unexpected symbol is encountered during an + * inline match operation, such as {@link Parser#match}. If the error + * strategy successfully recovers from the match failure, this method + * returns the {@link Token} instance which should be treated as the + * successful result of the match. + * + * <p>This method handles the consumption of any tokens - the caller should + * <b>not</b> call {@link Parser#consume} after a successful recovery.</p> + * + * <p>Note that the calling code will not report an error if this method + * returns successfully. The error strategy implementation is responsible + * for calling {@link Parser#notifyErrorListeners} as appropriate.</p> + * + * @param recognizer the parser instance + * @throws RecognitionException if the error strategy was not able to + * recover from the unexpected input symbol + */ + virtual Token* recoverInline(Parser *recognizer) = 0; + + /// <summary> + /// This method is called to recover from exception {@code e}. This method is + /// called after <seealso cref="#reportError"/> by the default exception handler + /// generated for a rule method. + /// </summary> + /// <seealso cref= #reportError + /// </seealso> + /// <param name="recognizer"> the parser instance </param> + /// <param name="e"> the recognition exception to recover from </param> + /// <exception cref="RecognitionException"> if the error strategy could not recover from + /// the recognition exception </exception> + virtual void recover(Parser *recognizer, std::exception_ptr e) = 0; + + /// <summary> + /// This method provides the error handler with an opportunity to handle + /// syntactic or semantic errors in the input stream before they result in a + /// <seealso cref="RecognitionException"/>. + /// <p/> + /// The generated code currently contains calls to <seealso cref="#sync"/> after + /// entering the decision state of a closure block ({@code (...)*} or + /// {@code (...)+}). + /// <p/> + /// For an implementation based on Jim Idle's "magic sync" mechanism, see + /// <seealso cref="DefaultErrorStrategy#sync"/>. + /// </summary> + /// <seealso cref= DefaultErrorStrategy#sync + /// </seealso> + /// <param name="recognizer"> the parser instance </param> + /// <exception cref="RecognitionException"> if an error is detected by the error + /// strategy but cannot be automatically recovered at the current state in + /// the parsing process </exception> + virtual void sync(Parser *recognizer) = 0; + + /// <summary> + /// Tests whether or not {@code recognizer} is in the process of recovering + /// from an error. In error recovery mode, <seealso cref="Parser#consume"/> adds + /// symbols to the parse tree by calling + /// {@link Parser#createErrorNode(ParserRuleContext, Token)} then + /// {@link ParserRuleContext#addErrorNode(ErrorNode)} instead of + /// {@link Parser#createTerminalNode(ParserRuleContext, Token)}. + /// </summary> + /// <param name="recognizer"> the parser instance </param> + /// <returns> {@code true} if the parser is currently recovering from a parse + /// error, otherwise {@code false} </returns> + virtual bool inErrorRecoveryMode(Parser *recognizer) = 0; + + /// <summary> + /// This method is called by when the parser successfully matches an input + /// symbol. + /// </summary> + /// <param name="recognizer"> the parser instance </param> + virtual void reportMatch(Parser *recognizer) = 0; + + /// <summary> + /// Report any kind of <seealso cref="RecognitionException"/>. This method is called by + /// the default exception handler generated for a rule method. + /// </summary> + /// <param name="recognizer"> the parser instance </param> + /// <param name="e"> the recognition exception to report </param> + virtual void reportError(Parser *recognizer, const RecognitionException &e) = 0; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/ANTLRFileStream.cpp b/contrib/libs/antlr4_cpp_runtime/src/ANTLRFileStream.cpp new file mode 100644 index 0000000000..674817ac0e --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ANTLRFileStream.cpp @@ -0,0 +1,23 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "ANTLRFileStream.h" + +using namespace antlr4; + +void ANTLRFileStream::loadFromFile(const std::string &fileName) { + _fileName = fileName; + if (_fileName.empty()) { + return; + } + + std::ifstream stream(fileName, std::ios::binary); + + ANTLRInputStream::load(stream); +} + +std::string ANTLRFileStream::getSourceName() const { + return _fileName; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/ANTLRFileStream.h b/contrib/libs/antlr4_cpp_runtime/src/ANTLRFileStream.h new file mode 100644 index 0000000000..6c7d619a00 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ANTLRFileStream.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "ANTLRInputStream.h" + +namespace antlr4 { + + /// This is an ANTLRInputStream that is loaded from a file all at once + /// when you construct the object (or call load()). + // TODO: this class needs testing. + class ANTLR4CPP_PUBLIC ANTLRFileStream : public ANTLRInputStream { + public: + ANTLRFileStream() = default; + ANTLRFileStream(const std::string &) = delete; + ANTLRFileStream(const char *data, size_t length) = delete; + ANTLRFileStream(std::istream &stream) = delete; + + // Assumes a file name encoded in UTF-8 and file content in the same encoding (with or w/o BOM). + virtual void loadFromFile(const std::string &fileName); + virtual std::string getSourceName() const override; + + private: + std::string _fileName; // UTF-8 encoded file name. + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/ANTLRInputStream.cpp b/contrib/libs/antlr4_cpp_runtime/src/ANTLRInputStream.cpp new file mode 100644 index 0000000000..b6470af9b7 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ANTLRInputStream.cpp @@ -0,0 +1,180 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include <string.h> + +#include "Exceptions.h" +#include "misc/Interval.h" +#include "IntStream.h" + +#include "support/Utf8.h" +#include "support/CPPUtils.h" + +#include "ANTLRInputStream.h" + +using namespace antlr4; +using namespace antlrcpp; + +using misc::Interval; + +ANTLRInputStream::ANTLRInputStream() { + InitializeInstanceFields(); +} + +ANTLRInputStream::ANTLRInputStream(std::string_view input): ANTLRInputStream() { + load(input.data(), input.length()); +} + +ANTLRInputStream::ANTLRInputStream(const char *data, size_t length) { + load(data, length); +} + +ANTLRInputStream::ANTLRInputStream(std::istream &stream): ANTLRInputStream() { + load(stream); +} + +void ANTLRInputStream::load(const std::string &input, bool lenient) { + load(input.data(), input.size(), lenient); +} + +void ANTLRInputStream::load(const char *data, size_t length, bool lenient) { + // Remove the UTF-8 BOM if present. + const char *bom = "\xef\xbb\xbf"; + if (length >= 3 && strncmp(data, bom, 3) == 0) { + data += 3; + length -= 3; + } + if (lenient) { + _data = Utf8::lenientDecode(std::string_view(data, length)); + } else { + auto maybe_utf32 = Utf8::strictDecode(std::string_view(data, length)); + if (!maybe_utf32.has_value()) { + throw IllegalArgumentException("UTF-8 string contains an illegal byte sequence"); + } + _data = std::move(maybe_utf32).value(); + } + p = 0; +} + +void ANTLRInputStream::load(std::istream &stream, bool lenient) { + if (!stream.good() || stream.eof()) // No fail, bad or EOF. + return; + + _data.clear(); + + std::string s((std::istreambuf_iterator<char>(stream)), std::istreambuf_iterator<char>()); + load(s.data(), s.length(), lenient); +} + +void ANTLRInputStream::reset() { + p = 0; +} + +void ANTLRInputStream::consume() { + if (p >= _data.size()) { + assert(LA(1) == IntStream::EOF); + throw IllegalStateException("cannot consume EOF"); + } + + if (p < _data.size()) { + p++; + } +} + +size_t ANTLRInputStream::LA(ssize_t i) { + if (i == 0) { + return 0; // undefined + } + + ssize_t position = static_cast<ssize_t>(p); + if (i < 0) { + i++; // e.g., translate LA(-1) to use offset i=0; then _data[p+0-1] + if ((position + i - 1) < 0) { + return IntStream::EOF; // invalid; no char before first char + } + } + + if ((position + i - 1) >= static_cast<ssize_t>(_data.size())) { + return IntStream::EOF; + } + + return _data[static_cast<size_t>((position + i - 1))]; +} + +size_t ANTLRInputStream::LT(ssize_t i) { + return LA(i); +} + +size_t ANTLRInputStream::index() { + return p; +} + +size_t ANTLRInputStream::size() { + return _data.size(); +} + +// Mark/release do nothing. We have entire buffer. +ssize_t ANTLRInputStream::mark() { + return -1; +} + +void ANTLRInputStream::release(ssize_t /* marker */) { +} + +void ANTLRInputStream::seek(size_t index) { + if (index <= p) { + p = index; // just jump; don't update stream state (line, ...) + return; + } + // seek forward, consume until p hits index or n (whichever comes first) + index = std::min(index, _data.size()); + while (p < index) { + consume(); + } +} + +std::string ANTLRInputStream::getText(const Interval &interval) { + if (interval.a < 0 || interval.b < 0) { + return ""; + } + + size_t start = static_cast<size_t>(interval.a); + size_t stop = static_cast<size_t>(interval.b); + + + if (stop >= _data.size()) { + stop = _data.size() - 1; + } + + size_t count = stop - start + 1; + if (start >= _data.size()) { + return ""; + } + + auto maybeUtf8 = Utf8::strictEncode(std::u32string_view(_data).substr(start, count)); + if (!maybeUtf8.has_value()) { + throw IllegalArgumentException("Input stream contains invalid Unicode code points"); + } + return std::move(maybeUtf8).value(); +} + +std::string ANTLRInputStream::getSourceName() const { + if (name.empty()) { + return IntStream::UNKNOWN_SOURCE_NAME; + } + return name; +} + +std::string ANTLRInputStream::toString() const { + auto maybeUtf8 = Utf8::strictEncode(_data); + if (!maybeUtf8.has_value()) { + throw IllegalArgumentException("Input stream contains invalid Unicode code points"); + } + return std::move(maybeUtf8).value(); +} + +void ANTLRInputStream::InitializeInstanceFields() { + p = 0; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/ANTLRInputStream.h b/contrib/libs/antlr4_cpp_runtime/src/ANTLRInputStream.h new file mode 100644 index 0000000000..413eadefa4 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ANTLRInputStream.h @@ -0,0 +1,79 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <string_view> + +#include "CharStream.h" + +namespace antlr4 { + + // Vacuum all input from a stream and then treat it + // like a string. Can also pass in a string or char[] to use. + // Input is expected to be encoded in UTF-8 and converted to UTF-32 internally. + class ANTLR4CPP_PUBLIC ANTLRInputStream : public CharStream { + protected: + /// The data being scanned. + // UTF-32 + std::u32string _data; + + /// 0..n-1 index into string of next char </summary> + size_t p; + + public: + /// What is name or source of this char stream? + std::string name; + + ANTLRInputStream(); + + ANTLRInputStream(std::string_view input); + + ANTLRInputStream(const char *data, size_t length); + ANTLRInputStream(std::istream &stream); + + virtual void load(const std::string &input, bool lenient); + virtual void load(const char *data, size_t length, bool lenient); + virtual void load(std::istream &stream, bool lenient); + + virtual void load(const std::string &input) { load(input, false); } + virtual void load(const char *data, size_t length) { load(data, length, false); } + virtual void load(std::istream &stream) { load(stream, false); } + + /// Reset the stream so that it's in the same state it was + /// when the object was created *except* the data array is not + /// touched. + virtual void reset(); + virtual void consume() override; + virtual size_t LA(ssize_t i) override; + virtual size_t LT(ssize_t i); + + /// <summary> + /// Return the current input symbol index 0..n where n indicates the + /// last symbol has been read. The index is the index of char to + /// be returned from LA(1). + /// </summary> + virtual size_t index() override; + virtual size_t size() override; + + /// <summary> + /// mark/release do nothing; we have entire buffer </summary> + virtual ssize_t mark() override; + virtual void release(ssize_t marker) override; + + /// <summary> + /// consume() ahead until p==index; can't just set p=index as we must + /// update line and charPositionInLine. If we seek backwards, just set p + /// </summary> + virtual void seek(size_t index) override; + virtual std::string getText(const misc::Interval &interval) override; + virtual std::string getSourceName() const override; + virtual std::string toString() const override; + + private: + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/BailErrorStrategy.cpp b/contrib/libs/antlr4_cpp_runtime/src/BailErrorStrategy.cpp new file mode 100644 index 0000000000..781a13b547 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/BailErrorStrategy.cpp @@ -0,0 +1,61 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Exceptions.h" +#include "ParserRuleContext.h" +#include "InputMismatchException.h" +#include "Parser.h" + +#include "BailErrorStrategy.h" + +using namespace antlr4; + +void BailErrorStrategy::recover(Parser *recognizer, std::exception_ptr e) { + ParserRuleContext *context = recognizer->getContext(); + do { + context->exception = e; + if (context->parent == nullptr) + break; + context = static_cast<ParserRuleContext *>(context->parent); + } while (true); + + try { + std::rethrow_exception(e); // Throw the exception to be able to catch and rethrow nested. +#if defined(_MSC_FULL_VER) && _MSC_FULL_VER < 190023026 + } catch (RecognitionException &inner) { + throw ParseCancellationException(inner.what()); +#else + } catch (RecognitionException & /*inner*/) { + std::throw_with_nested(ParseCancellationException()); +#endif + } +} + +Token* BailErrorStrategy::recoverInline(Parser *recognizer) { + InputMismatchException e(recognizer); + std::exception_ptr exception = std::make_exception_ptr(e); + + ParserRuleContext *context = recognizer->getContext(); + do { + context->exception = exception; + if (context->parent == nullptr) + break; + context = static_cast<ParserRuleContext *>(context->parent); + } while (true); + + try { + throw e; +#if defined(_MSC_FULL_VER) && _MSC_FULL_VER < 190023026 + } catch (InputMismatchException &inner) { + throw ParseCancellationException(inner.what()); +#else + } catch (InputMismatchException & /*inner*/) { + std::throw_with_nested(ParseCancellationException()); +#endif + } +} + +void BailErrorStrategy::sync(Parser * /*recognizer*/) { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/BailErrorStrategy.h b/contrib/libs/antlr4_cpp_runtime/src/BailErrorStrategy.h new file mode 100644 index 0000000000..598f993022 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/BailErrorStrategy.h @@ -0,0 +1,59 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "DefaultErrorStrategy.h" + +namespace antlr4 { + + /** + * This implementation of {@link ANTLRErrorStrategy} responds to syntax errors + * by immediately canceling the parse operation with a + * {@link ParseCancellationException}. The implementation ensures that the + * {@link ParserRuleContext#exception} field is set for all parse tree nodes + * that were not completed prior to encountering the error. + * + * <p> + * This error strategy is useful in the following scenarios.</p> + * + * <ul> + * <li><strong>Two-stage parsing:</strong> This error strategy allows the first + * stage of two-stage parsing to immediately terminate if an error is + * encountered, and immediately fall back to the second stage. In addition to + * avoiding wasted work by attempting to recover from errors here, the empty + * implementation of {@link BailErrorStrategy#sync} improves the performance of + * the first stage.</li> + * <li><strong>Silent validation:</strong> When syntax errors are not being + * reported or logged, and the parse result is simply ignored if errors occur, + * the {@link BailErrorStrategy} avoids wasting work on recovering from errors + * when the result will be ignored either way.</li> + * </ul> + * + * <p> + * {@code myparser.setErrorHandler(new BailErrorStrategy());}</p> + * + * @see Parser#setErrorHandler(ANTLRErrorStrategy) + */ + class ANTLR4CPP_PUBLIC BailErrorStrategy : public DefaultErrorStrategy { + /// <summary> + /// Instead of recovering from exception {@code e}, re-throw it wrapped + /// in a <seealso cref="ParseCancellationException"/> so it is not caught by the + /// rule function catches. Use <seealso cref="Exception#getCause()"/> to get the + /// original <seealso cref="RecognitionException"/>. + /// </summary> + public: + virtual void recover(Parser *recognizer, std::exception_ptr e) override; + + /// Make sure we don't attempt to recover inline; if the parser + /// successfully recovers, it won't throw an exception. + virtual Token* recoverInline(Parser *recognizer) override; + + /// <summary> + /// Make sure we don't attempt to recover from problems in subrules. </summary> + virtual void sync(Parser *recognizer) override; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/BaseErrorListener.cpp b/contrib/libs/antlr4_cpp_runtime/src/BaseErrorListener.cpp new file mode 100644 index 0000000000..cdcca8bc5c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/BaseErrorListener.cpp @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "BaseErrorListener.h" +#include "RecognitionException.h" + +using namespace antlr4; + +void BaseErrorListener::syntaxError(Recognizer * /*recognizer*/, Token * /*offendingSymbol*/, size_t /*line*/, + size_t /*charPositionInLine*/, const std::string &/*msg*/, std::exception_ptr /*e*/) { +} + +void BaseErrorListener::reportAmbiguity(Parser * /*recognizer*/, const dfa::DFA &/*dfa*/, size_t /*startIndex*/, + size_t /*stopIndex*/, bool /*exact*/, const antlrcpp::BitSet &/*ambigAlts*/, atn::ATNConfigSet * /*configs*/) { +} + +void BaseErrorListener::reportAttemptingFullContext(Parser * /*recognizer*/, const dfa::DFA &/*dfa*/, size_t /*startIndex*/, + size_t /*stopIndex*/, const antlrcpp::BitSet &/*conflictingAlts*/, atn::ATNConfigSet * /*configs*/) { +} + +void BaseErrorListener::reportContextSensitivity(Parser * /*recognizer*/, const dfa::DFA &/*dfa*/, size_t /*startIndex*/, + size_t /*stopIndex*/, size_t /*prediction*/, atn::ATNConfigSet * /*configs*/) { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/BaseErrorListener.h b/contrib/libs/antlr4_cpp_runtime/src/BaseErrorListener.h new file mode 100644 index 0000000000..317785aa64 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/BaseErrorListener.h @@ -0,0 +1,36 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "ANTLRErrorListener.h" + +namespace antlrcpp { + class BitSet; +} + +namespace antlr4 { + + /** + * Provides an empty default implementation of {@link ANTLRErrorListener}. The + * default implementation of each method does nothing, but can be overridden as + * necessary. + */ + class ANTLR4CPP_PUBLIC BaseErrorListener : public ANTLRErrorListener { + + virtual void syntaxError(Recognizer *recognizer, Token * offendingSymbol, size_t line, size_t charPositionInLine, + const std::string &msg, std::exception_ptr e) override; + + virtual void reportAmbiguity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, bool exact, + const antlrcpp::BitSet &ambigAlts, atn::ATNConfigSet *configs) override; + + virtual void reportAttemptingFullContext(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + const antlrcpp::BitSet &conflictingAlts, atn::ATNConfigSet *configs) override; + + virtual void reportContextSensitivity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + size_t prediction, atn::ATNConfigSet *configs) override; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/BufferedTokenStream.cpp b/contrib/libs/antlr4_cpp_runtime/src/BufferedTokenStream.cpp new file mode 100644 index 0000000000..4eaff2c852 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/BufferedTokenStream.cpp @@ -0,0 +1,414 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "WritableToken.h" +#include "Lexer.h" +#include "RuleContext.h" +#include "misc/Interval.h" +#include "Exceptions.h" +#include "support/CPPUtils.h" + +#include "BufferedTokenStream.h" + +using namespace antlr4; +using namespace antlrcpp; + +BufferedTokenStream::BufferedTokenStream(TokenSource *tokenSource) : _tokenSource(tokenSource){ + InitializeInstanceFields(); +} + +TokenSource* BufferedTokenStream::getTokenSource() const { + return _tokenSource; +} + +size_t BufferedTokenStream::index() { + return _p; +} + +ssize_t BufferedTokenStream::mark() { + return 0; +} + +void BufferedTokenStream::release(ssize_t /*marker*/) { + // no resources to release +} + +void BufferedTokenStream::reset() { + seek(0); +} + +void BufferedTokenStream::seek(size_t index) { + lazyInit(); + _p = adjustSeekIndex(index); +} + +size_t BufferedTokenStream::size() { + return _tokens.size(); +} + +void BufferedTokenStream::consume() { + bool skipEofCheck = false; + if (!_needSetup) { + if (_fetchedEOF) { + // the last token in tokens is EOF. skip check if p indexes any + // fetched token except the last. + skipEofCheck = _p < _tokens.size() - 1; + } else { + // no EOF token in tokens. skip check if p indexes a fetched token. + skipEofCheck = _p < _tokens.size(); + } + } else { + // not yet initialized + skipEofCheck = false; + } + + if (!skipEofCheck && LA(1) == Token::EOF) { + throw IllegalStateException("cannot consume EOF"); + } + + if (sync(_p + 1)) { + _p = adjustSeekIndex(_p + 1); + } +} + +bool BufferedTokenStream::sync(size_t i) { + if (i + 1 < _tokens.size()) + return true; + size_t n = i - _tokens.size() + 1; // how many more elements we need? + + if (n > 0) { + size_t fetched = fetch(n); + return fetched >= n; + } + + return true; +} + +size_t BufferedTokenStream::fetch(size_t n) { + if (_fetchedEOF) { + return 0; + } + + size_t i = 0; + while (i < n) { + std::unique_ptr<Token> t(_tokenSource->nextToken()); + + if (is<WritableToken *>(t.get())) { + (static_cast<WritableToken *>(t.get()))->setTokenIndex(_tokens.size()); + } + + _tokens.push_back(std::move(t)); + ++i; + + if (_tokens.back()->getType() == Token::EOF) { + _fetchedEOF = true; + break; + } + } + + return i; +} + +Token* BufferedTokenStream::get(size_t i) const { + if (i >= _tokens.size()) { + throw IndexOutOfBoundsException(std::string("token index ") + + std::to_string(i) + + std::string(" out of range 0..") + + std::to_string(_tokens.size() - 1)); + } + return _tokens[i].get(); +} + +std::vector<Token *> BufferedTokenStream::get(size_t start, size_t stop) { + std::vector<Token *> subset; + + lazyInit(); + + if (_tokens.empty()) { + return subset; + } + + if (stop >= _tokens.size()) { + stop = _tokens.size() - 1; + } + for (size_t i = start; i <= stop; i++) { + Token *t = _tokens[i].get(); + if (t->getType() == Token::EOF) { + break; + } + subset.push_back(t); + } + return subset; +} + +size_t BufferedTokenStream::LA(ssize_t i) { + return LT(i)->getType(); +} + +Token* BufferedTokenStream::LB(size_t k) { + if (k > _p) { + return nullptr; + } + return _tokens[_p - k].get(); +} + +Token* BufferedTokenStream::LT(ssize_t k) { + lazyInit(); + if (k == 0) { + return nullptr; + } + if (k < 0) { + return LB(-k); + } + + size_t i = _p + k - 1; + sync(i); + if (i >= _tokens.size()) { // return EOF token + // EOF must be last token + return _tokens.back().get(); + } + + return _tokens[i].get(); +} + +ssize_t BufferedTokenStream::adjustSeekIndex(size_t i) { + return i; +} + +void BufferedTokenStream::lazyInit() { + if (_needSetup) { + setup(); + } +} + +void BufferedTokenStream::setup() { + _needSetup = false; + sync(0); + _p = adjustSeekIndex(0); +} + +void BufferedTokenStream::setTokenSource(TokenSource *tokenSource) { + _tokenSource = tokenSource; + _tokens.clear(); + _fetchedEOF = false; + _needSetup = true; +} + +std::vector<Token *> BufferedTokenStream::getTokens() { + std::vector<Token *> result; + for (auto &t : _tokens) + result.push_back(t.get()); + return result; +} + +std::vector<Token *> BufferedTokenStream::getTokens(size_t start, size_t stop) { + return getTokens(start, stop, std::vector<size_t>()); +} + +std::vector<Token *> BufferedTokenStream::getTokens(size_t start, size_t stop, const std::vector<size_t> &types) { + lazyInit(); + if (stop >= _tokens.size() || start >= _tokens.size()) { + throw IndexOutOfBoundsException(std::string("start ") + + std::to_string(start) + + std::string(" or stop ") + + std::to_string(stop) + + std::string(" not in 0..") + + std::to_string(_tokens.size() - 1)); + } + + std::vector<Token *> filteredTokens; + + if (start > stop) { + return filteredTokens; + } + + for (size_t i = start; i <= stop; i++) { + Token *tok = _tokens[i].get(); + + if (types.empty() || std::find(types.begin(), types.end(), tok->getType()) != types.end()) { + filteredTokens.push_back(tok); + } + } + return filteredTokens; +} + +std::vector<Token *> BufferedTokenStream::getTokens(size_t start, size_t stop, size_t ttype) { + std::vector<size_t> s; + s.push_back(ttype); + return getTokens(start, stop, s); +} + +ssize_t BufferedTokenStream::nextTokenOnChannel(size_t i, size_t channel) { + sync(i); + if (i >= size()) { + return size() - 1; + } + + Token *token = _tokens[i].get(); + while (token->getChannel() != channel) { + if (token->getType() == Token::EOF) { + return i; + } + i++; + sync(i); + token = _tokens[i].get(); + } + return i; +} + +ssize_t BufferedTokenStream::previousTokenOnChannel(size_t i, size_t channel) { + sync(i); + if (i >= size()) { + // the EOF token is on every channel + return size() - 1; + } + + while (true) { + Token *token = _tokens[i].get(); + if (token->getType() == Token::EOF || token->getChannel() == channel) { + return i; + } + + if (i == 0) + return -1; + i--; + } + return i; +} + +std::vector<Token *> BufferedTokenStream::getHiddenTokensToRight(size_t tokenIndex, ssize_t channel) { + lazyInit(); + if (tokenIndex >= _tokens.size()) { + throw IndexOutOfBoundsException(std::to_string(tokenIndex) + " not in 0.." + std::to_string(_tokens.size() - 1)); + } + + ssize_t nextOnChannel = nextTokenOnChannel(tokenIndex + 1, Lexer::DEFAULT_TOKEN_CHANNEL); + size_t to; + size_t from = tokenIndex + 1; + // if none onchannel to right, nextOnChannel=-1 so set to = last token + if (nextOnChannel == -1) { + to = static_cast<ssize_t>(size() - 1); + } else { + to = nextOnChannel; + } + + return filterForChannel(from, to, channel); +} + +std::vector<Token *> BufferedTokenStream::getHiddenTokensToRight(size_t tokenIndex) { + return getHiddenTokensToRight(tokenIndex, -1); +} + +std::vector<Token *> BufferedTokenStream::getHiddenTokensToLeft(size_t tokenIndex, ssize_t channel) { + lazyInit(); + if (tokenIndex >= _tokens.size()) { + throw IndexOutOfBoundsException(std::to_string(tokenIndex) + " not in 0.." + std::to_string(_tokens.size() - 1)); + } + + if (tokenIndex == 0) { + // Obviously no tokens can appear before the first token. + return { }; + } + + ssize_t prevOnChannel = previousTokenOnChannel(tokenIndex - 1, Lexer::DEFAULT_TOKEN_CHANNEL); + if (prevOnChannel == static_cast<ssize_t>(tokenIndex - 1)) { + return { }; + } + // if none onchannel to left, prevOnChannel=-1 then from=0 + size_t from = static_cast<size_t>(prevOnChannel + 1); + size_t to = tokenIndex - 1; + + return filterForChannel(from, to, channel); +} + +std::vector<Token *> BufferedTokenStream::getHiddenTokensToLeft(size_t tokenIndex) { + return getHiddenTokensToLeft(tokenIndex, -1); +} + +std::vector<Token *> BufferedTokenStream::filterForChannel(size_t from, size_t to, ssize_t channel) { + std::vector<Token *> hidden; + for (size_t i = from; i <= to; i++) { + Token *t = _tokens[i].get(); + if (channel == -1) { + if (t->getChannel() != Lexer::DEFAULT_TOKEN_CHANNEL) { + hidden.push_back(t); + } + } else { + if (t->getChannel() == static_cast<size_t>(channel)) { + hidden.push_back(t); + } + } + } + + return hidden; +} + +bool BufferedTokenStream::isInitialized() const { + return !_needSetup; +} + +/** + * Get the text of all tokens in this buffer. + */ +std::string BufferedTokenStream::getSourceName() const +{ + return _tokenSource->getSourceName(); +} + +std::string BufferedTokenStream::getText() { + fill(); + return getText(misc::Interval(0U, size() - 1)); +} + +std::string BufferedTokenStream::getText(const misc::Interval &interval) { + lazyInit(); + size_t start = interval.a; + size_t stop = interval.b; + if (start == INVALID_INDEX || stop == INVALID_INDEX) { + return ""; + } + sync(stop); + if (stop >= _tokens.size()) { + stop = _tokens.size() - 1; + } + + std::stringstream ss; + for (size_t i = start; i <= stop; i++) { + Token *t = _tokens[i].get(); + if (t->getType() == Token::EOF) { + break; + } + ss << t->getText(); + } + return ss.str(); +} + +std::string BufferedTokenStream::getText(RuleContext *ctx) { + return getText(ctx->getSourceInterval()); +} + +std::string BufferedTokenStream::getText(Token *start, Token *stop) { + if (start != nullptr && stop != nullptr) { + return getText(misc::Interval(start->getTokenIndex(), stop->getTokenIndex())); + } + + return ""; +} + +void BufferedTokenStream::fill() { + lazyInit(); + const size_t blockSize = 1000; + while (true) { + size_t fetched = fetch(blockSize); + if (fetched < blockSize) { + return; + } + } +} + +void BufferedTokenStream::InitializeInstanceFields() { + _needSetup = true; + _fetchedEOF = false; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/BufferedTokenStream.h b/contrib/libs/antlr4_cpp_runtime/src/BufferedTokenStream.h new file mode 100644 index 0000000000..2161471241 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/BufferedTokenStream.h @@ -0,0 +1,200 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "TokenStream.h" + +namespace antlr4 { + + /** + * This implementation of {@link TokenStream} loads tokens from a + * {@link TokenSource} on-demand, and places the tokens in a buffer to provide + * access to any previous token by index. + * + * <p> + * This token stream ignores the value of {@link Token#getChannel}. If your + * parser requires the token stream filter tokens to only those on a particular + * channel, such as {@link Token#DEFAULT_CHANNEL} or + * {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a + * {@link CommonTokenStream}.</p> + */ + class ANTLR4CPP_PUBLIC BufferedTokenStream : public TokenStream { + public: + BufferedTokenStream(TokenSource *tokenSource); + BufferedTokenStream(const BufferedTokenStream& other) = delete; + + BufferedTokenStream& operator = (const BufferedTokenStream& other) = delete; + + virtual TokenSource* getTokenSource() const override; + virtual size_t index() override; + virtual ssize_t mark() override; + + virtual void release(ssize_t marker) override; + virtual void reset(); + virtual void seek(size_t index) override; + + virtual size_t size() override; + virtual void consume() override; + + virtual Token* get(size_t i) const override; + + /// Get all tokens from start..stop inclusively. + virtual std::vector<Token *> get(size_t start, size_t stop); + + virtual size_t LA(ssize_t i) override; + virtual Token* LT(ssize_t k) override; + + /// Reset this token stream by setting its token source. + virtual void setTokenSource(TokenSource *tokenSource); + virtual std::vector<Token *> getTokens(); + virtual std::vector<Token *> getTokens(size_t start, size_t stop); + + /// <summary> + /// Given a start and stop index, return a List of all tokens in + /// the token type BitSet. Return null if no tokens were found. This + /// method looks at both on and off channel tokens. + /// </summary> + virtual std::vector<Token *> getTokens(size_t start, size_t stop, const std::vector<size_t> &types); + virtual std::vector<Token *> getTokens(size_t start, size_t stop, size_t ttype); + + /// Collect all tokens on specified channel to the right of + /// the current token up until we see a token on DEFAULT_TOKEN_CHANNEL or + /// EOF. If channel is -1, find any non default channel token. + virtual std::vector<Token *> getHiddenTokensToRight(size_t tokenIndex, ssize_t channel); + + /// <summary> + /// Collect all hidden tokens (any off-default channel) to the right of + /// the current token up until we see a token on DEFAULT_TOKEN_CHANNEL + /// or EOF. + /// </summary> + virtual std::vector<Token *> getHiddenTokensToRight(size_t tokenIndex); + + /// <summary> + /// Collect all tokens on specified channel to the left of + /// the current token up until we see a token on DEFAULT_TOKEN_CHANNEL. + /// If channel is -1, find any non default channel token. + /// </summary> + virtual std::vector<Token *> getHiddenTokensToLeft(size_t tokenIndex, ssize_t channel); + + /// <summary> + /// Collect all hidden tokens (any off-default channel) to the left of + /// the current token up until we see a token on DEFAULT_TOKEN_CHANNEL. + /// </summary> + virtual std::vector<Token *> getHiddenTokensToLeft(size_t tokenIndex); + + virtual std::string getSourceName() const override; + virtual std::string getText() override; + virtual std::string getText(const misc::Interval &interval) override; + virtual std::string getText(RuleContext *ctx) override; + virtual std::string getText(Token *start, Token *stop) override; + + /// Get all tokens from lexer until EOF. + virtual void fill(); + + protected: + /** + * The {@link TokenSource} from which tokens for this stream are fetched. + */ + TokenSource *_tokenSource; + + /** + * A collection of all tokens fetched from the token source. The list is + * considered a complete view of the input once {@link #fetchedEOF} is set + * to {@code true}. + */ + std::vector<std::unique_ptr<Token>> _tokens; + + /** + * The index into {@link #tokens} of the current token (next token to + * {@link #consume}). {@link #tokens}{@code [}{@link #p}{@code ]} should be + * {@link #LT LT(1)}. + * + * <p>This field is set to -1 when the stream is first constructed or when + * {@link #setTokenSource} is called, indicating that the first token has + * not yet been fetched from the token source. For additional information, + * see the documentation of {@link IntStream} for a description of + * Initializing Methods.</p> + */ + // ml: since -1 requires to make this member signed for just this single aspect we use a member _needSetup instead. + // Use bool isInitialized() to find out if this stream has started reading. + size_t _p; + + /** + * Indicates whether the {@link Token#EOF} token has been fetched from + * {@link #tokenSource} and added to {@link #tokens}. This field improves + * performance for the following cases: + * + * <ul> + * <li>{@link #consume}: The lookahead check in {@link #consume} to prevent + * consuming the EOF symbol is optimized by checking the values of + * {@link #fetchedEOF} and {@link #p} instead of calling {@link #LA}.</li> + * <li>{@link #fetch}: The check to prevent adding multiple EOF symbols into + * {@link #tokens} is trivial with this field.</li> + * <ul> + */ + bool _fetchedEOF; + + /// <summary> + /// Make sure index {@code i} in tokens has a token. + /// </summary> + /// <returns> {@code true} if a token is located at index {@code i}, otherwise + /// {@code false}. </returns> + /// <seealso cref= #get(int i) </seealso> + virtual bool sync(size_t i); + + /// <summary> + /// Add {@code n} elements to buffer. + /// </summary> + /// <returns> The actual number of elements added to the buffer. </returns> + virtual size_t fetch(size_t n); + + virtual Token* LB(size_t k); + + /// Allowed derived classes to modify the behavior of operations which change + /// the current stream position by adjusting the target token index of a seek + /// operation. The default implementation simply returns {@code i}. If an + /// exception is thrown in this method, the current stream index should not be + /// changed. + /// <p/> + /// For example, <seealso cref="CommonTokenStream"/> overrides this method to ensure that + /// the seek target is always an on-channel token. + /// + /// <param name="i"> The target token index. </param> + /// <returns> The adjusted target token index. </returns> + virtual ssize_t adjustSeekIndex(size_t i); + void lazyInit(); + virtual void setup(); + + /** + * Given a starting index, return the index of the next token on channel. + * Return {@code i} if {@code tokens[i]} is on channel. Return the index of + * the EOF token if there are no tokens on channel between {@code i} and + * EOF. + */ + virtual ssize_t nextTokenOnChannel(size_t i, size_t channel); + + /** + * Given a starting index, return the index of the previous token on + * channel. Return {@code i} if {@code tokens[i]} is on channel. Return -1 + * if there are no tokens on channel between {@code i} and 0. + * + * <p> + * If {@code i} specifies an index at or after the EOF token, the EOF token + * index is returned. This is due to the fact that the EOF token is treated + * as though it were on every channel.</p> + */ + virtual ssize_t previousTokenOnChannel(size_t i, size_t channel); + + virtual std::vector<Token *> filterForChannel(size_t from, size_t to, ssize_t channel); + + bool isInitialized() const; + + private: + bool _needSetup; + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/CharStream.cpp b/contrib/libs/antlr4_cpp_runtime/src/CharStream.cpp new file mode 100644 index 0000000000..b05874c8bf --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/CharStream.cpp @@ -0,0 +1,11 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "CharStream.h" + +using namespace antlr4; + +CharStream::~CharStream() { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/CharStream.h b/contrib/libs/antlr4_cpp_runtime/src/CharStream.h new file mode 100644 index 0000000000..a9952dbbac --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/CharStream.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "IntStream.h" +#include "misc/Interval.h" + +namespace antlr4 { + + /// A source of characters for an ANTLR lexer. + class ANTLR4CPP_PUBLIC CharStream : public IntStream { + public: + virtual ~CharStream(); + + /// This method returns the text for a range of characters within this input + /// stream. This method is guaranteed to not throw an exception if the + /// specified interval lies entirely within a marked range. For more + /// information about marked ranges, see IntStream::mark. + /// + /// <param name="interval"> an interval within the stream </param> + /// <returns> the text of the specified interval + /// </returns> + /// <exception cref="NullPointerException"> if {@code interval} is {@code null} </exception> + /// <exception cref="IllegalArgumentException"> if {@code interval.a < 0}, or if + /// {@code interval.b < interval.a - 1}, or if {@code interval.b} lies at or + /// past the end of the stream </exception> + /// <exception cref="UnsupportedOperationException"> if the stream does not support + /// getting the text of the specified interval </exception> + virtual std::string getText(const misc::Interval &interval) = 0; + + virtual std::string toString() const = 0; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/CommonToken.cpp b/contrib/libs/antlr4_cpp_runtime/src/CommonToken.cpp new file mode 100644 index 0000000000..6e9f06a249 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/CommonToken.cpp @@ -0,0 +1,193 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "TokenSource.h" +#include "CharStream.h" +#include "Recognizer.h" +#include "Vocabulary.h" + +#include "misc/Interval.h" + +#include "support/CPPUtils.h" +#include "support/StringUtils.h" + +#include "CommonToken.h" + +using namespace antlr4; +using namespace antlr4::misc; + +using namespace antlrcpp; + +const std::pair<TokenSource*, CharStream*> CommonToken::EMPTY_SOURCE; + +CommonToken::CommonToken(size_t type) { + InitializeInstanceFields(); + _type = type; +} + +CommonToken::CommonToken(std::pair<TokenSource*, CharStream*> source, size_t type, size_t channel, size_t start, size_t stop) { + InitializeInstanceFields(); + _source = source; + _type = type; + _channel = channel; + _start = start; + _stop = stop; + if (_source.first != nullptr) { + _line = static_cast<int>(source.first->getLine()); + _charPositionInLine = source.first->getCharPositionInLine(); + } +} + +CommonToken::CommonToken(size_t type, const std::string &text) { + InitializeInstanceFields(); + _type = type; + _channel = DEFAULT_CHANNEL; + _text = text; + _source = EMPTY_SOURCE; +} + +CommonToken::CommonToken(Token *oldToken) { + InitializeInstanceFields(); + _type = oldToken->getType(); + _line = oldToken->getLine(); + _index = oldToken->getTokenIndex(); + _charPositionInLine = oldToken->getCharPositionInLine(); + _channel = oldToken->getChannel(); + _start = oldToken->getStartIndex(); + _stop = oldToken->getStopIndex(); + + if (is<CommonToken *>(oldToken)) { + _text = (static_cast<CommonToken *>(oldToken))->_text; + _source = (static_cast<CommonToken *>(oldToken))->_source; + } else { + _text = oldToken->getText(); + _source = { oldToken->getTokenSource(), oldToken->getInputStream() }; + } +} + +size_t CommonToken::getType() const { + return _type; +} + +void CommonToken::setLine(size_t line) { + _line = line; +} + +std::string CommonToken::getText() const { + if (!_text.empty()) { + return _text; + } + + CharStream *input = getInputStream(); + if (input == nullptr) { + return ""; + } + size_t n = input->size(); + if (_start < n && _stop < n) { + return input->getText(misc::Interval(_start, _stop)); + } else { + return "<EOF>"; + } +} + +void CommonToken::setText(const std::string &text) { + _text = text; +} + +size_t CommonToken::getLine() const { + return _line; +} + +size_t CommonToken::getCharPositionInLine() const { + return _charPositionInLine; +} + +void CommonToken::setCharPositionInLine(size_t charPositionInLine) { + _charPositionInLine = charPositionInLine; +} + +size_t CommonToken::getChannel() const { + return _channel; +} + +void CommonToken::setChannel(size_t channel) { + _channel = channel; +} + +void CommonToken::setType(size_t type) { + _type = type; +} + +size_t CommonToken::getStartIndex() const { + return _start; +} + +void CommonToken::setStartIndex(size_t start) { + _start = start; +} + +size_t CommonToken::getStopIndex() const { + return _stop; +} + +void CommonToken::setStopIndex(size_t stop) { + _stop = stop; +} + +size_t CommonToken::getTokenIndex() const { + return _index; +} + +void CommonToken::setTokenIndex(size_t index) { + _index = index; +} + +antlr4::TokenSource *CommonToken::getTokenSource() const { + return _source.first; +} + +antlr4::CharStream *CommonToken::getInputStream() const { + return _source.second; +} + +std::string CommonToken::toString() const { + return toString(nullptr); +} + +std::string CommonToken::toString(Recognizer *r) const { + std::stringstream ss; + + std::string channelStr; + if (_channel > 0) { + channelStr = ",channel=" + std::to_string(_channel); + } + std::string txt = getText(); + if (!txt.empty()) { + txt = antlrcpp::escapeWhitespace(txt); + } else { + txt = "<no text>"; + } + + std::string typeString = std::to_string(symbolToNumeric(_type)); + if (r != nullptr) + typeString = r->getVocabulary().getDisplayName(_type); + + ss << "[@" << symbolToNumeric(getTokenIndex()) << "," << symbolToNumeric(_start) << ":" << symbolToNumeric(_stop) + << "='" << txt << "',<" << typeString << ">" << channelStr << "," << _line << ":" + << getCharPositionInLine() << "]"; + + return ss.str(); +} + +void CommonToken::InitializeInstanceFields() { + _type = 0; + _line = 0; + _charPositionInLine = INVALID_INDEX; + _channel = DEFAULT_CHANNEL; + _index = INVALID_INDEX; + _start = 0; + _stop = 0; + _source = EMPTY_SOURCE; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/CommonToken.h b/contrib/libs/antlr4_cpp_runtime/src/CommonToken.h new file mode 100644 index 0000000000..3fbc2ae4f5 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/CommonToken.h @@ -0,0 +1,158 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "WritableToken.h" + +namespace antlr4 { + + class ANTLR4CPP_PUBLIC CommonToken : public WritableToken { + protected: + /** + * An empty {@link Pair} which is used as the default value of + * {@link #source} for tokens that do not have a source. + */ + static const std::pair<TokenSource *, CharStream *> EMPTY_SOURCE; + + /** + * This is the backing field for {@link #getType} and {@link #setType}. + */ + size_t _type; + + /** + * This is the backing field for {@link #getLine} and {@link #setLine}. + */ + size_t _line; + + /** + * This is the backing field for {@link #getCharPositionInLine} and + * {@link #setCharPositionInLine}. + */ + size_t _charPositionInLine; // set to invalid position + + /** + * This is the backing field for {@link #getChannel} and + * {@link #setChannel}. + */ + size_t _channel; + + /** + * This is the backing field for {@link #getTokenSource} and + * {@link #getInputStream}. + * + * <p> + * These properties share a field to reduce the memory footprint of + * {@link CommonToken}. Tokens created by a {@link CommonTokenFactory} from + * the same source and input stream share a reference to the same + * {@link Pair} containing these values.</p> + */ + + std::pair<TokenSource *, CharStream *> _source; // ml: pure references, usually from statically allocated classes. + + /** + * This is the backing field for {@link #getText} when the token text is + * explicitly set in the constructor or via {@link #setText}. + * + * @see #getText() + */ + std::string _text; + + /** + * This is the backing field for {@link #getTokenIndex} and + * {@link #setTokenIndex}. + */ + size_t _index; + + /** + * This is the backing field for {@link #getStartIndex} and + * {@link #setStartIndex}. + */ + size_t _start; + + /** + * This is the backing field for {@link #getStopIndex} and + * {@link #setStopIndex}. + */ + size_t _stop; + + public: + /** + * Constructs a new {@link CommonToken} with the specified token type. + * + * @param type The token type. + */ + CommonToken(size_t type); + CommonToken(std::pair<TokenSource*, CharStream*> source, size_t type, size_t channel, size_t start, size_t stop); + + /** + * Constructs a new {@link CommonToken} with the specified token type and + * text. + * + * @param type The token type. + * @param text The text of the token. + */ + CommonToken(size_t type, const std::string &text); + + /** + * Constructs a new {@link CommonToken} as a copy of another {@link Token}. + * + * <p> + * If {@code oldToken} is also a {@link CommonToken} instance, the newly + * constructed token will share a reference to the {@link #text} field and + * the {@link Pair} stored in {@link #source}. Otherwise, {@link #text} will + * be assigned the result of calling {@link #getText}, and {@link #source} + * will be constructed from the result of {@link Token#getTokenSource} and + * {@link Token#getInputStream}.</p> + * + * @param oldToken The token to copy. + */ + CommonToken(Token *oldToken); + + virtual size_t getType() const override; + + /** + * Explicitly set the text for this token. If {code text} is not + * {@code null}, then {@link #getText} will return this value rather than + * extracting the text from the input. + * + * @param text The explicit text of the token, or {@code null} if the text + * should be obtained from the input along with the start and stop indexes + * of the token. + */ + virtual void setText(const std::string &text) override; + virtual std::string getText() const override; + + virtual void setLine(size_t line) override; + virtual size_t getLine() const override; + + virtual size_t getCharPositionInLine() const override; + virtual void setCharPositionInLine(size_t charPositionInLine) override; + + virtual size_t getChannel() const override; + virtual void setChannel(size_t channel) override; + + virtual void setType(size_t type) override; + + virtual size_t getStartIndex() const override; + virtual void setStartIndex(size_t start); + + virtual size_t getStopIndex() const override; + virtual void setStopIndex(size_t stop); + + virtual size_t getTokenIndex() const override; + virtual void setTokenIndex(size_t index) override; + + virtual TokenSource *getTokenSource() const override; + virtual CharStream *getInputStream() const override; + + virtual std::string toString() const override; + + virtual std::string toString(Recognizer *r) const; + private: + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/CommonTokenFactory.cpp b/contrib/libs/antlr4_cpp_runtime/src/CommonTokenFactory.cpp new file mode 100644 index 0000000000..23d8f7003a --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/CommonTokenFactory.cpp @@ -0,0 +1,39 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/Interval.h" +#include "CommonToken.h" +#include "CharStream.h" + +#include "CommonTokenFactory.h" + +using namespace antlr4; + +const std::unique_ptr<TokenFactory<CommonToken>> CommonTokenFactory::DEFAULT(new CommonTokenFactory); + +CommonTokenFactory::CommonTokenFactory(bool copyText_) : copyText(copyText_) { +} + +CommonTokenFactory::CommonTokenFactory() : CommonTokenFactory(false) { +} + +std::unique_ptr<CommonToken> CommonTokenFactory::create(std::pair<TokenSource*, CharStream*> source, size_t type, + const std::string &text, size_t channel, size_t start, size_t stop, size_t line, size_t charPositionInLine) { + + std::unique_ptr<CommonToken> t(new CommonToken(source, type, channel, start, stop)); + t->setLine(line); + t->setCharPositionInLine(charPositionInLine); + if (text != "") { + t->setText(text); + } else if (copyText && source.second != nullptr) { + t->setText(source.second->getText(misc::Interval(start, stop))); + } + + return t; +} + +std::unique_ptr<CommonToken> CommonTokenFactory::create(size_t type, const std::string &text) { + return std::unique_ptr<CommonToken>(new CommonToken(type, text)); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/CommonTokenFactory.h b/contrib/libs/antlr4_cpp_runtime/src/CommonTokenFactory.h new file mode 100644 index 0000000000..0ae1a0353c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/CommonTokenFactory.h @@ -0,0 +1,74 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "TokenFactory.h" + +namespace antlr4 { + + /** + * This default implementation of {@link TokenFactory} creates + * {@link CommonToken} objects. + */ + class ANTLR4CPP_PUBLIC CommonTokenFactory : public TokenFactory<CommonToken> { + public: + /** + * The default {@link CommonTokenFactory} instance. + * + * <p> + * This token factory does not explicitly copy token text when constructing + * tokens.</p> + */ + static const std::unique_ptr<TokenFactory<CommonToken>> DEFAULT; + + protected: + /** + * Indicates whether {@link CommonToken#setText} should be called after + * constructing tokens to explicitly set the text. This is useful for cases + * where the input stream might not be able to provide arbitrary substrings + * of text from the input after the lexer creates a token (e.g. the + * implementation of {@link CharStream#getText} in + * {@link UnbufferedCharStream} throws an + * {@link UnsupportedOperationException}). Explicitly setting the token text + * allows {@link Token#getText} to be called at any time regardless of the + * input stream implementation. + * + * <p> + * The default value is {@code false} to avoid the performance and memory + * overhead of copying text for every token unless explicitly requested.</p> + */ + const bool copyText; + + public: + /** + * Constructs a {@link CommonTokenFactory} with the specified value for + * {@link #copyText}. + * + * <p> + * When {@code copyText} is {@code false}, the {@link #DEFAULT} instance + * should be used instead of constructing a new instance.</p> + * + * @param copyText The value for {@link #copyText}. + */ + CommonTokenFactory(bool copyText); + + /** + * Constructs a {@link CommonTokenFactory} with {@link #copyText} set to + * {@code false}. + * + * <p> + * The {@link #DEFAULT} instance should be used instead of calling this + * directly.</p> + */ + CommonTokenFactory(); + + virtual std::unique_ptr<CommonToken> create(std::pair<TokenSource*, CharStream*> source, size_t type, + const std::string &text, size_t channel, size_t start, size_t stop, size_t line, size_t charPositionInLine) override; + + virtual std::unique_ptr<CommonToken> create(size_t type, const std::string &text) override; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/CommonTokenStream.cpp b/contrib/libs/antlr4_cpp_runtime/src/CommonTokenStream.cpp new file mode 100644 index 0000000000..02a2e55af3 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/CommonTokenStream.cpp @@ -0,0 +1,78 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Token.h" + +#include "CommonTokenStream.h" + +using namespace antlr4; + +CommonTokenStream::CommonTokenStream(TokenSource *tokenSource) : CommonTokenStream(tokenSource, Token::DEFAULT_CHANNEL) { +} + +CommonTokenStream::CommonTokenStream(TokenSource *tokenSource, size_t channel_) +: BufferedTokenStream(tokenSource), channel(channel_) { +} + +ssize_t CommonTokenStream::adjustSeekIndex(size_t i) { + return nextTokenOnChannel(i, channel); +} + +Token* CommonTokenStream::LB(size_t k) { + if (k == 0 || k > _p) { + return nullptr; + } + + ssize_t i = static_cast<ssize_t>(_p); + size_t n = 1; + // find k good tokens looking backwards + while (n <= k) { + // skip off-channel tokens + i = previousTokenOnChannel(i - 1, channel); + n++; + } + if (i < 0) { + return nullptr; + } + + return _tokens[i].get(); +} + +Token* CommonTokenStream::LT(ssize_t k) { + lazyInit(); + if (k == 0) { + return nullptr; + } + if (k < 0) { + return LB(static_cast<size_t>(-k)); + } + size_t i = _p; + ssize_t n = 1; // we know tokens[p] is a good one + // find k good tokens + while (n < k) { + // skip off-channel tokens, but make sure to not look past EOF + if (sync(i + 1)) { + i = nextTokenOnChannel(i + 1, channel); + } + n++; + } + + return _tokens[i].get(); +} + +int CommonTokenStream::getNumberOfOnChannelTokens() { + int n = 0; + fill(); + for (size_t i = 0; i < _tokens.size(); i++) { + Token *t = _tokens[i].get(); + if (t->getChannel() == channel) { + n++; + } + if (t->getType() == Token::EOF) { + break; + } + } + return n; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/CommonTokenStream.h b/contrib/libs/antlr4_cpp_runtime/src/CommonTokenStream.h new file mode 100644 index 0000000000..fde72c7386 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/CommonTokenStream.h @@ -0,0 +1,79 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "BufferedTokenStream.h" + +namespace antlr4 { + + /** + * This class extends {@link BufferedTokenStream} with functionality to filter + * token streams to tokens on a particular channel (tokens where + * {@link Token#getChannel} returns a particular value). + * + * <p> + * This token stream provides access to all tokens by index or when calling + * methods like {@link #getText}. The channel filtering is only used for code + * accessing tokens via the lookahead methods {@link #LA}, {@link #LT}, and + * {@link #LB}.</p> + * + * <p> + * By default, tokens are placed on the default channel + * ({@link Token#DEFAULT_CHANNEL}), but may be reassigned by using the + * {@code ->channel(HIDDEN)} lexer command, or by using an embedded action to + * call {@link Lexer#setChannel}. + * </p> + * + * <p> + * Note: lexer rules which use the {@code ->skip} lexer command or call + * {@link Lexer#skip} do not produce tokens at all, so input text matched by + * such a rule will not be available as part of the token stream, regardless of + * channel.</p> + */ + class ANTLR4CPP_PUBLIC CommonTokenStream : public BufferedTokenStream { + public: + /** + * Constructs a new {@link CommonTokenStream} using the specified token + * source and the default token channel ({@link Token#DEFAULT_CHANNEL}). + * + * @param tokenSource The token source. + */ + CommonTokenStream(TokenSource *tokenSource); + + /** + * Constructs a new {@link CommonTokenStream} using the specified token + * source and filtering tokens to the specified channel. Only tokens whose + * {@link Token#getChannel} matches {@code channel} or have the + * {@link Token#getType} equal to {@link Token#EOF} will be returned by the + * token stream lookahead methods. + * + * @param tokenSource The token source. + * @param channel The channel to use for filtering tokens. + */ + CommonTokenStream(TokenSource *tokenSource, size_t channel); + + virtual Token* LT(ssize_t k) override; + + /// Count EOF just once. + virtual int getNumberOfOnChannelTokens(); + + protected: + /** + * Specifies the channel to use for filtering tokens. + * + * <p> + * The default value is {@link Token#DEFAULT_CHANNEL}, which matches the + * default channel assigned to tokens created by the lexer.</p> + */ + size_t channel; + + virtual ssize_t adjustSeekIndex(size_t i) override; + + virtual Token* LB(size_t k) override; + + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/ConsoleErrorListener.cpp b/contrib/libs/antlr4_cpp_runtime/src/ConsoleErrorListener.cpp new file mode 100644 index 0000000000..c16f949cd2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ConsoleErrorListener.cpp @@ -0,0 +1,15 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "ConsoleErrorListener.h" + +using namespace antlr4; + +ConsoleErrorListener ConsoleErrorListener::INSTANCE; + +void ConsoleErrorListener::syntaxError(Recognizer * /*recognizer*/, Token * /*offendingSymbol*/, + size_t line, size_t charPositionInLine, const std::string &msg, std::exception_ptr /*e*/) { + std::cerr << "line " << line << ":" << charPositionInLine << " " << msg << std::endl; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/ConsoleErrorListener.h b/contrib/libs/antlr4_cpp_runtime/src/ConsoleErrorListener.h new file mode 100644 index 0000000000..f1d1188667 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ConsoleErrorListener.h @@ -0,0 +1,35 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "BaseErrorListener.h" + +namespace antlr4 { + + class ANTLR4CPP_PUBLIC ConsoleErrorListener : public BaseErrorListener { + public: + /** + * Provides a default instance of {@link ConsoleErrorListener}. + */ + static ConsoleErrorListener INSTANCE; + + /** + * {@inheritDoc} + * + * <p> + * This implementation prints messages to {@link System#err} containing the + * values of {@code line}, {@code charPositionInLine}, and {@code msg} using + * the following format.</p> + * + * <pre> + * line <em>line</em>:<em>charPositionInLine</em> <em>msg</em> + * </pre> + */ + virtual void syntaxError(Recognizer *recognizer, Token * offendingSymbol, size_t line, size_t charPositionInLine, + const std::string &msg, std::exception_ptr e) override; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/DefaultErrorStrategy.cpp b/contrib/libs/antlr4_cpp_runtime/src/DefaultErrorStrategy.cpp new file mode 100644 index 0000000000..e5a7327859 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/DefaultErrorStrategy.cpp @@ -0,0 +1,336 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "NoViableAltException.h" +#include "misc/IntervalSet.h" +#include "atn/ParserATNSimulator.h" +#include "InputMismatchException.h" +#include "FailedPredicateException.h" +#include "ParserRuleContext.h" +#include "atn/RuleTransition.h" +#include "atn/ATN.h" +#include "atn/ATNState.h" +#include "support/StringUtils.h" +#include "support/Casts.h" +#include "Parser.h" +#include "CommonToken.h" +#include "Vocabulary.h" + +#include "DefaultErrorStrategy.h" + +using namespace antlr4; +using namespace antlr4::atn; + +using namespace antlrcpp; + +DefaultErrorStrategy::DefaultErrorStrategy() { + InitializeInstanceFields(); +} + +DefaultErrorStrategy::~DefaultErrorStrategy() { +} + +void DefaultErrorStrategy::reset(Parser *recognizer) { + _errorSymbols.clear(); + endErrorCondition(recognizer); +} + +void DefaultErrorStrategy::beginErrorCondition(Parser * /*recognizer*/) { + errorRecoveryMode = true; +} + +bool DefaultErrorStrategy::inErrorRecoveryMode(Parser * /*recognizer*/) { + return errorRecoveryMode; +} + +void DefaultErrorStrategy::endErrorCondition(Parser * /*recognizer*/) { + errorRecoveryMode = false; + lastErrorIndex = -1; +} + +void DefaultErrorStrategy::reportMatch(Parser *recognizer) { + endErrorCondition(recognizer); +} + +void DefaultErrorStrategy::reportError(Parser *recognizer, const RecognitionException &e) { + // If we've already reported an error and have not matched a token + // yet successfully, don't report any errors. + if (inErrorRecoveryMode(recognizer)) { + return; // don't report spurious errors + } + + beginErrorCondition(recognizer); + if (is<const NoViableAltException *>(&e)) { + reportNoViableAlternative(recognizer, static_cast<const NoViableAltException &>(e)); + } else if (is<const InputMismatchException *>(&e)) { + reportInputMismatch(recognizer, static_cast<const InputMismatchException &>(e)); + } else if (is<const FailedPredicateException *>(&e)) { + reportFailedPredicate(recognizer, static_cast<const FailedPredicateException &>(e)); + } else if (is<const RecognitionException *>(&e)) { + recognizer->notifyErrorListeners(e.getOffendingToken(), e.what(), std::current_exception()); + } +} + +void DefaultErrorStrategy::recover(Parser *recognizer, std::exception_ptr /*e*/) { + if (lastErrorIndex == static_cast<int>(recognizer->getInputStream()->index()) && + lastErrorStates.contains(recognizer->getState())) { + + // uh oh, another error at same token index and previously-visited + // state in ATN; must be a case where LT(1) is in the recovery + // token set so nothing got consumed. Consume a single token + // at least to prevent an infinite loop; this is a failsafe. + recognizer->consume(); + } + lastErrorIndex = static_cast<int>(recognizer->getInputStream()->index()); + lastErrorStates.add(recognizer->getState()); + misc::IntervalSet followSet = getErrorRecoverySet(recognizer); + consumeUntil(recognizer, followSet); +} + +void DefaultErrorStrategy::sync(Parser *recognizer) { + atn::ATNState *s = recognizer->getInterpreter<atn::ATNSimulator>()->atn.states[recognizer->getState()]; + + // If already recovering, don't try to sync + if (inErrorRecoveryMode(recognizer)) { + return; + } + + TokenStream *tokens = recognizer->getTokenStream(); + size_t la = tokens->LA(1); + + // try cheaper subset first; might get lucky. seems to shave a wee bit off + auto nextTokens = recognizer->getATN().nextTokens(s); + if (nextTokens.contains(Token::EPSILON) || nextTokens.contains(la)) { + return; + } + + switch (s->getStateType()) { + case atn::ATNStateType::BLOCK_START: + case atn::ATNStateType::STAR_BLOCK_START: + case atn::ATNStateType::PLUS_BLOCK_START: + case atn::ATNStateType::STAR_LOOP_ENTRY: + // report error and recover if possible + if (singleTokenDeletion(recognizer) != nullptr) { + return; + } + + throw InputMismatchException(recognizer); + + case atn::ATNStateType::PLUS_LOOP_BACK: + case atn::ATNStateType::STAR_LOOP_BACK: { + reportUnwantedToken(recognizer); + misc::IntervalSet expecting = recognizer->getExpectedTokens(); + misc::IntervalSet whatFollowsLoopIterationOrRule = expecting.Or(getErrorRecoverySet(recognizer)); + consumeUntil(recognizer, whatFollowsLoopIterationOrRule); + } + break; + + default: + // do nothing if we can't identify the exact kind of ATN state + break; + } +} + +void DefaultErrorStrategy::reportNoViableAlternative(Parser *recognizer, const NoViableAltException &e) { + TokenStream *tokens = recognizer->getTokenStream(); + std::string input; + if (tokens != nullptr) { + if (e.getStartToken()->getType() == Token::EOF) { + input = "<EOF>"; + } else { + input = tokens->getText(e.getStartToken(), e.getOffendingToken()); + } + } else { + input = "<unknown input>"; + } + std::string msg = "no viable alternative at input " + escapeWSAndQuote(input); + recognizer->notifyErrorListeners(e.getOffendingToken(), msg, std::make_exception_ptr(e)); +} + +void DefaultErrorStrategy::reportInputMismatch(Parser *recognizer, const InputMismatchException &e) { + std::string msg = "mismatched input " + getTokenErrorDisplay(e.getOffendingToken()) + + " expecting " + e.getExpectedTokens().toString(recognizer->getVocabulary()); + recognizer->notifyErrorListeners(e.getOffendingToken(), msg, std::make_exception_ptr(e)); +} + +void DefaultErrorStrategy::reportFailedPredicate(Parser *recognizer, const FailedPredicateException &e) { + const std::string& ruleName = recognizer->getRuleNames()[recognizer->getContext()->getRuleIndex()]; + std::string msg = "rule " + ruleName + " " + e.what(); + recognizer->notifyErrorListeners(e.getOffendingToken(), msg, std::make_exception_ptr(e)); +} + +void DefaultErrorStrategy::reportUnwantedToken(Parser *recognizer) { + if (inErrorRecoveryMode(recognizer)) { + return; + } + + beginErrorCondition(recognizer); + + Token *t = recognizer->getCurrentToken(); + std::string tokenName = getTokenErrorDisplay(t); + misc::IntervalSet expecting = getExpectedTokens(recognizer); + + std::string msg = "extraneous input " + tokenName + " expecting " + expecting.toString(recognizer->getVocabulary()); + recognizer->notifyErrorListeners(t, msg, nullptr); +} + +void DefaultErrorStrategy::reportMissingToken(Parser *recognizer) { + if (inErrorRecoveryMode(recognizer)) { + return; + } + + beginErrorCondition(recognizer); + + Token *t = recognizer->getCurrentToken(); + misc::IntervalSet expecting = getExpectedTokens(recognizer); + std::string expectedText = expecting.toString(recognizer->getVocabulary()); + std::string msg = "missing " + expectedText + " at " + getTokenErrorDisplay(t); + + recognizer->notifyErrorListeners(t, msg, nullptr); +} + +Token* DefaultErrorStrategy::recoverInline(Parser *recognizer) { + // Single token deletion. + Token *matchedSymbol = singleTokenDeletion(recognizer); + if (matchedSymbol) { + // We have deleted the extra token. + // Now, move past ttype token as if all were ok. + recognizer->consume(); + return matchedSymbol; + } + + // Single token insertion. + if (singleTokenInsertion(recognizer)) { + return getMissingSymbol(recognizer); + } + + // Even that didn't work; must throw the exception. + throw InputMismatchException(recognizer); +} + +bool DefaultErrorStrategy::singleTokenInsertion(Parser *recognizer) { + ssize_t currentSymbolType = recognizer->getInputStream()->LA(1); + + // if current token is consistent with what could come after current + // ATN state, then we know we're missing a token; error recovery + // is free to conjure up and insert the missing token + atn::ATNState *currentState = recognizer->getInterpreter<atn::ATNSimulator>()->atn.states[recognizer->getState()]; + atn::ATNState *next = currentState->transitions[0]->target; + const atn::ATN &atn = recognizer->getInterpreter<atn::ATNSimulator>()->atn; + misc::IntervalSet expectingAtLL2 = atn.nextTokens(next, recognizer->getContext()); + if (expectingAtLL2.contains(currentSymbolType)) { + reportMissingToken(recognizer); + return true; + } + return false; +} + +Token* DefaultErrorStrategy::singleTokenDeletion(Parser *recognizer) { + size_t nextTokenType = recognizer->getInputStream()->LA(2); + misc::IntervalSet expecting = getExpectedTokens(recognizer); + if (expecting.contains(nextTokenType)) { + reportUnwantedToken(recognizer); + recognizer->consume(); // simply delete extra token + // we want to return the token we're actually matching + Token *matchedSymbol = recognizer->getCurrentToken(); + reportMatch(recognizer); // we know current token is correct + return matchedSymbol; + } + return nullptr; +} + +Token* DefaultErrorStrategy::getMissingSymbol(Parser *recognizer) { + Token *currentSymbol = recognizer->getCurrentToken(); + misc::IntervalSet expecting = getExpectedTokens(recognizer); + size_t expectedTokenType = expecting.getMinElement(); // get any element + std::string tokenText; + if (expectedTokenType == Token::EOF) { + tokenText = "<missing EOF>"; + } else { + tokenText = "<missing " + recognizer->getVocabulary().getDisplayName(expectedTokenType) + ">"; + } + Token *current = currentSymbol; + Token *lookback = recognizer->getTokenStream()->LT(-1); + if (current->getType() == Token::EOF && lookback != nullptr) { + current = lookback; + } + + _errorSymbols.push_back(recognizer->getTokenFactory()->create( + { current->getTokenSource(), current->getTokenSource()->getInputStream() }, + expectedTokenType, tokenText, Token::DEFAULT_CHANNEL, INVALID_INDEX, INVALID_INDEX, + current->getLine(), current->getCharPositionInLine())); + + return _errorSymbols.back().get(); +} + +misc::IntervalSet DefaultErrorStrategy::getExpectedTokens(Parser *recognizer) { + return recognizer->getExpectedTokens(); +} + +std::string DefaultErrorStrategy::getTokenErrorDisplay(Token *t) { + if (t == nullptr) { + return "<no Token>"; + } + std::string s = getSymbolText(t); + if (s == "") { + if (getSymbolType(t) == Token::EOF) { + s = "<EOF>"; + } else { + s = "<" + std::to_string(getSymbolType(t)) + ">"; + } + } + return escapeWSAndQuote(s); +} + +std::string DefaultErrorStrategy::getSymbolText(Token *symbol) { + return symbol->getText(); +} + +size_t DefaultErrorStrategy::getSymbolType(Token *symbol) { + return symbol->getType(); +} + +std::string DefaultErrorStrategy::escapeWSAndQuote(const std::string &s) const { + std::string result; + result.reserve(s.size() + 2); + result.push_back('\''); + antlrcpp::escapeWhitespace(result, s); + result.push_back('\''); + result.shrink_to_fit(); + return result; +} + +misc::IntervalSet DefaultErrorStrategy::getErrorRecoverySet(Parser *recognizer) { + const atn::ATN &atn = recognizer->getInterpreter<atn::ATNSimulator>()->atn; + RuleContext *ctx = recognizer->getContext(); + misc::IntervalSet recoverSet; + while (ctx->invokingState != ATNState::INVALID_STATE_NUMBER) { + // compute what follows who invoked us + atn::ATNState *invokingState = atn.states[ctx->invokingState]; + const atn::RuleTransition *rt = downCast<const atn::RuleTransition*>(invokingState->transitions[0].get()); + misc::IntervalSet follow = atn.nextTokens(rt->followState); + recoverSet.addAll(follow); + + if (ctx->parent == nullptr) + break; + ctx = static_cast<RuleContext *>(ctx->parent); + } + recoverSet.remove(Token::EPSILON); + + return recoverSet; +} + +void DefaultErrorStrategy::consumeUntil(Parser *recognizer, const misc::IntervalSet &set) { + size_t ttype = recognizer->getInputStream()->LA(1); + while (ttype != Token::EOF && !set.contains(ttype)) { + recognizer->consume(); + ttype = recognizer->getInputStream()->LA(1); + } +} + +void DefaultErrorStrategy::InitializeInstanceFields() { + errorRecoveryMode = false; + lastErrorIndex = -1; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/DefaultErrorStrategy.h b/contrib/libs/antlr4_cpp_runtime/src/DefaultErrorStrategy.h new file mode 100644 index 0000000000..7b914468cf --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/DefaultErrorStrategy.h @@ -0,0 +1,466 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "ANTLRErrorStrategy.h" +#include "misc/IntervalSet.h" + +namespace antlr4 { + + /** + * This is the default implementation of {@link ANTLRErrorStrategy} used for + * error reporting and recovery in ANTLR parsers. + */ + class ANTLR4CPP_PUBLIC DefaultErrorStrategy : public ANTLRErrorStrategy { + public: + DefaultErrorStrategy(); + DefaultErrorStrategy(DefaultErrorStrategy const& other) = delete; + virtual ~DefaultErrorStrategy(); + + DefaultErrorStrategy& operator = (DefaultErrorStrategy const& other) = delete; + + protected: + /** + * Indicates whether the error strategy is currently "recovering from an + * error". This is used to suppress reporting multiple error messages while + * attempting to recover from a detected syntax error. + * + * @see #inErrorRecoveryMode + */ + bool errorRecoveryMode; + + /** The index into the input stream where the last error occurred. + * This is used to prevent infinite loops where an error is found + * but no token is consumed during recovery...another error is found, + * ad nauseum. This is a failsafe mechanism to guarantee that at least + * one token/tree node is consumed for two errors. + */ + int lastErrorIndex; + + misc::IntervalSet lastErrorStates; + + /// <summary> + /// {@inheritDoc} + /// <p/> + /// The default implementation simply calls <seealso cref="#endErrorCondition"/> to + /// ensure that the handler is not in error recovery mode. + /// </summary> + public: + virtual void reset(Parser *recognizer) override; + + /// <summary> + /// This method is called to enter error recovery mode when a recognition + /// exception is reported. + /// </summary> + /// <param name="recognizer"> the parser instance </param> + protected: + virtual void beginErrorCondition(Parser *recognizer); + + /// <summary> + /// {@inheritDoc} + /// </summary> + public: + virtual bool inErrorRecoveryMode(Parser *recognizer) override; + + /// <summary> + /// This method is called to leave error recovery mode after recovering from + /// a recognition exception. + /// </summary> + /// <param name="recognizer"> </param> + protected: + virtual void endErrorCondition(Parser *recognizer); + + /// <summary> + /// {@inheritDoc} + /// <p/> + /// The default implementation simply calls <seealso cref="#endErrorCondition"/>. + /// </summary> + public: + virtual void reportMatch(Parser *recognizer) override; + + /// {@inheritDoc} + /// <p/> + /// The default implementation returns immediately if the handler is already + /// in error recovery mode. Otherwise, it calls <seealso cref="#beginErrorCondition"/> + /// and dispatches the reporting task based on the runtime type of {@code e} + /// according to the following table. + /// + /// <ul> + /// <li><seealso cref="NoViableAltException"/>: Dispatches the call to + /// <seealso cref="#reportNoViableAlternative"/></li> + /// <li><seealso cref="InputMismatchException"/>: Dispatches the call to + /// <seealso cref="#reportInputMismatch"/></li> + /// <li><seealso cref="FailedPredicateException"/>: Dispatches the call to + /// <seealso cref="#reportFailedPredicate"/></li> + /// <li>All other types: calls <seealso cref="Parser#notifyErrorListeners"/> to report + /// the exception</li> + /// </ul> + virtual void reportError(Parser *recognizer, const RecognitionException &e) override; + + /// <summary> + /// {@inheritDoc} + /// <p/> + /// The default implementation resynchronizes the parser by consuming tokens + /// until we find one in the resynchronization set--loosely the set of tokens + /// that can follow the current rule. + /// </summary> + virtual void recover(Parser *recognizer, std::exception_ptr e) override; + + /** + * The default implementation of {@link ANTLRErrorStrategy#sync} makes sure + * that the current lookahead symbol is consistent with what were expecting + * at this point in the ATN. You can call this anytime but ANTLR only + * generates code to check before subrules/loops and each iteration. + * + * <p>Implements Jim Idle's magic sync mechanism in closures and optional + * subrules. E.g.,</p> + * + * <pre> + * a : sync ( stuff sync )* ; + * sync : {consume to what can follow sync} ; + * </pre> + * + * At the start of a sub rule upon error, {@link #sync} performs single + * token deletion, if possible. If it can't do that, it bails on the current + * rule and uses the default error recovery, which consumes until the + * resynchronization set of the current rule. + * + * <p>If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block + * with an empty alternative), then the expected set includes what follows + * the subrule.</p> + * + * <p>During loop iteration, it consumes until it sees a token that can start a + * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to + * stay in the loop as long as possible.</p> + * + * <p><strong>ORIGINS</strong></p> + * + * <p>Previous versions of ANTLR did a poor job of their recovery within loops. + * A single mismatch token or missing token would force the parser to bail + * out of the entire rules surrounding the loop. So, for rule</p> + * + * <pre> + * classDef : 'class' ID '{' member* '}' + * </pre> + * + * input with an extra token between members would force the parser to + * consume until it found the next class definition rather than the next + * member definition of the current class. + * + * <p>This functionality cost a little bit of effort because the parser has to + * compare token set at the start of the loop and at each iteration. If for + * some reason speed is suffering for you, you can turn off this + * functionality by simply overriding this method as a blank { }.</p> + */ + virtual void sync(Parser *recognizer) override; + + /// <summary> + /// This is called by <seealso cref="#reportError"/> when the exception is a + /// <seealso cref="NoViableAltException"/>. + /// </summary> + /// <seealso cref= #reportError + /// </seealso> + /// <param name="recognizer"> the parser instance </param> + /// <param name="e"> the recognition exception </param> + protected: + virtual void reportNoViableAlternative(Parser *recognizer, const NoViableAltException &e); + + /// <summary> + /// This is called by <seealso cref="#reportError"/> when the exception is an + /// <seealso cref="InputMismatchException"/>. + /// </summary> + /// <seealso cref= #reportError + /// </seealso> + /// <param name="recognizer"> the parser instance </param> + /// <param name="e"> the recognition exception </param> + virtual void reportInputMismatch(Parser *recognizer, const InputMismatchException &e); + + /// <summary> + /// This is called by <seealso cref="#reportError"/> when the exception is a + /// <seealso cref="FailedPredicateException"/>. + /// </summary> + /// <seealso cref= #reportError + /// </seealso> + /// <param name="recognizer"> the parser instance </param> + /// <param name="e"> the recognition exception </param> + virtual void reportFailedPredicate(Parser *recognizer, const FailedPredicateException &e); + + /** + * This method is called to report a syntax error which requires the removal + * of a token from the input stream. At the time this method is called, the + * erroneous symbol is current {@code LT(1)} symbol and has not yet been + * removed from the input stream. When this method returns, + * {@code recognizer} is in error recovery mode. + * + * <p>This method is called when {@link #singleTokenDeletion} identifies + * single-token deletion as a viable recovery strategy for a mismatched + * input error.</p> + * + * <p>The default implementation simply returns if the handler is already in + * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to + * enter error recovery mode, followed by calling + * {@link Parser#notifyErrorListeners}.</p> + * + * @param recognizer the parser instance + */ + virtual void reportUnwantedToken(Parser *recognizer); + + /** + * This method is called to report a syntax error which requires the + * insertion of a missing token into the input stream. At the time this + * method is called, the missing token has not yet been inserted. When this + * method returns, {@code recognizer} is in error recovery mode. + * + * <p>This method is called when {@link #singleTokenInsertion} identifies + * single-token insertion as a viable recovery strategy for a mismatched + * input error.</p> + * + * <p>The default implementation simply returns if the handler is already in + * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to + * enter error recovery mode, followed by calling + * {@link Parser#notifyErrorListeners}.</p> + * + * @param recognizer the parser instance + */ + virtual void reportMissingToken(Parser *recognizer); + + public: + /** + * {@inheritDoc} + * + * <p>The default implementation attempts to recover from the mismatched input + * by using single token insertion and deletion as described below. If the + * recovery attempt fails, this method throws an + * {@link InputMismatchException}.</p> + * + * <p><strong>EXTRA TOKEN</strong> (single token deletion)</p> + * + * <p>{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the + * right token, however, then assume {@code LA(1)} is some extra spurious + * token and delete it. Then consume and return the next token (which was + * the {@code LA(2)} token) as the successful result of the match operation.</p> + * + * <p>This recovery strategy is implemented by {@link #singleTokenDeletion}.</p> + * + * <p><strong>MISSING TOKEN</strong> (single token insertion)</p> + * + * <p>If current token (at {@code LA(1)}) is consistent with what could come + * after the expected {@code LA(1)} token, then assume the token is missing + * and use the parser's {@link TokenFactory} to create it on the fly. The + * "insertion" is performed by returning the created token as the successful + * result of the match operation.</p> + * + * <p>This recovery strategy is implemented by {@link #singleTokenInsertion}.</p> + * + * <p><strong>EXAMPLE</strong></p> + * + * <p>For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When + * the parser returns from the nested call to {@code expr}, it will have + * call chain:</p> + * + * <pre> + * stat → expr → atom + * </pre> + * + * and it will be trying to match the {@code ')'} at this point in the + * derivation: + * + * <pre> + * => ID '=' '(' INT ')' ('+' atom)* ';' + * ^ + * </pre> + * + * The attempt to match {@code ')'} will fail when it sees {@code ';'} and + * call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'} + * is in the set of tokens that can follow the {@code ')'} token reference + * in rule {@code atom}. It can assume that you forgot the {@code ')'}. + */ + virtual Token* recoverInline(Parser *recognizer) override; + + /// <summary> + /// This method implements the single-token insertion inline error recovery + /// strategy. It is called by <seealso cref="#recoverInline"/> if the single-token + /// deletion strategy fails to recover from the mismatched input. If this + /// method returns {@code true}, {@code recognizer} will be in error recovery + /// mode. + /// <p/> + /// This method determines whether or not single-token insertion is viable by + /// checking if the {@code LA(1)} input symbol could be successfully matched + /// if it were instead the {@code LA(2)} symbol. If this method returns + /// {@code true}, the caller is responsible for creating and inserting a + /// token with the correct type to produce this behavior. + /// </summary> + /// <param name="recognizer"> the parser instance </param> + /// <returns> {@code true} if single-token insertion is a viable recovery + /// strategy for the current mismatched input, otherwise {@code false} </returns> + protected: + virtual bool singleTokenInsertion(Parser *recognizer); + + /// <summary> + /// This method implements the single-token deletion inline error recovery + /// strategy. It is called by <seealso cref="#recoverInline"/> to attempt to recover + /// from mismatched input. If this method returns null, the parser and error + /// handler state will not have changed. If this method returns non-null, + /// {@code recognizer} will <em>not</em> be in error recovery mode since the + /// returned token was a successful match. + /// <p/> + /// If the single-token deletion is successful, this method calls + /// <seealso cref="#reportUnwantedToken"/> to report the error, followed by + /// <seealso cref="Parser#consume"/> to actually "delete" the extraneous token. Then, + /// before returning <seealso cref="#reportMatch"/> is called to signal a successful + /// match. + /// </summary> + /// <param name="recognizer"> the parser instance </param> + /// <returns> the successfully matched <seealso cref="Token"/> instance if single-token + /// deletion successfully recovers from the mismatched input, otherwise + /// {@code null} </returns> + virtual Token* singleTokenDeletion(Parser *recognizer); + + /// <summary> + /// Conjure up a missing token during error recovery. + /// + /// The recognizer attempts to recover from single missing + /// symbols. But, actions might refer to that missing symbol. + /// For example, x=ID {f($x);}. The action clearly assumes + /// that there has been an identifier matched previously and that + /// $x points at that token. If that token is missing, but + /// the next token in the stream is what we want we assume that + /// this token is missing and we keep going. Because we + /// have to return some token to replace the missing token, + /// we have to conjure one up. This method gives the user control + /// over the tokens returned for missing tokens. Mostly, + /// you will want to create something special for identifier + /// tokens. For literals such as '{' and ',', the default + /// action in the parser or tree parser works. It simply creates + /// a CommonToken of the appropriate type. The text will be the token. + /// If you change what tokens must be created by the lexer, + /// override this method to create the appropriate tokens. + /// </summary> + virtual Token* getMissingSymbol(Parser *recognizer); + + virtual misc::IntervalSet getExpectedTokens(Parser *recognizer); + + /// <summary> + /// How should a token be displayed in an error message? The default + /// is to display just the text, but during development you might + /// want to have a lot of information spit out. Override in that case + /// to use t.toString() (which, for CommonToken, dumps everything about + /// the token). This is better than forcing you to override a method in + /// your token objects because you don't have to go modify your lexer + /// so that it creates a new class. + /// </summary> + virtual std::string getTokenErrorDisplay(Token *t); + + virtual std::string getSymbolText(Token *symbol); + + virtual size_t getSymbolType(Token *symbol); + + virtual std::string escapeWSAndQuote(const std::string &s) const; + + /* Compute the error recovery set for the current rule. During + * rule invocation, the parser pushes the set of tokens that can + * follow that rule reference on the stack; this amounts to + * computing FIRST of what follows the rule reference in the + * enclosing rule. See LinearApproximator.FIRST(). + * This local follow set only includes tokens + * from within the rule; i.e., the FIRST computation done by + * ANTLR stops at the end of a rule. + * + * EXAMPLE + * + * When you find a "no viable alt exception", the input is not + * consistent with any of the alternatives for rule r. The best + * thing to do is to consume tokens until you see something that + * can legally follow a call to r *or* any rule that called r. + * You don't want the exact set of viable next tokens because the + * input might just be missing a token--you might consume the + * rest of the input looking for one of the missing tokens. + * + * Consider grammar: + * + * a : '[' b ']' + * | '(' b ')' + * ; + * b : c '^' INT ; + * c : ID + * | INT + * ; + * + * At each rule invocation, the set of tokens that could follow + * that rule is pushed on a stack. Here are the various + * context-sensitive follow sets: + * + * FOLLOW(b1_in_a) = FIRST(']') = ']' + * FOLLOW(b2_in_a) = FIRST(')') = ')' + * FOLLOW(c_in_b) = FIRST('^') = '^' + * + * Upon erroneous input "[]", the call chain is + * + * a -> b -> c + * + * and, hence, the follow context stack is: + * + * depth follow set start of rule execution + * 0 <EOF> a (from main()) + * 1 ']' b + * 2 '^' c + * + * Notice that ')' is not included, because b would have to have + * been called from a different context in rule a for ')' to be + * included. + * + * For error recovery, we cannot consider FOLLOW(c) + * (context-sensitive or otherwise). We need the combined set of + * all context-sensitive FOLLOW sets--the set of all tokens that + * could follow any reference in the call chain. We need to + * resync to one of those tokens. Note that FOLLOW(c)='^' and if + * we resync'd to that token, we'd consume until EOF. We need to + * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. + * In this case, for input "[]", LA(1) is ']' and in the set, so we would + * not consume anything. After printing an error, rule c would + * return normally. Rule b would not find the required '^' though. + * At this point, it gets a mismatched token error and throws an + * exception (since LA(1) is not in the viable following token + * set). The rule exception handler tries to recover, but finds + * the same recovery set and doesn't consume anything. Rule b + * exits normally returning to rule a. Now it finds the ']' (and + * with the successful match exits errorRecovery mode). + * + * So, you can see that the parser walks up the call chain looking + * for the token that was a member of the recovery set. + * + * Errors are not generated in errorRecovery mode. + * + * ANTLR's error recovery mechanism is based upon original ideas: + * + * "Algorithms + Data Structures = Programs" by Niklaus Wirth + * + * and + * + * "A note on error recovery in recursive descent parsers": + * http://portal.acm.org/citation.cfm?id=947902.947905 + * + * Later, Josef Grosch had some good ideas: + * + * "Efficient and Comfortable Error Recovery in Recursive Descent + * Parsers": + * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip + * + * Like Grosch I implement context-sensitive FOLLOW sets that are combined + * at run-time upon error to avoid overhead during parsing. + */ + virtual misc::IntervalSet getErrorRecoverySet(Parser *recognizer); + + /// <summary> + /// Consume tokens until one matches the given token set. </summary> + virtual void consumeUntil(Parser *recognizer, const misc::IntervalSet &set); + + private: + std::vector<std::unique_ptr<Token>> _errorSymbols; // Temporarily created token. + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/DiagnosticErrorListener.cpp b/contrib/libs/antlr4_cpp_runtime/src/DiagnosticErrorListener.cpp new file mode 100644 index 0000000000..ef6f64372d --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/DiagnosticErrorListener.cpp @@ -0,0 +1,84 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/PredictionContext.h" +#include "atn/ATNConfig.h" +#include "atn/ATNConfigSet.h" +#include "Parser.h" +#include "misc/Interval.h" +#include "dfa/DFA.h" + +#include "DiagnosticErrorListener.h" + +using namespace antlr4; + +DiagnosticErrorListener::DiagnosticErrorListener() : DiagnosticErrorListener(true) { +} + +DiagnosticErrorListener::DiagnosticErrorListener(bool exactOnly_) : exactOnly(exactOnly_) { +} + +void DiagnosticErrorListener::reportAmbiguity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + bool exact, const antlrcpp::BitSet &ambigAlts, atn::ATNConfigSet *configs) { + if (exactOnly && !exact) { + return; + } + + std::string decision = getDecisionDescription(recognizer, dfa); + antlrcpp::BitSet conflictingAlts = getConflictingAlts(ambigAlts, configs); + std::string text = recognizer->getTokenStream()->getText(misc::Interval(startIndex, stopIndex)); + std::string message = "reportAmbiguity d=" + decision + ": ambigAlts=" + conflictingAlts.toString() + + ", input='" + text + "'"; + + recognizer->notifyErrorListeners(message); +} + +void DiagnosticErrorListener::reportAttemptingFullContext(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, + size_t stopIndex, const antlrcpp::BitSet &/*conflictingAlts*/, atn::ATNConfigSet * /*configs*/) { + std::string decision = getDecisionDescription(recognizer, dfa); + std::string text = recognizer->getTokenStream()->getText(misc::Interval(startIndex, stopIndex)); + std::string message = "reportAttemptingFullContext d=" + decision + ", input='" + text + "'"; + recognizer->notifyErrorListeners(message); +} + +void DiagnosticErrorListener::reportContextSensitivity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, + size_t stopIndex, size_t /*prediction*/, atn::ATNConfigSet * /*configs*/) { + std::string decision = getDecisionDescription(recognizer, dfa); + std::string text = recognizer->getTokenStream()->getText(misc::Interval(startIndex, stopIndex)); + std::string message = "reportContextSensitivity d=" + decision + ", input='" + text + "'"; + recognizer->notifyErrorListeners(message); +} + +std::string DiagnosticErrorListener::getDecisionDescription(Parser *recognizer, const dfa::DFA &dfa) { + size_t decision = dfa.decision; + size_t ruleIndex = (reinterpret_cast<atn::ATNState*>(dfa.atnStartState))->ruleIndex; + + const std::vector<std::string>& ruleNames = recognizer->getRuleNames(); + if (ruleIndex == INVALID_INDEX || ruleIndex >= ruleNames.size()) { + return std::to_string(decision); + } + + std::string ruleName = ruleNames[ruleIndex]; + if (ruleName == "" || ruleName.empty()) { + return std::to_string(decision); + } + + return std::to_string(decision) + " (" + ruleName + ")"; +} + +antlrcpp::BitSet DiagnosticErrorListener::getConflictingAlts(const antlrcpp::BitSet &reportedAlts, + atn::ATNConfigSet *configs) { + if (reportedAlts.count() > 0) { // Not exactly like the original Java code, but this listener is only used + // in the TestRig (where it never provides a good alt set), so it's probably ok so. + return reportedAlts; + } + + antlrcpp::BitSet result; + for (auto &config : configs->configs) { + result.set(config->alt); + } + + return result; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/DiagnosticErrorListener.h b/contrib/libs/antlr4_cpp_runtime/src/DiagnosticErrorListener.h new file mode 100644 index 0000000000..ed6d749429 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/DiagnosticErrorListener.h @@ -0,0 +1,80 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "BaseErrorListener.h" + +namespace antlr4 { + + /// <summary> + /// This implementation of <seealso cref="ANTLRErrorListener"/> can be used to identify + /// certain potential correctness and performance problems in grammars. "Reports" + /// are made by calling <seealso cref="Parser#notifyErrorListeners"/> with the appropriate + /// message. + /// + /// <ul> + /// <li><b>Ambiguities</b>: These are cases where more than one path through the + /// grammar can match the input.</li> + /// <li><b>Weak context sensitivity</b>: These are cases where full-context + /// prediction resolved an SLL conflict to a unique alternative which equaled the + /// minimum alternative of the SLL conflict.</li> + /// <li><b>Strong (forced) context sensitivity</b>: These are cases where the + /// full-context prediction resolved an SLL conflict to a unique alternative, + /// <em>and</em> the minimum alternative of the SLL conflict was found to not be + /// a truly viable alternative. Two-stage parsing cannot be used for inputs where + /// this situation occurs.</li> + /// </ul> + /// + /// @author Sam Harwell + /// </summary> + class ANTLR4CPP_PUBLIC DiagnosticErrorListener : public BaseErrorListener { + /// <summary> + /// When {@code true}, only exactly known ambiguities are reported. + /// </summary> + protected: + const bool exactOnly; + + /// <summary> + /// Initializes a new instance of <seealso cref="DiagnosticErrorListener"/> which only + /// reports exact ambiguities. + /// </summary> + public: + DiagnosticErrorListener(); + + /// <summary> + /// Initializes a new instance of <seealso cref="DiagnosticErrorListener"/>, specifying + /// whether all ambiguities or only exact ambiguities are reported. + /// </summary> + /// <param name="exactOnly"> {@code true} to report only exact ambiguities, otherwise + /// {@code false} to report all ambiguities. </param> + DiagnosticErrorListener(bool exactOnly); + + virtual void reportAmbiguity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, bool exact, + const antlrcpp::BitSet &ambigAlts, atn::ATNConfigSet *configs) override; + + virtual void reportAttemptingFullContext(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + const antlrcpp::BitSet &conflictingAlts, atn::ATNConfigSet *configs) override; + + virtual void reportContextSensitivity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + size_t prediction, atn::ATNConfigSet *configs) override; + + protected: + virtual std::string getDecisionDescription(Parser *recognizer, const dfa::DFA &dfa); + + /// <summary> + /// Computes the set of conflicting or ambiguous alternatives from a + /// configuration set, if that information was not already provided by the + /// parser. + /// </summary> + /// <param name="reportedAlts"> The set of conflicting or ambiguous alternatives, as + /// reported by the parser. </param> + /// <param name="configs"> The conflicting or ambiguous configuration set. </param> + /// <returns> Returns {@code reportedAlts} if it is not {@code null}, otherwise + /// returns the set of alternatives represented in {@code configs}. </returns> + virtual antlrcpp::BitSet getConflictingAlts(const antlrcpp::BitSet &reportedAlts, atn::ATNConfigSet *configs); + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/Exceptions.cpp b/contrib/libs/antlr4_cpp_runtime/src/Exceptions.cpp new file mode 100644 index 0000000000..24aea29b0c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/Exceptions.cpp @@ -0,0 +1,64 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Exceptions.h" + +using namespace antlr4; + +RuntimeException::RuntimeException(const std::string &msg) : std::exception(), _message(msg) { +} + +const char* RuntimeException::what() const noexcept { + return _message.c_str(); +} + +//------------------ IOException --------------------------------------------------------------------------------------- + +IOException::IOException(const std::string &msg) : std::exception(), _message(msg) { +} + +const char* IOException::what() const noexcept { + return _message.c_str(); +} + +//------------------ IllegalStateException ----------------------------------------------------------------------------- + +IllegalStateException::~IllegalStateException() { +} + +//------------------ IllegalArgumentException -------------------------------------------------------------------------- + +IllegalArgumentException::~IllegalArgumentException() { +} + +//------------------ NullPointerException ------------------------------------------------------------------------------ + +NullPointerException::~NullPointerException() { +} + +//------------------ IndexOutOfBoundsException ------------------------------------------------------------------------- + +IndexOutOfBoundsException::~IndexOutOfBoundsException() { +} + +//------------------ UnsupportedOperationException --------------------------------------------------------------------- + +UnsupportedOperationException::~UnsupportedOperationException() { +} + +//------------------ EmptyStackException ------------------------------------------------------------------------------- + +EmptyStackException::~EmptyStackException() { +} + +//------------------ CancellationException ----------------------------------------------------------------------------- + +CancellationException::~CancellationException() { +} + +//------------------ ParseCancellationException ------------------------------------------------------------------------ + +ParseCancellationException::~ParseCancellationException() { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/Exceptions.h b/contrib/libs/antlr4_cpp_runtime/src/Exceptions.h new file mode 100644 index 0000000000..35d72b52ee --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/Exceptions.h @@ -0,0 +1,99 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { + + // An exception hierarchy modelled loosely after java.lang.* exceptions. + class ANTLR4CPP_PUBLIC RuntimeException : public std::exception { + private: + std::string _message; + public: + RuntimeException(const std::string &msg = ""); + + virtual const char* what() const noexcept override; + }; + + class ANTLR4CPP_PUBLIC IllegalStateException : public RuntimeException { + public: + IllegalStateException(const std::string &msg = "") : RuntimeException(msg) {} + IllegalStateException(IllegalStateException const&) = default; + ~IllegalStateException(); + IllegalStateException& operator=(IllegalStateException const&) = default; + }; + + class ANTLR4CPP_PUBLIC IllegalArgumentException : public RuntimeException { + public: + IllegalArgumentException(IllegalArgumentException const&) = default; + IllegalArgumentException(const std::string &msg = "") : RuntimeException(msg) {} + ~IllegalArgumentException(); + IllegalArgumentException& operator=(IllegalArgumentException const&) = default; + }; + + class ANTLR4CPP_PUBLIC NullPointerException : public RuntimeException { + public: + NullPointerException(const std::string &msg = "") : RuntimeException(msg) {} + NullPointerException(NullPointerException const&) = default; + ~NullPointerException(); + NullPointerException& operator=(NullPointerException const&) = default; + }; + + class ANTLR4CPP_PUBLIC IndexOutOfBoundsException : public RuntimeException { + public: + IndexOutOfBoundsException(const std::string &msg = "") : RuntimeException(msg) {} + IndexOutOfBoundsException(IndexOutOfBoundsException const&) = default; + ~IndexOutOfBoundsException(); + IndexOutOfBoundsException& operator=(IndexOutOfBoundsException const&) = default; + }; + + class ANTLR4CPP_PUBLIC UnsupportedOperationException : public RuntimeException { + public: + UnsupportedOperationException(const std::string &msg = "") : RuntimeException(msg) {} + UnsupportedOperationException(UnsupportedOperationException const&) = default; + ~UnsupportedOperationException(); + UnsupportedOperationException& operator=(UnsupportedOperationException const&) = default; + + }; + + class ANTLR4CPP_PUBLIC EmptyStackException : public RuntimeException { + public: + EmptyStackException(const std::string &msg = "") : RuntimeException(msg) {} + EmptyStackException(EmptyStackException const&) = default; + ~EmptyStackException(); + EmptyStackException& operator=(EmptyStackException const&) = default; + }; + + // IOException is not a runtime exception (in the java hierarchy). + // Hence we have to duplicate the RuntimeException implementation. + class ANTLR4CPP_PUBLIC IOException : public std::exception { + private: + std::string _message; + + public: + IOException(const std::string &msg = ""); + + virtual const char* what() const noexcept override; + }; + + class ANTLR4CPP_PUBLIC CancellationException : public IllegalStateException { + public: + CancellationException(const std::string &msg = "") : IllegalStateException(msg) {} + CancellationException(CancellationException const&) = default; + ~CancellationException(); + CancellationException& operator=(CancellationException const&) = default; + }; + + class ANTLR4CPP_PUBLIC ParseCancellationException : public CancellationException { + public: + ParseCancellationException(const std::string &msg = "") : CancellationException(msg) {} + ParseCancellationException(ParseCancellationException const&) = default; + ~ParseCancellationException(); + ParseCancellationException& operator=(ParseCancellationException const&) = default; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/FailedPredicateException.cpp b/contrib/libs/antlr4_cpp_runtime/src/FailedPredicateException.cpp new file mode 100644 index 0000000000..ca2537b300 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/FailedPredicateException.cpp @@ -0,0 +1,52 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ParserATNSimulator.h" +#include "Parser.h" +#include "atn/PredicateTransition.h" +#include "atn/ATN.h" +#include "atn/ATNState.h" +#include "support/Casts.h" +#include "support/CPPUtils.h" + +#include "FailedPredicateException.h" + +using namespace antlr4; +using namespace antlrcpp; + +FailedPredicateException::FailedPredicateException(Parser *recognizer) : FailedPredicateException(recognizer, "", "") { +} + +FailedPredicateException::FailedPredicateException(Parser *recognizer, const std::string &predicate): FailedPredicateException(recognizer, predicate, "") { +} + +FailedPredicateException::FailedPredicateException(Parser *recognizer, const std::string &predicate, const std::string &message) + : RecognitionException(!message.empty() ? message : "failed predicate: " + predicate + "?", recognizer, + recognizer->getInputStream(), recognizer->getContext(), recognizer->getCurrentToken()) { + + atn::ATNState *s = recognizer->getInterpreter<atn::ATNSimulator>()->atn.states[recognizer->getState()]; + const atn::Transition *transition = s->transitions[0].get(); + if (transition->getTransitionType() == atn::TransitionType::PREDICATE) { + _ruleIndex = downCast<const atn::PredicateTransition&>(*transition).getRuleIndex(); + _predicateIndex = downCast<const atn::PredicateTransition&>(*transition).getPredIndex(); + } else { + _ruleIndex = 0; + _predicateIndex = 0; + } + + _predicate = predicate; +} + +size_t FailedPredicateException::getRuleIndex() { + return _ruleIndex; +} + +size_t FailedPredicateException::getPredIndex() { + return _predicateIndex; +} + +std::string FailedPredicateException::getPredicate() { + return _predicate; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/FailedPredicateException.h b/contrib/libs/antlr4_cpp_runtime/src/FailedPredicateException.h new file mode 100644 index 0000000000..89bec0fd0b --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/FailedPredicateException.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RecognitionException.h" + +namespace antlr4 { + + /// A semantic predicate failed during validation. Validation of predicates + /// occurs when normally parsing the alternative just like matching a token. + /// Disambiguating predicate evaluation occurs when we test a predicate during + /// prediction. + class ANTLR4CPP_PUBLIC FailedPredicateException : public RecognitionException { + public: + explicit FailedPredicateException(Parser *recognizer); + FailedPredicateException(Parser *recognizer, const std::string &predicate); + FailedPredicateException(Parser *recognizer, const std::string &predicate, const std::string &message); + + virtual size_t getRuleIndex(); + virtual size_t getPredIndex(); + virtual std::string getPredicate(); + + private: + size_t _ruleIndex; + size_t _predicateIndex; + std::string _predicate; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/FlatHashMap.h b/contrib/libs/antlr4_cpp_runtime/src/FlatHashMap.h new file mode 100644 index 0000000000..ad5ffa2432 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/FlatHashMap.h @@ -0,0 +1,57 @@ +// Copyright 2012-2022 The ANTLR Project +// +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "antlr4-common.h" + +#if ANTLR4CPP_USING_ABSEIL +#error #include "absl/container/flat_hash_map.h" +#else +#include <unordered_map> +#endif + +// By default ANTLRv4 uses containers provided by the C++ standard library. In most deployments this +// is fine, however in some using custom containers may be preferred. This header allows that by +// optionally supporting some alternative implementations and allowing for more easier patching of +// other alternatives. + +namespace antlr4 { + +#if ANTLR4CPP_USING_ABSEIL + template <typename Key, typename Value, + typename Hash = typename absl::flat_hash_map<Key, Value>::hasher, + typename Equal = typename absl::flat_hash_map<Key, Value>::key_equal, + typename Allocator = typename absl::flat_hash_map<Key, Value>::allocator_type> + using FlatHashMap = absl::flat_hash_map<Key, Value, Hash, Equal, Allocator>; +#else + template <typename Key, typename Value, + typename Hash = typename std::unordered_map<Key, Value>::hasher, + typename Equal = typename std::unordered_map<Key, Value>::key_equal, + typename Allocator = typename std::unordered_map<Key, Value>::allocator_type> + using FlatHashMap = std::unordered_map<Key, Value, Hash, Equal, Allocator>; +#endif + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/FlatHashSet.h b/contrib/libs/antlr4_cpp_runtime/src/FlatHashSet.h new file mode 100644 index 0000000000..5396c2bd5d --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/FlatHashSet.h @@ -0,0 +1,57 @@ +// Copyright 2012-2022 The ANTLR Project +// +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "antlr4-common.h" + +#if ANTLR4CPP_USING_ABSEIL +#error #include "absl/container/flat_hash_set.h" +#else +#include <unordered_set> +#endif + +// By default ANTLRv4 uses containers provided by the C++ standard library. In most deployments this +// is fine, however in some using custom containers may be preferred. This header allows that by +// optionally supporting some alternative implementations and allowing for more easier patching of +// other alternatives. + +namespace antlr4 { + +#if ANTLR4CPP_USING_ABSEIL + template <typename Key, + typename Hash = typename absl::flat_hash_set<Key>::hasher, + typename Equal = typename absl::flat_hash_set<Key>::key_equal, + typename Allocator = typename absl::flat_hash_set<Key>::allocator_type> + using FlatHashSet = absl::flat_hash_set<Key, Hash, Equal, Allocator>; +#else + template <typename Key, + typename Hash = typename std::unordered_set<Key>::hasher, + typename Equal = typename std::unordered_set<Key>::key_equal, + typename Allocator = typename std::unordered_set<Key>::allocator_type> + using FlatHashSet = std::unordered_set<Key, Hash, Equal, Allocator>; +#endif + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/InputMismatchException.cpp b/contrib/libs/antlr4_cpp_runtime/src/InputMismatchException.cpp new file mode 100644 index 0000000000..4f4947985d --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/InputMismatchException.cpp @@ -0,0 +1,18 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Parser.h" + +#include "InputMismatchException.h" + +using namespace antlr4; + +InputMismatchException::InputMismatchException(Parser *recognizer) + : RecognitionException(recognizer, recognizer->getInputStream(), recognizer->getContext(), + recognizer->getCurrentToken()) { +} + +InputMismatchException::~InputMismatchException() { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/InputMismatchException.h b/contrib/libs/antlr4_cpp_runtime/src/InputMismatchException.h new file mode 100644 index 0000000000..8b75420968 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/InputMismatchException.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RecognitionException.h" + +namespace antlr4 { + + /// <summary> + /// This signifies any kind of mismatched input exceptions such as + /// when the current input does not match the expected token. + /// </summary> + class ANTLR4CPP_PUBLIC InputMismatchException : public RecognitionException { + public: + InputMismatchException(Parser *recognizer); + InputMismatchException(InputMismatchException const&) = default; + ~InputMismatchException(); + InputMismatchException& operator=(InputMismatchException const&) = default; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/IntStream.cpp b/contrib/libs/antlr4_cpp_runtime/src/IntStream.cpp new file mode 100644 index 0000000000..37a90a7cd9 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/IntStream.cpp @@ -0,0 +1,12 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "IntStream.h" + +using namespace antlr4; + +const std::string IntStream::UNKNOWN_SOURCE_NAME = "<unknown>"; + +IntStream::~IntStream() = default; diff --git a/contrib/libs/antlr4_cpp_runtime/src/IntStream.h b/contrib/libs/antlr4_cpp_runtime/src/IntStream.h new file mode 100644 index 0000000000..40a0f2a9e8 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/IntStream.h @@ -0,0 +1,218 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { + + /// <summary> + /// A simple stream of symbols whose values are represented as integers. This + /// interface provides <em>marked ranges</em> with support for a minimum level + /// of buffering necessary to implement arbitrary lookahead during prediction. + /// For more information on marked ranges, see <seealso cref="#mark"/>. + /// <p/> + /// <strong>Initializing Methods:</strong> Some methods in this interface have + /// unspecified behavior if no call to an initializing method has occurred after + /// the stream was constructed. The following is a list of initializing methods: + /// + /// <ul> + /// <li><seealso cref="#LA"/></li> + /// <li><seealso cref="#consume"/></li> + /// <li><seealso cref="#size"/></li> + /// </ul> + /// </summary> + class ANTLR4CPP_PUBLIC IntStream { + public: + static constexpr size_t EOF = std::numeric_limits<size_t>::max(); + + /// The value returned by <seealso cref="#LA LA()"/> when the end of the stream is + /// reached. + /// No explicit EOF definition. We got EOF on all platforms. + //static const size_t _EOF = std::ios::eofbit; + + /// <summary> + /// The value returned by <seealso cref="#getSourceName"/> when the actual name of the + /// underlying source is not known. + /// </summary> + static const std::string UNKNOWN_SOURCE_NAME; + + virtual ~IntStream(); + + /// <summary> + /// Consumes the current symbol in the stream. This method has the following + /// effects: + /// + /// <ul> + /// <li><strong>Forward movement:</strong> The value of <seealso cref="#index index()"/> + /// before calling this method is less than the value of {@code index()} + /// after calling this method.</li> + /// <li><strong>Ordered lookahead:</strong> The value of {@code LA(1)} before + /// calling this method becomes the value of {@code LA(-1)} after calling + /// this method.</li> + /// </ul> + /// + /// Note that calling this method does not guarantee that {@code index()} is + /// incremented by exactly 1, as that would preclude the ability to implement + /// filtering streams (e.g. <seealso cref="CommonTokenStream"/> which distinguishes + /// between "on-channel" and "off-channel" tokens). + /// </summary> + /// <exception cref="IllegalStateException"> if an attempt is made to consume the the + /// end of the stream (i.e. if {@code LA(1)==}<seealso cref="#EOF EOF"/> before calling + /// {@code consume}). </exception> + virtual void consume() = 0; + + /// <summary> + /// Gets the value of the symbol at offset {@code i} from the current + /// position. When {@code i==1}, this method returns the value of the current + /// symbol in the stream (which is the next symbol to be consumed). When + /// {@code i==-1}, this method returns the value of the previously read + /// symbol in the stream. It is not valid to call this method with + /// {@code i==0}, but the specific behavior is unspecified because this + /// method is frequently called from performance-critical code. + /// <p/> + /// This method is guaranteed to succeed if any of the following are true: + /// + /// <ul> + /// <li>{@code i>0}</li> + /// <li>{@code i==-1} and <seealso cref="#index index()"/> returns a value greater + /// than the value of {@code index()} after the stream was constructed + /// and {@code LA(1)} was called in that order. Specifying the current + /// {@code index()} relative to the index after the stream was created + /// allows for filtering implementations that do not return every symbol + /// from the underlying source. Specifying the call to {@code LA(1)} + /// allows for lazily initialized streams.</li> + /// <li>{@code LA(i)} refers to a symbol consumed within a marked region + /// that has not yet been released.</li> + /// </ul> + /// + /// If {@code i} represents a position at or beyond the end of the stream, + /// this method returns <seealso cref="#EOF"/>. + /// <p/> + /// The return value is unspecified if {@code i<0} and fewer than {@code -i} + /// calls to <seealso cref="#consume consume()"/> have occurred from the beginning of + /// the stream before calling this method. + /// </summary> + /// <exception cref="UnsupportedOperationException"> if the stream does not support + /// retrieving the value of the specified symbol </exception> + virtual size_t LA(ssize_t i) = 0; + + /// <summary> + /// A mark provides a guarantee that <seealso cref="#seek seek()"/> operations will be + /// valid over a "marked range" extending from the index where {@code mark()} + /// was called to the current <seealso cref="#index index()"/>. This allows the use of + /// streaming input sources by specifying the minimum buffering requirements + /// to support arbitrary lookahead during prediction. + /// <p/> + /// The returned mark is an opaque handle (type {@code int}) which is passed + /// to <seealso cref="#release release()"/> when the guarantees provided by the marked + /// range are no longer necessary. When calls to + /// {@code mark()}/{@code release()} are nested, the marks must be released + /// in reverse order of which they were obtained. Since marked regions are + /// used during performance-critical sections of prediction, the specific + /// behavior of invalid usage is unspecified (i.e. a mark is not released, or + /// a mark is released twice, or marks are not released in reverse order from + /// which they were created). + /// <p/> + /// The behavior of this method is unspecified if no call to an + /// <seealso cref="IntStream initializing method"/> has occurred after this stream was + /// constructed. + /// <p/> + /// This method does not change the current position in the input stream. + /// <p/> + /// The following example shows the use of <seealso cref="#mark mark()"/>, + /// <seealso cref="#release release(mark)"/>, <seealso cref="#index index()"/>, and + /// <seealso cref="#seek seek(index)"/> as part of an operation to safely work within a + /// marked region, then restore the stream position to its original value and + /// release the mark. + /// <pre> + /// IntStream stream = ...; + /// int index = -1; + /// int mark = stream.mark(); + /// try { + /// index = stream.index(); + /// // perform work here... + /// } finally { + /// if (index != -1) { + /// stream.seek(index); + /// } + /// stream.release(mark); + /// } + /// </pre> + /// </summary> + /// <returns> An opaque marker which should be passed to + /// <seealso cref="#release release()"/> when the marked range is no longer required. </returns> + virtual ssize_t mark() = 0; + + /// <summary> + /// This method releases a marked range created by a call to + /// <seealso cref="#mark mark()"/>. Calls to {@code release()} must appear in the + /// reverse order of the corresponding calls to {@code mark()}. If a mark is + /// released twice, or if marks are not released in reverse order of the + /// corresponding calls to {@code mark()}, the behavior is unspecified. + /// <p/> + /// For more information and an example, see <seealso cref="#mark"/>. + /// </summary> + /// <param name="marker"> A marker returned by a call to {@code mark()}. </param> + /// <seealso cref= #mark </seealso> + virtual void release(ssize_t marker) = 0; + + /// <summary> + /// Return the index into the stream of the input symbol referred to by + /// {@code LA(1)}. + /// <p/> + /// The behavior of this method is unspecified if no call to an + /// <seealso cref="IntStream initializing method"/> has occurred after this stream was + /// constructed. + /// </summary> + virtual size_t index() = 0; + + /// <summary> + /// Set the input cursor to the position indicated by {@code index}. If the + /// specified index lies past the end of the stream, the operation behaves as + /// though {@code index} was the index of the EOF symbol. After this method + /// returns without throwing an exception, the at least one of the following + /// will be true. + /// + /// <ul> + /// <li><seealso cref="#index index()"/> will return the index of the first symbol + /// appearing at or after the specified {@code index}. Specifically, + /// implementations which filter their sources should automatically + /// adjust {@code index} forward the minimum amount required for the + /// operation to target a non-ignored symbol.</li> + /// <li>{@code LA(1)} returns <seealso cref="#EOF"/></li> + /// </ul> + /// + /// This operation is guaranteed to not throw an exception if {@code index} + /// lies within a marked region. For more information on marked regions, see + /// <seealso cref="#mark"/>. The behavior of this method is unspecified if no call to + /// an <seealso cref="IntStream initializing method"/> has occurred after this stream + /// was constructed. + /// </summary> + /// <param name="index"> The absolute index to seek to. + /// </param> + /// <exception cref="IllegalArgumentException"> if {@code index} is less than 0 </exception> + /// <exception cref="UnsupportedOperationException"> if the stream does not support + /// seeking to the specified index </exception> + virtual void seek(size_t index) = 0; + + /// <summary> + /// Returns the total number of symbols in the stream, including a single EOF + /// symbol. + /// </summary> + /// <exception cref="UnsupportedOperationException"> if the size of the stream is + /// unknown. </exception> + virtual size_t size() = 0; + + /// <summary> + /// Gets the name of the underlying symbol source. This method returns a + /// non-null, non-empty string. If such a name is not known, this method + /// returns <seealso cref="#UNKNOWN_SOURCE_NAME"/>. + /// </summary> + virtual std::string getSourceName() const = 0; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/InterpreterRuleContext.cpp b/contrib/libs/antlr4_cpp_runtime/src/InterpreterRuleContext.cpp new file mode 100644 index 0000000000..f2812ba910 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/InterpreterRuleContext.cpp @@ -0,0 +1,19 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "InterpreterRuleContext.h" + +using namespace antlr4; + +InterpreterRuleContext::InterpreterRuleContext() : ParserRuleContext() { +} + +InterpreterRuleContext::InterpreterRuleContext(ParserRuleContext *parent, size_t invokingStateNumber, size_t ruleIndex) + : ParserRuleContext(parent, invokingStateNumber), _ruleIndex(ruleIndex) { +} + +size_t InterpreterRuleContext::getRuleIndex() const { + return _ruleIndex; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/InterpreterRuleContext.h b/contrib/libs/antlr4_cpp_runtime/src/InterpreterRuleContext.h new file mode 100644 index 0000000000..a34d06b1f1 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/InterpreterRuleContext.h @@ -0,0 +1,45 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "ParserRuleContext.h" + +namespace antlr4 { + + /** + * This class extends {@link ParserRuleContext} by allowing the value of + * {@link #getRuleIndex} to be explicitly set for the context. + * + * <p> + * {@link ParserRuleContext} does not include field storage for the rule index + * since the context classes created by the code generator override the + * {@link #getRuleIndex} method to return the correct value for that context. + * Since the parser interpreter does not use the context classes generated for a + * parser, this class (with slightly more memory overhead per node) is used to + * provide equivalent functionality.</p> + */ + class ANTLR4CPP_PUBLIC InterpreterRuleContext : public ParserRuleContext { + public: + InterpreterRuleContext(); + + /** + * Constructs a new {@link InterpreterRuleContext} with the specified + * parent, invoking state, and rule index. + * + * @param parent The parent context. + * @param invokingStateNumber The invoking state number. + * @param ruleIndex The rule index for the current context. + */ + InterpreterRuleContext(ParserRuleContext *parent, size_t invokingStateNumber, size_t ruleIndex); + + virtual size_t getRuleIndex() const override; + + protected: + /** This is the backing field for {@link #getRuleIndex}. */ + const size_t _ruleIndex = INVALID_INDEX; +}; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/Lexer.cpp b/contrib/libs/antlr4_cpp_runtime/src/Lexer.cpp new file mode 100644 index 0000000000..b0385c56ba --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/Lexer.cpp @@ -0,0 +1,294 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/LexerATNSimulator.h" +#include "Exceptions.h" +#include "misc/Interval.h" +#include "CommonTokenFactory.h" +#include "LexerNoViableAltException.h" +#include "ANTLRErrorListener.h" +#include "support/CPPUtils.h" +#include "CommonToken.h" + +#include "Lexer.h" + +#define DEBUG_LEXER 0 + +using namespace antlrcpp; +using namespace antlr4; + +Lexer::Lexer() : Recognizer() { + InitializeInstanceFields(); + _input = nullptr; +} + +Lexer::Lexer(CharStream *input) : Recognizer(), _input(input) { + InitializeInstanceFields(); +} + +void Lexer::reset() { + // wack Lexer state variables + _input->seek(0); // rewind the input + + _syntaxErrors = 0; + token.reset(); + type = Token::INVALID_TYPE; + channel = Token::DEFAULT_CHANNEL; + tokenStartCharIndex = INVALID_INDEX; + tokenStartCharPositionInLine = 0; + tokenStartLine = 0; + type = 0; + _text = ""; + + hitEOF = false; + mode = Lexer::DEFAULT_MODE; + modeStack.clear(); + + getInterpreter<atn::LexerATNSimulator>()->reset(); +} + +std::unique_ptr<Token> Lexer::nextToken() { + // Mark start location in char stream so unbuffered streams are + // guaranteed at least have text of current token + ssize_t tokenStartMarker = _input->mark(); + + auto onExit = finally([this, tokenStartMarker]{ + // make sure we release marker after match or + // unbuffered char stream will keep buffering + _input->release(tokenStartMarker); + }); + + while (true) { + outerContinue: + if (hitEOF) { + emitEOF(); + return std::move(token); + } + + token.reset(); + channel = Token::DEFAULT_CHANNEL; + tokenStartCharIndex = _input->index(); + tokenStartCharPositionInLine = getInterpreter<atn::LexerATNSimulator>()->getCharPositionInLine(); + tokenStartLine = getInterpreter<atn::LexerATNSimulator>()->getLine(); + _text = ""; + do { + type = Token::INVALID_TYPE; + size_t ttype; + try { + ttype = getInterpreter<atn::LexerATNSimulator>()->match(_input, mode); + } catch (LexerNoViableAltException &e) { + notifyListeners(e); // report error + recover(e); + ttype = SKIP; + } + if (_input->LA(1) == EOF) { + hitEOF = true; + } + if (type == Token::INVALID_TYPE) { + type = ttype; + } + if (type == SKIP) { + goto outerContinue; + } + } while (type == MORE); + if (token == nullptr) { + emit(); + } + return std::move(token); + } +} + +void Lexer::skip() { + type = SKIP; +} + +void Lexer::more() { + type = MORE; +} + +void Lexer::setMode(size_t m) { + mode = m; +} + +void Lexer::pushMode(size_t m) { +#if DEBUG_LEXER == 1 + std::cout << "pushMode " << m << std::endl; +#endif + + modeStack.push_back(mode); + setMode(m); +} + +size_t Lexer::popMode() { + if (modeStack.empty()) { + throw EmptyStackException(); + } +#if DEBUG_LEXER == 1 + std::cout << std::string("popMode back to ") << modeStack.back() << std::endl; +#endif + + setMode(modeStack.back()); + modeStack.pop_back(); + return mode; +} + + +TokenFactory<CommonToken>* Lexer::getTokenFactory() { + return _factory; +} + +void Lexer::setInputStream(IntStream *input) { + reset(); + _input = dynamic_cast<CharStream*>(input); +} + +std::string Lexer::getSourceName() { + return _input->getSourceName(); +} + +CharStream* Lexer::getInputStream() { + return _input; +} + +void Lexer::emit(std::unique_ptr<Token> newToken) { + token = std::move(newToken); +} + +Token* Lexer::emit() { + emit(_factory->create({ this, _input }, type, _text, channel, + tokenStartCharIndex, getCharIndex() - 1, tokenStartLine, tokenStartCharPositionInLine)); + return token.get(); +} + +Token* Lexer::emitEOF() { + size_t cpos = getCharPositionInLine(); + size_t line = getLine(); + emit(_factory->create({ this, _input }, EOF, "", Token::DEFAULT_CHANNEL, _input->index(), _input->index() - 1, line, cpos)); + return token.get(); +} + +size_t Lexer::getLine() const { + return getInterpreter<atn::LexerATNSimulator>()->getLine(); +} + +size_t Lexer::getCharPositionInLine() { + return getInterpreter<atn::LexerATNSimulator>()->getCharPositionInLine(); +} + +void Lexer::setLine(size_t line) { + getInterpreter<atn::LexerATNSimulator>()->setLine(line); +} + +void Lexer::setCharPositionInLine(size_t charPositionInLine) { + getInterpreter<atn::LexerATNSimulator>()->setCharPositionInLine(charPositionInLine); +} + +size_t Lexer::getCharIndex() { + return _input->index(); +} + +std::string Lexer::getText() { + if (!_text.empty()) { + return _text; + } + return getInterpreter<atn::LexerATNSimulator>()->getText(_input); +} + +void Lexer::setText(const std::string &text) { + _text = text; +} + +std::unique_ptr<Token> Lexer::getToken() { + return std::move(token); +} + +void Lexer::setToken(std::unique_ptr<Token> newToken) { + token = std::move(newToken); +} + +void Lexer::setType(size_t ttype) { + type = ttype; +} + +size_t Lexer::getType() { + return type; +} + +void Lexer::setChannel(size_t newChannel) { + channel = newChannel; +} + +size_t Lexer::getChannel() { + return channel; +} + +std::vector<std::unique_ptr<Token>> Lexer::getAllTokens() { + std::vector<std::unique_ptr<Token>> tokens; + std::unique_ptr<Token> t = nextToken(); + while (t->getType() != EOF) { + tokens.push_back(std::move(t)); + t = nextToken(); + } + return tokens; +} + +void Lexer::recover(const LexerNoViableAltException &/*e*/) { + if (_input->LA(1) != EOF) { + // skip a char and try again + getInterpreter<atn::LexerATNSimulator>()->consume(_input); + } +} + +void Lexer::notifyListeners(const LexerNoViableAltException & /*e*/) { + ++_syntaxErrors; + std::string text = _input->getText(misc::Interval(tokenStartCharIndex, _input->index())); + std::string msg = std::string("token recognition error at: '") + getErrorDisplay(text) + std::string("'"); + + ProxyErrorListener &listener = getErrorListenerDispatch(); + listener.syntaxError(this, nullptr, tokenStartLine, tokenStartCharPositionInLine, msg, std::current_exception()); +} + +std::string Lexer::getErrorDisplay(const std::string &s) { + std::stringstream ss; + for (auto c : s) { + switch (c) { + case '\n': + ss << "\\n"; + break; + case '\t': + ss << "\\t"; + break; + case '\r': + ss << "\\r"; + break; + default: + ss << c; + break; + } + } + return ss.str(); +} + +void Lexer::recover(RecognitionException * /*re*/) { + // TODO: Do we lose character or line position information? + _input->consume(); +} + +size_t Lexer::getNumberOfSyntaxErrors() { + return _syntaxErrors; +} + +void Lexer::InitializeInstanceFields() { + _syntaxErrors = 0; + token = nullptr; + _factory = CommonTokenFactory::DEFAULT.get(); + tokenStartCharIndex = INVALID_INDEX; + tokenStartLine = 0; + tokenStartCharPositionInLine = 0; + hitEOF = false; + channel = 0; + type = 0; + mode = Lexer::DEFAULT_MODE; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/Lexer.h b/contrib/libs/antlr4_cpp_runtime/src/Lexer.h new file mode 100644 index 0000000000..77033ad9e6 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/Lexer.h @@ -0,0 +1,196 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Recognizer.h" +#include "TokenSource.h" +#include "CharStream.h" +#include "Token.h" + +namespace antlr4 { + + /// A lexer is recognizer that draws input symbols from a character stream. + /// lexer grammars result in a subclass of this object. A Lexer object + /// uses simplified match() and error recovery mechanisms in the interest + /// of speed. + class ANTLR4CPP_PUBLIC Lexer : public Recognizer, public TokenSource { + public: + static constexpr size_t DEFAULT_MODE = 0; + static constexpr size_t MORE = std::numeric_limits<size_t>::max() - 1; + static constexpr size_t SKIP = std::numeric_limits<size_t>::max() - 2; + + static constexpr size_t DEFAULT_TOKEN_CHANNEL = Token::DEFAULT_CHANNEL; + static constexpr size_t HIDDEN = Token::HIDDEN_CHANNEL; + static constexpr size_t MIN_CHAR_VALUE = 0; + static constexpr size_t MAX_CHAR_VALUE = 0x10FFFF; + + CharStream *_input; // Pure reference, usually from statically allocated instance. + + protected: + /// How to create token objects. + TokenFactory<CommonToken> *_factory; + + public: + /// The goal of all lexer rules/methods is to create a token object. + /// This is an instance variable as multiple rules may collaborate to + /// create a single token. nextToken will return this object after + /// matching lexer rule(s). If you subclass to allow multiple token + /// emissions, then set this to the last token to be matched or + /// something nonnull so that the auto token emit mechanism will not + /// emit another token. + + // Life cycle of a token is this: + // Created by emit() (via the token factory) or by action code, holding ownership of it. + // Ownership is handed over to the token stream when calling nextToken(). + std::unique_ptr<Token> token; + + /// <summary> + /// What character index in the stream did the current token start at? + /// Needed, for example, to get the text for current token. Set at + /// the start of nextToken. + /// </summary> + size_t tokenStartCharIndex; + + /// <summary> + /// The line on which the first character of the token resides </summary> + size_t tokenStartLine; + + /// The character position of first character within the line. + size_t tokenStartCharPositionInLine; + + /// Once we see EOF on char stream, next token will be EOF. + /// If you have DONE : EOF ; then you see DONE EOF. + bool hitEOF; + + /// The channel number for the current token. + size_t channel; + + /// The token type for the current token. + size_t type; + + // Use the vector as a stack. + std::vector<size_t> modeStack; + size_t mode; + + Lexer(); + Lexer(CharStream *input); + virtual ~Lexer() {} + + virtual void reset(); + + /// Return a token from this source; i.e., match a token on the char stream. + virtual std::unique_ptr<Token> nextToken() override; + + /// Instruct the lexer to skip creating a token for current lexer rule + /// and look for another token. nextToken() knows to keep looking when + /// a lexer rule finishes with token set to SKIP_TOKEN. Recall that + /// if token == null at end of any token rule, it creates one for you + /// and emits it. + virtual void skip(); + virtual void more(); + virtual void setMode(size_t m); + virtual void pushMode(size_t m); + virtual size_t popMode(); + + template<typename T1> + void setTokenFactory(TokenFactory<T1> *factory) { + this->_factory = factory; + } + + virtual TokenFactory<CommonToken>* getTokenFactory() override; + + /// Set the char stream and reset the lexer + virtual void setInputStream(IntStream *input) override; + + virtual std::string getSourceName() override; + + virtual CharStream* getInputStream() override; + + /// By default does not support multiple emits per nextToken invocation + /// for efficiency reasons. Subclasses can override this method, nextToken, + /// and getToken (to push tokens into a list and pull from that list + /// rather than a single variable as this implementation does). + virtual void emit(std::unique_ptr<Token> newToken); + + /// The standard method called to automatically emit a token at the + /// outermost lexical rule. The token object should point into the + /// char buffer start..stop. If there is a text override in 'text', + /// use that to set the token's text. Override this method to emit + /// custom Token objects or provide a new factory. + virtual Token* emit(); + + virtual Token* emitEOF(); + + virtual size_t getLine() const override; + + virtual size_t getCharPositionInLine() override; + + virtual void setLine(size_t line); + + virtual void setCharPositionInLine(size_t charPositionInLine); + + /// What is the index of the current character of lookahead? + virtual size_t getCharIndex(); + + /// Return the text matched so far for the current token or any + /// text override. + virtual std::string getText(); + + /// Set the complete text of this token; it wipes any previous + /// changes to the text. + virtual void setText(const std::string &text); + + /// Override if emitting multiple tokens. + virtual std::unique_ptr<Token> getToken(); + + virtual void setToken(std::unique_ptr<Token> newToken); + + virtual void setType(size_t ttype); + + virtual size_t getType(); + + virtual void setChannel(size_t newChannel); + + virtual size_t getChannel(); + + virtual const std::vector<std::string>& getChannelNames() const = 0; + + virtual const std::vector<std::string>& getModeNames() const = 0; + + /// Return a list of all Token objects in input char stream. + /// Forces load of all tokens. Does not include EOF token. + virtual std::vector<std::unique_ptr<Token>> getAllTokens(); + + virtual void recover(const LexerNoViableAltException &e); + + virtual void notifyListeners(const LexerNoViableAltException &e); + + virtual std::string getErrorDisplay(const std::string &s); + + /// Lexers can normally match any char in it's vocabulary after matching + /// a token, so do the easy thing and just kill a character and hope + /// it all works out. You can instead use the rule invocation stack + /// to do sophisticated error recovery if you are in a fragment rule. + virtual void recover(RecognitionException *re); + + /// <summary> + /// Gets the number of syntax errors reported during parsing. This value is + /// incremented each time <seealso cref="#notifyErrorListeners"/> is called. + /// </summary> + /// <seealso cref= #notifyListeners </seealso> + virtual size_t getNumberOfSyntaxErrors(); + + protected: + /// You can set the text for the current token to override what is in + /// the input char buffer (via setText()). + std::string _text; + + private: + size_t _syntaxErrors; + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/LexerInterpreter.cpp b/contrib/libs/antlr4_cpp_runtime/src/LexerInterpreter.cpp new file mode 100644 index 0000000000..38acd09ddd --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/LexerInterpreter.cpp @@ -0,0 +1,60 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATNType.h" +#include "atn/LexerATNSimulator.h" +#include "dfa/DFA.h" +#include "Exceptions.h" +#include "Vocabulary.h" + +#include "LexerInterpreter.h" + +using namespace antlr4; + +LexerInterpreter::LexerInterpreter(const std::string &grammarFileName, const dfa::Vocabulary &vocabulary, + const std::vector<std::string> &ruleNames, const std::vector<std::string> &channelNames, const std::vector<std::string> &modeNames, + const atn::ATN &atn, CharStream *input) + : Lexer(input), _grammarFileName(grammarFileName), _atn(atn), _ruleNames(ruleNames), + _channelNames(channelNames), _modeNames(modeNames), + _vocabulary(vocabulary) { + + if (_atn.grammarType != atn::ATNType::LEXER) { + throw IllegalArgumentException("The ATN must be a lexer ATN."); + } + + for (size_t i = 0; i < atn.getNumberOfDecisions(); ++i) { + _decisionToDFA.push_back(dfa::DFA(_atn.getDecisionState(i), i)); + } + _interpreter = new atn::LexerATNSimulator(this, _atn, _decisionToDFA, _sharedContextCache); /* mem-check: deleted in d-tor */ +} + +LexerInterpreter::~LexerInterpreter() +{ + delete _interpreter; +} + +const atn::ATN& LexerInterpreter::getATN() const { + return _atn; +} + +std::string LexerInterpreter::getGrammarFileName() const { + return _grammarFileName; +} + +const std::vector<std::string>& LexerInterpreter::getRuleNames() const { + return _ruleNames; +} + +const std::vector<std::string>& LexerInterpreter::getChannelNames() const { + return _channelNames; +} + +const std::vector<std::string>& LexerInterpreter::getModeNames() const { + return _modeNames; +} + +const dfa::Vocabulary& LexerInterpreter::getVocabulary() const { + return _vocabulary; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/LexerInterpreter.h b/contrib/libs/antlr4_cpp_runtime/src/LexerInterpreter.h new file mode 100644 index 0000000000..3787c1d0d5 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/LexerInterpreter.h @@ -0,0 +1,46 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Lexer.h" +#include "atn/PredictionContext.h" +#include "atn/PredictionContextCache.h" +#include "Vocabulary.h" + +namespace antlr4 { + + class ANTLR4CPP_PUBLIC LexerInterpreter : public Lexer { + public: + LexerInterpreter(const std::string &grammarFileName, const dfa::Vocabulary &vocabulary, + const std::vector<std::string> &ruleNames, const std::vector<std::string> &channelNames, + const std::vector<std::string> &modeNames, const atn::ATN &atn, CharStream *input); + + ~LexerInterpreter(); + + virtual const atn::ATN& getATN() const override; + virtual std::string getGrammarFileName() const override; + virtual const std::vector<std::string>& getRuleNames() const override; + virtual const std::vector<std::string>& getChannelNames() const override; + virtual const std::vector<std::string>& getModeNames() const override; + + virtual const dfa::Vocabulary& getVocabulary() const override; + + protected: + const std::string _grammarFileName; + const atn::ATN &_atn; + + const std::vector<std::string> &_ruleNames; + const std::vector<std::string> &_channelNames; + const std::vector<std::string> &_modeNames; + std::vector<dfa::DFA> _decisionToDFA; + + atn::PredictionContextCache _sharedContextCache; + + private: + dfa::Vocabulary _vocabulary; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/LexerNoViableAltException.cpp b/contrib/libs/antlr4_cpp_runtime/src/LexerNoViableAltException.cpp new file mode 100644 index 0000000000..3304b82b40 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/LexerNoViableAltException.cpp @@ -0,0 +1,36 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/Interval.h" +#include "support/CPPUtils.h" +#include "CharStream.h" +#include "Lexer.h" + +#include "LexerNoViableAltException.h" + +using namespace antlr4; + +LexerNoViableAltException::LexerNoViableAltException(Lexer *lexer, CharStream *input, size_t startIndex, + atn::ATNConfigSet *deadEndConfigs) + : RecognitionException(lexer, input, nullptr, nullptr), _startIndex(startIndex), _deadEndConfigs(deadEndConfigs) { +} + +size_t LexerNoViableAltException::getStartIndex() { + return _startIndex; +} + +atn::ATNConfigSet* LexerNoViableAltException::getDeadEndConfigs() { + return _deadEndConfigs; +} + +std::string LexerNoViableAltException::toString() { + std::string symbol; + if (_startIndex < getInputStream()->size()) { + symbol = static_cast<CharStream *>(getInputStream())->getText(misc::Interval(_startIndex, _startIndex)); + symbol = antlrcpp::escapeWhitespace(symbol, false); + } + std::string format = "LexerNoViableAltException('" + symbol + "')"; + return format; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/LexerNoViableAltException.h b/contrib/libs/antlr4_cpp_runtime/src/LexerNoViableAltException.h new file mode 100644 index 0000000000..52eada7cfa --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/LexerNoViableAltException.h @@ -0,0 +1,31 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RecognitionException.h" +#include "atn/ATNConfigSet.h" + +namespace antlr4 { + + class ANTLR4CPP_PUBLIC LexerNoViableAltException : public RecognitionException { + public: + LexerNoViableAltException(Lexer *lexer, CharStream *input, size_t startIndex, + atn::ATNConfigSet *deadEndConfigs); + + virtual size_t getStartIndex(); + virtual atn::ATNConfigSet* getDeadEndConfigs(); + virtual std::string toString(); + + private: + /// Matching attempted at what input index? + const size_t _startIndex; + + /// Which configurations did we try at input.index() that couldn't match input.LA(1)? + atn::ATNConfigSet *_deadEndConfigs; + + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/ListTokenSource.cpp b/contrib/libs/antlr4_cpp_runtime/src/ListTokenSource.cpp new file mode 100644 index 0000000000..45372808e5 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ListTokenSource.cpp @@ -0,0 +1,92 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Token.h" +#include "CommonToken.h" +#include "CharStream.h" + +#include "ListTokenSource.h" + +using namespace antlr4; + +ListTokenSource::ListTokenSource(std::vector<std::unique_ptr<Token>> tokens_) : ListTokenSource(std::move(tokens_), "") { +} + +ListTokenSource::ListTokenSource(std::vector<std::unique_ptr<Token>> tokens_, const std::string &sourceName_) + : tokens(std::move(tokens_)), sourceName(sourceName_) { + InitializeInstanceFields(); + if (tokens.empty()) { + throw "tokens cannot be null"; + } + + // Check if there is an eof token and create one if not. + if (tokens.back()->getType() != Token::EOF) { + Token *lastToken = tokens.back().get(); + size_t start = INVALID_INDEX; + size_t previousStop = lastToken->getStopIndex(); + if (previousStop != INVALID_INDEX) { + start = previousStop + 1; + } + + size_t stop = std::max(INVALID_INDEX, start - 1); + tokens.emplace_back((_factory->create({ this, getInputStream() }, Token::EOF, "EOF", + Token::DEFAULT_CHANNEL, start, stop, static_cast<int>(lastToken->getLine()), lastToken->getCharPositionInLine()))); + } +} + +size_t ListTokenSource::getCharPositionInLine() { + if (i < tokens.size()) { + return tokens[i]->getCharPositionInLine(); + } + return 0; +} + +std::unique_ptr<Token> ListTokenSource::nextToken() { + if (i < tokens.size()) { + return std::move(tokens[i++]); + } + return nullptr; +} + +size_t ListTokenSource::getLine() const { + if (i < tokens.size()) { + return tokens[i]->getLine(); + } + + return 1; +} + +CharStream *ListTokenSource::getInputStream() { + if (i < tokens.size()) { + return tokens[i]->getInputStream(); + } else if (!tokens.empty()) { + return tokens.back()->getInputStream(); + } + + // no input stream information is available + return nullptr; +} + +std::string ListTokenSource::getSourceName() { + if (sourceName != "") { + return sourceName; + } + + CharStream *inputStream = getInputStream(); + if (inputStream != nullptr) { + return inputStream->getSourceName(); + } + + return "List"; +} + +TokenFactory<CommonToken>* ListTokenSource::getTokenFactory() { + return _factory; +} + +void ListTokenSource::InitializeInstanceFields() { + i = 0; + _factory = CommonTokenFactory::DEFAULT.get(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/ListTokenSource.h b/contrib/libs/antlr4_cpp_runtime/src/ListTokenSource.h new file mode 100644 index 0000000000..542b05cb5a --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ListTokenSource.h @@ -0,0 +1,88 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "TokenSource.h" +#include "CommonTokenFactory.h" + +namespace antlr4 { + + /// Provides an implementation of <seealso cref="TokenSource"/> as a wrapper around a list + /// of <seealso cref="Token"/> objects. + /// + /// If the final token in the list is an <seealso cref="Token#EOF"/> token, it will be used + /// as the EOF token for every call to <seealso cref="#nextToken"/> after the end of the + /// list is reached. Otherwise, an EOF token will be created. + class ANTLR4CPP_PUBLIC ListTokenSource : public TokenSource { + protected: + // This list will be emptied token by token as we call nextToken(). + // Token streams can be used to buffer tokens for a while. + std::vector<std::unique_ptr<Token>> tokens; + + private: + /// <summary> + /// The name of the input source. If this value is {@code null}, a call to + /// <seealso cref="#getSourceName"/> should return the source name used to create the + /// the next token in <seealso cref="#tokens"/> (or the previous token if the end of + /// the input has been reached). + /// </summary> + const std::string sourceName; + + protected: + /// The index into <seealso cref="#tokens"/> of token to return by the next call to + /// <seealso cref="#nextToken"/>. The end of the input is indicated by this value + /// being greater than or equal to the number of items in <seealso cref="#tokens"/>. + size_t i; + + private: + /// This is the backing field for <seealso cref="#getTokenFactory"/> and + /// <seealso cref="setTokenFactory"/>. + TokenFactory<CommonToken> *_factory = CommonTokenFactory::DEFAULT.get(); + + public: + /// Constructs a new <seealso cref="ListTokenSource"/> instance from the specified + /// collection of <seealso cref="Token"/> objects. + /// + /// <param name="tokens"> The collection of <seealso cref="Token"/> objects to provide as a + /// <seealso cref="TokenSource"/>. </param> + /// <exception cref="NullPointerException"> if {@code tokens} is {@code null} </exception> + ListTokenSource(std::vector<std::unique_ptr<Token>> tokens); + ListTokenSource(const ListTokenSource& other) = delete; + + ListTokenSource& operator = (const ListTokenSource& other) = delete; + + /// <summary> + /// Constructs a new <seealso cref="ListTokenSource"/> instance from the specified + /// collection of <seealso cref="Token"/> objects and source name. + /// </summary> + /// <param name="tokens"> The collection of <seealso cref="Token"/> objects to provide as a + /// <seealso cref="TokenSource"/>. </param> + /// <param name="sourceName"> The name of the <seealso cref="TokenSource"/>. If this value is + /// {@code null}, <seealso cref="#getSourceName"/> will attempt to infer the name from + /// the next <seealso cref="Token"/> (or the previous token if the end of the input has + /// been reached). + /// </param> + /// <exception cref="NullPointerException"> if {@code tokens} is {@code null} </exception> + ListTokenSource(std::vector<std::unique_ptr<Token>> tokens_, const std::string &sourceName_); + + virtual size_t getCharPositionInLine() override; + virtual std::unique_ptr<Token> nextToken() override; + virtual size_t getLine() const override; + virtual CharStream* getInputStream() override; + virtual std::string getSourceName() override; + + template<typename T1> + void setTokenFactory(TokenFactory<T1> *factory) { + this->_factory = factory; + } + + virtual TokenFactory<CommonToken>* getTokenFactory() override; + + private: + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/NoViableAltException.cpp b/contrib/libs/antlr4_cpp_runtime/src/NoViableAltException.cpp new file mode 100644 index 0000000000..273c208c74 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/NoViableAltException.cpp @@ -0,0 +1,46 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Parser.h" + +#include "NoViableAltException.h" + +using namespace antlr4; + +namespace { + +// Create a normal shared pointer if the configurations are to be deleted. If not, then +// the shared pointer is created with a deleter that does nothing. +Ref<atn::ATNConfigSet> buildConfigsRef(atn::ATNConfigSet *configs, bool deleteConfigs) { + if (deleteConfigs) { + return Ref<atn::ATNConfigSet>(configs); + } else { + return Ref<atn::ATNConfigSet>(configs, [](atn::ATNConfigSet *){}); + } +} + +} + +NoViableAltException::NoViableAltException(Parser *recognizer) + : NoViableAltException(recognizer, recognizer->getTokenStream(), recognizer->getCurrentToken(), + recognizer->getCurrentToken(), nullptr, recognizer->getContext(), false) { +} + +NoViableAltException::NoViableAltException(Parser *recognizer, TokenStream *input,Token *startToken, + Token *offendingToken, atn::ATNConfigSet *deadEndConfigs, ParserRuleContext *ctx, bool deleteConfigs) + : RecognitionException("No viable alternative", recognizer, input, ctx, offendingToken), + _deadEndConfigs(buildConfigsRef(deadEndConfigs, deleteConfigs)), _startToken(startToken) { +} + +NoViableAltException::~NoViableAltException() { +} + +Token* NoViableAltException::getStartToken() const { + return _startToken; +} + +atn::ATNConfigSet* NoViableAltException::getDeadEndConfigs() const { + return _deadEndConfigs.get(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/NoViableAltException.h b/contrib/libs/antlr4_cpp_runtime/src/NoViableAltException.h new file mode 100644 index 0000000000..b15039d0cb --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/NoViableAltException.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RecognitionException.h" +#include "Token.h" +#include "atn/ATNConfigSet.h" + +namespace antlr4 { + + /// Indicates that the parser could not decide which of two or more paths + /// to take based upon the remaining input. It tracks the starting token + /// of the offending input and also knows where the parser was + /// in the various paths when the error. Reported by reportNoViableAlternative() + class ANTLR4CPP_PUBLIC NoViableAltException : public RecognitionException { + public: + NoViableAltException(Parser *recognizer); // LL(1) error + NoViableAltException(Parser *recognizer, TokenStream *input,Token *startToken, + Token *offendingToken, atn::ATNConfigSet *deadEndConfigs, ParserRuleContext *ctx, bool deleteConfigs); + ~NoViableAltException(); + + virtual Token* getStartToken() const; + virtual atn::ATNConfigSet* getDeadEndConfigs() const; + + private: + /// Which configurations did we try at input.index() that couldn't match input.LT(1)? + /// Shared pointer that conditionally deletes the configurations (based on flag + /// passed during construction) + Ref<atn::ATNConfigSet> _deadEndConfigs; + + /// The token object at the start index; the input stream might + /// not be buffering tokens so get a reference to it. (At the + /// time the error occurred, of course the stream needs to keep a + /// buffer all of the tokens but later we might not have access to those.) + Token *_startToken; + + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/Parser.cpp b/contrib/libs/antlr4_cpp_runtime/src/Parser.cpp new file mode 100644 index 0000000000..337bcba17a --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/Parser.cpp @@ -0,0 +1,670 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATNDeserializationOptions.h" +#include "tree/pattern/ParseTreePatternMatcher.h" +#include "dfa/DFA.h" +#include "ParserRuleContext.h" +#include "tree/TerminalNode.h" +#include "tree/ErrorNodeImpl.h" +#include "Lexer.h" +#include "atn/ParserATNSimulator.h" +#include "misc/IntervalSet.h" +#include "atn/RuleStartState.h" +#include "DefaultErrorStrategy.h" +#include "atn/ATNDeserializer.h" +#include "atn/RuleTransition.h" +#include "atn/ATN.h" +#include "Exceptions.h" +#include "ANTLRErrorListener.h" +#include "tree/pattern/ParseTreePattern.h" +#include "internal/Synchronization.h" + +#include "atn/ProfilingATNSimulator.h" +#include "atn/ParseInfo.h" + +#include "Parser.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::internal; +using namespace antlrcpp; + +namespace { + +struct BypassAltsAtnCache final { + std::shared_mutex mutex; + /// This field maps from the serialized ATN string to the deserialized <seealso cref="ATN"/> with + /// bypass alternatives. + /// + /// <seealso cref= ATNDeserializationOptions#isGenerateRuleBypassTransitions() </seealso> + std::map<std::vector<int32_t>, std::unique_ptr<const atn::ATN>, std::less<>> map; +}; + +BypassAltsAtnCache* getBypassAltsAtnCache() { + static BypassAltsAtnCache* const instance = new BypassAltsAtnCache(); + return instance; +} + +} + +Parser::TraceListener::TraceListener(Parser *outerInstance_) : outerInstance(outerInstance_) { +} + +Parser::TraceListener::~TraceListener() { +} + +void Parser::TraceListener::enterEveryRule(ParserRuleContext *ctx) { + std::cout << "enter " << outerInstance->getRuleNames()[ctx->getRuleIndex()] + << ", LT(1)=" << outerInstance->_input->LT(1)->getText() << std::endl; +} + +void Parser::TraceListener::visitTerminal(tree::TerminalNode *node) { + std::cout << "consume " << node->getSymbol() << " rule " + << outerInstance->getRuleNames()[outerInstance->getContext()->getRuleIndex()] << std::endl; +} + +void Parser::TraceListener::visitErrorNode(tree::ErrorNode * /*node*/) { +} + +void Parser::TraceListener::exitEveryRule(ParserRuleContext *ctx) { + std::cout << "exit " << outerInstance->getRuleNames()[ctx->getRuleIndex()] + << ", LT(1)=" << outerInstance->_input->LT(1)->getText() << std::endl; +} + +Parser::TrimToSizeListener Parser::TrimToSizeListener::INSTANCE; + +Parser::TrimToSizeListener::~TrimToSizeListener() { +} + +void Parser::TrimToSizeListener::enterEveryRule(ParserRuleContext * /*ctx*/) { +} + +void Parser::TrimToSizeListener::visitTerminal(tree::TerminalNode * /*node*/) { +} + +void Parser::TrimToSizeListener::visitErrorNode(tree::ErrorNode * /*node*/) { +} + +void Parser::TrimToSizeListener::exitEveryRule(ParserRuleContext * ctx) { + ctx->children.shrink_to_fit(); +} + +Parser::Parser(TokenStream *input) { + InitializeInstanceFields(); + setInputStream(input); +} + +Parser::~Parser() { + _tracker.reset(); + delete _tracer; +} + +void Parser::reset() { + if (getInputStream() != nullptr) { + getInputStream()->seek(0); + } + _errHandler->reset(this); // Watch out, this is not shared_ptr.reset(). + + _matchedEOF = false; + _syntaxErrors = 0; + setTrace(false); + _precedenceStack.clear(); + _precedenceStack.push_back(0); + _ctx = nullptr; + _tracker.reset(); + + atn::ATNSimulator *interpreter = getInterpreter<atn::ParserATNSimulator>(); + if (interpreter != nullptr) { + interpreter->reset(); + } +} + +Token* Parser::match(size_t ttype) { + Token *t = getCurrentToken(); + if (t->getType() == ttype) { + if (ttype == EOF) { + _matchedEOF = true; + } + _errHandler->reportMatch(this); + consume(); + } else { + t = _errHandler->recoverInline(this); + if (_buildParseTrees && t->getTokenIndex() == INVALID_INDEX) { + // we must have conjured up a new token during single token insertion + // if it's not the current symbol + _ctx->addChild(createErrorNode(t)); + } + } + return t; +} + +Token* Parser::matchWildcard() { + Token *t = getCurrentToken(); + if (t->getType() > 0) { + _errHandler->reportMatch(this); + consume(); + } else { + t = _errHandler->recoverInline(this); + if (_buildParseTrees && t->getTokenIndex() == INVALID_INDEX) { + // we must have conjured up a new token during single token insertion + // if it's not the current symbol + _ctx->addChild(createErrorNode(t)); + } + } + + return t; +} + +void Parser::setBuildParseTree(bool buildParseTrees) { + this->_buildParseTrees = buildParseTrees; +} + +bool Parser::getBuildParseTree() { + return _buildParseTrees; +} + +void Parser::setTrimParseTree(bool trimParseTrees) { + if (trimParseTrees) { + if (getTrimParseTree()) { + return; + } + addParseListener(&TrimToSizeListener::INSTANCE); + } else { + removeParseListener(&TrimToSizeListener::INSTANCE); + } +} + +bool Parser::getTrimParseTree() { + return std::find(getParseListeners().begin(), getParseListeners().end(), &TrimToSizeListener::INSTANCE) != getParseListeners().end(); +} + +std::vector<tree::ParseTreeListener *> Parser::getParseListeners() { + return _parseListeners; +} + +void Parser::addParseListener(tree::ParseTreeListener *listener) { + if (!listener) { + throw NullPointerException("listener"); + } + + this->_parseListeners.push_back(listener); +} + +void Parser::removeParseListener(tree::ParseTreeListener *listener) { + if (!_parseListeners.empty()) { + auto it = std::find(_parseListeners.begin(), _parseListeners.end(), listener); + if (it != _parseListeners.end()) { + _parseListeners.erase(it); + } + } +} + +void Parser::removeParseListeners() { + _parseListeners.clear(); +} + +void Parser::triggerEnterRuleEvent() { + for (auto *listener : _parseListeners) { + listener->enterEveryRule(_ctx); + _ctx->enterRule(listener); + } +} + +void Parser::triggerExitRuleEvent() { + // reverse order walk of listeners + for (auto it = _parseListeners.rbegin(); it != _parseListeners.rend(); ++it) { + _ctx->exitRule(*it); + (*it)->exitEveryRule(_ctx); + } +} + +size_t Parser::getNumberOfSyntaxErrors() { + return _syntaxErrors; +} + +TokenFactory<CommonToken>* Parser::getTokenFactory() { + return _input->getTokenSource()->getTokenFactory(); +} + +const atn::ATN& Parser::getATNWithBypassAlts() { + auto serializedAtn = getSerializedATN(); + if (serializedAtn.empty()) { + throw UnsupportedOperationException("The current parser does not support an ATN with bypass alternatives."); + } + // XXX: using the entire serialized ATN as key into the map is a big resource waste. + // How large can that thing become? + auto *cache = getBypassAltsAtnCache(); + { + std::shared_lock<std::shared_mutex> lock(cache->mutex); + auto existing = cache->map.find(serializedAtn); + if (existing != cache->map.end()) { + return *existing->second; + } + } + + std::unique_lock<std::shared_mutex> lock(cache->mutex); + auto existing = cache->map.find(serializedAtn); + if (existing != cache->map.end()) { + return *existing->second; + } + atn::ATNDeserializationOptions deserializationOptions; + deserializationOptions.setGenerateRuleBypassTransitions(true); + atn::ATNDeserializer deserializer(deserializationOptions); + auto atn = deserializer.deserialize(serializedAtn); + return *cache->map.insert(std::make_pair(std::vector<int32_t>(serializedAtn.begin(), serializedAtn.end()), std::move(atn))).first->second; +} + +tree::pattern::ParseTreePattern Parser::compileParseTreePattern(const std::string &pattern, int patternRuleIndex) { + if (getTokenStream() != nullptr) { + TokenSource *tokenSource = getTokenStream()->getTokenSource(); + if (is<Lexer*>(tokenSource)) { + Lexer *lexer = dynamic_cast<Lexer *>(tokenSource); + return compileParseTreePattern(pattern, patternRuleIndex, lexer); + } + } + throw UnsupportedOperationException("Parser can't discover a lexer to use"); +} + +tree::pattern::ParseTreePattern Parser::compileParseTreePattern(const std::string &pattern, int patternRuleIndex, + Lexer *lexer) { + tree::pattern::ParseTreePatternMatcher m(lexer, this); + return m.compile(pattern, patternRuleIndex); +} + +Ref<ANTLRErrorStrategy> Parser::getErrorHandler() { + return _errHandler; +} + +void Parser::setErrorHandler(Ref<ANTLRErrorStrategy> const& handler) { + _errHandler = handler; +} + +IntStream* Parser::getInputStream() { + return getTokenStream(); +} + +void Parser::setInputStream(IntStream *input) { + setTokenStream(static_cast<TokenStream*>(input)); +} + +TokenStream* Parser::getTokenStream() { + return _input; +} + +void Parser::setTokenStream(TokenStream *input) { + _input = nullptr; // Just a reference we don't own. + reset(); + _input = input; +} + +Token* Parser::getCurrentToken() { + return _input->LT(1); +} + +void Parser::notifyErrorListeners(const std::string &msg) { + notifyErrorListeners(getCurrentToken(), msg, nullptr); +} + +void Parser::notifyErrorListeners(Token *offendingToken, const std::string &msg, std::exception_ptr e) { + _syntaxErrors++; + size_t line = offendingToken->getLine(); + size_t charPositionInLine = offendingToken->getCharPositionInLine(); + + ProxyErrorListener &listener = getErrorListenerDispatch(); + listener.syntaxError(this, offendingToken, line, charPositionInLine, msg, e); +} + +Token* Parser::consume() { + Token *o = getCurrentToken(); + if (o->getType() != EOF) { + getInputStream()->consume(); + } + + bool hasListener = _parseListeners.size() > 0 && !_parseListeners.empty(); + if (_buildParseTrees || hasListener) { + if (_errHandler->inErrorRecoveryMode(this)) { + tree::ErrorNode *node = createErrorNode(o); + _ctx->addChild(node); + if (_parseListeners.size() > 0) { + for (auto *listener : _parseListeners) { + listener->visitErrorNode(node); + } + } + } else { + tree::TerminalNode *node = _ctx->addChild(createTerminalNode(o)); + if (_parseListeners.size() > 0) { + for (auto *listener : _parseListeners) { + listener->visitTerminal(node); + } + } + } + } + return o; +} + +void Parser::addContextToParseTree() { + // Add current context to parent if we have a parent. + if (_ctx->parent == nullptr) + return; + + downCast<ParserRuleContext*>(_ctx->parent)->addChild(_ctx); +} + +void Parser::enterRule(ParserRuleContext *localctx, size_t state, size_t /*ruleIndex*/) { + setState(state); + _ctx = localctx; + _ctx->start = _input->LT(1); + if (_buildParseTrees) { + addContextToParseTree(); + } + if (_parseListeners.size() > 0) { + triggerEnterRuleEvent(); + } +} + +void Parser::exitRule() { + if (_matchedEOF) { + // if we have matched EOF, it cannot consume past EOF so we use LT(1) here + _ctx->stop = _input->LT(1); // LT(1) will be end of file + } else { + _ctx->stop = _input->LT(-1); // stop node is what we just matched + } + + // trigger event on ctx, before it reverts to parent + if (_parseListeners.size() > 0) { + triggerExitRuleEvent(); + } + setState(_ctx->invokingState); + _ctx = downCast<ParserRuleContext*>(_ctx->parent); +} + +void Parser::enterOuterAlt(ParserRuleContext *localctx, size_t altNum) { + localctx->setAltNumber(altNum); + + // if we have new localctx, make sure we replace existing ctx + // that is previous child of parse tree + if (_buildParseTrees && _ctx != localctx) { + if (_ctx->parent != nullptr) { + ParserRuleContext *parent = downCast<ParserRuleContext*>(_ctx->parent); + parent->removeLastChild(); + parent->addChild(localctx); + } + } + _ctx = localctx; +} + +int Parser::getPrecedence() const { + if (_precedenceStack.empty()) { + return -1; + } + + return _precedenceStack.back(); +} + +void Parser::enterRecursionRule(ParserRuleContext *localctx, size_t ruleIndex) { + enterRecursionRule(localctx, getATN().ruleToStartState[ruleIndex]->stateNumber, ruleIndex, 0); +} + +void Parser::enterRecursionRule(ParserRuleContext *localctx, size_t state, size_t /*ruleIndex*/, int precedence) { + setState(state); + _precedenceStack.push_back(precedence); + _ctx = localctx; + _ctx->start = _input->LT(1); + if (!_parseListeners.empty()) { + triggerEnterRuleEvent(); // simulates rule entry for left-recursive rules + } +} + +void Parser::pushNewRecursionContext(ParserRuleContext *localctx, size_t state, size_t /*ruleIndex*/) { + ParserRuleContext *previous = _ctx; + previous->parent = localctx; + previous->invokingState = state; + previous->stop = _input->LT(-1); + + _ctx = localctx; + _ctx->start = previous->start; + if (_buildParseTrees) { + _ctx->addChild(previous); + } + + if (_parseListeners.size() > 0) { + triggerEnterRuleEvent(); // simulates rule entry for left-recursive rules + } +} + +void Parser::unrollRecursionContexts(ParserRuleContext *parentctx) { + _precedenceStack.pop_back(); + _ctx->stop = _input->LT(-1); + ParserRuleContext *retctx = _ctx; // save current ctx (return value) + + // unroll so ctx is as it was before call to recursive method + if (_parseListeners.size() > 0) { + while (_ctx != parentctx) { + triggerExitRuleEvent(); + _ctx = downCast<ParserRuleContext*>(_ctx->parent); + } + } else { + _ctx = parentctx; + } + + // hook into tree + retctx->parent = parentctx; + + if (_buildParseTrees && parentctx != nullptr) { + // add return ctx into invoking rule's tree + parentctx->addChild(retctx); + } +} + +ParserRuleContext* Parser::getInvokingContext(size_t ruleIndex) { + ParserRuleContext *p = _ctx; + while (p) { + if (p->getRuleIndex() == ruleIndex) { + return p; + } + if (p->parent == nullptr) + break; + p = downCast<ParserRuleContext*>(p->parent); + } + return nullptr; +} + +ParserRuleContext* Parser::getContext() { + return _ctx; +} + +void Parser::setContext(ParserRuleContext *ctx) { + _ctx = ctx; +} + +bool Parser::precpred(RuleContext * /*localctx*/, int precedence) { + return precedence >= _precedenceStack.back(); +} + +bool Parser::inContext(const std::string &/*context*/) { + // TODO: useful in parser? + return false; +} + +bool Parser::isExpectedToken(size_t symbol) { + const atn::ATN &atn = getInterpreter<atn::ParserATNSimulator>()->atn; + ParserRuleContext *ctx = _ctx; + atn::ATNState *s = atn.states[getState()]; + misc::IntervalSet following = atn.nextTokens(s); + + if (following.contains(symbol)) { + return true; + } + + if (!following.contains(Token::EPSILON)) { + return false; + } + + while (ctx && ctx->invokingState != ATNState::INVALID_STATE_NUMBER && following.contains(Token::EPSILON)) { + atn::ATNState *invokingState = atn.states[ctx->invokingState]; + const atn::RuleTransition *rt = static_cast<const atn::RuleTransition*>(invokingState->transitions[0].get()); + following = atn.nextTokens(rt->followState); + if (following.contains(symbol)) { + return true; + } + + ctx = downCast<ParserRuleContext*>(ctx->parent); + } + + if (following.contains(Token::EPSILON) && symbol == EOF) { + return true; + } + + return false; +} + +bool Parser::isMatchedEOF() const { + return _matchedEOF; +} + +misc::IntervalSet Parser::getExpectedTokens() { + return getATN().getExpectedTokens(getState(), getContext()); +} + +misc::IntervalSet Parser::getExpectedTokensWithinCurrentRule() { + const atn::ATN &atn = getInterpreter<atn::ParserATNSimulator>()->atn; + atn::ATNState *s = atn.states[getState()]; + return atn.nextTokens(s); +} + +size_t Parser::getRuleIndex(const std::string &ruleName) { + const std::map<std::string, size_t> &m = getRuleIndexMap(); + auto iterator = m.find(ruleName); + if (iterator == m.end()) { + return INVALID_INDEX; + } + return iterator->second; +} + +ParserRuleContext* Parser::getRuleContext() { + return _ctx; +} + +std::vector<std::string> Parser::getRuleInvocationStack() { + return getRuleInvocationStack(_ctx); +} + +std::vector<std::string> Parser::getRuleInvocationStack(RuleContext *p) { + std::vector<std::string> const& ruleNames = getRuleNames(); + std::vector<std::string> stack; + RuleContext *run = p; + while (run != nullptr) { + // compute what follows who invoked us + size_t ruleIndex = run->getRuleIndex(); + if (ruleIndex == INVALID_INDEX ) { + stack.push_back("n/a"); + } else { + stack.push_back(ruleNames[ruleIndex]); + } + if (!RuleContext::is(run->parent)) { + break; + } + run = downCast<RuleContext*>(run->parent); + } + return stack; +} + +std::vector<std::string> Parser::getDFAStrings() { + atn::ParserATNSimulator *simulator = getInterpreter<atn::ParserATNSimulator>(); + if (!simulator->decisionToDFA.empty()) { + UniqueLock<Mutex> lck(_mutex); + + std::vector<std::string> s; + for (size_t d = 0; d < simulator->decisionToDFA.size(); d++) { + dfa::DFA &dfa = simulator->decisionToDFA[d]; + s.push_back(dfa.toString(getVocabulary())); + } + return s; + } + return std::vector<std::string>(); +} + +void Parser::dumpDFA() { + atn::ParserATNSimulator *simulator = getInterpreter<atn::ParserATNSimulator>(); + if (!simulator->decisionToDFA.empty()) { + UniqueLock<Mutex> lck(_mutex); + bool seenOne = false; + for (size_t d = 0; d < simulator->decisionToDFA.size(); d++) { + dfa::DFA &dfa = simulator->decisionToDFA[d]; + if (!dfa.states.empty()) { + if (seenOne) { + std::cout << std::endl; + } + std::cout << "Decision " << dfa.decision << ":" << std::endl; + std::cout << dfa.toString(getVocabulary()); + seenOne = true; + } + } + } +} + +std::string Parser::getSourceName() { + return _input->getSourceName(); +} + +atn::ParseInfo Parser::getParseInfo() const { + atn::ParserATNSimulator *simulator = getInterpreter<atn::ParserATNSimulator>(); + return atn::ParseInfo(dynamic_cast<atn::ProfilingATNSimulator*>(simulator)); +} + +void Parser::setProfile(bool profile) { + atn::ParserATNSimulator *interp = getInterpreter<atn::ParserATNSimulator>(); + atn::PredictionMode saveMode = interp != nullptr ? interp->getPredictionMode() : atn::PredictionMode::LL; + if (profile) { + if (!is<atn::ProfilingATNSimulator *>(interp)) { + setInterpreter(new atn::ProfilingATNSimulator(this)); /* mem-check: replacing existing interpreter which gets deleted. */ + } + } else if (is<atn::ProfilingATNSimulator *>(interp)) { + /* mem-check: replacing existing interpreter which gets deleted. */ + atn::ParserATNSimulator *sim = new atn::ParserATNSimulator(this, getATN(), interp->decisionToDFA, interp->getSharedContextCache()); + setInterpreter(sim); + } + getInterpreter<atn::ParserATNSimulator>()->setPredictionMode(saveMode); +} + +void Parser::setTrace(bool trace) { + if (!trace) { + if (_tracer) + removeParseListener(_tracer); + delete _tracer; + _tracer = nullptr; + } else { + if (_tracer) + removeParseListener(_tracer); // Just in case this is triggered multiple times. + _tracer = new TraceListener(this); + addParseListener(_tracer); + } +} + +bool Parser::isTrace() const { + return _tracer != nullptr; +} + +tree::TerminalNode *Parser::createTerminalNode(Token *t) { + return _tracker.createInstance<tree::TerminalNodeImpl>(t); +} + +tree::ErrorNode *Parser::createErrorNode(Token *t) { + return _tracker.createInstance<tree::ErrorNodeImpl>(t); +} + +void Parser::InitializeInstanceFields() { + _errHandler = std::make_shared<DefaultErrorStrategy>(); + _precedenceStack.clear(); + _precedenceStack.push_back(0); + _buildParseTrees = true; + _syntaxErrors = 0; + _matchedEOF = false; + _input = nullptr; + _tracer = nullptr; + _ctx = nullptr; +} + diff --git a/contrib/libs/antlr4_cpp_runtime/src/Parser.h b/contrib/libs/antlr4_cpp_runtime/src/Parser.h new file mode 100644 index 0000000000..f490b00c38 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/Parser.h @@ -0,0 +1,461 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Recognizer.h" +#include "tree/ParseTreeListener.h" +#include "tree/ParseTree.h" +#include "TokenStream.h" +#include "TokenSource.h" +#include "misc/Interval.h" + +namespace antlr4 { + + /// This is all the parsing support code essentially; most of it is error recovery stuff. + class ANTLR4CPP_PUBLIC Parser : public Recognizer { + public: + + class TraceListener : public tree::ParseTreeListener { + public: + TraceListener(Parser *outerInstance); + virtual ~TraceListener(); + + virtual void enterEveryRule(ParserRuleContext *ctx) override; + virtual void visitTerminal(tree::TerminalNode *node) override; + virtual void visitErrorNode(tree::ErrorNode *node) override; + virtual void exitEveryRule(ParserRuleContext *ctx) override; + + private: + Parser *const outerInstance; + }; + + class TrimToSizeListener : public tree::ParseTreeListener { + public: + static TrimToSizeListener INSTANCE; + + virtual ~TrimToSizeListener(); + + virtual void enterEveryRule(ParserRuleContext *ctx) override; + virtual void visitTerminal(tree::TerminalNode *node) override; + virtual void visitErrorNode(tree::ErrorNode *node) override; + virtual void exitEveryRule(ParserRuleContext *ctx) override; + }; + + Parser(TokenStream *input); + virtual ~Parser(); + + /// reset the parser's state + virtual void reset(); + + /// <summary> + /// Match current input symbol against {@code ttype}. If the symbol type + /// matches, <seealso cref="ANTLRErrorStrategy#reportMatch"/> and <seealso cref="#consume"/> are + /// called to complete the match process. + /// + /// If the symbol type does not match, + /// <seealso cref="ANTLRErrorStrategy#recoverInline"/> is called on the current error + /// strategy to attempt recovery. If <seealso cref="#getBuildParseTree"/> is + /// {@code true} and the token index of the symbol returned by + /// <seealso cref="ANTLRErrorStrategy#recoverInline"/> is -1, the symbol is added to + /// the parse tree by calling {@link #createErrorNode(ParserRuleContext, Token)} then + /// {@link ParserRuleContext#addErrorNode(ErrorNode)}. + /// </summary> + /// <param name="ttype"> the token type to match </param> + /// <returns> the matched symbol </returns> + /// <exception cref="RecognitionException"> if the current input symbol did not match + /// {@code ttype} and the error strategy could not recover from the + /// mismatched symbol </exception> + virtual Token* match(size_t ttype); + + /// <summary> + /// Match current input symbol as a wildcard. If the symbol type matches + /// (i.e. has a value greater than 0), <seealso cref="ANTLRErrorStrategy#reportMatch"/> + /// and <seealso cref="#consume"/> are called to complete the match process. + /// <p/> + /// If the symbol type does not match, + /// <seealso cref="ANTLRErrorStrategy#recoverInline"/> is called on the current error + /// strategy to attempt recovery. If <seealso cref="#getBuildParseTree"/> is + /// {@code true} and the token index of the symbol returned by + /// <seealso cref="ANTLRErrorStrategy#recoverInline"/> is -1, the symbol is added to + /// the parse tree by calling <seealso cref="ParserRuleContext#addErrorNode"/>. + /// </summary> + /// <returns> the matched symbol </returns> + /// <exception cref="RecognitionException"> if the current input symbol did not match + /// a wildcard and the error strategy could not recover from the mismatched + /// symbol </exception> + virtual Token* matchWildcard(); + + /// <summary> + /// Track the <seealso cref="ParserRuleContext"/> objects during the parse and hook + /// them up using the <seealso cref="ParserRuleContext#children"/> list so that it + /// forms a parse tree. The <seealso cref="ParserRuleContext"/> returned from the start + /// rule represents the root of the parse tree. + /// <p/> + /// Note that if we are not building parse trees, rule contexts only point + /// upwards. When a rule exits, it returns the context but that gets garbage + /// collected if nobody holds a reference. It points upwards but nobody + /// points at it. + /// <p/> + /// When we build parse trees, we are adding all of these contexts to + /// <seealso cref="ParserRuleContext#children"/> list. Contexts are then not candidates + /// for garbage collection. + /// </summary> + virtual void setBuildParseTree(bool buildParseTrees); + + /// <summary> + /// Gets whether or not a complete parse tree will be constructed while + /// parsing. This property is {@code true} for a newly constructed parser. + /// </summary> + /// <returns> {@code true} if a complete parse tree will be constructed while + /// parsing, otherwise {@code false} </returns> + virtual bool getBuildParseTree(); + + /// <summary> + /// Trim the internal lists of the parse tree during parsing to conserve memory. + /// This property is set to {@code false} by default for a newly constructed parser. + /// </summary> + /// <param name="trimParseTrees"> {@code true} to trim the capacity of the <seealso cref="ParserRuleContext#children"/> + /// list to its size after a rule is parsed. </param> + virtual void setTrimParseTree(bool trimParseTrees); + + /// <returns> {@code true} if the <seealso cref="ParserRuleContext#children"/> list is trimmed + /// using the default <seealso cref="Parser.TrimToSizeListener"/> during the parse process. </returns> + virtual bool getTrimParseTree(); + + virtual std::vector<tree::ParseTreeListener *> getParseListeners(); + + /// <summary> + /// Registers {@code listener} to receive events during the parsing process. + /// <p/> + /// To support output-preserving grammar transformations (including but not + /// limited to left-recursion removal, automated left-factoring, and + /// optimized code generation), calls to listener methods during the parse + /// may differ substantially from calls made by + /// <seealso cref="ParseTreeWalker#DEFAULT"/> used after the parse is complete. In + /// particular, rule entry and exit events may occur in a different order + /// during the parse than after the parser. In addition, calls to certain + /// rule entry methods may be omitted. + /// <p/> + /// With the following specific exceptions, calls to listener events are + /// <em>deterministic</em>, i.e. for identical input the calls to listener + /// methods will be the same. + /// + /// <ul> + /// <li>Alterations to the grammar used to generate code may change the + /// behavior of the listener calls.</li> + /// <li>Alterations to the command line options passed to ANTLR 4 when + /// generating the parser may change the behavior of the listener calls.</li> + /// <li>Changing the version of the ANTLR Tool used to generate the parser + /// may change the behavior of the listener calls.</li> + /// </ul> + /// </summary> + /// <param name="listener"> the listener to add + /// </param> + /// <exception cref="NullPointerException"> if {@code} listener is {@code null} </exception> + virtual void addParseListener(tree::ParseTreeListener *listener); + + /// <summary> + /// Remove {@code listener} from the list of parse listeners. + /// <p/> + /// If {@code listener} is {@code null} or has not been added as a parse + /// listener, this method does nothing. + /// </summary> + /// <seealso cref= #addParseListener + /// </seealso> + /// <param name="listener"> the listener to remove </param> + virtual void removeParseListener(tree::ParseTreeListener *listener); + + /// <summary> + /// Remove all parse listeners. + /// </summary> + /// <seealso cref= #addParseListener </seealso> + virtual void removeParseListeners(); + + /// <summary> + /// Notify any parse listeners of an enter rule event. + /// </summary> + /// <seealso cref= #addParseListener </seealso> + virtual void triggerEnterRuleEvent(); + + /// <summary> + /// Notify any parse listeners of an exit rule event. + /// </summary> + /// <seealso cref= #addParseListener </seealso> + virtual void triggerExitRuleEvent(); + + /// <summary> + /// Gets the number of syntax errors reported during parsing. This value is + /// incremented each time <seealso cref="#notifyErrorListeners"/> is called. + /// </summary> + /// <seealso cref= #notifyErrorListeners </seealso> + virtual size_t getNumberOfSyntaxErrors(); + + virtual TokenFactory<CommonToken>* getTokenFactory() override; + + /// <summary> + /// Tell our token source and error strategy about a new way to create tokens. </summary> + template<typename T1> + void setTokenFactory(TokenFactory<T1> *factory) { + _input->getTokenSource()->setTokenFactory(factory); + } + + /// The ATN with bypass alternatives is expensive to create so we create it + /// lazily. The ATN is owned by us. + virtual const atn::ATN& getATNWithBypassAlts(); + + /// <summary> + /// The preferred method of getting a tree pattern. For example, here's a + /// sample use: + /// + /// <pre> + /// ParseTree t = parser.expr(); + /// ParseTreePattern p = parser.compileParseTreePattern("<ID>+0", MyParser.RULE_expr); + /// ParseTreeMatch m = p.match(t); + /// String id = m.get("ID"); + /// </pre> + /// </summary> + virtual tree::pattern::ParseTreePattern compileParseTreePattern(const std::string &pattern, int patternRuleIndex); + + /// <summary> + /// The same as <seealso cref="#compileParseTreePattern(String, int)"/> but specify a + /// <seealso cref="Lexer"/> rather than trying to deduce it from this parser. + /// </summary> + virtual tree::pattern::ParseTreePattern compileParseTreePattern(const std::string &pattern, int patternRuleIndex, + Lexer *lexer); + + virtual Ref<ANTLRErrorStrategy> getErrorHandler(); + virtual void setErrorHandler(Ref<ANTLRErrorStrategy> const& handler); + + virtual IntStream* getInputStream() override; + void setInputStream(IntStream *input) override; + + virtual TokenStream* getTokenStream(); + + /// Set the token stream and reset the parser. + virtual void setTokenStream(TokenStream *input); + + /// <summary> + /// Match needs to return the current input symbol, which gets put + /// into the label for the associated token ref; e.g., x=ID. + /// </summary> + virtual Token* getCurrentToken(); + + void notifyErrorListeners(const std::string &msg); + + virtual void notifyErrorListeners(Token *offendingToken, const std::string &msg, std::exception_ptr e); + + /// Consume and return the <seealso cref="#getCurrentToken current symbol"/>. + /// <p/> + /// E.g., given the following input with {@code A} being the current + /// lookahead symbol, this function moves the cursor to {@code B} and returns + /// {@code A}. + /// + /// <pre> + /// A B + /// ^ + /// </pre> + /// + /// If the parser is not in error recovery mode, the consumed symbol is added + /// to the parse tree using <seealso cref="ParserRuleContext#addChild(TerminalNode)"/>, and + /// <seealso cref="ParseTreeListener#visitTerminal"/> is called on any parse listeners. + /// If the parser <em>is</em> in error recovery mode, the consumed symbol is + /// added to the parse tree using {@link #createErrorNode(ParserRuleContext, Token)} then + /// {@link ParserRuleContext#addErrorNode(ErrorNode)} and + /// <seealso cref="ParseTreeListener#visitErrorNode"/> is called on any parse + /// listeners. + virtual Token* consume(); + + /// Always called by generated parsers upon entry to a rule. Access field + /// <seealso cref="#_ctx"/> get the current context. + virtual void enterRule(ParserRuleContext *localctx, size_t state, size_t ruleIndex); + + void exitRule(); + + virtual void enterOuterAlt(ParserRuleContext *localctx, size_t altNum); + + /** + * Get the precedence level for the top-most precedence rule. + * + * @return The precedence level for the top-most precedence rule, or -1 if + * the parser context is not nested within a precedence rule. + */ + int getPrecedence() const; + + /// @deprecated Use + /// <seealso cref="#enterRecursionRule(ParserRuleContext, int, int, int)"/> instead. + virtual void enterRecursionRule(ParserRuleContext *localctx, size_t ruleIndex); + virtual void enterRecursionRule(ParserRuleContext *localctx, size_t state, size_t ruleIndex, int precedence); + + /** Like {@link #enterRule} but for recursive rules. + * Make the current context the child of the incoming localctx. + */ + virtual void pushNewRecursionContext(ParserRuleContext *localctx, size_t state, size_t ruleIndex); + virtual void unrollRecursionContexts(ParserRuleContext *parentctx); + virtual ParserRuleContext* getInvokingContext(size_t ruleIndex); + virtual ParserRuleContext* getContext(); + virtual void setContext(ParserRuleContext *ctx); + virtual bool precpred(RuleContext *localctx, int precedence) override; + virtual bool inContext(const std::string &context); + + /// <summary> + /// Checks whether or not {@code symbol} can follow the current state in the + /// ATN. The behavior of this method is equivalent to the following, but is + /// implemented such that the complete context-sensitive follow set does not + /// need to be explicitly constructed. + /// + /// <pre> + /// return getExpectedTokens().contains(symbol); + /// </pre> + /// </summary> + /// <param name="symbol"> the symbol type to check </param> + /// <returns> {@code true} if {@code symbol} can follow the current state in + /// the ATN, otherwise {@code false}. </returns> + virtual bool isExpectedToken(size_t symbol); + + bool isMatchedEOF() const; + + /// <summary> + /// Computes the set of input symbols which could follow the current parser + /// state and context, as given by <seealso cref="#getState"/> and <seealso cref="#getContext"/>, + /// respectively. + /// </summary> + /// <seealso cref= ATN#getExpectedTokens(int, RuleContext) </seealso> + virtual misc::IntervalSet getExpectedTokens(); + + virtual misc::IntervalSet getExpectedTokensWithinCurrentRule(); + + /// Get a rule's index (i.e., {@code RULE_ruleName} field) or INVALID_INDEX if not found. + virtual size_t getRuleIndex(const std::string &ruleName); + + virtual ParserRuleContext* getRuleContext(); + + /// <summary> + /// Return List<String> of the rule names in your parser instance + /// leading up to a call to the current rule. You could override if + /// you want more details such as the file/line info of where + /// in the ATN a rule is invoked. + /// + /// This is very useful for error messages. + /// </summary> + virtual std::vector<std::string> getRuleInvocationStack(); + + virtual std::vector<std::string> getRuleInvocationStack(RuleContext *p); + + /// <summary> + /// For debugging and other purposes. </summary> + virtual std::vector<std::string> getDFAStrings(); + + /// <summary> + /// For debugging and other purposes. </summary> + virtual void dumpDFA(); + + virtual std::string getSourceName(); + + atn::ParseInfo getParseInfo() const; + + /** + * @since 4.3 + */ + void setProfile(bool profile); + + /// <summary> + /// During a parse is sometimes useful to listen in on the rule entry and exit + /// events as well as token matches. This is for quick and dirty debugging. + /// </summary> + virtual void setTrace(bool trace); + + /** + * Gets whether a {@link TraceListener} is registered as a parse listener + * for the parser. + * + * @see #setTrace(boolean) + */ + bool isTrace() const; + + tree::ParseTreeTracker& getTreeTracker() { return _tracker; } + + /** How to create a token leaf node associated with a parent. + * Typically, the terminal node to create is not a function of the parent + * but this method must still set the parent pointer of the terminal node + * returned. I would prefer having {@link ParserRuleContext#addAnyChild(ParseTree)} + * set the parent pointer, but the parent pointer is implementation dependent + * and currently there is no setParent() in {@link TerminalNode} (and can't + * add method in Java 1.7 without breaking backward compatibility). + * + * @since 4.7 + */ + tree::TerminalNode *createTerminalNode(Token *t); + + /** How to create an error node, given a token, associated with a parent. + * Typically, the error node to create is not a function of the parent + * but this method must still set the parent pointer of the terminal node + * returned. I would prefer having {@link ParserRuleContext#addAnyChild(ParseTree)} + * set the parent pointer, but the parent pointer is implementation dependent + * and currently there is no setParent() in {@link ErrorNode} (and can't + * add method in Java 1.7 without breaking backward compatibility). + * + * @since 4.7 + */ + tree::ErrorNode *createErrorNode(Token *t); + + protected: + /// The ParserRuleContext object for the currently executing rule. + /// This is always non-null during the parsing process. + // ml: this is one of the contexts tracked in _allocatedContexts. + ParserRuleContext *_ctx; + + /// The error handling strategy for the parser. The default is DefaultErrorStrategy. + /// See also getErrorHandler. + Ref<ANTLRErrorStrategy> _errHandler; + + /// <summary> + /// The input stream. + /// </summary> + /// <seealso cref= #getInputStream </seealso> + /// <seealso cref= #setInputStream </seealso> + TokenStream *_input; + + std::vector<int> _precedenceStack; + + /// <summary> + /// Specifies whether or not the parser should construct a parse tree during + /// the parsing process. The default value is {@code true}. + /// </summary> + /// <seealso cref= #getBuildParseTree </seealso> + /// <seealso cref= #setBuildParseTree </seealso> + bool _buildParseTrees; + + /// The list of <seealso cref="ParseTreeListener"/> listeners registered to receive + /// events during the parse. + /// <seealso cref= #addParseListener </seealso> + std::vector<tree::ParseTreeListener *> _parseListeners; + + /// <summary> + /// The number of syntax errors reported during parsing. This value is + /// incremented each time <seealso cref="#notifyErrorListeners"/> is called. + /// </summary> + size_t _syntaxErrors; + + /** Indicates parser has match()ed EOF token. See {@link #exitRule()}. */ + bool _matchedEOF; + + virtual void addContextToParseTree(); + + // All rule contexts created during a parse run. This is cleared when calling reset(). + tree::ParseTreeTracker _tracker; + + private: + /// When setTrace(true) is called, a reference to the + /// TraceListener is stored here so it can be easily removed in a + /// later call to setTrace(false). The listener itself is + /// implemented as a parser listener so this field is not directly used by + /// other parser methods. + TraceListener *_tracer; + + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/ParserInterpreter.cpp b/contrib/libs/antlr4_cpp_runtime/src/ParserInterpreter.cpp new file mode 100644 index 0000000000..e1c54a0eb1 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ParserInterpreter.cpp @@ -0,0 +1,294 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "dfa/DFA.h" +#include "atn/RuleStartState.h" +#include "InterpreterRuleContext.h" +#include "atn/ParserATNSimulator.h" +#include "ANTLRErrorStrategy.h" +#include "atn/LoopEndState.h" +#include "FailedPredicateException.h" +#include "atn/StarLoopEntryState.h" +#include "atn/AtomTransition.h" +#include "atn/RuleTransition.h" +#include "atn/PredicateTransition.h" +#include "atn/PrecedencePredicateTransition.h" +#include "atn/ActionTransition.h" +#include "atn/ATN.h" +#include "atn/RuleStopState.h" +#include "Lexer.h" +#include "Token.h" +#include "Vocabulary.h" +#include "InputMismatchException.h" +#include "CommonToken.h" +#include "tree/ErrorNode.h" + +#include "support/CPPUtils.h" +#include "support/Casts.h" + +#include "ParserInterpreter.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; + +using namespace antlrcpp; + +ParserInterpreter::ParserInterpreter(const std::string &grammarFileName, const dfa::Vocabulary &vocabulary, + const std::vector<std::string> &ruleNames, const atn::ATN &atn, TokenStream *input) + : Parser(input), _grammarFileName(grammarFileName), _atn(atn), _ruleNames(ruleNames), _vocabulary(vocabulary) { + + // init decision DFA + for (size_t i = 0; i < atn.getNumberOfDecisions(); ++i) { + atn::DecisionState *decisionState = atn.getDecisionState(i); + _decisionToDFA.push_back(dfa::DFA(decisionState, i)); + } + + // get atn simulator that knows how to do predictions + _interpreter = new atn::ParserATNSimulator(this, atn, _decisionToDFA, _sharedContextCache); /* mem-check: deleted in d-tor */ +} + +ParserInterpreter::~ParserInterpreter() { + delete _interpreter; +} + +void ParserInterpreter::reset() { + Parser::reset(); + _overrideDecisionReached = false; + _overrideDecisionRoot = nullptr; +} + +const atn::ATN& ParserInterpreter::getATN() const { + return _atn; +} + +const dfa::Vocabulary& ParserInterpreter::getVocabulary() const { + return _vocabulary; +} + +const std::vector<std::string>& ParserInterpreter::getRuleNames() const { + return _ruleNames; +} + +std::string ParserInterpreter::getGrammarFileName() const { + return _grammarFileName; +} + +ParserRuleContext* ParserInterpreter::parse(size_t startRuleIndex) { + atn::RuleStartState *startRuleStartState = _atn.ruleToStartState[startRuleIndex]; + + _rootContext = createInterpreterRuleContext(nullptr, atn::ATNState::INVALID_STATE_NUMBER, startRuleIndex); + + if (startRuleStartState->isLeftRecursiveRule) { + enterRecursionRule(_rootContext, startRuleStartState->stateNumber, startRuleIndex, 0); + } else { + enterRule(_rootContext, startRuleStartState->stateNumber, startRuleIndex); + } + + while (true) { + atn::ATNState *p = getATNState(); + switch (p->getStateType()) { + case atn::ATNStateType::RULE_STOP : + // pop; return from rule + if (_ctx->isEmpty()) { + if (startRuleStartState->isLeftRecursiveRule) { + ParserRuleContext *result = _ctx; + auto parentContext = _parentContextStack.top(); + _parentContextStack.pop(); + unrollRecursionContexts(parentContext.first); + return result; + } else { + exitRule(); + return _rootContext; + } + } + + visitRuleStopState(p); + break; + + default : + try { + visitState(p); + } + catch (RecognitionException &e) { + setState(_atn.ruleToStopState[p->ruleIndex]->stateNumber); + getErrorHandler()->reportError(this, e); + getContext()->exception = std::current_exception(); + recover(e); + } + + break; + } + } +} + +void ParserInterpreter::enterRecursionRule(ParserRuleContext *localctx, size_t state, size_t ruleIndex, int precedence) { + _parentContextStack.push({ _ctx, localctx->invokingState }); + Parser::enterRecursionRule(localctx, state, ruleIndex, precedence); +} + +void ParserInterpreter::addDecisionOverride(int decision, int tokenIndex, int forcedAlt) { + _overrideDecision = decision; + _overrideDecisionInputIndex = tokenIndex; + _overrideDecisionAlt = forcedAlt; +} + +Ref<InterpreterRuleContext> ParserInterpreter::getOverrideDecisionRoot() const { + return _overrideDecisionRoot; +} + +InterpreterRuleContext* ParserInterpreter::getRootContext() { + return _rootContext; +} + +atn::ATNState* ParserInterpreter::getATNState() { + return _atn.states[getState()]; +} + +void ParserInterpreter::visitState(atn::ATNState *p) { + size_t predictedAlt = 1; + if (DecisionState::is(p)) { + predictedAlt = visitDecisionState(downCast<DecisionState*>(p)); + } + + const atn::Transition *transition = p->transitions[predictedAlt - 1].get(); + switch (transition->getTransitionType()) { + case atn::TransitionType::EPSILON: + if (p->getStateType() == ATNStateType::STAR_LOOP_ENTRY && + (downCast<StarLoopEntryState *>(p))->isPrecedenceDecision && + !LoopEndState::is(transition->target)) { + // We are at the start of a left recursive rule's (...)* loop + // and we're not taking the exit branch of loop. + InterpreterRuleContext *localctx = createInterpreterRuleContext(_parentContextStack.top().first, + _parentContextStack.top().second, static_cast<int>(_ctx->getRuleIndex())); + pushNewRecursionContext(localctx, _atn.ruleToStartState[p->ruleIndex]->stateNumber, static_cast<int>(_ctx->getRuleIndex())); + } + break; + + case atn::TransitionType::ATOM: + match(static_cast<int>(static_cast<const atn::AtomTransition*>(transition)->_label)); + break; + + case atn::TransitionType::RANGE: + case atn::TransitionType::SET: + case atn::TransitionType::NOT_SET: + if (!transition->matches(static_cast<int>(_input->LA(1)), Token::MIN_USER_TOKEN_TYPE, Lexer::MAX_CHAR_VALUE)) { + recoverInline(); + } + matchWildcard(); + break; + + case atn::TransitionType::WILDCARD: + matchWildcard(); + break; + + case atn::TransitionType::RULE: + { + atn::RuleStartState *ruleStartState = static_cast<atn::RuleStartState*>(transition->target); + size_t ruleIndex = ruleStartState->ruleIndex; + InterpreterRuleContext *newctx = createInterpreterRuleContext(_ctx, p->stateNumber, ruleIndex); + if (ruleStartState->isLeftRecursiveRule) { + enterRecursionRule(newctx, ruleStartState->stateNumber, ruleIndex, static_cast<const atn::RuleTransition*>(transition)->precedence); + } else { + enterRule(newctx, transition->target->stateNumber, ruleIndex); + } + } + break; + + case atn::TransitionType::PREDICATE: + { + const atn::PredicateTransition *predicateTransition = static_cast<const atn::PredicateTransition*>(transition); + if (!sempred(_ctx, predicateTransition->getRuleIndex(), predicateTransition->getPredIndex())) { + throw FailedPredicateException(this); + } + } + break; + + case atn::TransitionType::ACTION: + { + const atn::ActionTransition *actionTransition = static_cast<const atn::ActionTransition*>(transition); + action(_ctx, actionTransition->ruleIndex, actionTransition->actionIndex); + } + break; + + case atn::TransitionType::PRECEDENCE: + { + if (!precpred(_ctx, static_cast<const atn::PrecedencePredicateTransition*>(transition)->getPrecedence())) { + throw FailedPredicateException(this, "precpred(_ctx, " + std::to_string(static_cast<const atn::PrecedencePredicateTransition*>(transition)->getPrecedence()) + ")"); + } + } + break; + + default: + throw UnsupportedOperationException("Unrecognized ATN transition type."); + } + + setState(transition->target->stateNumber); +} + +size_t ParserInterpreter::visitDecisionState(DecisionState *p) { + size_t predictedAlt = 1; + if (p->transitions.size() > 1) { + getErrorHandler()->sync(this); + int decision = p->decision; + if (decision == _overrideDecision && _input->index() == _overrideDecisionInputIndex && !_overrideDecisionReached) { + predictedAlt = _overrideDecisionAlt; + _overrideDecisionReached = true; + } else { + predictedAlt = getInterpreter<ParserATNSimulator>()->adaptivePredict(_input, decision, _ctx); + } + } + return predictedAlt; +} + +InterpreterRuleContext* ParserInterpreter::createInterpreterRuleContext(ParserRuleContext *parent, + size_t invokingStateNumber, size_t ruleIndex) { + return _tracker.createInstance<InterpreterRuleContext>(parent, invokingStateNumber, ruleIndex); +} + +void ParserInterpreter::visitRuleStopState(atn::ATNState *p) { + atn::RuleStartState *ruleStartState = _atn.ruleToStartState[p->ruleIndex]; + if (ruleStartState->isLeftRecursiveRule) { + std::pair<ParserRuleContext *, size_t> parentContext = _parentContextStack.top(); + _parentContextStack.pop(); + + unrollRecursionContexts(parentContext.first); + setState(parentContext.second); + } else { + exitRule(); + } + + const atn::RuleTransition *ruleTransition = static_cast<const atn::RuleTransition*>(_atn.states[getState()]->transitions[0].get()); + setState(ruleTransition->followState->stateNumber); +} + +void ParserInterpreter::recover(RecognitionException &e) { + size_t i = _input->index(); + getErrorHandler()->recover(this, std::make_exception_ptr(e)); + + if (_input->index() == i) { + // no input consumed, better add an error node + if (is<InputMismatchException *>(&e)) { + InputMismatchException &ime = static_cast<InputMismatchException&>(e); + Token *tok = e.getOffendingToken(); + size_t expectedTokenType = ime.getExpectedTokens().getMinElement(); // get any element + _errorToken = getTokenFactory()->create({ tok->getTokenSource(), tok->getTokenSource()->getInputStream() }, + expectedTokenType, tok->getText(), Token::DEFAULT_CHANNEL, INVALID_INDEX, INVALID_INDEX, // invalid start/stop + tok->getLine(), tok->getCharPositionInLine()); + _ctx->addChild(createErrorNode(_errorToken.get())); + } + else { // NoViableAlt + Token *tok = e.getOffendingToken(); + _errorToken = getTokenFactory()->create({ tok->getTokenSource(), tok->getTokenSource()->getInputStream() }, + Token::INVALID_TYPE, tok->getText(), Token::DEFAULT_CHANNEL, INVALID_INDEX, INVALID_INDEX, // invalid start/stop + tok->getLine(), tok->getCharPositionInLine()); + _ctx->addChild(createErrorNode(_errorToken.get())); + } + } +} + +Token* ParserInterpreter::recoverInline() { + return _errHandler->recoverInline(this); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/ParserInterpreter.h b/contrib/libs/antlr4_cpp_runtime/src/ParserInterpreter.h new file mode 100644 index 0000000000..6d4a679e5b --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ParserInterpreter.h @@ -0,0 +1,173 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Parser.h" +#include "atn/ATN.h" +#include "support/BitSet.h" +#include "atn/PredictionContext.h" +#include "atn/PredictionContextCache.h" +#include "Vocabulary.h" + +namespace antlr4 { + + /// <summary> + /// A parser simulator that mimics what ANTLR's generated + /// parser code does. A ParserATNSimulator is used to make + /// predictions via adaptivePredict but this class moves a pointer through the + /// ATN to simulate parsing. ParserATNSimulator just + /// makes us efficient rather than having to backtrack, for example. + /// + /// This properly creates parse trees even for left recursive rules. + /// + /// We rely on the left recursive rule invocation and special predicate + /// transitions to make left recursive rules work. + /// + /// See TestParserInterpreter for examples. + /// </summary> + class ANTLR4CPP_PUBLIC ParserInterpreter : public Parser { + public: + ParserInterpreter(const std::string &grammarFileName, const dfa::Vocabulary &vocabulary, + const std::vector<std::string> &ruleNames, const atn::ATN &atn, TokenStream *input); + ~ParserInterpreter(); + + virtual void reset() override; + + virtual const atn::ATN& getATN() const override; + + virtual const dfa::Vocabulary& getVocabulary() const override; + + virtual const std::vector<std::string>& getRuleNames() const override; + virtual std::string getGrammarFileName() const override; + + /// Begin parsing at startRuleIndex + virtual ParserRuleContext* parse(size_t startRuleIndex); + + virtual void enterRecursionRule(ParserRuleContext *localctx, size_t state, size_t ruleIndex, int precedence) override; + + + /** Override this parser interpreters normal decision-making process + * at a particular decision and input token index. Instead of + * allowing the adaptive prediction mechanism to choose the + * first alternative within a block that leads to a successful parse, + * force it to take the alternative, 1..n for n alternatives. + * + * As an implementation limitation right now, you can only specify one + * override. This is sufficient to allow construction of different + * parse trees for ambiguous input. It means re-parsing the entire input + * in general because you're never sure where an ambiguous sequence would + * live in the various parse trees. For example, in one interpretation, + * an ambiguous input sequence would be matched completely in expression + * but in another it could match all the way back to the root. + * + * s : e '!'? ; + * e : ID + * | ID '!' + * ; + * + * Here, x! can be matched as (s (e ID) !) or (s (e ID !)). In the first + * case, the ambiguous sequence is fully contained only by the root. + * In the second case, the ambiguous sequences fully contained within just + * e, as in: (e ID !). + * + * Rather than trying to optimize this and make + * some intelligent decisions for optimization purposes, I settled on + * just re-parsing the whole input and then using + * {link Trees#getRootOfSubtreeEnclosingRegion} to find the minimal + * subtree that contains the ambiguous sequence. I originally tried to + * record the call stack at the point the parser detected and ambiguity but + * left recursive rules create a parse tree stack that does not reflect + * the actual call stack. That impedance mismatch was enough to make + * it it challenging to restart the parser at a deeply nested rule + * invocation. + * + * Only parser interpreters can override decisions so as to avoid inserting + * override checking code in the critical ALL(*) prediction execution path. + * + * @since 4.5.1 + */ + void addDecisionOverride(int decision, int tokenIndex, int forcedAlt); + + Ref<InterpreterRuleContext> getOverrideDecisionRoot() const; + + /** Return the root of the parse, which can be useful if the parser + * bails out. You still can access the top node. Note that, + * because of the way left recursive rules add children, it's possible + * that the root will not have any children if the start rule immediately + * called and left recursive rule that fails. + * + * @since 4.5.1 + */ + InterpreterRuleContext* getRootContext(); + + protected: + const std::string _grammarFileName; + const atn::ATN &_atn; + + std::vector<std::string> _ruleNames; + + std::vector<dfa::DFA> _decisionToDFA; // not shared like it is for generated parsers + atn::PredictionContextCache _sharedContextCache; + + /** This stack corresponds to the _parentctx, _parentState pair of locals + * that would exist on call stack frames with a recursive descent parser; + * in the generated function for a left-recursive rule you'd see: + * + * private EContext e(int _p) throws RecognitionException { + * ParserRuleContext _parentctx = _ctx; // Pair.a + * int _parentState = getState(); // Pair.b + * ... + * } + * + * Those values are used to create new recursive rule invocation contexts + * associated with left operand of an alt like "expr '*' expr". + */ + std::stack<std::pair<ParserRuleContext *, size_t>> _parentContextStack; + + /** We need a map from (decision,inputIndex)->forced alt for computing ambiguous + * parse trees. For now, we allow exactly one override. + */ + int _overrideDecision = -1; + size_t _overrideDecisionInputIndex = INVALID_INDEX; + size_t _overrideDecisionAlt = INVALID_INDEX; + bool _overrideDecisionReached = false; // latch and only override once; error might trigger infinite loop + + /** What is the current context when we override a decision? This tells + * us what the root of the parse tree is when using override + * for an ambiguity/lookahead check. + */ + Ref<InterpreterRuleContext> _overrideDecisionRoot; + InterpreterRuleContext* _rootContext; + + virtual atn::ATNState *getATNState(); + virtual void visitState(atn::ATNState *p); + + /** Method visitDecisionState() is called when the interpreter reaches + * a decision state (instance of DecisionState). It gives an opportunity + * for subclasses to track interesting things. + */ + size_t visitDecisionState(atn::DecisionState *p); + + /** Provide simple "factory" for InterpreterRuleContext's. + * @since 4.5.1 + */ + InterpreterRuleContext* createInterpreterRuleContext(ParserRuleContext *parent, size_t invokingStateNumber, size_t ruleIndex); + + virtual void visitRuleStopState(atn::ATNState *p); + + /** Rely on the error handler for this parser but, if no tokens are consumed + * to recover, add an error node. Otherwise, nothing is seen in the parse + * tree. + */ + void recover(RecognitionException &e); + Token* recoverInline(); + + private: + const dfa::Vocabulary &_vocabulary; + std::unique_ptr<Token> _errorToken; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/ParserRuleContext.cpp b/contrib/libs/antlr4_cpp_runtime/src/ParserRuleContext.cpp new file mode 100644 index 0000000000..7eb3e6577f --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ParserRuleContext.cpp @@ -0,0 +1,138 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "tree/TerminalNode.h" +#include "tree/ErrorNode.h" +#include "misc/Interval.h" +#include "Parser.h" +#include "Token.h" + +#include "support/Casts.h" +#include "support/CPPUtils.h" + +#include "ParserRuleContext.h" + +using namespace antlr4; +using namespace antlr4::tree; + +using namespace antlrcpp; + +ParserRuleContext ParserRuleContext::EMPTY; + +ParserRuleContext::ParserRuleContext() + : start(nullptr), stop(nullptr) { +} + +ParserRuleContext::ParserRuleContext(ParserRuleContext *parent, size_t invokingStateNumber) +: RuleContext(parent, invokingStateNumber), start(nullptr), stop(nullptr) { +} + +void ParserRuleContext::copyFrom(ParserRuleContext *ctx) { + // from RuleContext + this->parent = ctx->parent; + this->invokingState = ctx->invokingState; + + this->start = ctx->start; + this->stop = ctx->stop; + + // copy any error nodes to alt label node + if (!ctx->children.empty()) { + for (auto *child : ctx->children) { + if (ErrorNode::is(child)) { + downCast<ErrorNode*>(child)->setParent(this); + children.push_back(child); + } + } + + // Remove the just reparented error nodes from the source context. + ctx->children.erase(std::remove_if(ctx->children.begin(), ctx->children.end(), [this](tree::ParseTree *e) -> bool { + return std::find(children.begin(), children.end(), e) != children.end(); + }), ctx->children.end()); + } +} + +void ParserRuleContext::enterRule(tree::ParseTreeListener * /*listener*/) { +} + +void ParserRuleContext::exitRule(tree::ParseTreeListener * /*listener*/) { +} + +tree::TerminalNode* ParserRuleContext::addChild(tree::TerminalNode *t) { + t->setParent(this); + children.push_back(t); + return t; +} + +RuleContext* ParserRuleContext::addChild(RuleContext *ruleInvocation) { + children.push_back(ruleInvocation); + return ruleInvocation; +} + +void ParserRuleContext::removeLastChild() { + if (!children.empty()) { + children.pop_back(); + } +} + +tree::TerminalNode* ParserRuleContext::getToken(size_t ttype, size_t i) const { + if (i >= children.size()) { + return nullptr; + } + size_t j = 0; // what token with ttype have we found? + for (auto *child : children) { + if (TerminalNode::is(child)) { + tree::TerminalNode *typedChild = downCast<tree::TerminalNode*>(child); + Token *symbol = typedChild->getSymbol(); + if (symbol->getType() == ttype) { + if (j++ == i) { + return typedChild; + } + } + } + } + return nullptr; +} + +std::vector<tree::TerminalNode *> ParserRuleContext::getTokens(size_t ttype) const { + std::vector<tree::TerminalNode*> tokens; + for (auto *child : children) { + if (TerminalNode::is(child)) { + tree::TerminalNode *typedChild = downCast<tree::TerminalNode*>(child); + Token *symbol = typedChild->getSymbol(); + if (symbol->getType() == ttype) { + tokens.push_back(typedChild); + } + } + } + return tokens; +} + +misc::Interval ParserRuleContext::getSourceInterval() { + if (start == nullptr) { + return misc::Interval::INVALID; + } + + if (stop == nullptr || stop->getTokenIndex() < start->getTokenIndex()) { + return misc::Interval(start->getTokenIndex(), start->getTokenIndex() - 1); // empty + } + return misc::Interval(start->getTokenIndex(), stop->getTokenIndex()); +} + +Token* ParserRuleContext::getStart() const { + return start; +} + +Token* ParserRuleContext::getStop() const { + return stop; +} + +std::string ParserRuleContext::toInfoString(Parser *recognizer) { + std::vector<std::string> rules = recognizer->getRuleInvocationStack(this); + std::reverse(rules.begin(), rules.end()); + std::string rulesStr = antlrcpp::arrayToString(rules); + return "ParserRuleContext" + rulesStr + "{start=" + std::to_string(start->getTokenIndex()) + ", stop=" + + std::to_string(stop->getTokenIndex()) + '}'; +} + diff --git a/contrib/libs/antlr4_cpp_runtime/src/ParserRuleContext.h b/contrib/libs/antlr4_cpp_runtime/src/ParserRuleContext.h new file mode 100644 index 0000000000..63a8466e59 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ParserRuleContext.h @@ -0,0 +1,147 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RuleContext.h" +#include "support/CPPUtils.h" + +namespace antlr4 { + + /// <summary> + /// A rule invocation record for parsing. + /// + /// Contains all of the information about the current rule not stored in the + /// RuleContext. It handles parse tree children list, Any ATN state + /// tracing, and the default values available for rule invocatons: + /// start, stop, rule index, current alt number. + /// + /// Subclasses made for each rule and grammar track the parameters, + /// return values, locals, and labels specific to that rule. These + /// are the objects that are returned from rules. + /// + /// Note text is not an actual field of a rule return value; it is computed + /// from start and stop using the input stream's toString() method. I + /// could add a ctor to this so that we can pass in and store the input + /// stream, but I'm not sure we want to do that. It would seem to be undefined + /// to get the .text property anyway if the rule matches tokens from multiple + /// input streams. + /// + /// I do not use getters for fields of objects that are used simply to + /// group values such as this aggregate. The getters/setters are there to + /// satisfy the superclass interface. + /// </summary> + class ANTLR4CPP_PUBLIC ParserRuleContext : public RuleContext { + public: + static ParserRuleContext EMPTY; + + /// <summary> + /// For debugging/tracing purposes, we want to track all of the nodes in + /// the ATN traversed by the parser for a particular rule. + /// This list indicates the sequence of ATN nodes used to match + /// the elements of the children list. This list does not include + /// ATN nodes and other rules used to match rule invocations. It + /// traces the rule invocation node itself but nothing inside that + /// other rule's ATN submachine. + /// + /// There is NOT a one-to-one correspondence between the children and + /// states list. There are typically many nodes in the ATN traversed + /// for each element in the children list. For example, for a rule + /// invocation there is the invoking state and the following state. + /// + /// The parser setState() method updates field s and adds it to this list + /// if we are debugging/tracing. + /// + /// This does not trace states visited during prediction. + /// </summary> + // public List<Integer> states; + + Token *start; + Token *stop; + + /// The exception that forced this rule to return. If the rule successfully + /// completed, this is "null exception pointer". + std::exception_ptr exception; + + ParserRuleContext(); + ParserRuleContext(ParserRuleContext *parent, size_t invokingStateNumber); + + /** COPY a ctx (I'm deliberately not using copy constructor) to avoid + * confusion with creating node with parent. Does not copy children + * (except error leaves). + */ + virtual void copyFrom(ParserRuleContext *ctx); + + + // Double dispatch methods for listeners + + virtual void enterRule(tree::ParseTreeListener *listener); + virtual void exitRule(tree::ParseTreeListener *listener); + + /** Add a token leaf node child and force its parent to be this node. */ + tree::TerminalNode* addChild(tree::TerminalNode *t); + RuleContext* addChild(RuleContext *ruleInvocation); + + /// Used by enterOuterAlt to toss out a RuleContext previously added as + /// we entered a rule. If we have # label, we will need to remove + /// generic ruleContext object. + void removeLastChild(); + + tree::TerminalNode* getToken(size_t ttype, std::size_t i) const; + + std::vector<tree::TerminalNode*> getTokens(size_t ttype) const; + + template<typename T> + T* getRuleContext(size_t i) const { + static_assert(std::is_base_of_v<RuleContext, T>, "T must be derived from RuleContext"); + size_t j = 0; // what element have we found with ctxType? + for (auto *child : children) { + if (RuleContext::is(child)) { + if (auto *typedChild = dynamic_cast<T*>(child); typedChild != nullptr) { + if (j++ == i) { + return typedChild; + } + } + } + } + return nullptr; + } + + template<typename T> + std::vector<T*> getRuleContexts() const { + static_assert(std::is_base_of_v<RuleContext, T>, "T must be derived from RuleContext"); + std::vector<T*> contexts; + for (auto *child : children) { + if (RuleContext::is(child)) { + if (auto *typedChild = dynamic_cast<T*>(child); typedChild != nullptr) { + contexts.push_back(typedChild); + } + } + } + return contexts; + } + + virtual misc::Interval getSourceInterval() override; + + /** + * Get the initial token in this context. + * Note that the range from start to stop is inclusive, so for rules that do not consume anything + * (for example, zero length or error productions) this token may exceed stop. + */ + Token* getStart() const; + + /** + * Get the final token in this context. + * Note that the range from start to stop is inclusive, so for rules that do not consume anything + * (for example, zero length or error productions) this token may precede start. + */ + Token* getStop() const; + + /// <summary> + /// Used for rule context info debugging during parse-time, not so much for ATN debugging </summary> + virtual std::string toInfoString(Parser *recognizer); + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/ProxyErrorListener.cpp b/contrib/libs/antlr4_cpp_runtime/src/ProxyErrorListener.cpp new file mode 100644 index 0000000000..34bfd73e26 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ProxyErrorListener.cpp @@ -0,0 +1,53 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "ProxyErrorListener.h" + +using namespace antlr4; + +void ProxyErrorListener::addErrorListener(ANTLRErrorListener *listener) { + if (listener == nullptr) { + throw "listener cannot be null."; + } + + _delegates.insert(listener); +} + +void ProxyErrorListener::removeErrorListener(ANTLRErrorListener *listener) { + _delegates.erase(listener); +} + +void ProxyErrorListener::removeErrorListeners() { + _delegates.clear(); +} + +void ProxyErrorListener::syntaxError(Recognizer *recognizer, Token *offendingSymbol, size_t line, + size_t charPositionInLine, const std::string &msg, std::exception_ptr e) { + + for (auto *listener : _delegates) { + listener->syntaxError(recognizer, offendingSymbol, line, charPositionInLine, msg, e); + } +} + +void ProxyErrorListener::reportAmbiguity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + bool exact, const antlrcpp::BitSet &ambigAlts, atn::ATNConfigSet *configs) { + for (auto *listener : _delegates) { + listener->reportAmbiguity(recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs); + } +} + +void ProxyErrorListener::reportAttemptingFullContext(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, + size_t stopIndex, const antlrcpp::BitSet &conflictingAlts, atn::ATNConfigSet *configs) { + for (auto *listener : _delegates) { + listener->reportAttemptingFullContext(recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs); + } +} + +void ProxyErrorListener::reportContextSensitivity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + size_t prediction, atn::ATNConfigSet *configs) { + for (auto *listener : _delegates) { + listener->reportContextSensitivity(recognizer, dfa, startIndex, stopIndex, prediction, configs); + } +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/ProxyErrorListener.h b/contrib/libs/antlr4_cpp_runtime/src/ProxyErrorListener.h new file mode 100644 index 0000000000..04630ce12c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ProxyErrorListener.h @@ -0,0 +1,38 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "ANTLRErrorListener.h" +#include "Exceptions.h" + +namespace antlr4 { + + /// This implementation of ANTLRErrorListener dispatches all calls to a + /// collection of delegate listeners. This reduces the effort required to support multiple + /// listeners. + class ANTLR4CPP_PUBLIC ProxyErrorListener : public ANTLRErrorListener { + private: + std::set<ANTLRErrorListener *> _delegates; // Not owned. + + public: + void addErrorListener(ANTLRErrorListener *listener); + void removeErrorListener(ANTLRErrorListener *listener); + void removeErrorListeners(); + + void syntaxError(Recognizer *recognizer, Token *offendingSymbol, size_t line, size_t charPositionInLine, + const std::string &msg, std::exception_ptr e) override; + + virtual void reportAmbiguity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, bool exact, + const antlrcpp::BitSet &ambigAlts, atn::ATNConfigSet *configs) override; + + virtual void reportAttemptingFullContext(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + const antlrcpp::BitSet &conflictingAlts, atn::ATNConfigSet *configs) override; + + virtual void reportContextSensitivity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + size_t prediction, atn::ATNConfigSet *configs) override; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/RecognitionException.cpp b/contrib/libs/antlr4_cpp_runtime/src/RecognitionException.cpp new file mode 100644 index 0000000000..5b37f9d2f0 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/RecognitionException.cpp @@ -0,0 +1,65 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATN.h" +#include "Recognizer.h" +#include "ParserRuleContext.h" +#include "misc/IntervalSet.h" + +#include "RecognitionException.h" + +using namespace antlr4; + +RecognitionException::RecognitionException(Recognizer *recognizer, IntStream *input, ParserRuleContext *ctx, + Token *offendingToken) + : RecognitionException("", recognizer, input, ctx, offendingToken) { +} + +RecognitionException::RecognitionException(const std::string &message, Recognizer *recognizer, IntStream *input, + ParserRuleContext *ctx, Token *offendingToken) + : RuntimeException(message), _recognizer(recognizer), _input(input), _ctx(ctx), _offendingToken(offendingToken) { + InitializeInstanceFields(); + if (recognizer != nullptr) { + _offendingState = recognizer->getState(); + } +} + +RecognitionException::~RecognitionException() { +} + +size_t RecognitionException::getOffendingState() const { + return _offendingState; +} + +void RecognitionException::setOffendingState(size_t offendingState) { + _offendingState = offendingState; +} + +misc::IntervalSet RecognitionException::getExpectedTokens() const { + if (_recognizer) { + return _recognizer->getATN().getExpectedTokens(_offendingState, _ctx); + } + return misc::IntervalSet::EMPTY_SET; +} + +RuleContext* RecognitionException::getCtx() const { + return _ctx; +} + +IntStream* RecognitionException::getInputStream() const { + return _input; +} + +Token* RecognitionException::getOffendingToken() const { + return _offendingToken; +} + +Recognizer* RecognitionException::getRecognizer() const { + return _recognizer; +} + +void RecognitionException::InitializeInstanceFields() { + _offendingState = INVALID_INDEX; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/RecognitionException.h b/contrib/libs/antlr4_cpp_runtime/src/RecognitionException.h new file mode 100644 index 0000000000..9397ab20c8 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/RecognitionException.h @@ -0,0 +1,98 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Exceptions.h" + +namespace antlr4 { + + /// The root of the ANTLR exception hierarchy. In general, ANTLR tracks just + /// 3 kinds of errors: prediction errors, failed predicate errors, and + /// mismatched input errors. In each case, the parser knows where it is + /// in the input, where it is in the ATN, the rule invocation stack, + /// and what kind of problem occurred. + class ANTLR4CPP_PUBLIC RecognitionException : public RuntimeException { + private: + /// The Recognizer where this exception originated. + Recognizer *_recognizer; + IntStream *_input; + ParserRuleContext *_ctx; + + /// The current Token when an error occurred. Since not all streams + /// support accessing symbols by index, we have to track the Token + /// instance itself. + Token *_offendingToken; + + size_t _offendingState; + + public: + RecognitionException(Recognizer *recognizer, IntStream *input, ParserRuleContext *ctx, + Token *offendingToken = nullptr); + RecognitionException(const std::string &message, Recognizer *recognizer, IntStream *input, + ParserRuleContext *ctx, Token *offendingToken = nullptr); + RecognitionException(RecognitionException const&) = default; + ~RecognitionException(); + RecognitionException& operator=(RecognitionException const&) = default; + + /// Get the ATN state number the parser was in at the time the error + /// occurred. For NoViableAltException and + /// LexerNoViableAltException exceptions, this is the + /// DecisionState number. For others, it is the state whose outgoing + /// edge we couldn't match. + /// + /// If the state number is not known, this method returns -1. + virtual size_t getOffendingState() const; + + protected: + void setOffendingState(size_t offendingState); + + /// Gets the set of input symbols which could potentially follow the + /// previously matched symbol at the time this exception was thrown. + /// + /// If the set of expected tokens is not known and could not be computed, + /// this method returns an empty set. + /// + /// @returns The set of token types that could potentially follow the current + /// state in the ATN, or an empty set if the information is not available. + public: + virtual misc::IntervalSet getExpectedTokens() const; + + /// <summary> + /// Gets the <seealso cref="RuleContext"/> at the time this exception was thrown. + /// <p/> + /// If the context is not available, this method returns {@code null}. + /// </summary> + /// <returns> The <seealso cref="RuleContext"/> at the time this exception was thrown. + /// If the context is not available, this method returns {@code null}. </returns> + virtual RuleContext* getCtx() const; + + /// <summary> + /// Gets the input stream which is the symbol source for the recognizer where + /// this exception was thrown. + /// <p/> + /// If the input stream is not available, this method returns {@code null}. + /// </summary> + /// <returns> The input stream which is the symbol source for the recognizer + /// where this exception was thrown, or {@code null} if the stream is not + /// available. </returns> + virtual IntStream* getInputStream() const; + + virtual Token* getOffendingToken() const; + + /// <summary> + /// Gets the <seealso cref="Recognizer"/> where this exception occurred. + /// <p/> + /// If the recognizer is not available, this method returns {@code null}. + /// </summary> + /// <returns> The recognizer where this exception occurred, or {@code null} if + /// the recognizer is not available. </returns> + virtual Recognizer* getRecognizer() const; + + private: + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/Recognizer.cpp b/contrib/libs/antlr4_cpp_runtime/src/Recognizer.cpp new file mode 100644 index 0000000000..c8a183324c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/Recognizer.cpp @@ -0,0 +1,157 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "ConsoleErrorListener.h" +#include "RecognitionException.h" +#include "support/CPPUtils.h" +#include "Token.h" +#include "atn/ATN.h" +#include "atn/ATNSimulator.h" +#include "support/CPPUtils.h" +#include "support/StringUtils.h" + +#include "Vocabulary.h" + +#include "Recognizer.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::internal; + +std::map<const dfa::Vocabulary*, std::map<std::string_view, size_t>> Recognizer::_tokenTypeMapCache; +std::map<std::vector<std::string>, std::map<std::string, size_t>> Recognizer::_ruleIndexMapCache; + +Recognizer::Recognizer() { + InitializeInstanceFields(); + _proxListener.addErrorListener(&ConsoleErrorListener::INSTANCE); +} + +Recognizer::~Recognizer() { +} + +std::map<std::string_view, size_t> Recognizer::getTokenTypeMap() { + const dfa::Vocabulary& vocabulary = getVocabulary(); + + UniqueLock<Mutex> lck(_mutex); + std::map<std::string_view, size_t> result; + auto iterator = _tokenTypeMapCache.find(&vocabulary); + if (iterator != _tokenTypeMapCache.end()) { + result = iterator->second; + } else { + for (size_t i = 0; i <= getATN().maxTokenType; ++i) { + std::string_view literalName = vocabulary.getLiteralName(i); + if (!literalName.empty()) { + result[literalName] = i; + } + + std::string_view symbolicName = vocabulary.getSymbolicName(i); + if (!symbolicName.empty()) { + result[symbolicName] = i; + } + } + result["EOF"] = EOF; + _tokenTypeMapCache[&vocabulary] = result; + } + + return result; +} + +std::map<std::string, size_t> Recognizer::getRuleIndexMap() { + const std::vector<std::string>& ruleNames = getRuleNames(); + if (ruleNames.empty()) { + throw "The current recognizer does not provide a list of rule names."; + } + + UniqueLock<Mutex> lck(_mutex); + std::map<std::string, size_t> result; + auto iterator = _ruleIndexMapCache.find(ruleNames); + if (iterator != _ruleIndexMapCache.end()) { + result = iterator->second; + } else { + result = antlrcpp::toMap(ruleNames); + _ruleIndexMapCache[ruleNames] = result; + } + return result; +} + +size_t Recognizer::getTokenType(std::string_view tokenName) { + const std::map<std::string_view, size_t> &map = getTokenTypeMap(); + auto iterator = map.find(tokenName); + if (iterator == map.end()) + return Token::INVALID_TYPE; + + return iterator->second; +} + +void Recognizer::setInterpreter(atn::ATNSimulator *interpreter) { + // Usually the interpreter is set by the descendant (lexer or parser (simulator), but can also be exchanged + // by the profiling ATN simulator. + delete _interpreter; + _interpreter = interpreter; +} + +std::string Recognizer::getErrorHeader(RecognitionException *e) { + // We're having issues with cross header dependencies, these two classes will need to be + // rewritten to remove that. + size_t line = e->getOffendingToken()->getLine(); + size_t charPositionInLine = e->getOffendingToken()->getCharPositionInLine(); + return std::string("line ") + std::to_string(line) + ":" + std::to_string(charPositionInLine); + +} + +std::string Recognizer::getTokenErrorDisplay(Token *t) { + if (t == nullptr) { + return "<no Token>"; + } + std::string s = t->getText(); + if (s == "") { + if (t->getType() == EOF) { + s = "<EOF>"; + } else { + s = std::string("<") + std::to_string(t->getType()) + std::string(">"); + } + } + + std::string result; + result.reserve(s.size() + 2); + result.push_back('\''); + antlrcpp::escapeWhitespace(result, s); + result.push_back('\''); + result.shrink_to_fit(); + return result; +} + +void Recognizer::addErrorListener(ANTLRErrorListener *listener) { + _proxListener.addErrorListener(listener); +} + +void Recognizer::removeErrorListener(ANTLRErrorListener *listener) { + _proxListener.removeErrorListener(listener); +} + +void Recognizer::removeErrorListeners() { + _proxListener.removeErrorListeners(); +} + +ProxyErrorListener& Recognizer::getErrorListenerDispatch() { + return _proxListener; +} + +bool Recognizer::sempred(RuleContext * /*localctx*/, size_t /*ruleIndex*/, size_t /*actionIndex*/) { + return true; +} + +bool Recognizer::precpred(RuleContext * /*localctx*/, int /*precedence*/) { + return true; +} + +void Recognizer::action(RuleContext * /*localctx*/, size_t /*ruleIndex*/, size_t /*actionIndex*/) { +} + +void Recognizer::InitializeInstanceFields() { + _stateNumber = ATNState::INVALID_STATE_NUMBER; + _interpreter = nullptr; +} + diff --git a/contrib/libs/antlr4_cpp_runtime/src/Recognizer.h b/contrib/libs/antlr4_cpp_runtime/src/Recognizer.h new file mode 100644 index 0000000000..0226a612e1 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/Recognizer.h @@ -0,0 +1,160 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "ProxyErrorListener.h" +#include "support/Casts.h" +#include "atn/SerializedATNView.h" +#include "internal/Synchronization.h" + +namespace antlr4 { + + class ANTLR4CPP_PUBLIC Recognizer { + public: + static constexpr size_t EOF = std::numeric_limits<size_t>::max(); + + Recognizer(); + Recognizer(Recognizer const&) = delete; + virtual ~Recognizer(); + + Recognizer& operator=(Recognizer const&) = delete; + + virtual std::vector<std::string> const& getRuleNames() const = 0; + + /** + * Get the vocabulary used by the recognizer. + * + * @return A {@link Vocabulary} instance providing information about the + * vocabulary used by the grammar. + */ + virtual dfa::Vocabulary const& getVocabulary() const = 0; + + /// <summary> + /// Get a map from token names to token types. + /// <p/> + /// Used for XPath and tree pattern compilation. + /// </summary> + virtual std::map<std::string_view, size_t> getTokenTypeMap(); + + /// <summary> + /// Get a map from rule names to rule indexes. + /// <p/> + /// Used for XPath and tree pattern compilation. + /// </summary> + virtual std::map<std::string, size_t> getRuleIndexMap(); + + virtual size_t getTokenType(std::string_view tokenName); + + /// <summary> + /// If this recognizer was generated, it will have a serialized ATN + /// representation of the grammar. + /// <p/> + /// For interpreters, we don't know their serialized ATN despite having + /// created the interpreter from it. + /// </summary> + virtual atn::SerializedATNView getSerializedATN() const { + throw "there is no serialized ATN"; + } + + /// <summary> + /// For debugging and other purposes, might want the grammar name. + /// Have ANTLR generate an implementation for this method. + /// </summary> + virtual std::string getGrammarFileName() const = 0; + + /// Get the ATN interpreter (in fact one of it's descendants) used by the recognizer for prediction. + /// @returns The ATN interpreter used by the recognizer for prediction. + template <class T> + T* getInterpreter() const { + return antlrcpp::downCast<T *>(_interpreter); + } + + /** + * Set the ATN interpreter used by the recognizer for prediction. + * + * @param interpreter The ATN interpreter used by the recognizer for + * prediction. + */ + void setInterpreter(atn::ATNSimulator *interpreter); + + /// What is the error header, normally line/character position information? + virtual std::string getErrorHeader(RecognitionException *e); + + /** How should a token be displayed in an error message? The default + * is to display just the text, but during development you might + * want to have a lot of information spit out. Override in that case + * to use t.toString() (which, for CommonToken, dumps everything about + * the token). This is better than forcing you to override a method in + * your token objects because you don't have to go modify your lexer + * so that it creates a new Java type. + * + * @deprecated This method is not called by the ANTLR 4 Runtime. Specific + * implementations of {@link ANTLRErrorStrategy} may provide a similar + * feature when necessary. For example, see + * {@link DefaultErrorStrategy#getTokenErrorDisplay}. + */ + virtual std::string getTokenErrorDisplay(Token *t); + + /// <exception cref="NullPointerException"> if {@code listener} is {@code null}. </exception> + virtual void addErrorListener(ANTLRErrorListener *listener); + + virtual void removeErrorListener(ANTLRErrorListener *listener); + + virtual void removeErrorListeners(); + + virtual ProxyErrorListener& getErrorListenerDispatch(); + + // subclass needs to override these if there are sempreds or actions + // that the ATN interp needs to execute + virtual bool sempred(RuleContext *localctx, size_t ruleIndex, size_t actionIndex); + + virtual bool precpred(RuleContext *localctx, int precedence); + + virtual void action(RuleContext *localctx, size_t ruleIndex, size_t actionIndex); + + size_t getState() const { return _stateNumber; } + + // Get the ATN used by the recognizer for prediction. + virtual const atn::ATN& getATN() const = 0; + + /// <summary> + /// Indicate that the recognizer has changed internal state that is + /// consistent with the ATN state passed in. This way we always know + /// where we are in the ATN as the parser goes along. The rule + /// context objects form a stack that lets us see the stack of + /// invoking rules. Combine this and we have complete ATN + /// configuration information. + /// </summary> + void setState(size_t atnState) { _stateNumber = atnState; } + + virtual IntStream* getInputStream() = 0; + + virtual void setInputStream(IntStream *input) = 0; + + virtual TokenFactory<CommonToken>* getTokenFactory() = 0; + + template<typename T1> + void setTokenFactory(TokenFactory<T1> *input); + + protected: + atn::ATNSimulator *_interpreter; // Set and deleted in descendants (or the profiler). + + // Mutex to manage synchronized access for multithreading. + internal::Mutex _mutex; + + private: + static std::map<const dfa::Vocabulary*, std::map<std::string_view, size_t>> _tokenTypeMapCache; + static std::map<std::vector<std::string>, std::map<std::string, size_t>> _ruleIndexMapCache; + + ProxyErrorListener _proxListener; // Manages a collection of listeners. + + size_t _stateNumber; + + void InitializeInstanceFields(); + + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/RuleContext.cpp b/contrib/libs/antlr4_cpp_runtime/src/RuleContext.cpp new file mode 100644 index 0000000000..6d67f9a29a --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/RuleContext.cpp @@ -0,0 +1,144 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "tree/Trees.h" +#include "misc/Interval.h" +#include "Parser.h" +#include "atn/ATN.h" +#include "atn/ATNState.h" +#include "tree/ParseTreeVisitor.h" + +#include "RuleContext.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::tree; + +RuleContext::RuleContext() : ParseTree(ParseTreeType::RULE) { + InitializeInstanceFields(); +} + +RuleContext::RuleContext(RuleContext *parent_, size_t invokingState_) : ParseTree(ParseTreeType::RULE) { + InitializeInstanceFields(); + this->parent = parent_; + this->invokingState = invokingState_; +} + +int RuleContext::depth() { + int n = 1; + RuleContext *p = this; + while (true) { + if (p->parent == nullptr) + break; + p = static_cast<RuleContext *>(p->parent); + n++; + } + return n; +} + +bool RuleContext::isEmpty() { + return invokingState == ATNState::INVALID_STATE_NUMBER; +} + +misc::Interval RuleContext::getSourceInterval() { + return misc::Interval::INVALID; +} + +std::string RuleContext::getText() { + if (children.empty()) { + return ""; + } + + std::stringstream ss; + for (size_t i = 0; i < children.size(); i++) { + ParseTree *tree = children[i]; + if (tree != nullptr) + ss << tree->getText(); + } + + return ss.str(); +} + +size_t RuleContext::getRuleIndex() const { + return INVALID_INDEX; +} + +size_t RuleContext::getAltNumber() const { + return atn::ATN::INVALID_ALT_NUMBER; +} + +void RuleContext::setAltNumber(size_t /*altNumber*/) { +} + +std::any RuleContext::accept(tree::ParseTreeVisitor *visitor) { + return visitor->visitChildren(this); +} + +std::string RuleContext::toStringTree(Parser *recog, bool pretty) { + return tree::Trees::toStringTree(this, recog, pretty); +} + +std::string RuleContext::toStringTree(std::vector<std::string> &ruleNames, bool pretty) { + return tree::Trees::toStringTree(this, ruleNames, pretty); +} + +std::string RuleContext::toStringTree(bool pretty) { + return toStringTree(nullptr, pretty); +} + + +std::string RuleContext::toString(const std::vector<std::string> &ruleNames) { + return toString(ruleNames, nullptr); +} + + +std::string RuleContext::toString(const std::vector<std::string> &ruleNames, RuleContext *stop) { + std::stringstream ss; + + RuleContext *currentParent = this; + ss << "["; + while (currentParent != stop) { + if (ruleNames.empty()) { + if (!currentParent->isEmpty()) { + ss << currentParent->invokingState; + } + } else { + size_t ruleIndex = currentParent->getRuleIndex(); + + std::string ruleName = (ruleIndex < ruleNames.size()) ? ruleNames[ruleIndex] : std::to_string(ruleIndex); + ss << ruleName; + } + + if (currentParent->parent == nullptr) // No parent anymore. + break; + currentParent = static_cast<RuleContext *>(currentParent->parent); + if (!ruleNames.empty() || !currentParent->isEmpty()) { + ss << " "; + } + } + + ss << "]"; + + return ss.str(); +} + +std::string RuleContext::toString() { + return toString(nullptr); +} + +std::string RuleContext::toString(Recognizer *recog) { + return toString(recog, &ParserRuleContext::EMPTY); +} + +std::string RuleContext::toString(Recognizer *recog, RuleContext *stop) { + if (recog == nullptr) + return toString(std::vector<std::string>(), stop); // Don't use an initializer {} here or we end up calling ourselve recursivly. + return toString(recog->getRuleNames(), stop); +} + +void RuleContext::InitializeInstanceFields() { + invokingState = INVALID_INDEX; +} + diff --git a/contrib/libs/antlr4_cpp_runtime/src/RuleContext.h b/contrib/libs/antlr4_cpp_runtime/src/RuleContext.h new file mode 100644 index 0000000000..a0effa2a02 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/RuleContext.h @@ -0,0 +1,141 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "tree/ParseTree.h" + +namespace antlr4 { + + /** A rule context is a record of a single rule invocation. + * + * We form a stack of these context objects using the parent + * pointer. A parent pointer of null indicates that the current + * context is the bottom of the stack. The ParserRuleContext subclass + * as a children list so that we can turn this data structure into a + * tree. + * + * The root node always has a null pointer and invokingState of -1. + * + * Upon entry to parsing, the first invoked rule function creates a + * context object (asubclass specialized for that rule such as + * SContext) and makes it the root of a parse tree, recorded by field + * Parser._ctx. + * + * public final SContext s() throws RecognitionException { + * SContext _localctx = new SContext(_ctx, getState()); <-- create new node + * enterRule(_localctx, 0, RULE_s); <-- push it + * ... + * exitRule(); <-- pop back to _localctx + * return _localctx; + * } + * + * A subsequent rule invocation of r from the start rule s pushes a + * new context object for r whose parent points at s and use invoking + * state is the state with r emanating as edge label. + * + * The invokingState fields from a context object to the root + * together form a stack of rule indication states where the root + * (bottom of the stack) has a -1 sentinel value. If we invoke start + * symbol s then call r1, which calls r2, the would look like + * this: + * + * SContext[-1] <- root node (bottom of the stack) + * R1Context[p] <- p in rule s called r1 + * R2Context[q] <- q in rule r1 called r2 + * + * So the top of the stack, _ctx, represents a call to the current + * rule and it holds the return address from another rule that invoke + * to this rule. To invoke a rule, we must always have a current context. + * + * The parent contexts are useful for computing lookahead sets and + * getting error information. + * + * These objects are used during parsing and prediction. + * For the special case of parsers, we use the subclass + * ParserRuleContext. + * + * @see ParserRuleContext + */ + class ANTLR4CPP_PUBLIC RuleContext : public tree::ParseTree { + public: + static bool is(const tree::ParseTree &parseTree) { return parseTree.getTreeType() == tree::ParseTreeType::RULE; } + + static bool is(const tree::ParseTree *parseTree) { return parseTree != nullptr && is(*parseTree); } + + /// What state invoked the rule associated with this context? + /// The "return address" is the followState of invokingState + /// If parent is null, this should be -1 and this context object represents the start rule. + size_t invokingState; + + RuleContext(); + RuleContext(RuleContext *parent, size_t invokingState); + + virtual int depth(); + + /// A context is empty if there is no invoking state; meaning nobody called current context. + virtual bool isEmpty(); + + // satisfy the ParseTree / SyntaxTree interface + + virtual misc::Interval getSourceInterval() override; + + virtual std::string getText() override; + + virtual size_t getRuleIndex() const; + + /** For rule associated with this parse tree internal node, return + * the outer alternative number used to match the input. Default + * implementation does not compute nor store this alt num. Create + * a subclass of ParserRuleContext with backing field and set + * option contextSuperClass. + * to set it. + * + * @since 4.5.3 + */ + virtual size_t getAltNumber() const; + + /** Set the outer alternative number for this context node. Default + * implementation does nothing to avoid backing field overhead for + * trees that don't need it. Create + * a subclass of ParserRuleContext with backing field and set + * option contextSuperClass. + * + * @since 4.5.3 + */ + virtual void setAltNumber(size_t altNumber); + + virtual std::any accept(tree::ParseTreeVisitor *visitor) override; + + /// <summary> + /// Print out a whole tree, not just a node, in LISP format + /// (root child1 .. childN). Print just a node if this is a leaf. + /// We have to know the recognizer so we can get rule names. + /// </summary> + virtual std::string toStringTree(Parser *recog, bool pretty = false) override; + + /// <summary> + /// Print out a whole tree, not just a node, in LISP format + /// (root child1 .. childN). Print just a node if this is a leaf. + /// </summary> + virtual std::string toStringTree(std::vector<std::string> &ruleNames, bool pretty = false); + + virtual std::string toStringTree(bool pretty = false) override; + virtual std::string toString() override; + std::string toString(Recognizer *recog); + std::string toString(const std::vector<std::string> &ruleNames); + + // recog null unless ParserRuleContext, in which case we use subclass toString(...) + std::string toString(Recognizer *recog, RuleContext *stop); + + virtual std::string toString(const std::vector<std::string> &ruleNames, RuleContext *stop); + + bool operator == (const RuleContext &other) { return this == &other; } // Simple address comparison. + + private: + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/RuleContextWithAltNum.cpp b/contrib/libs/antlr4_cpp_runtime/src/RuleContextWithAltNum.cpp new file mode 100644 index 0000000000..250859fdc0 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/RuleContextWithAltNum.cpp @@ -0,0 +1,27 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATN.h" + +#include "RuleContextWithAltNum.h" + +using namespace antlr4; +using namespace antlr4::atn; + +RuleContextWithAltNum::RuleContextWithAltNum() : ParserRuleContext() { + altNum = ATN::INVALID_ALT_NUMBER; +} + +RuleContextWithAltNum::RuleContextWithAltNum(ParserRuleContext *parent, int invokingStateNumber) + : ParserRuleContext(parent, invokingStateNumber) { +} + +size_t RuleContextWithAltNum::getAltNumber() const { + return altNum; +} + +void RuleContextWithAltNum::setAltNumber(size_t number) { + altNum = number; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/RuleContextWithAltNum.h b/contrib/libs/antlr4_cpp_runtime/src/RuleContextWithAltNum.h new file mode 100644 index 0000000000..995d9aa7b1 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/RuleContextWithAltNum.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "ParserRuleContext.h" + +namespace antlr4 { + + /// A handy class for use with + /// + /// options {contextSuperClass=org.antlr.v4.runtime.RuleContextWithAltNum;} + /// + /// that provides a backing field / impl for the outer alternative number + /// matched for an internal parse tree node. + /// + /// I'm only putting into Java runtime as I'm certain I'm the only one that + /// will really every use this. + class ANTLR4CPP_PUBLIC RuleContextWithAltNum : public ParserRuleContext { + public: + size_t altNum = 0; + + RuleContextWithAltNum(); + RuleContextWithAltNum(ParserRuleContext *parent, int invokingStateNumber); + + virtual size_t getAltNumber() const override; + virtual void setAltNumber(size_t altNum) override; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/RuntimeMetaData.cpp b/contrib/libs/antlr4_cpp_runtime/src/RuntimeMetaData.cpp new file mode 100644 index 0000000000..cf30d68587 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/RuntimeMetaData.cpp @@ -0,0 +1,54 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "RuntimeMetaData.h" +#include "Version.h" + +using namespace antlr4; + +const std::string RuntimeMetaData::VERSION = ANTLRCPP_VERSION_STRING; + +std::string RuntimeMetaData::getRuntimeVersion() { + return VERSION; +} + +void RuntimeMetaData::checkVersion(const std::string &generatingToolVersion, const std::string &compileTimeVersion) { + std::string runtimeVersion = VERSION; + bool runtimeConflictsWithGeneratingTool = false; + bool runtimeConflictsWithCompileTimeTool = false; + + if (generatingToolVersion != "") { + runtimeConflictsWithGeneratingTool = runtimeVersion != generatingToolVersion + && getMajorMinorVersion(runtimeVersion) != getMajorMinorVersion(generatingToolVersion); + } + + runtimeConflictsWithCompileTimeTool = runtimeVersion != compileTimeVersion + && getMajorMinorVersion(runtimeVersion) != getMajorMinorVersion(compileTimeVersion); + + if (runtimeConflictsWithGeneratingTool) { + std::cerr << "ANTLR Tool version " << generatingToolVersion << " used for code generation does not match " + "the current runtime version " << runtimeVersion << std::endl; + } + if (runtimeConflictsWithCompileTimeTool) { + std::cerr << "ANTLR Runtime version " << compileTimeVersion << " used for parser compilation does not match " + "the current runtime version " << runtimeVersion << std::endl; + } +} + +std::string RuntimeMetaData::getMajorMinorVersion(const std::string &version) { + size_t firstDot = version.find('.'); + size_t secondDot = firstDot != std::string::npos ? version.find('.', firstDot + 1) : std::string::npos; + size_t firstDash = version.find('-'); + size_t referenceLength = version.size(); + if (secondDot != std::string::npos) { + referenceLength = std::min(referenceLength, secondDot); + } + + if (firstDash != std::string::npos) { + referenceLength = std::min(referenceLength, firstDash); + } + + return version.substr(0, referenceLength); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/RuntimeMetaData.h b/contrib/libs/antlr4_cpp_runtime/src/RuntimeMetaData.h new file mode 100644 index 0000000000..f178cfe9e8 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/RuntimeMetaData.h @@ -0,0 +1,155 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { + + /// <summary> + /// This class provides access to the current version of the ANTLR 4 runtime + /// library as compile-time and runtime constants, along with methods for + /// checking for matching version numbers and notifying listeners in the case + /// where a version mismatch is detected. + /// + /// <para> + /// The runtime version information is provided by <seealso cref="#VERSION"/> and + /// <seealso cref="#getRuntimeVersion()"/>. Detailed information about these values is + /// provided in the documentation for each member.</para> + /// + /// <para> + /// The runtime version check is implemented by <seealso cref="#checkVersion"/>. Detailed + /// information about incorporating this call into user code, as well as its use + /// in generated code, is provided in the documentation for the method.</para> + /// + /// <para> + /// Version strings x.y and x.y.z are considered "compatible" and no error + /// would be generated. Likewise, version strings x.y-SNAPSHOT and x.y.z are + /// considered "compatible" because the major and minor components x.y + /// are the same in each.</para> + /// + /// <para> + /// To trap any error messages issued by this code, use System.setErr() + /// in your main() startup code. + /// </para> + /// + /// @since 4.3 + /// </summary> + class ANTLR4CPP_PUBLIC RuntimeMetaData { + public: + /// A compile-time constant containing the current version of the ANTLR 4 + /// runtime library. + /// + /// <para> + /// This compile-time constant value allows generated parsers and other + /// libraries to include a literal reference to the version of the ANTLR 4 + /// runtime library the code was compiled against. At each release, we + /// change this value.</para> + /// + /// <para>Version numbers are assumed to have the form + /// + /// <em>major</em>.<em>minor</em>.<em>patch</em>.<em>revision</em>-<em>suffix</em>, + /// + /// with the individual components defined as follows.</para> + /// + /// <ul> + /// <li><em>major</em> is a required non-negative integer, and is equal to + /// {@code 4} for ANTLR 4.</li> + /// <li><em>minor</em> is a required non-negative integer.</li> + /// <li><em>patch</em> is an optional non-negative integer. When + /// <em>patch</em> is omitted, the {@code .} (dot) appearing before it is + /// also omitted.</li> + /// <li><em>revision</em> is an optional non-negative integer, and may only + /// be included when <em>patch</em> is also included. When <em>revision</em> + /// is omitted, the {@code .} (dot) appearing before it is also omitted.</li> + /// <li><em>suffix</em> is an optional string. When <em>suffix</em> is + /// omitted, the {@code -} (hyphen-minus) appearing before it is also + /// omitted.</li> + /// </ul> + static const std::string VERSION; + + /// <summary> + /// Gets the currently executing version of the ANTLR 4 runtime library. + /// + /// <para> + /// This method provides runtime access to the <seealso cref="#VERSION"/> field, as + /// opposed to directly referencing the field as a compile-time constant.</para> + /// </summary> + /// <returns> The currently executing version of the ANTLR 4 library </returns> + + static std::string getRuntimeVersion(); + + /// <summary> + /// This method provides the ability to detect mismatches between the version + /// of ANTLR 4 used to generate a parser, the version of the ANTLR runtime a + /// parser was compiled against, and the version of the ANTLR runtime which + /// is currently executing. + /// + /// <para> + /// The version check is designed to detect the following two specific + /// scenarios.</para> + /// + /// <ul> + /// <li>The ANTLR Tool version used for code generation does not match the + /// currently executing runtime version.</li> + /// <li>The ANTLR Runtime version referenced at the time a parser was + /// compiled does not match the currently executing runtime version.</li> + /// </ul> + /// + /// <para> + /// Starting with ANTLR 4.3, the code generator emits a call to this method + /// using two constants in each generated lexer and parser: a hard-coded + /// constant indicating the version of the tool used to generate the parser + /// and a reference to the compile-time constant <seealso cref="#VERSION"/>. At + /// runtime, this method is called during the initialization of the generated + /// parser to detect mismatched versions, and notify the registered listeners + /// prior to creating instances of the parser.</para> + /// + /// <para> + /// This method does not perform any detection or filtering of semantic + /// changes between tool and runtime versions. It simply checks for a + /// version match and emits an error to stderr if a difference + /// is detected.</para> + /// + /// <para> + /// Note that some breaking changes between releases could result in other + /// types of runtime exceptions, such as a <seealso cref="LinkageError"/>, prior to + /// calling this method. In these cases, the underlying version mismatch will + /// not be reported here. This method is primarily intended to + /// notify users of potential semantic changes between releases that do not + /// result in binary compatibility problems which would be detected by the + /// class loader. As with semantic changes, changes that break binary + /// compatibility between releases are mentioned in the release notes + /// accompanying the affected release.</para> + /// + /// <para> + /// <strong>Additional note for target developers:</strong> The version check + /// implemented by this class is designed to address specific compatibility + /// concerns that may arise during the execution of Java applications. Other + /// targets should consider the implementation of this method in the context + /// of that target's known execution environment, which may or may not + /// resemble the design provided for the Java target.</para> + /// </summary> + /// <param name="generatingToolVersion"> The version of the tool used to generate a parser. + /// This value may be null when called from user code that was not generated + /// by, and does not reference, the ANTLR 4 Tool itself. </param> + /// <param name="compileTimeVersion"> The version of the runtime the parser was + /// compiled against. This should always be passed using a direct reference + /// to <seealso cref="#VERSION"/>. </param> + static void checkVersion(const std::string &generatingToolVersion, const std::string &compileTimeVersion); + + /// <summary> + /// Gets the major and minor version numbers from a version string. For + /// details about the syntax of the input {@code version}. + /// E.g., from x.y.z return x.y. + /// </summary> + /// <param name="version"> The complete version string. </param> + /// <returns> A string of the form <em>major</em>.<em>minor</em> containing + /// only the major and minor components of the version string. </returns> + static std::string getMajorMinorVersion(const std::string &version); + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/Token.cpp b/contrib/libs/antlr4_cpp_runtime/src/Token.cpp new file mode 100644 index 0000000000..31266b42d1 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/Token.cpp @@ -0,0 +1,9 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Token.h" + +antlr4::Token::~Token() { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/Token.h b/contrib/libs/antlr4_cpp_runtime/src/Token.h new file mode 100644 index 0000000000..832db740b3 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/Token.h @@ -0,0 +1,92 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "IntStream.h" + +namespace antlr4 { + + /// A token has properties: text, type, line, character position in the line + /// (so we can ignore tabs), token channel, index, and source from which + /// we obtained this token. + class ANTLR4CPP_PUBLIC Token { + public: + static constexpr size_t INVALID_TYPE = 0; + + /// During lookahead operations, this "token" signifies we hit rule end ATN state + /// and did not follow it despite needing to. + static constexpr size_t EPSILON = std::numeric_limits<size_t>::max() - 1; + static constexpr size_t MIN_USER_TOKEN_TYPE = 1; + static constexpr size_t EOF = IntStream::EOF; + + virtual ~Token(); + + /// All tokens go to the parser (unless skip() is called in that rule) + /// on a particular "channel". The parser tunes to a particular channel + /// so that whitespace etc... can go to the parser on a "hidden" channel. + static constexpr size_t DEFAULT_CHANNEL = 0; + + /// Anything on different channel than DEFAULT_CHANNEL is not parsed + /// by parser. + static constexpr size_t HIDDEN_CHANNEL = 1; + + /** + * This is the minimum constant value which can be assigned to a + * user-defined token channel. + * + * <p> + * The non-negative numbers less than {@link #MIN_USER_CHANNEL_VALUE} are + * assigned to the predefined channels {@link #DEFAULT_CHANNEL} and + * {@link #HIDDEN_CHANNEL}.</p> + * + * @see Token#getChannel() + */ + static constexpr size_t MIN_USER_CHANNEL_VALUE = 2; + + /// Get the text of the token. + virtual std::string getText() const = 0; + + /// Get the token type of the token + virtual size_t getType() const = 0; + + /// The line number on which the 1st character of this token was matched, line=1..n + virtual size_t getLine() const = 0; + + /// The index of the first character of this token relative to the + /// beginning of the line at which it occurs, 0..n-1 + virtual size_t getCharPositionInLine() const = 0; + + /// Return the channel this token. Each token can arrive at the parser + /// on a different channel, but the parser only "tunes" to a single channel. + /// The parser ignores everything not on DEFAULT_CHANNEL. + virtual size_t getChannel() const = 0; + + /// An index from 0..n-1 of the token object in the input stream. + /// This must be valid in order to print token streams and + /// use TokenRewriteStream. + /// + /// Return INVALID_INDEX to indicate that this token was conjured up since + /// it doesn't have a valid index. + virtual size_t getTokenIndex() const = 0; + + /// The starting character index of the token + /// This method is optional; return INVALID_INDEX if not implemented. + virtual size_t getStartIndex() const = 0; + + /// The last character index of the token. + /// This method is optional; return INVALID_INDEX if not implemented. + virtual size_t getStopIndex() const = 0; + + /// Gets the <seealso cref="TokenSource"/> which created this token. + virtual TokenSource *getTokenSource() const = 0; + + /// Gets the <seealso cref="CharStream"/> from which this token was derived. + virtual CharStream *getInputStream() const = 0; + + virtual std::string toString() const = 0; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/TokenFactory.h b/contrib/libs/antlr4_cpp_runtime/src/TokenFactory.h new file mode 100644 index 0000000000..4eef044329 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/TokenFactory.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { + + /// The default mechanism for creating tokens. It's used by default in Lexer and + /// the error handling strategy (to create missing tokens). Notifying the parser + /// of a new factory means that it notifies it's token source and error strategy. + template<typename Symbol> + class ANTLR4CPP_PUBLIC TokenFactory { + public: + virtual ~TokenFactory() {} + + /// This is the method used to create tokens in the lexer and in the + /// error handling strategy. If text!=null, than the start and stop positions + /// are wiped to -1 in the text override is set in the CommonToken. + virtual std::unique_ptr<Symbol> create(std::pair<TokenSource *, CharStream *> source, size_t type, const std::string &text, + size_t channel, size_t start, size_t stop, size_t line, size_t charPositionInLine) = 0; + + /// Generically useful + virtual std::unique_ptr<Symbol> create(size_t type, const std::string &text) = 0; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/TokenSource.cpp b/contrib/libs/antlr4_cpp_runtime/src/TokenSource.cpp new file mode 100644 index 0000000000..6b9d7af2f7 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/TokenSource.cpp @@ -0,0 +1,9 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "TokenSource.h" + +antlr4::TokenSource::~TokenSource() { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/TokenSource.h b/contrib/libs/antlr4_cpp_runtime/src/TokenSource.h new file mode 100644 index 0000000000..f05c27efac --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/TokenSource.h @@ -0,0 +1,85 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "TokenFactory.h" + +namespace antlr4 { + + /// <summary> + /// A source of tokens must provide a sequence of tokens via <seealso cref="#nextToken()"/> + /// and also must reveal it's source of characters; <seealso cref="CommonToken"/>'s text is + /// computed from a <seealso cref="CharStream"/>; it only store indices into the char + /// stream. + /// <p/> + /// Errors from the lexer are never passed to the parser. Either you want to keep + /// going or you do not upon token recognition error. If you do not want to + /// continue lexing then you do not want to continue parsing. Just throw an + /// exception not under <seealso cref="RecognitionException"/> and Java will naturally toss + /// you all the way out of the recognizers. If you want to continue lexing then + /// you should not throw an exception to the parser--it has already requested a + /// token. Keep lexing until you get a valid one. Just report errors and keep + /// going, looking for a valid token. + /// </summary> + class ANTLR4CPP_PUBLIC TokenSource { + public: + virtual ~TokenSource(); + + /// Return a <seealso cref="Token"/> object from your input stream (usually a + /// <seealso cref="CharStream"/>). Do not fail/return upon lexing error; keep chewing + /// on the characters until you get a good one; errors are not passed through + /// to the parser. + virtual std::unique_ptr<Token> nextToken() = 0; + + /// <summary> + /// Get the line number for the current position in the input stream. The + /// first line in the input is line 1. + /// </summary> + /// <returns> The line number for the current position in the input stream, or + /// 0 if the current token source does not track line numbers. </returns> + virtual size_t getLine() const = 0; + + /// <summary> + /// Get the index into the current line for the current position in the input + /// stream. The first character on a line has position 0. + /// </summary> + /// <returns> The line number for the current position in the input stream, or + /// (sze_t)-1 if the current token source does not track character positions. </returns> + virtual size_t getCharPositionInLine() = 0; + + /// <summary> + /// Get the <seealso cref="CharStream"/> from which this token source is currently + /// providing tokens. + /// </summary> + /// <returns> The <seealso cref="CharStream"/> associated with the current position in + /// the input, or {@code null} if no input stream is available for the token + /// source. </returns> + virtual CharStream* getInputStream() = 0; + + /// <summary> + /// Gets the name of the underlying input source. This method returns a + /// non-null, non-empty string. If such a name is not known, this method + /// returns <seealso cref="IntStream#UNKNOWN_SOURCE_NAME"/>. + /// </summary> + virtual std::string getSourceName() = 0; + + /// <summary> + /// Set the <seealso cref="TokenFactory"/> this token source should use for creating + /// <seealso cref="Token"/> objects from the input. + /// </summary> + /// <param name="factory"> The <seealso cref="TokenFactory"/> to use for creating tokens. </param> + template<typename T1> + void setTokenFactory(TokenFactory<T1> * /*factory*/) {} + + /// <summary> + /// Gets the <seealso cref="TokenFactory"/> this token source is currently using for + /// creating <seealso cref="Token"/> objects from the input. + /// </summary> + /// <returns> The <seealso cref="TokenFactory"/> currently used by this token source. </returns> + virtual TokenFactory<CommonToken>* getTokenFactory() = 0; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/TokenStream.cpp b/contrib/libs/antlr4_cpp_runtime/src/TokenStream.cpp new file mode 100644 index 0000000000..fbb1ab788a --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/TokenStream.cpp @@ -0,0 +1,11 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "TokenStream.h" + +using namespace antlr4; + +TokenStream::~TokenStream() { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/TokenStream.h b/contrib/libs/antlr4_cpp_runtime/src/TokenStream.h new file mode 100644 index 0000000000..15b4f367a6 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/TokenStream.h @@ -0,0 +1,137 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "IntStream.h" + +namespace antlr4 { + + /// <summary> + /// An <seealso cref="IntStream"/> whose symbols are <seealso cref="Token"/> instances. + /// </summary> + class ANTLR4CPP_PUBLIC TokenStream : public IntStream { + /// <summary> + /// Get the <seealso cref="Token"/> instance associated with the value returned by + /// <seealso cref="#LA LA(k)"/>. This method has the same pre- and post-conditions as + /// <seealso cref="IntStream#LA"/>. In addition, when the preconditions of this method + /// are met, the return value is non-null and the value of + /// {@code LT(k).getType()==LA(k)}. + /// </summary> + /// <seealso cref= IntStream#LA </seealso> + public: + virtual ~TokenStream(); + + virtual Token* LT(ssize_t k) = 0; + + /// <summary> + /// Gets the <seealso cref="Token"/> at the specified {@code index} in the stream. When + /// the preconditions of this method are met, the return value is non-null. + /// <p/> + /// The preconditions for this method are the same as the preconditions of + /// <seealso cref="IntStream#seek"/>. If the behavior of {@code seek(index)} is + /// unspecified for the current state and given {@code index}, then the + /// behavior of this method is also unspecified. + /// <p/> + /// The symbol referred to by {@code index} differs from {@code seek()} only + /// in the case of filtering streams where {@code index} lies before the end + /// of the stream. Unlike {@code seek()}, this method does not adjust + /// {@code index} to point to a non-ignored symbol. + /// </summary> + /// <exception cref="IllegalArgumentException"> if {code index} is less than 0 </exception> + /// <exception cref="UnsupportedOperationException"> if the stream does not support + /// retrieving the token at the specified index </exception> + virtual Token* get(size_t index) const = 0; + + /// Gets the underlying TokenSource which provides tokens for this stream. + virtual TokenSource* getTokenSource() const = 0; + + /// <summary> + /// Return the text of all tokens within the specified {@code interval}. This + /// method behaves like the following code (including potential exceptions + /// for violating preconditions of <seealso cref="#get"/>, but may be optimized by the + /// specific implementation. + /// + /// <pre> + /// TokenStream stream = ...; + /// String text = ""; + /// for (int i = interval.a; i <= interval.b; i++) { + /// text += stream.get(i).getText(); + /// } + /// </pre> + /// </summary> + /// <param name="interval"> The interval of tokens within this stream to get text + /// for. </param> + /// <returns> The text of all tokens within the specified interval in this + /// stream. + /// </returns> + /// <exception cref="NullPointerException"> if {@code interval} is {@code null} </exception> + virtual std::string getText(const misc::Interval &interval) = 0; + + /// <summary> + /// Return the text of all tokens in the stream. This method behaves like the + /// following code, including potential exceptions from the calls to + /// <seealso cref="IntStream#size"/> and <seealso cref="#getText(Interval)"/>, but may be + /// optimized by the specific implementation. + /// + /// <pre> + /// TokenStream stream = ...; + /// String text = stream.getText(new Interval(0, stream.size())); + /// </pre> + /// </summary> + /// <returns> The text of all tokens in the stream. </returns> + virtual std::string getText() = 0; + + /// <summary> + /// Return the text of all tokens in the source interval of the specified + /// context. This method behaves like the following code, including potential + /// exceptions from the call to <seealso cref="#getText(Interval)"/>, but may be + /// optimized by the specific implementation. + /// </p> + /// If {@code ctx.getSourceInterval()} does not return a valid interval of + /// tokens provided by this stream, the behavior is unspecified. + /// + /// <pre> + /// TokenStream stream = ...; + /// String text = stream.getText(ctx.getSourceInterval()); + /// </pre> + /// </summary> + /// <param name="ctx"> The context providing the source interval of tokens to get + /// text for. </param> + /// <returns> The text of all tokens within the source interval of {@code ctx}. </returns> + virtual std::string getText(RuleContext *ctx) = 0; + + /// <summary> + /// Return the text of all tokens in this stream between {@code start} and + /// {@code stop} (inclusive). + /// <p/> + /// If the specified {@code start} or {@code stop} token was not provided by + /// this stream, or if the {@code stop} occurred before the {@code start} + /// token, the behavior is unspecified. + /// <p/> + /// For streams which ensure that the <seealso cref="Token#getTokenIndex"/> method is + /// accurate for all of its provided tokens, this method behaves like the + /// following code. Other streams may implement this method in other ways + /// provided the behavior is consistent with this at a high level. + /// + /// <pre> + /// TokenStream stream = ...; + /// String text = ""; + /// for (int i = start.getTokenIndex(); i <= stop.getTokenIndex(); i++) { + /// text += stream.get(i).getText(); + /// } + /// </pre> + /// </summary> + /// <param name="start"> The first token in the interval to get text for. </param> + /// <param name="stop"> The last token in the interval to get text for (inclusive). </param> + /// <returns> The text of all tokens lying between the specified {@code start} + /// and {@code stop} tokens. + /// </returns> + /// <exception cref="UnsupportedOperationException"> if this stream does not support + /// this method for the specified tokens </exception> + virtual std::string getText(Token *start, Token *stop) = 0; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/TokenStreamRewriter.cpp b/contrib/libs/antlr4_cpp_runtime/src/TokenStreamRewriter.cpp new file mode 100644 index 0000000000..9050eb5c91 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/TokenStreamRewriter.cpp @@ -0,0 +1,425 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Exceptions.h" +#include "misc/Interval.h" +#include "Token.h" +#include "TokenStream.h" + +#include "TokenStreamRewriter.h" + +using namespace antlr4; + +using antlr4::misc::Interval; + +TokenStreamRewriter::RewriteOperation::RewriteOperation(TokenStreamRewriter *outerInstance_, size_t index_) + : outerInstance(outerInstance_) { + + InitializeInstanceFields(); + this->index = index_; +} + +TokenStreamRewriter::RewriteOperation::RewriteOperation(TokenStreamRewriter *outerInstance_, size_t index_, + const std::string& text_) : outerInstance(outerInstance_) { + + InitializeInstanceFields(); + this->index = index_; + this->text = text_; +} + +TokenStreamRewriter::RewriteOperation::~RewriteOperation() +{ +} + +size_t TokenStreamRewriter::RewriteOperation::execute(std::string * /*buf*/) { + return index; +} + +std::string TokenStreamRewriter::RewriteOperation::toString() { + std::string opName = "TokenStreamRewriter"; + size_t dollarIndex = opName.find('$'); + opName = opName.substr(dollarIndex + 1, opName.length() - (dollarIndex + 1)); + return "<" + opName + "@" + outerInstance->tokens->get(dollarIndex)->getText() + ":\"" + text + "\">"; +} + +void TokenStreamRewriter::RewriteOperation::InitializeInstanceFields() { + instructionIndex = 0; + index = 0; +} + +TokenStreamRewriter::InsertBeforeOp::InsertBeforeOp(TokenStreamRewriter *outerInstance_, size_t index_, const std::string& text_) +: RewriteOperation(outerInstance_, index_, text_), outerInstance(outerInstance_) { +} + +size_t TokenStreamRewriter::InsertBeforeOp::execute(std::string *buf) { + buf->append(text); + if (outerInstance->tokens->get(index)->getType() != Token::EOF) { + buf->append(outerInstance->tokens->get(index)->getText()); + } + return index + 1; +} + +TokenStreamRewriter::ReplaceOp::ReplaceOp(TokenStreamRewriter *outerInstance_, size_t from, size_t to, const std::string& text) +: RewriteOperation(outerInstance_, from, text), outerInstance(outerInstance_) { + + InitializeInstanceFields(); + lastIndex = to; +} + +size_t TokenStreamRewriter::ReplaceOp::execute(std::string *buf) { + buf->append(text); + return lastIndex + 1; +} + +std::string TokenStreamRewriter::ReplaceOp::toString() { + if (text.empty()) { + return "<DeleteOp@" + outerInstance->tokens->get(index)->getText() + ".." + outerInstance->tokens->get(lastIndex)->getText() + ">"; + } + return "<ReplaceOp@" + outerInstance->tokens->get(index)->getText() + ".." + outerInstance->tokens->get(lastIndex)->getText() + ":\"" + text + "\">"; +} + +void TokenStreamRewriter::ReplaceOp::InitializeInstanceFields() { + lastIndex = 0; +} + +//------------------ TokenStreamRewriter ------------------------------------------------------------------------------- + +const std::string TokenStreamRewriter::DEFAULT_PROGRAM_NAME = "default"; + +TokenStreamRewriter::TokenStreamRewriter(TokenStream *tokens_) : tokens(tokens_) { + _programs[DEFAULT_PROGRAM_NAME].reserve(PROGRAM_INIT_SIZE); +} + +TokenStreamRewriter::~TokenStreamRewriter() { + for (const auto &program : _programs) { + for (auto *operation : program.second) { + delete operation; + } + } +} + +TokenStream *TokenStreamRewriter::getTokenStream() { + return tokens; +} + +void TokenStreamRewriter::rollback(size_t instructionIndex) { + rollback(DEFAULT_PROGRAM_NAME, instructionIndex); +} + +void TokenStreamRewriter::rollback(const std::string &programName, size_t instructionIndex) { + std::vector<RewriteOperation*> is = _programs[programName]; + if (is.size() > 0) { + _programs.insert({ programName, std::vector<RewriteOperation*>(is.begin() + MIN_TOKEN_INDEX, is.begin() + instructionIndex) }); + } +} + +void TokenStreamRewriter::deleteProgram() { + deleteProgram(DEFAULT_PROGRAM_NAME); +} + +void TokenStreamRewriter::deleteProgram(const std::string &programName) { + rollback(programName, MIN_TOKEN_INDEX); +} + +void TokenStreamRewriter::insertAfter(Token *t, const std::string& text) { + insertAfter(DEFAULT_PROGRAM_NAME, t, text); +} + +void TokenStreamRewriter::insertAfter(size_t index, const std::string& text) { + insertAfter(DEFAULT_PROGRAM_NAME, index, text); +} + +void TokenStreamRewriter::insertAfter(const std::string &programName, Token *t, const std::string& text) { + insertAfter(programName, t->getTokenIndex(), text); +} + +void TokenStreamRewriter::insertAfter(const std::string &programName, size_t index, const std::string& text) { + // to insert after, just insert before next index (even if past end) + insertBefore(programName, index + 1, text); +} + +void TokenStreamRewriter::insertBefore(Token *t, const std::string& text) { + insertBefore(DEFAULT_PROGRAM_NAME, t, text); +} + +void TokenStreamRewriter::insertBefore(size_t index, const std::string& text) { + insertBefore(DEFAULT_PROGRAM_NAME, index, text); +} + +void TokenStreamRewriter::insertBefore(const std::string &programName, Token *t, const std::string& text) { + insertBefore(programName, t->getTokenIndex(), text); +} + +void TokenStreamRewriter::insertBefore(const std::string &programName, size_t index, const std::string& text) { + RewriteOperation *op = new InsertBeforeOp(this, index, text); /* mem-check: deleted in d-tor */ + std::vector<RewriteOperation*> &rewrites = getProgram(programName); + op->instructionIndex = rewrites.size(); + rewrites.push_back(op); +} + +void TokenStreamRewriter::replace(size_t index, const std::string& text) { + replace(DEFAULT_PROGRAM_NAME, index, index, text); +} + +void TokenStreamRewriter::replace(size_t from, size_t to, const std::string& text) { + replace(DEFAULT_PROGRAM_NAME, from, to, text); +} + +void TokenStreamRewriter::replace(Token *indexT, const std::string& text) { + replace(DEFAULT_PROGRAM_NAME, indexT, indexT, text); +} + +void TokenStreamRewriter::replace(Token *from, Token *to, const std::string& text) { + replace(DEFAULT_PROGRAM_NAME, from, to, text); +} + +void TokenStreamRewriter::replace(const std::string &programName, size_t from, size_t to, const std::string& text) { + if (from > to || to >= tokens->size()) { + throw IllegalArgumentException("replace: range invalid: " + std::to_string(from) + ".." + std::to_string(to) + + "(size = " + std::to_string(tokens->size()) + ")"); + } + RewriteOperation *op = new ReplaceOp(this, from, to, text); /* mem-check: deleted in d-tor */ + std::vector<RewriteOperation*> &rewrites = getProgram(programName); + op->instructionIndex = rewrites.size(); + rewrites.push_back(op); +} + +void TokenStreamRewriter::replace(const std::string &programName, Token *from, Token *to, const std::string& text) { + replace(programName, from->getTokenIndex(), to->getTokenIndex(), text); +} + +void TokenStreamRewriter::Delete(size_t index) { + Delete(DEFAULT_PROGRAM_NAME, index, index); +} + +void TokenStreamRewriter::Delete(size_t from, size_t to) { + Delete(DEFAULT_PROGRAM_NAME, from, to); +} + +void TokenStreamRewriter::Delete(Token *indexT) { + Delete(DEFAULT_PROGRAM_NAME, indexT, indexT); +} + +void TokenStreamRewriter::Delete(Token *from, Token *to) { + Delete(DEFAULT_PROGRAM_NAME, from, to); +} + +void TokenStreamRewriter::Delete(const std::string &programName, size_t from, size_t to) { + std::string nullString; + replace(programName, from, to, nullString); +} + +void TokenStreamRewriter::Delete(const std::string &programName, Token *from, Token *to) { + std::string nullString; + replace(programName, from, to, nullString); +} + +size_t TokenStreamRewriter::getLastRewriteTokenIndex() { + return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME); +} + +size_t TokenStreamRewriter::getLastRewriteTokenIndex(const std::string &programName) { + if (_lastRewriteTokenIndexes.find(programName) == _lastRewriteTokenIndexes.end()) { + return INVALID_INDEX; + } + return _lastRewriteTokenIndexes[programName]; +} + +void TokenStreamRewriter::setLastRewriteTokenIndex(const std::string &programName, size_t i) { + _lastRewriteTokenIndexes.insert({ programName, i }); +} + +std::vector<TokenStreamRewriter::RewriteOperation*>& TokenStreamRewriter::getProgram(const std::string &name) { + auto iterator = _programs.find(name); + if (iterator == _programs.end()) { + return initializeProgram(name); + } + return iterator->second; +} + +std::vector<TokenStreamRewriter::RewriteOperation*>& TokenStreamRewriter::initializeProgram(const std::string &name) { + _programs[name].reserve(PROGRAM_INIT_SIZE); + return _programs[name]; +} + +std::string TokenStreamRewriter::getText() { + return getText(DEFAULT_PROGRAM_NAME, Interval(0UL, tokens->size() - 1)); +} + +std::string TokenStreamRewriter::getText(std::string programName) { + return getText(programName, Interval(0UL, tokens->size() - 1)); +} + +std::string TokenStreamRewriter::getText(const Interval &interval) { + return getText(DEFAULT_PROGRAM_NAME, interval); +} + +std::string TokenStreamRewriter::getText(const std::string &programName, const Interval &interval) { + std::vector<TokenStreamRewriter::RewriteOperation*> &rewrites = _programs[programName]; + size_t start = interval.a; + size_t stop = interval.b; + + // ensure start/end are in range + if (stop > tokens->size() - 1) { + stop = tokens->size() - 1; + } + if (start == INVALID_INDEX) { + start = 0; + } + + if (rewrites.empty() || rewrites.empty()) { + return tokens->getText(interval); // no instructions to execute + } + std::string buf; + + // First, optimize instruction stream + std::unordered_map<size_t, TokenStreamRewriter::RewriteOperation*> indexToOp = reduceToSingleOperationPerIndex(rewrites); + + // Walk buffer, executing instructions and emitting tokens + size_t i = start; + while (i <= stop && i < tokens->size()) { + RewriteOperation *op = indexToOp[i]; + indexToOp.erase(i); // remove so any left have index size-1 + Token *t = tokens->get(i); + if (op == nullptr) { + // no operation at that index, just dump token + if (t->getType() != Token::EOF) { + buf.append(t->getText()); + } + i++; // move to next token + } + else { + i = op->execute(&buf); // execute operation and skip + } + } + + // include stuff after end if it's last index in buffer + // So, if they did an insertAfter(lastValidIndex, "foo"), include + // foo if end==lastValidIndex. + if (stop == tokens->size() - 1) { + // Scan any remaining operations after last token + // should be included (they will be inserts). + for (auto op : indexToOp) { + if (op.second->index >= tokens->size() - 1) { + buf.append(op.second->text); + } + } + } + return buf; +} + +std::unordered_map<size_t, TokenStreamRewriter::RewriteOperation*> TokenStreamRewriter::reduceToSingleOperationPerIndex( + std::vector<TokenStreamRewriter::RewriteOperation*> &rewrites) { + + + // WALK REPLACES + for (size_t i = 0; i < rewrites.size(); ++i) { + TokenStreamRewriter::RewriteOperation *op = rewrites[i]; + ReplaceOp *rop = dynamic_cast<ReplaceOp *>(op); + if (rop == nullptr) + continue; + + // Wipe prior inserts within range + std::vector<InsertBeforeOp *> inserts = getKindOfOps<InsertBeforeOp>(rewrites, i); + for (auto *iop : inserts) { + if (iop->index == rop->index) { + // E.g., insert before 2, delete 2..2; update replace + // text to include insert before, kill insert + delete rewrites[iop->instructionIndex]; + rewrites[iop->instructionIndex] = nullptr; + rop->text = iop->text + (!rop->text.empty() ? rop->text : ""); + } + else if (iop->index > rop->index && iop->index <= rop->lastIndex) { + // delete insert as it's a no-op. + delete rewrites[iop->instructionIndex]; + rewrites[iop->instructionIndex] = nullptr; + } + } + // Drop any prior replaces contained within + std::vector<ReplaceOp*> prevReplaces = getKindOfOps<ReplaceOp>(rewrites, i); + for (auto *prevRop : prevReplaces) { + if (prevRop->index >= rop->index && prevRop->lastIndex <= rop->lastIndex) { + // delete replace as it's a no-op. + delete rewrites[prevRop->instructionIndex]; + rewrites[prevRop->instructionIndex] = nullptr; + continue; + } + // throw exception unless disjoint or identical + bool disjoint = prevRop->lastIndex < rop->index || prevRop->index > rop->lastIndex; + // Delete special case of replace (text==null): + // D.i-j.u D.x-y.v | boundaries overlap combine to max(min)..max(right) + if (prevRop->text.empty() && rop->text.empty() && !disjoint) { + delete rewrites[prevRop->instructionIndex]; + rewrites[prevRop->instructionIndex] = nullptr; // kill first delete + rop->index = std::min(prevRop->index, rop->index); + rop->lastIndex = std::max(prevRop->lastIndex, rop->lastIndex); + std::cout << "new rop " << rop << std::endl; + } + else if (!disjoint) { + throw IllegalArgumentException("replace op boundaries of " + rop->toString() + + " overlap with previous " + prevRop->toString()); + } + } + } + + // WALK INSERTS + for (size_t i = 0; i < rewrites.size(); i++) { + InsertBeforeOp *iop = dynamic_cast<InsertBeforeOp *>(rewrites[i]); + if (iop == nullptr) + continue; + + // combine current insert with prior if any at same index + + std::vector<InsertBeforeOp *> prevInserts = getKindOfOps<InsertBeforeOp>(rewrites, i); + for (auto *prevIop : prevInserts) { + if (prevIop->index == iop->index) { // combine objects + // convert to strings...we're in process of toString'ing + // whole token buffer so no lazy eval issue with any templates + iop->text = catOpText(&iop->text, &prevIop->text); + // delete redundant prior insert + delete rewrites[prevIop->instructionIndex]; + rewrites[prevIop->instructionIndex] = nullptr; + } + } + // look for replaces where iop.index is in range; error + std::vector<ReplaceOp*> prevReplaces = getKindOfOps<ReplaceOp>(rewrites, i); + for (auto *rop : prevReplaces) { + if (iop->index == rop->index) { + rop->text = catOpText(&iop->text, &rop->text); + delete rewrites[i]; + rewrites[i] = nullptr; // delete current insert + continue; + } + if (iop->index >= rop->index && iop->index <= rop->lastIndex) { + throw IllegalArgumentException("insert op " + iop->toString() + " within boundaries of previous " + rop->toString()); + } + } + } + + std::unordered_map<size_t, TokenStreamRewriter::RewriteOperation*> m; + for (TokenStreamRewriter::RewriteOperation *op : rewrites) { + if (op == nullptr) { // ignore deleted ops + continue; + } + if (m.count(op->index) > 0) { + throw RuntimeException("should only be one op per index"); + } + m[op->index] = op; + } + + return m; +} + +std::string TokenStreamRewriter::catOpText(std::string *a, std::string *b) { + std::string x = ""; + std::string y = ""; + if (a != nullptr) { + x = *a; + } + if (b != nullptr) { + y = *b; + } + return x + y; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/TokenStreamRewriter.h b/contrib/libs/antlr4_cpp_runtime/src/TokenStreamRewriter.h new file mode 100644 index 0000000000..929056a3f9 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/TokenStreamRewriter.h @@ -0,0 +1,295 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { + + /** + * Useful for rewriting out a buffered input token stream after doing some + * augmentation or other manipulations on it. + * + * <p> + * You can insert stuff, replace, and delete chunks. Note that the operations + * are done lazily--only if you convert the buffer to a {@link String} with + * {@link TokenStream#getText()}. This is very efficient because you are not + * moving data around all the time. As the buffer of tokens is converted to + * strings, the {@link #getText()} method(s) scan the input token stream and + * check to see if there is an operation at the current index. If so, the + * operation is done and then normal {@link String} rendering continues on the + * buffer. This is like having multiple Turing machine instruction streams + * (programs) operating on a single input tape. :)</p> + * + * <p> + * This rewriter makes no modifications to the token stream. It does not ask the + * stream to fill itself up nor does it advance the input cursor. The token + * stream {@link TokenStream#index()} will return the same value before and + * after any {@link #getText()} call.</p> + * + * <p> + * The rewriter only works on tokens that you have in the buffer and ignores the + * current input cursor. If you are buffering tokens on-demand, calling + * {@link #getText()} halfway through the input will only do rewrites for those + * tokens in the first half of the file.</p> + * + * <p> + * Since the operations are done lazily at {@link #getText}-time, operations do + * not screw up the token index values. That is, an insert operation at token + * index {@code i} does not change the index values for tokens + * {@code i}+1..n-1.</p> + * + * <p> + * Because operations never actually alter the buffer, you may always get the + * original token stream back without undoing anything. Since the instructions + * are queued up, you can easily simulate transactions and roll back any changes + * if there is an error just by removing instructions. For example,</p> + * + * <pre> + * CharStream input = new ANTLRFileStream("input"); + * TLexer lex = new TLexer(input); + * CommonTokenStream tokens = new CommonTokenStream(lex); + * T parser = new T(tokens); + * TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens); + * parser.startRule(); + * </pre> + * + * <p> + * Then in the rules, you can execute (assuming rewriter is visible):</p> + * + * <pre> + * Token t,u; + * ... + * rewriter.insertAfter(t, "text to put after t");} + * rewriter.insertAfter(u, "text after u");} + * System.out.println(rewriter.getText()); + * </pre> + * + * <p> + * You can also have multiple "instruction streams" and get multiple rewrites + * from a single pass over the input. Just name the instruction streams and use + * that name again when printing the buffer. This could be useful for generating + * a C file and also its header file--all from the same buffer:</p> + * + * <pre> + * rewriter.insertAfter("pass1", t, "text to put after t");} + * rewriter.insertAfter("pass2", u, "text after u");} + * System.out.println(rewriter.getText("pass1")); + * System.out.println(rewriter.getText("pass2")); + * </pre> + * + * <p> + * If you don't use named rewrite streams, a "default" stream is used as the + * first example shows.</p> + */ + class ANTLR4CPP_PUBLIC TokenStreamRewriter { + public: + static const std::string DEFAULT_PROGRAM_NAME; + static constexpr size_t PROGRAM_INIT_SIZE = 100; + static constexpr size_t MIN_TOKEN_INDEX = 0; + + TokenStreamRewriter(TokenStream *tokens); + virtual ~TokenStreamRewriter(); + + TokenStream *getTokenStream(); + + virtual void rollback(size_t instructionIndex); + + /// Rollback the instruction stream for a program so that + /// the indicated instruction (via instructionIndex) is no + /// longer in the stream. UNTESTED! + virtual void rollback(const std::string &programName, size_t instructionIndex); + + virtual void deleteProgram(); + + /// Reset the program so that no instructions exist. + virtual void deleteProgram(const std::string &programName); + virtual void insertAfter(Token *t, const std::string& text); + virtual void insertAfter(size_t index, const std::string& text); + virtual void insertAfter(const std::string &programName, Token *t, const std::string& text); + virtual void insertAfter(const std::string &programName, size_t index, const std::string& text); + + virtual void insertBefore(Token *t, const std::string& text); + virtual void insertBefore(size_t index, const std::string& text); + virtual void insertBefore(const std::string &programName, Token *t, const std::string& text); + virtual void insertBefore(const std::string &programName, size_t index, const std::string& text); + + virtual void replace(size_t index, const std::string& text); + virtual void replace(size_t from, size_t to, const std::string& text); + virtual void replace(Token *indexT, const std::string& text); + virtual void replace(Token *from, Token *to, const std::string& text); + virtual void replace(const std::string &programName, size_t from, size_t to, const std::string& text); + virtual void replace(const std::string &programName, Token *from, Token *to, const std::string& text); + + virtual void Delete(size_t index); + virtual void Delete(size_t from, size_t to); + virtual void Delete(Token *indexT); + virtual void Delete(Token *from, Token *to); + virtual void Delete(const std::string &programName, size_t from, size_t to); + virtual void Delete(const std::string &programName, Token *from, Token *to); + + virtual size_t getLastRewriteTokenIndex(); + + /// Return the text from the original tokens altered per the + /// instructions given to this rewriter. + virtual std::string getText(); + + /** Return the text from the original tokens altered per the + * instructions given to this rewriter in programName. + */ + std::string getText(std::string programName); + + /// Return the text associated with the tokens in the interval from the + /// original token stream but with the alterations given to this rewriter. + /// The interval refers to the indexes in the original token stream. + /// We do not alter the token stream in any way, so the indexes + /// and intervals are still consistent. Includes any operations done + /// to the first and last token in the interval. So, if you did an + /// insertBefore on the first token, you would get that insertion. + /// The same is true if you do an insertAfter the stop token. + virtual std::string getText(const misc::Interval &interval); + + virtual std::string getText(const std::string &programName, const misc::Interval &interval); + + protected: + class RewriteOperation { + public: + /// What index into rewrites List are we? + size_t index; + std::string text; + + /// Token buffer index. + size_t instructionIndex; + + RewriteOperation(TokenStreamRewriter *outerInstance, size_t index); + RewriteOperation(TokenStreamRewriter *outerInstance, size_t index, const std::string& text); + virtual ~RewriteOperation(); + + /// Execute the rewrite operation by possibly adding to the buffer. + /// Return the index of the next token to operate on. + + virtual size_t execute(std::string *buf); + virtual std::string toString(); + + private: + TokenStreamRewriter *const outerInstance; + void InitializeInstanceFields(); + }; + + class InsertBeforeOp : public RewriteOperation { + private: + TokenStreamRewriter *const outerInstance; + + public: + InsertBeforeOp(TokenStreamRewriter *outerInstance, size_t index, const std::string& text); + + virtual size_t execute(std::string *buf) override; + }; + + class ReplaceOp : public RewriteOperation { + private: + TokenStreamRewriter *const outerInstance; + + public: + size_t lastIndex; + + ReplaceOp(TokenStreamRewriter *outerInstance, size_t from, size_t to, const std::string& text); + virtual size_t execute(std::string *buf) override; + virtual std::string toString() override; + + private: + void InitializeInstanceFields(); + }; + + /// Our source stream + TokenStream *const tokens; + + /// You may have multiple, named streams of rewrite operations. + /// I'm calling these things "programs." + /// Maps String (name) -> rewrite (List) + std::map<std::string, std::vector<RewriteOperation*>> _programs; + + /// <summary> + /// Map String (program name) -> Integer index </summary> + std::map<std::string, size_t> _lastRewriteTokenIndexes; + virtual size_t getLastRewriteTokenIndex(const std::string &programName); + virtual void setLastRewriteTokenIndex(const std::string &programName, size_t i); + virtual std::vector<RewriteOperation*>& getProgram(const std::string &name); + + /// <summary> + /// We need to combine operations and report invalid operations (like + /// overlapping replaces that are not completed nested). Inserts to + /// same index need to be combined etc... Here are the cases: + /// + /// I.i.u I.j.v leave alone, nonoverlapping + /// I.i.u I.i.v combine: Iivu + /// + /// R.i-j.u R.x-y.v | i-j in x-y delete first R + /// R.i-j.u R.i-j.v delete first R + /// R.i-j.u R.x-y.v | x-y in i-j ERROR + /// R.i-j.u R.x-y.v | boundaries overlap ERROR + /// + /// Delete special case of replace (text==null): + /// D.i-j.u D.x-y.v | boundaries overlap combine to max(min)..max(right) + /// + /// I.i.u R.x-y.v | i in (x+1)-y delete I (since insert before + /// we're not deleting i) + /// I.i.u R.x-y.v | i not in (x+1)-y leave alone, nonoverlapping + /// R.x-y.v I.i.u | i in x-y ERROR + /// R.x-y.v I.x.u R.x-y.uv (combine, delete I) + /// R.x-y.v I.i.u | i not in x-y leave alone, nonoverlapping + /// + /// I.i.u = insert u before op @ index i + /// R.x-y.u = replace x-y indexed tokens with u + /// + /// First we need to examine replaces. For any replace op: + /// + /// 1. wipe out any insertions before op within that range. + /// 2. Drop any replace op before that is contained completely within + /// that range. + /// 3. Throw exception upon boundary overlap with any previous replace. + /// + /// Then we can deal with inserts: + /// + /// 1. for any inserts to same index, combine even if not adjacent. + /// 2. for any prior replace with same left boundary, combine this + /// insert with replace and delete this replace. + /// 3. throw exception if index in same range as previous replace + /// + /// Don't actually delete; make op null in list. Easier to walk list. + /// Later we can throw as we add to index -> op map. + /// + /// Note that I.2 R.2-2 will wipe out I.2 even though, technically, the + /// inserted stuff would be before the replace range. But, if you + /// add tokens in front of a method body '{' and then delete the method + /// body, I think the stuff before the '{' you added should disappear too. + /// + /// Return a map from token index to operation. + /// </summary> + virtual std::unordered_map<size_t, RewriteOperation*> reduceToSingleOperationPerIndex(std::vector<RewriteOperation*> &rewrites); + + virtual std::string catOpText(std::string *a, std::string *b); + + /// Get all operations before an index of a particular kind. + template <typename T> + std::vector<T *> getKindOfOps(std::vector<RewriteOperation *> rewrites, size_t before) { + std::vector<T *> ops; + for (size_t i = 0; i < before && i < rewrites.size(); i++) { + T *op = dynamic_cast<T *>(rewrites[i]); + if (op == nullptr) { // ignore deleted or non matching entries + continue; + } + ops.push_back(op); + } + return ops; + } + + private: + std::vector<RewriteOperation *>& initializeProgram(const std::string &name); + + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/UnbufferedCharStream.cpp b/contrib/libs/antlr4_cpp_runtime/src/UnbufferedCharStream.cpp new file mode 100644 index 0000000000..bbfb8848fd --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/UnbufferedCharStream.cpp @@ -0,0 +1,208 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/Interval.h" +#include "Exceptions.h" +#include "support/Utf8.h" + +#include "UnbufferedCharStream.h" + +using namespace antlrcpp; +using namespace antlr4; +using namespace antlr4::misc; + +UnbufferedCharStream::UnbufferedCharStream(std::wistream &input) + : _p(0), _numMarkers(0), _lastChar(0), _lastCharBufferStart(0), _currentCharIndex(0), _input(input) { + // The vector's size is what used to be n in Java code. + fill(1); // prime +} + +void UnbufferedCharStream::consume() { + if (LA(1) == EOF) { + throw IllegalStateException("cannot consume EOF"); + } + + // buf always has at least data[p==0] in this method due to ctor + _lastChar = _data[_p]; // track last char for LA(-1) + + if (_p == _data.size() - 1 && _numMarkers == 0) { + size_t capacity = _data.capacity(); + _data.clear(); + _data.reserve(capacity); + + _p = 0; + _lastCharBufferStart = _lastChar; + } else { + _p++; + } + + _currentCharIndex++; + sync(1); +} + +void UnbufferedCharStream::sync(size_t want) { + if (_p + want <= _data.size()) // Already enough data loaded? + return; + + fill(_p + want - _data.size()); +} + +size_t UnbufferedCharStream::fill(size_t n) { + for (size_t i = 0; i < n; i++) { + if (_data.size() > 0 && _data.back() == 0xFFFF) { + return i; + } + + try { + char32_t c = nextChar(); + add(c); +#if defined(_MSC_FULL_VER) && _MSC_FULL_VER < 190023026 + } catch (IOException &ioe) { + // throw_with_nested is not available before VS 2015. + throw ioe; +#else + } catch (IOException & /*ioe*/) { + std::throw_with_nested(RuntimeException()); +#endif + } + } + + return n; +} + +char32_t UnbufferedCharStream::nextChar() { + return _input.get(); +} + +void UnbufferedCharStream::add(char32_t c) { + _data += c; +} + +size_t UnbufferedCharStream::LA(ssize_t i) { + if (i == -1) { // special case + return _lastChar; + } + + // We can look back only as many chars as we have buffered. + ssize_t index = static_cast<ssize_t>(_p) + i - 1; + if (index < 0) { + throw IndexOutOfBoundsException(); + } + + if (i > 0) { + sync(static_cast<size_t>(i)); // No need to sync if we look back. + } + if (static_cast<size_t>(index) >= _data.size()) { + return EOF; + } + + if (_data[static_cast<size_t>(index)] == std::char_traits<wchar_t>::eof()) { + return EOF; + } + + return _data[static_cast<size_t>(index)]; +} + +ssize_t UnbufferedCharStream::mark() { + if (_numMarkers == 0) { + _lastCharBufferStart = _lastChar; + } + + ssize_t mark = -static_cast<ssize_t>(_numMarkers) - 1; + _numMarkers++; + return mark; +} + +void UnbufferedCharStream::release(ssize_t marker) { + ssize_t expectedMark = -static_cast<ssize_t>(_numMarkers); + if (marker != expectedMark) { + throw IllegalStateException("release() called with an invalid marker."); + } + + _numMarkers--; + if (_numMarkers == 0 && _p > 0) { + _data.erase(0, _p); + _p = 0; + _lastCharBufferStart = _lastChar; + } +} + +size_t UnbufferedCharStream::index() { + return _currentCharIndex; +} + +void UnbufferedCharStream::seek(size_t index) { + if (index == _currentCharIndex) { + return; + } + + if (index > _currentCharIndex) { + sync(index - _currentCharIndex); + index = std::min(index, getBufferStartIndex() + _data.size() - 1); + } + + // index == to bufferStartIndex should set p to 0 + ssize_t i = static_cast<ssize_t>(index) - static_cast<ssize_t>(getBufferStartIndex()); + if (i < 0) { + throw IllegalArgumentException(std::string("cannot seek to negative index ") + std::to_string(index)); + } else if (i >= static_cast<ssize_t>(_data.size())) { + throw UnsupportedOperationException("Seek to index outside buffer: " + std::to_string(index) + + " not in " + std::to_string(getBufferStartIndex()) + ".." + + std::to_string(getBufferStartIndex() + _data.size())); + } + + _p = static_cast<size_t>(i); + _currentCharIndex = index; + if (_p == 0) { + _lastChar = _lastCharBufferStart; + } else { + _lastChar = _data[_p - 1]; + } +} + +size_t UnbufferedCharStream::size() { + throw UnsupportedOperationException("Unbuffered stream cannot know its size"); +} + +std::string UnbufferedCharStream::getSourceName() const { + if (name.empty()) { + return UNKNOWN_SOURCE_NAME; + } + + return name; +} + +std::string UnbufferedCharStream::getText(const misc::Interval &interval) { + if (interval.a < 0 || interval.b < interval.a - 1) { + throw IllegalArgumentException("invalid interval"); + } + + size_t bufferStartIndex = getBufferStartIndex(); + if (!_data.empty() && _data.back() == 0xFFFF) { + if (interval.a + interval.length() > bufferStartIndex + _data.size()) { + throw IllegalArgumentException("the interval extends past the end of the stream"); + } + } + + if (interval.a < static_cast<ssize_t>(bufferStartIndex) || interval.b >= ssize_t(bufferStartIndex + _data.size())) { + throw UnsupportedOperationException("interval " + interval.toString() + " outside buffer: " + + std::to_string(bufferStartIndex) + ".." + std::to_string(bufferStartIndex + _data.size() - 1)); + } + // convert from absolute to local index + size_t i = interval.a - bufferStartIndex; + auto maybeUtf8 = Utf8::strictEncode(std::u32string_view(_data).substr(i, interval.length())); + if (!maybeUtf8.has_value()) { + throw IllegalArgumentException("Unbuffered stream contains invalid Unicode code points"); + } + return std::move(maybeUtf8).value(); +} + +std::string UnbufferedCharStream::toString() const { + throw UnsupportedOperationException("Unbuffered stream cannot be materialized to a string"); +} + +size_t UnbufferedCharStream::getBufferStartIndex() const { + return _currentCharIndex - _p; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/UnbufferedCharStream.h b/contrib/libs/antlr4_cpp_runtime/src/UnbufferedCharStream.h new file mode 100644 index 0000000000..5b05834f85 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/UnbufferedCharStream.h @@ -0,0 +1,117 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "CharStream.h" + +namespace antlr4 { + + /// Do not buffer up the entire char stream. It does keep a small buffer + /// for efficiency and also buffers while a mark exists (set by the + /// lookahead prediction in parser). "Unbuffered" here refers to fact + /// that it doesn't buffer all data, not that's it's on demand loading of char. + class ANTLR4CPP_PUBLIC UnbufferedCharStream : public CharStream { + public: + /// The name or source of this char stream. + std::string name; + + explicit UnbufferedCharStream(std::wistream &input); + + void consume() override; + size_t LA(ssize_t i) override; + + /// <summary> + /// Return a marker that we can release later. + /// <p/> + /// The specific marker value used for this class allows for some level of + /// protection against misuse where {@code seek()} is called on a mark or + /// {@code release()} is called in the wrong order. + /// </summary> + ssize_t mark() override; + + /// <summary> + /// Decrement number of markers, resetting buffer if we hit 0. </summary> + /// <param name="marker"> </param> + void release(ssize_t marker) override; + size_t index() override; + + /// <summary> + /// Seek to absolute character index, which might not be in the current + /// sliding window. Move {@code p} to {@code index-bufferStartIndex}. + /// </summary> + void seek(size_t index) override; + size_t size() override; + std::string getSourceName() const override; + std::string getText(const misc::Interval &interval) override; + + std::string toString() const override; + + protected: + /// A moving window buffer of the data being scanned. While there's a marker, + /// we keep adding to buffer. Otherwise, <seealso cref="#consume consume()"/> resets so + /// we start filling at index 0 again. + // UTF-32 encoded. + std::u32string _data; + typedef char32_t storage_type; + + /// <summary> + /// 0..n-1 index into <seealso cref="#data data"/> of next character. + /// <p/> + /// The {@code LA(1)} character is {@code data[p]}. If {@code p == n}, we are + /// out of buffered characters. + /// </summary> + size_t _p; + + /// <summary> + /// Count up with <seealso cref="#mark mark()"/> and down with + /// <seealso cref="#release release()"/>. When we {@code release()} the last mark, + /// {@code numMarkers} reaches 0 and we reset the buffer. Copy + /// {@code data[p]..data[n-1]} to {@code data[0]..data[(n-1)-p]}. + /// </summary> + size_t _numMarkers; + + /// This is the {@code LA(-1)} character for the current position. + size_t _lastChar; // UTF-32 + + /// <summary> + /// When {@code numMarkers > 0}, this is the {@code LA(-1)} character for the + /// first character in <seealso cref="#data data"/>. Otherwise, this is unspecified. + /// </summary> + size_t _lastCharBufferStart; // UTF-32 + + /// <summary> + /// Absolute character index. It's the index of the character about to be + /// read via {@code LA(1)}. Goes from 0 to the number of characters in the + /// entire stream, although the stream size is unknown before the end is + /// reached. + /// </summary> + size_t _currentCharIndex; + + std::wistream &_input; + + /// <summary> + /// Make sure we have 'want' elements from current position <seealso cref="#p p"/>. + /// Last valid {@code p} index is {@code data.length-1}. {@code p+need-1} is + /// the char index 'need' elements ahead. If we need 1 element, + /// {@code (p+1-1)==p} must be less than {@code data.length}. + /// </summary> + virtual void sync(size_t want); + + /// <summary> + /// Add {@code n} characters to the buffer. Returns the number of characters + /// actually added to the buffer. If the return value is less than {@code n}, + /// then EOF was reached before {@code n} characters could be added. + /// </summary> + virtual size_t fill(size_t n); + + /// Override to provide different source of characters than + /// <seealso cref="#input input"/>. + virtual char32_t nextChar(); + virtual void add(char32_t c); + size_t getBufferStartIndex() const; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/UnbufferedTokenStream.cpp b/contrib/libs/antlr4_cpp_runtime/src/UnbufferedTokenStream.cpp new file mode 100644 index 0000000000..16ff49e332 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/UnbufferedTokenStream.cpp @@ -0,0 +1,270 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Token.h" +#include "Exceptions.h" +#include "assert.h" +#include "TokenSource.h" +#include "support/Arrays.h" +#include "misc/Interval.h" +#include "RuleContext.h" +#include "WritableToken.h" + +#include "UnbufferedTokenStream.h" + +using namespace antlr4; + +UnbufferedTokenStream::UnbufferedTokenStream(TokenSource *tokenSource) : UnbufferedTokenStream(tokenSource, 256) { +} + +UnbufferedTokenStream::UnbufferedTokenStream(TokenSource *tokenSource, int /*bufferSize*/) + : _tokenSource(tokenSource), _lastToken(nullptr), _lastTokenBufferStart(nullptr) +{ + InitializeInstanceFields(); + fill(1); // prime the pump +} + +UnbufferedTokenStream::~UnbufferedTokenStream() { +} + +Token* UnbufferedTokenStream::get(size_t i) const +{ // get absolute index + size_t bufferStartIndex = getBufferStartIndex(); + if (i < bufferStartIndex || i >= bufferStartIndex + _tokens.size()) { + throw IndexOutOfBoundsException(std::string("get(") + std::to_string(i) + std::string(") outside buffer: ") + + std::to_string(bufferStartIndex) + std::string("..") + std::to_string(bufferStartIndex + _tokens.size())); + } + return _tokens[i - bufferStartIndex].get(); +} + +Token* UnbufferedTokenStream::LT(ssize_t i) +{ + if (i == -1) { + return _lastToken; + } + + sync(i); + ssize_t index = static_cast<ssize_t>(_p) + i - 1; + if (index < 0) { + throw IndexOutOfBoundsException(std::string("LT(") + std::to_string(i) + std::string(") gives negative index")); + } + + if (index >= static_cast<ssize_t>(_tokens.size())) { + assert(_tokens.size() > 0 && _tokens.back()->getType() == EOF); + return _tokens.back().get(); + } + + return _tokens[static_cast<size_t>(index)].get(); +} + +size_t UnbufferedTokenStream::LA(ssize_t i) +{ + return LT(i)->getType(); +} + +TokenSource* UnbufferedTokenStream::getTokenSource() const +{ + return _tokenSource; +} + +std::string UnbufferedTokenStream::getText() +{ + return ""; +} + +std::string UnbufferedTokenStream::getText(RuleContext* ctx) +{ + return getText(ctx->getSourceInterval()); +} + +std::string UnbufferedTokenStream::getText(Token *start, Token *stop) +{ + return getText(misc::Interval(start->getTokenIndex(), stop->getTokenIndex())); +} + +void UnbufferedTokenStream::consume() +{ + if (LA(1) == EOF) { + throw IllegalStateException("cannot consume EOF"); + } + + // buf always has at least tokens[p==0] in this method due to ctor + _lastToken = _tokens[_p].get(); // track last token for LT(-1) + + // if we're at last token and no markers, opportunity to flush buffer + if (_p == _tokens.size() - 1 && _numMarkers == 0) { + _tokens.clear(); + _p = 0; + _lastTokenBufferStart = _lastToken; + } else { + ++_p; + } + + ++_currentTokenIndex; + sync(1); +} + +/// <summary> +/// Make sure we have 'need' elements from current position <seealso cref="#p p"/>. Last valid +/// {@code p} index is {@code tokens.length-1}. {@code p+need-1} is the tokens index 'need' elements +/// ahead. If we need 1 element, {@code (p+1-1)==p} must be less than {@code tokens.length}. +/// </summary> +void UnbufferedTokenStream::sync(ssize_t want) +{ + ssize_t need = (static_cast<ssize_t>(_p) + want - 1) - static_cast<ssize_t>(_tokens.size()) + 1; // how many more elements we need? + if (need > 0) { + fill(static_cast<size_t>(need)); + } +} + +/// <summary> +/// Add {@code n} elements to the buffer. Returns the number of tokens +/// actually added to the buffer. If the return value is less than {@code n}, +/// then EOF was reached before {@code n} tokens could be added. +/// </summary> +size_t UnbufferedTokenStream::fill(size_t n) +{ + for (size_t i = 0; i < n; i++) { + if (_tokens.size() > 0 && _tokens.back()->getType() == EOF) { + return i; + } + + add(_tokenSource->nextToken()); + } + + return n; +} + +void UnbufferedTokenStream::add(std::unique_ptr<Token> t) +{ + WritableToken *writable = dynamic_cast<WritableToken *>(t.get()); + if (writable != nullptr) { + writable->setTokenIndex(int(getBufferStartIndex() + _tokens.size())); + } + + _tokens.push_back(std::move(t)); +} + +/// <summary> +/// Return a marker that we can release later. +/// <p/> +/// The specific marker value used for this class allows for some level of +/// protection against misuse where {@code seek()} is called on a mark or +/// {@code release()} is called in the wrong order. +/// </summary> +ssize_t UnbufferedTokenStream::mark() +{ + if (_numMarkers == 0) { + _lastTokenBufferStart = _lastToken; + } + + int mark = -_numMarkers - 1; + _numMarkers++; + return mark; +} + +void UnbufferedTokenStream::release(ssize_t marker) +{ + ssize_t expectedMark = -_numMarkers; + if (marker != expectedMark) { + throw IllegalStateException("release() called with an invalid marker."); + } + + _numMarkers--; + if (_numMarkers == 0) { // can we release buffer? + if (_p > 0) { + // Copy tokens[p]..tokens[n-1] to tokens[0]..tokens[(n-1)-p], reset ptrs + // p is last valid token; move nothing if p==n as we have no valid char + _tokens.erase(_tokens.begin(), _tokens.begin() + static_cast<ssize_t>(_p)); + _p = 0; + } + + _lastTokenBufferStart = _lastToken; + } +} + +size_t UnbufferedTokenStream::index() +{ + return _currentTokenIndex; +} + +void UnbufferedTokenStream::seek(size_t index) +{ // seek to absolute index + if (index == _currentTokenIndex) { + return; + } + + if (index > _currentTokenIndex) { + sync(ssize_t(index - _currentTokenIndex)); + index = std::min(index, getBufferStartIndex() + _tokens.size() - 1); + } + + size_t bufferStartIndex = getBufferStartIndex(); + if (bufferStartIndex > index) { + throw IllegalArgumentException(std::string("cannot seek to negative index ") + std::to_string(index)); + } + + size_t i = index - bufferStartIndex; + if (i >= _tokens.size()) { + throw UnsupportedOperationException(std::string("seek to index outside buffer: ") + std::to_string(index) + + " not in " + std::to_string(bufferStartIndex) + ".." + std::to_string(bufferStartIndex + _tokens.size())); + } + + _p = i; + _currentTokenIndex = index; + if (_p == 0) { + _lastToken = _lastTokenBufferStart; + } else { + _lastToken = _tokens[_p - 1].get(); + } +} + +size_t UnbufferedTokenStream::size() +{ + throw UnsupportedOperationException("Unbuffered stream cannot know its size"); +} + +std::string UnbufferedTokenStream::getSourceName() const +{ + return _tokenSource->getSourceName(); +} + +std::string UnbufferedTokenStream::getText(const misc::Interval &interval) +{ + size_t bufferStartIndex = getBufferStartIndex(); + size_t bufferStopIndex = bufferStartIndex + _tokens.size() - 1; + + size_t start = interval.a; + size_t stop = interval.b; + if (start < bufferStartIndex || stop > bufferStopIndex) { + throw UnsupportedOperationException(std::string("interval ") + interval.toString() + + " not in token buffer window: " + std::to_string(bufferStartIndex) + ".." + std::to_string(bufferStopIndex)); + } + + size_t a = start - bufferStartIndex; + size_t b = stop - bufferStartIndex; + + std::stringstream ss; + for (size_t i = a; i <= b; i++) { + Token *t = _tokens[i].get(); + if (i > 0) + ss << ", "; + ss << t->getText(); + } + + return ss.str(); +} + +size_t UnbufferedTokenStream::getBufferStartIndex() const +{ + return _currentTokenIndex - _p; +} + +void UnbufferedTokenStream::InitializeInstanceFields() +{ + _p = 0; + _numMarkers = 0; + _currentTokenIndex = 0; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/UnbufferedTokenStream.h b/contrib/libs/antlr4_cpp_runtime/src/UnbufferedTokenStream.h new file mode 100644 index 0000000000..0c67ec8610 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/UnbufferedTokenStream.h @@ -0,0 +1,115 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "TokenStream.h" + +namespace antlr4 { + + class ANTLR4CPP_PUBLIC UnbufferedTokenStream : public TokenStream { + public: + UnbufferedTokenStream(TokenSource *tokenSource); + UnbufferedTokenStream(TokenSource *tokenSource, int bufferSize); + UnbufferedTokenStream(const UnbufferedTokenStream& other) = delete; + virtual ~UnbufferedTokenStream(); + + UnbufferedTokenStream& operator = (const UnbufferedTokenStream& other) = delete; + + virtual Token* get(size_t i) const override; + virtual Token* LT(ssize_t i) override; + virtual size_t LA(ssize_t i) override; + + virtual TokenSource* getTokenSource() const override; + + virtual std::string getText(const misc::Interval &interval) override; + virtual std::string getText() override; + virtual std::string getText(RuleContext *ctx) override; + virtual std::string getText(Token *start, Token *stop) override; + + virtual void consume() override; + + /// <summary> + /// Return a marker that we can release later. + /// <p/> + /// The specific marker value used for this class allows for some level of + /// protection against misuse where {@code seek()} is called on a mark or + /// {@code release()} is called in the wrong order. + /// </summary> + virtual ssize_t mark() override; + virtual void release(ssize_t marker) override; + virtual size_t index() override; + virtual void seek(size_t index) override; + virtual size_t size() override; + virtual std::string getSourceName() const override; + + protected: + /// Make sure we have 'need' elements from current position p. Last valid + /// p index is tokens.length - 1. p + need - 1 is the tokens index 'need' elements + /// ahead. If we need 1 element, (p+1-1)==p must be less than tokens.length. + TokenSource *_tokenSource; + + /// <summary> + /// A moving window buffer of the data being scanned. While there's a marker, + /// we keep adding to buffer. Otherwise, <seealso cref="#consume consume()"/> resets so + /// we start filling at index 0 again. + /// </summary> + + std::vector<std::unique_ptr<Token>> _tokens; + + /// <summary> + /// 0..n-1 index into <seealso cref="#tokens tokens"/> of next token. + /// <p/> + /// The {@code LT(1)} token is {@code tokens[p]}. If {@code p == n}, we are + /// out of buffered tokens. + /// </summary> + size_t _p; + + /// <summary> + /// Count up with <seealso cref="#mark mark()"/> and down with + /// <seealso cref="#release release()"/>. When we {@code release()} the last mark, + /// {@code numMarkers} reaches 0 and we reset the buffer. Copy + /// {@code tokens[p]..tokens[n-1]} to {@code tokens[0]..tokens[(n-1)-p]}. + /// </summary> + int _numMarkers; + + /// <summary> + /// This is the {@code LT(-1)} token for the current position. + /// </summary> + Token *_lastToken; + + /// <summary> + /// When {@code numMarkers > 0}, this is the {@code LT(-1)} token for the + /// first token in <seealso cref="#tokens"/>. Otherwise, this is {@code null}. + /// </summary> + Token *_lastTokenBufferStart; + + /// <summary> + /// Absolute token index. It's the index of the token about to be read via + /// {@code LT(1)}. Goes from 0 to the number of tokens in the entire stream, + /// although the stream size is unknown before the end is reached. + /// <p/> + /// This value is used to set the token indexes if the stream provides tokens + /// that implement <seealso cref="WritableToken"/>. + /// </summary> + size_t _currentTokenIndex; + + virtual void sync(ssize_t want); + + /// <summary> + /// Add {@code n} elements to the buffer. Returns the number of tokens + /// actually added to the buffer. If the return value is less than {@code n}, + /// then EOF was reached before {@code n} tokens could be added. + /// </summary> + virtual size_t fill(size_t n); + virtual void add(std::unique_ptr<Token> t); + + size_t getBufferStartIndex() const; + + private: + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/Version.h b/contrib/libs/antlr4_cpp_runtime/src/Version.h new file mode 100644 index 0000000000..43f00ea65c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/Version.h @@ -0,0 +1,42 @@ +// Copyright 2012-2022 The ANTLR Project +// +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "antlr4-common.h" + +#define ANTLRCPP_VERSION_MAJOR 4 +#define ANTLRCPP_VERSION_MINOR 11 +#define ANTLRCPP_VERSION_PATCH 1 + +#define ANTLRCPP_MAKE_VERSION(major, minor, patch) ((major) * 100000 + (minor) * 1000 + (patch)) + +#define ANTLRCPP_VERSION \ + ANTLRCPP_MAKE_VERSION(ANTLR4CPP_VERSION_MAJOR, ANTLR4CPP_VERSION_MINOR, ANTLR4CPP_VERSION_PATCH) + +#define ANTLRCPP_VERSION_STRING \ + ANTLR4CPP_STRINGIFY(ANTLR4CPP_VERSION_MAJOR) "." \ + ANTLR4CPP_STRINGIFY(ANTLR4CPP_VERSION_MINOR) "." \ + ANTLR4CPP_STRINGIFY(ANTLR4CPP_VERSION_PATCH) diff --git a/contrib/libs/antlr4_cpp_runtime/src/Vocabulary.cpp b/contrib/libs/antlr4_cpp_runtime/src/Vocabulary.cpp new file mode 100644 index 0000000000..0f783d5d79 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/Vocabulary.cpp @@ -0,0 +1,64 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Token.h" + +#include "Vocabulary.h" + +using namespace antlr4::dfa; + +const Vocabulary Vocabulary::EMPTY_VOCABULARY; + +Vocabulary::Vocabulary(std::vector<std::string> literalNames, std::vector<std::string> symbolicNames) +: Vocabulary(std::move(literalNames), std::move(symbolicNames), {}) { +} + +Vocabulary::Vocabulary(std::vector<std::string> literalNames, + std::vector<std::string> symbolicNames, std::vector<std::string> displayNames) + : _literalNames(std::move(literalNames)), _symbolicNames(std::move(symbolicNames)), _displayNames(std::move(displayNames)), + _maxTokenType(std::max(_displayNames.size(), std::max(_literalNames.size(), _symbolicNames.size())) - 1) { + // See note here on -1 part: https://github.com/antlr/antlr4/pull/1146 +} + +std::string_view Vocabulary::getLiteralName(size_t tokenType) const { + if (tokenType < _literalNames.size()) { + return _literalNames[tokenType]; + } + + return ""; +} + +std::string_view Vocabulary::getSymbolicName(size_t tokenType) const { + if (tokenType == Token::EOF) { + return "EOF"; + } + + if (tokenType < _symbolicNames.size()) { + return _symbolicNames[tokenType]; + } + + return ""; +} + +std::string Vocabulary::getDisplayName(size_t tokenType) const { + if (tokenType < _displayNames.size()) { + std::string_view displayName = _displayNames[tokenType]; + if (!displayName.empty()) { + return std::string(displayName); + } + } + + std::string_view literalName = getLiteralName(tokenType); + if (!literalName.empty()) { + return std::string(literalName); + } + + std::string_view symbolicName = getSymbolicName(tokenType); + if (!symbolicName.empty()) { + return std::string(symbolicName); + } + + return std::to_string(tokenType); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/Vocabulary.h b/contrib/libs/antlr4_cpp_runtime/src/Vocabulary.h new file mode 100644 index 0000000000..af5b243880 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/Vocabulary.h @@ -0,0 +1,177 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace dfa { + + /// This class provides a default implementation of the <seealso cref="Vocabulary"/> + /// interface. + class ANTLR4CPP_PUBLIC Vocabulary final { + public: + /// Gets an empty <seealso cref="Vocabulary"/> instance. + /// + /// <para> + /// No literal or symbol names are assigned to token types, so + /// <seealso cref="#getDisplayName(int)"/> returns the numeric value for all tokens + /// except <seealso cref="Token#EOF"/>.</para> + [[deprecated("Use the default constructor of Vocabulary instead.")]] static const Vocabulary EMPTY_VOCABULARY; + + Vocabulary() {} + + Vocabulary(const Vocabulary&) = default; + + /// <summary> + /// Constructs a new instance of <seealso cref="Vocabulary"/> from the specified + /// literal and symbolic token names. + /// </summary> + /// <param name="literalNames"> The literal names assigned to tokens, or {@code null} + /// if no literal names are assigned. </param> + /// <param name="symbolicNames"> The symbolic names assigned to tokens, or + /// {@code null} if no symbolic names are assigned. + /// </param> + /// <seealso cref= #getLiteralName(int) </seealso> + /// <seealso cref= #getSymbolicName(int) </seealso> + Vocabulary(std::vector<std::string> literalNames, std::vector<std::string> symbolicNames); + + /// <summary> + /// Constructs a new instance of <seealso cref="Vocabulary"/> from the specified + /// literal, symbolic, and display token names. + /// </summary> + /// <param name="literalNames"> The literal names assigned to tokens, or {@code null} + /// if no literal names are assigned. </param> + /// <param name="symbolicNames"> The symbolic names assigned to tokens, or + /// {@code null} if no symbolic names are assigned. </param> + /// <param name="displayNames"> The display names assigned to tokens, or {@code null} + /// to use the values in {@code literalNames} and {@code symbolicNames} as + /// the source of display names, as described in + /// <seealso cref="#getDisplayName(int)"/>. + /// </param> + /// <seealso cref= #getLiteralName(int) </seealso> + /// <seealso cref= #getSymbolicName(int) </seealso> + /// <seealso cref= #getDisplayName(int) </seealso> + Vocabulary(std::vector<std::string> literalNames, std::vector<std::string> symbolicNames, + std::vector<std::string> displayNames); + + /// <summary> + /// Returns the highest token type value. It can be used to iterate from + /// zero to that number, inclusively, thus querying all stored entries. </summary> + /// <returns> the highest token type value </returns> + constexpr size_t getMaxTokenType() const { return _maxTokenType; } + + /// <summary> + /// Gets the string literal associated with a token type. The string returned + /// by this method, when not {@code null}, can be used unaltered in a parser + /// grammar to represent this token type. + /// + /// <para>The following table shows examples of lexer rules and the literal + /// names assigned to the corresponding token types.</para> + /// + /// <table> + /// <tr> + /// <th>Rule</th> + /// <th>Literal Name</th> + /// <th>Java String Literal</th> + /// </tr> + /// <tr> + /// <td>{@code THIS : 'this';}</td> + /// <td>{@code 'this'}</td> + /// <td>{@code "'this'"}</td> + /// </tr> + /// <tr> + /// <td>{@code SQUOTE : '\'';}</td> + /// <td>{@code '\''}</td> + /// <td>{@code "'\\''"}</td> + /// </tr> + /// <tr> + /// <td>{@code ID : [A-Z]+;}</td> + /// <td>n/a</td> + /// <td>{@code null}</td> + /// </tr> + /// </table> + /// </summary> + /// <param name="tokenType"> The token type. + /// </param> + /// <returns> The string literal associated with the specified token type, or + /// {@code null} if no string literal is associated with the type. </returns> + std::string_view getLiteralName(size_t tokenType) const; + + /// <summary> + /// Gets the symbolic name associated with a token type. The string returned + /// by this method, when not {@code null}, can be used unaltered in a parser + /// grammar to represent this token type. + /// + /// <para>This method supports token types defined by any of the following + /// methods:</para> + /// + /// <ul> + /// <li>Tokens created by lexer rules.</li> + /// <li>Tokens defined in a <code>tokens{}</code> block in a lexer or parser + /// grammar.</li> + /// <li>The implicitly defined {@code EOF} token, which has the token type + /// <seealso cref="Token#EOF"/>.</li> + /// </ul> + /// + /// <para>The following table shows examples of lexer rules and the literal + /// names assigned to the corresponding token types.</para> + /// + /// <table> + /// <tr> + /// <th>Rule</th> + /// <th>Symbolic Name</th> + /// </tr> + /// <tr> + /// <td>{@code THIS : 'this';}</td> + /// <td>{@code THIS}</td> + /// </tr> + /// <tr> + /// <td>{@code SQUOTE : '\'';}</td> + /// <td>{@code SQUOTE}</td> + /// </tr> + /// <tr> + /// <td>{@code ID : [A-Z]+;}</td> + /// <td>{@code ID}</td> + /// </tr> + /// </table> + /// </summary> + /// <param name="tokenType"> The token type. + /// </param> + /// <returns> The symbolic name associated with the specified token type, or + /// {@code null} if no symbolic name is associated with the type. </returns> + std::string_view getSymbolicName(size_t tokenType) const; + + /// <summary> + /// Gets the display name of a token type. + /// + /// <para>ANTLR provides a default implementation of this method, but + /// applications are free to override the behavior in any manner which makes + /// sense for the application. The default implementation returns the first + /// result from the following list which produces a non-{@code null} + /// result.</para> + /// + /// <ol> + /// <li>The result of <seealso cref="#getLiteralName"/></li> + /// <li>The result of <seealso cref="#getSymbolicName"/></li> + /// <li>The result of <seealso cref="Integer#toString"/></li> + /// </ol> + /// </summary> + /// <param name="tokenType"> The token type. + /// </param> + /// <returns> The display name of the token type, for use in error reporting or + /// other user-visible messages which reference specific token types. </returns> + std::string getDisplayName(size_t tokenType) const; + + private: + std::vector<std::string> const _literalNames; + std::vector<std::string> const _symbolicNames; + std::vector<std::string> const _displayNames; + const size_t _maxTokenType = 0; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/WritableToken.cpp b/contrib/libs/antlr4_cpp_runtime/src/WritableToken.cpp new file mode 100644 index 0000000000..a30cd96f19 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/WritableToken.cpp @@ -0,0 +1,9 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "WritableToken.h" + +antlr4::WritableToken::~WritableToken() { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/WritableToken.h b/contrib/libs/antlr4_cpp_runtime/src/WritableToken.h new file mode 100644 index 0000000000..28856f25b9 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/WritableToken.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Token.h" + +namespace antlr4 { + + class ANTLR4CPP_PUBLIC WritableToken : public Token { + public: + virtual ~WritableToken(); + virtual void setText(const std::string &text) = 0; + virtual void setType(size_t ttype) = 0; + virtual void setLine(size_t line) = 0; + virtual void setCharPositionInLine(size_t pos) = 0; + virtual void setChannel(size_t channel) = 0; + virtual void setTokenIndex(size_t index) = 0; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/antlr4-common.h b/contrib/libs/antlr4_cpp_runtime/src/antlr4-common.h new file mode 100644 index 0000000000..d7f9a65fa1 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/antlr4-common.h @@ -0,0 +1,101 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <algorithm> +#include <any> +#include <atomic> +#include <bitset> +#include <cassert> +#include <climits> +#include <cstddef> +#include <cstdint> +#include <cstdlib> +#include <exception> +#include <fstream> +#include <iostream> +#include <iterator> +#include <limits> +#include <map> +#include <memory> +#include <set> +#include <sstream> +#include <stack> +#include <string> +#include <string_view> +#include <typeinfo> +#include <type_traits> +#include <unordered_map> +#include <unordered_set> +#include <utility> +#include <vector> + +// Defines for the Guid class and other platform dependent stuff. +#ifdef _WIN32 + #ifdef _MSC_VER + #pragma warning (disable: 4250) // Class inherits by dominance. + #pragma warning (disable: 4512) // assignment operator could not be generated + + #if _MSC_VER < 1900 + // Before VS 2015 code like "while (true)" will create a (useless) warning in level 4. + #pragma warning (disable: 4127) // conditional expression is constant + #endif + #endif + + #ifdef _WIN64 + typedef __int64 ssize_t; + #else + typedef __int32 ssize_t; + #endif + + #ifdef ANTLR4CPP_EXPORTS + #define ANTLR4CPP_PUBLIC __declspec(dllexport) + #else + #ifdef ANTLR4CPP_STATIC + #define ANTLR4CPP_PUBLIC + #else + #define ANTLR4CPP_PUBLIC __declspec(dllimport) + #endif + #endif + +#elif defined(__APPLE__) + #if __GNUC__ >= 4 + #define ANTLR4CPP_PUBLIC __attribute__ ((visibility ("default"))) + #else + #define ANTLR4CPP_PUBLIC + #endif +#else + #if __GNUC__ >= 6 + #define ANTLR4CPP_PUBLIC __attribute__ ((visibility ("default"))) + #else + #define ANTLR4CPP_PUBLIC + #endif +#endif + +#ifdef __has_builtin +#define ANTLR4CPP_HAVE_BUILTIN(x) __has_builtin(x) +#else +#define ANTLR4CPP_HAVE_BUILTIN(x) 0 +#endif + +#define ANTLR4CPP_INTERNAL_STRINGIFY(x) #x +#define ANTLR4CPP_STRINGIFY(x) ANTLR4CPP_INTERNAL_STRINGIFY(x) + +// We use everything from the C++ standard library by default. +#ifndef ANTLR4CPP_USING_ABSEIL +#define ANTLR4CPP_USING_ABSEIL 0 +#endif + +#include "support/Declarations.h" + +// We have to undefine this symbol as ANTLR will use this name for own members and even +// generated functions. Because EOF is a global macro we cannot use e.g. a namespace scope to disambiguate. +#ifdef EOF +#undef EOF +#endif + +#define INVALID_INDEX std::numeric_limits<size_t>::max() +template<class T> using Ref = std::shared_ptr<T>; diff --git a/contrib/libs/antlr4_cpp_runtime/src/antlr4-runtime.h b/contrib/libs/antlr4_cpp_runtime/src/antlr4-runtime.h new file mode 100644 index 0000000000..50b85aa4fc --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/antlr4-runtime.h @@ -0,0 +1,168 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +// This is the umbrella header for all ANTLR4 C++ runtime headers. + +#include "antlr4-common.h" + +#include "ANTLRErrorListener.h" +#include "ANTLRErrorStrategy.h" +#include "ANTLRFileStream.h" +#include "ANTLRInputStream.h" +#include "BailErrorStrategy.h" +#include "BaseErrorListener.h" +#include "BufferedTokenStream.h" +#include "CharStream.h" +#include "CommonToken.h" +#include "CommonTokenFactory.h" +#include "CommonTokenStream.h" +#include "ConsoleErrorListener.h" +#include "DefaultErrorStrategy.h" +#include "DiagnosticErrorListener.h" +#include "Exceptions.h" +#include "FailedPredicateException.h" +#include "InputMismatchException.h" +#include "IntStream.h" +#include "InterpreterRuleContext.h" +#include "Lexer.h" +#include "LexerInterpreter.h" +#include "LexerNoViableAltException.h" +#include "ListTokenSource.h" +#include "NoViableAltException.h" +#include "Parser.h" +#include "ParserInterpreter.h" +#include "ParserRuleContext.h" +#include "ProxyErrorListener.h" +#include "RecognitionException.h" +#include "Recognizer.h" +#include "RuleContext.h" +#include "RuleContextWithAltNum.h" +#include "RuntimeMetaData.h" +#include "Token.h" +#include "TokenFactory.h" +#include "TokenSource.h" +#include "TokenStream.h" +#include "TokenStreamRewriter.h" +#include "UnbufferedCharStream.h" +#include "UnbufferedTokenStream.h" +#include "Version.h" +#include "Vocabulary.h" +#include "Vocabulary.h" +#include "WritableToken.h" +#include "atn/ATN.h" +#include "atn/ATNConfig.h" +#include "atn/ATNConfigSet.h" +#include "atn/ATNDeserializationOptions.h" +#include "atn/ATNDeserializer.h" +#include "atn/ATNSimulator.h" +#include "atn/ATNState.h" +#include "atn/ATNType.h" +#include "atn/ActionTransition.h" +#include "atn/AmbiguityInfo.h" +#include "atn/ArrayPredictionContext.h" +#include "atn/AtomTransition.h" +#include "atn/BasicBlockStartState.h" +#include "atn/BasicState.h" +#include "atn/BlockEndState.h" +#include "atn/BlockStartState.h" +#include "atn/ContextSensitivityInfo.h" +#include "atn/DecisionEventInfo.h" +#include "atn/DecisionInfo.h" +#include "atn/DecisionState.h" +#include "atn/EpsilonTransition.h" +#include "atn/ErrorInfo.h" +#include "atn/LL1Analyzer.h" +#include "atn/LexerATNConfig.h" +#include "atn/LexerATNSimulator.h" +#include "atn/LexerAction.h" +#include "atn/LexerActionExecutor.h" +#include "atn/LexerActionType.h" +#include "atn/LexerChannelAction.h" +#include "atn/LexerCustomAction.h" +#include "atn/LexerIndexedCustomAction.h" +#include "atn/LexerModeAction.h" +#include "atn/LexerMoreAction.h" +#include "atn/LexerPopModeAction.h" +#include "atn/LexerPushModeAction.h" +#include "atn/LexerSkipAction.h" +#include "atn/LexerTypeAction.h" +#include "atn/LookaheadEventInfo.h" +#include "atn/LoopEndState.h" +#include "atn/NotSetTransition.h" +#include "atn/OrderedATNConfigSet.h" +#include "atn/ParseInfo.h" +#include "atn/ParserATNSimulator.h" +#include "atn/ParserATNSimulatorOptions.h" +#include "atn/PlusBlockStartState.h" +#include "atn/PlusLoopbackState.h" +#include "atn/PrecedencePredicateTransition.h" +#include "atn/PredicateEvalInfo.h" +#include "atn/PredicateTransition.h" +#include "atn/PredictionContext.h" +#include "atn/PredictionContextCache.h" +#include "atn/PredictionContextMergeCache.h" +#include "atn/PredictionContextMergeCacheOptions.h" +#include "atn/PredictionMode.h" +#include "atn/ProfilingATNSimulator.h" +#include "atn/RangeTransition.h" +#include "atn/RuleStartState.h" +#include "atn/RuleStopState.h" +#include "atn/RuleTransition.h" +#include "atn/SemanticContext.h" +#include "atn/SerializedATNView.h" +#include "atn/SetTransition.h" +#include "atn/SingletonPredictionContext.h" +#include "atn/StarBlockStartState.h" +#include "atn/StarLoopEntryState.h" +#include "atn/StarLoopbackState.h" +#include "atn/TokensStartState.h" +#include "atn/Transition.h" +#include "atn/WildcardTransition.h" +#include "dfa/DFA.h" +#include "dfa/DFASerializer.h" +#include "dfa/DFAState.h" +#include "dfa/LexerDFASerializer.h" +#include "misc/InterpreterDataReader.h" +#include "misc/Interval.h" +#include "misc/IntervalSet.h" +#include "misc/MurmurHash.h" +#include "misc/Predicate.h" +#include "support/Any.h" +#include "support/Arrays.h" +#include "support/BitSet.h" +#include "support/Casts.h" +#include "support/CPPUtils.h" +#include "tree/AbstractParseTreeVisitor.h" +#include "tree/ErrorNode.h" +#include "tree/ErrorNodeImpl.h" +#include "tree/ParseTree.h" +#include "tree/ParseTreeListener.h" +#include "tree/ParseTreeProperty.h" +#include "tree/ParseTreeVisitor.h" +#include "tree/ParseTreeWalker.h" +#include "tree/TerminalNode.h" +#include "tree/TerminalNodeImpl.h" +#include "tree/Trees.h" +#include "tree/pattern/Chunk.h" +#include "tree/pattern/ParseTreeMatch.h" +#include "tree/pattern/ParseTreePattern.h" +#include "tree/pattern/ParseTreePatternMatcher.h" +#include "tree/pattern/RuleTagToken.h" +#include "tree/pattern/TagChunk.h" +#include "tree/pattern/TextChunk.h" +#include "tree/pattern/TokenTagToken.h" +#include "tree/xpath/XPath.h" +#include "tree/xpath/XPathElement.h" +#include "tree/xpath/XPathLexer.h" +#include "tree/xpath/XPathLexerErrorListener.h" +#include "tree/xpath/XPathRuleAnywhereElement.h" +#include "tree/xpath/XPathRuleElement.h" +#include "tree/xpath/XPathTokenAnywhereElement.h" +#include "tree/xpath/XPathTokenElement.h" +#include "tree/xpath/XPathWildcardAnywhereElement.h" +#include "tree/xpath/XPathWildcardElement.h" +#include "internal/Synchronization.h" diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATN.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ATN.cpp new file mode 100644 index 0000000000..339515cc9c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATN.cpp @@ -0,0 +1,159 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/LL1Analyzer.h" +#include "Token.h" +#include "atn/RuleTransition.h" +#include "misc/IntervalSet.h" +#include "RuleContext.h" +#include "atn/DecisionState.h" +#include "Recognizer.h" +#include "atn/ATNType.h" +#include "Exceptions.h" +#include "support/CPPUtils.h" + +#include "atn/ATN.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::internal; +using namespace antlrcpp; + +ATN::ATN() : ATN(ATNType::LEXER, 0) {} + +ATN::ATN(ATNType grammarType_, size_t maxTokenType_) : grammarType(grammarType_), maxTokenType(maxTokenType_) {} + +ATN::~ATN() { + for (ATNState *state : states) { + delete state; + } +} + +misc::IntervalSet ATN::nextTokens(ATNState *s, RuleContext *ctx) const { + LL1Analyzer analyzer(*this); + return analyzer.LOOK(s, ctx); + +} + +misc::IntervalSet const& ATN::nextTokens(ATNState *s) const { + if (!s->_nextTokenUpdated) { + UniqueLock<Mutex> lock(_mutex); + if (!s->_nextTokenUpdated) { + s->_nextTokenWithinRule = nextTokens(s, nullptr); + s->_nextTokenUpdated = true; + } + } + return s->_nextTokenWithinRule; +} + +void ATN::addState(ATNState *state) { + if (state != nullptr) { + //state->atn = this; + state->stateNumber = static_cast<int>(states.size()); + } + + states.push_back(state); +} + +void ATN::removeState(ATNState *state) { + delete states.at(state->stateNumber);// just free mem, don't shift states in list + states.at(state->stateNumber) = nullptr; +} + +int ATN::defineDecisionState(DecisionState *s) { + decisionToState.push_back(s); + s->decision = static_cast<int>(decisionToState.size() - 1); + return s->decision; +} + +DecisionState *ATN::getDecisionState(size_t decision) const { + if (!decisionToState.empty()) { + return decisionToState[decision]; + } + return nullptr; +} + +size_t ATN::getNumberOfDecisions() const { + return decisionToState.size(); +} + +misc::IntervalSet ATN::getExpectedTokens(size_t stateNumber, RuleContext *context) const { + if (stateNumber == ATNState::INVALID_STATE_NUMBER || stateNumber >= states.size()) { + throw IllegalArgumentException("Invalid state number."); + } + + RuleContext *ctx = context; + ATNState *s = states.at(stateNumber); + misc::IntervalSet following = nextTokens(s); + if (!following.contains(Token::EPSILON)) { + return following; + } + + misc::IntervalSet expected; + expected.addAll(following); + expected.remove(Token::EPSILON); + while (ctx && ctx->invokingState != ATNState::INVALID_STATE_NUMBER && following.contains(Token::EPSILON)) { + ATNState *invokingState = states.at(ctx->invokingState); + const RuleTransition *rt = static_cast<const RuleTransition*>(invokingState->transitions[0].get()); + following = nextTokens(rt->followState); + expected.addAll(following); + expected.remove(Token::EPSILON); + + if (ctx->parent == nullptr) { + break; + } + ctx = static_cast<RuleContext *>(ctx->parent); + } + + if (following.contains(Token::EPSILON)) { + expected.add(Token::EOF); + } + + return expected; +} + +std::string ATN::toString() const { + std::stringstream ss; + std::string type; + switch (grammarType) { + case ATNType::LEXER: + type = "LEXER "; + break; + + case ATNType::PARSER: + type = "PARSER "; + break; + + default: + break; + } + ss << "(" << type << "ATN " << std::hex << this << std::dec << ") maxTokenType: " << maxTokenType << std::endl; + ss << "states (" << states.size() << ") {" << std::endl; + + size_t index = 0; + for (auto *state : states) { + if (state == nullptr) { + ss << " " << index++ << ": nul" << std::endl; + } else { + std::string text = state->toString(); + ss << " " << index++ << ": " << indent(text, " ", false) << std::endl; + } + } + + index = 0; + for (auto *state : decisionToState) { + if (state == nullptr) { + ss << " " << index++ << ": nul" << std::endl; + } else { + std::string text = state->toString(); + ss << " " << index++ << ": " << indent(text, " ", false) << std::endl; + } + } + + ss << "}"; + + return ss.str(); +} + diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATN.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ATN.h new file mode 100644 index 0000000000..f12476358a --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATN.h @@ -0,0 +1,133 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RuleContext.h" +#include "internal/Synchronization.h" + +// GCC generates a warning when forward-declaring ATN if ATN has already been +// declared due to the attributes added by ANTLR4CPP_PUBLIC. +// See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=39159 +// Add constant that can be checked so forward-declarations can be omitted. +#define ANTLR4CPP_ATN_DECLARED + +namespace antlr4 { +namespace atn { + + class LexerATNSimulator; + class ParserATNSimulator; + + class ANTLR4CPP_PUBLIC ATN { + public: + static constexpr size_t INVALID_ALT_NUMBER = 0; + + /// Used for runtime deserialization of ATNs from strings. + ATN(); + + ATN(ATNType grammarType, size_t maxTokenType); + + ATN(const ATN&) = delete; + + ATN(ATN&&) = delete; + + ~ATN(); + + ATN& operator=(const ATN&) = delete; + + ATN& operator=(ATN&&) = delete; + + std::vector<ATNState *> states; + + /// Each subrule/rule is a decision point and we must track them so we + /// can go back later and build DFA predictors for them. This includes + /// all the rules, subrules, optional blocks, ()+, ()* etc... + std::vector<DecisionState *> decisionToState; + + /// Maps from rule index to starting state number. + std::vector<RuleStartState *> ruleToStartState; + + /// Maps from rule index to stop state number. + std::vector<RuleStopState *> ruleToStopState; + + /// The type of the ATN. + ATNType grammarType; + + /// The maximum value for any symbol recognized by a transition in the ATN. + size_t maxTokenType; + + /// <summary> + /// For lexer ATNs, this maps the rule index to the resulting token type. + /// For parser ATNs, this maps the rule index to the generated bypass token + /// type if the + /// <seealso cref="ATNDeserializationOptions#isGenerateRuleBypassTransitions"/> + /// deserialization option was specified; otherwise, this is {@code null}. + /// </summary> + std::vector<size_t> ruleToTokenType; + + /// For lexer ATNs, this is an array of {@link LexerAction} objects which may + /// be referenced by action transitions in the ATN. + std::vector<Ref<const LexerAction>> lexerActions; + + std::vector<TokensStartState *> modeToStartState; + + /// <summary> + /// Compute the set of valid tokens that can occur starting in state {@code s}. + /// If {@code ctx} is null, the set of tokens will not include what can follow + /// the rule surrounding {@code s}. In other words, the set will be + /// restricted to tokens reachable staying within {@code s}'s rule. + /// </summary> + misc::IntervalSet nextTokens(ATNState *s, RuleContext *ctx) const; + + /// <summary> + /// Compute the set of valid tokens that can occur starting in {@code s} and + /// staying in same rule. <seealso cref="Token#EPSILON"/> is in set if we reach end of + /// rule. + /// </summary> + misc::IntervalSet const& nextTokens(ATNState *s) const; + + void addState(ATNState *state); + + void removeState(ATNState *state); + + int defineDecisionState(DecisionState *s); + + DecisionState *getDecisionState(size_t decision) const; + + size_t getNumberOfDecisions() const; + + /// <summary> + /// Computes the set of input symbols which could follow ATN state number + /// {@code stateNumber} in the specified full {@code context}. This method + /// considers the complete parser context, but does not evaluate semantic + /// predicates (i.e. all predicates encountered during the calculation are + /// assumed true). If a path in the ATN exists from the starting state to the + /// <seealso cref="RuleStopState"/> of the outermost context without matching any + /// symbols, <seealso cref="Token#EOF"/> is added to the returned set. + /// <p/> + /// If {@code context} is {@code null}, it is treated as + /// <seealso cref="ParserRuleContext#EMPTY"/>. + /// </summary> + /// <param name="stateNumber"> the ATN state number </param> + /// <param name="context"> the full parse context </param> + /// <returns> The set of potentially valid input symbols which could follow the + /// specified state in the specified context. </returns> + /// <exception cref="IllegalArgumentException"> if the ATN does not contain a state with + /// number {@code stateNumber} </exception> + misc::IntervalSet getExpectedTokens(size_t stateNumber, RuleContext *context) const; + + std::string toString() const; + + private: + friend class LexerATNSimulator; + friend class ParserATNSimulator; + + mutable internal::Mutex _mutex; + mutable internal::SharedMutex _stateMutex; + mutable internal::SharedMutex _edgeMutex; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfig.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfig.cpp new file mode 100644 index 0000000000..be4d5bfa8c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfig.cpp @@ -0,0 +1,106 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "atn/PredictionContext.h" +#include "SemanticContext.h" + +#include "atn/ATNConfig.h" + +using namespace antlr4::atn; + +namespace { + +/** + * This field stores the bit mask for implementing the + * {@link #isPrecedenceFilterSuppressed} property as a bit within the + * existing {@link #reachesIntoOuterContext} field. + */ +inline constexpr size_t SUPPRESS_PRECEDENCE_FILTER = 0x40000000; + +} + +ATNConfig::ATNConfig(ATNState *state, size_t alt, Ref<const PredictionContext> context) + : ATNConfig(state, alt, std::move(context), 0, SemanticContext::Empty::Instance) {} + +ATNConfig::ATNConfig(ATNState *state, size_t alt, Ref<const PredictionContext> context, Ref<const SemanticContext> semanticContext) + : ATNConfig(state, alt, std::move(context), 0, std::move(semanticContext)) {} + +ATNConfig::ATNConfig(ATNConfig const& other, Ref<const SemanticContext> semanticContext) + : ATNConfig(other.state, other.alt, other.context, other.reachesIntoOuterContext, std::move(semanticContext)) {} + +ATNConfig::ATNConfig(ATNConfig const& other, ATNState *state) + : ATNConfig(state, other.alt, other.context, other.reachesIntoOuterContext, other.semanticContext) {} + +ATNConfig::ATNConfig(ATNConfig const& other, ATNState *state, Ref<const SemanticContext> semanticContext) + : ATNConfig(state, other.alt, other.context, other.reachesIntoOuterContext, std::move(semanticContext)) {} + +ATNConfig::ATNConfig(ATNConfig const& other, ATNState *state, Ref<const PredictionContext> context) + : ATNConfig(state, other.alt, std::move(context), other.reachesIntoOuterContext, other.semanticContext) {} + +ATNConfig::ATNConfig(ATNConfig const& other, ATNState *state, Ref<const PredictionContext> context, Ref<const SemanticContext> semanticContext) + : ATNConfig(state, other.alt, std::move(context), other.reachesIntoOuterContext, std::move(semanticContext)) {} + +ATNConfig::ATNConfig(ATNState *state, size_t alt, Ref<const PredictionContext> context, size_t reachesIntoOuterContext, Ref<const SemanticContext> semanticContext) + : state(state), alt(alt), context(std::move(context)), reachesIntoOuterContext(reachesIntoOuterContext), semanticContext(std::move(semanticContext)) {} + +size_t ATNConfig::hashCode() const { + size_t hashCode = misc::MurmurHash::initialize(7); + hashCode = misc::MurmurHash::update(hashCode, state->stateNumber); + hashCode = misc::MurmurHash::update(hashCode, alt); + hashCode = misc::MurmurHash::update(hashCode, context); + hashCode = misc::MurmurHash::update(hashCode, semanticContext); + hashCode = misc::MurmurHash::finish(hashCode, 4); + return hashCode; +} + +size_t ATNConfig::getOuterContextDepth() const { + return reachesIntoOuterContext & ~SUPPRESS_PRECEDENCE_FILTER; +} + +bool ATNConfig::isPrecedenceFilterSuppressed() const { + return (reachesIntoOuterContext & SUPPRESS_PRECEDENCE_FILTER) != 0; +} + +void ATNConfig::setPrecedenceFilterSuppressed(bool value) { + if (value) { + reachesIntoOuterContext |= SUPPRESS_PRECEDENCE_FILTER; + } else { + reachesIntoOuterContext &= ~SUPPRESS_PRECEDENCE_FILTER; + } +} + +bool ATNConfig::operator==(const ATNConfig &other) const { + return state->stateNumber == other.state->stateNumber && alt == other.alt && + ((context == other.context) || (*context == *other.context)) && + *semanticContext == *other.semanticContext && + isPrecedenceFilterSuppressed() == other.isPrecedenceFilterSuppressed(); +} + +std::string ATNConfig::toString() const { + return toString(true); +} + +std::string ATNConfig::toString(bool showAlt) const { + std::stringstream ss; + ss << "("; + + ss << state->toString(); + if (showAlt) { + ss << "," << alt; + } + if (context) { + ss << ",[" << context->toString() << "]"; + } + if (semanticContext != nullptr && semanticContext != SemanticContext::Empty::Instance) { + ss << ",[" << semanticContext->toString() << "]"; + } + if (getOuterContextDepth() > 0) { + ss << ",up=" << getOuterContextDepth(); + } + ss << ")"; + + return ss.str(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfig.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfig.h new file mode 100644 index 0000000000..1d2e7ae163 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfig.h @@ -0,0 +1,157 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <cassert> + +#include "antlr4-common.h" +#include "atn/SemanticContext.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// A tuple: (ATN state, predicted alt, syntactic, semantic context). + /// The syntactic context is a graph-structured stack node whose + /// path(s) to the root is the rule invocation(s) + /// chain used to arrive at the state. The semantic context is + /// the tree of semantic predicates encountered before reaching + /// an ATN state. + /// </summary> + class ANTLR4CPP_PUBLIC ATNConfig { + public: + struct Hasher + { + size_t operator()(Ref<ATNConfig> const& k) const { + return k->hashCode(); + } + + size_t operator()(ATNConfig const& k) const { + return k.hashCode(); + } + }; + + struct Comparer { + bool operator()(Ref<ATNConfig> const& lhs, Ref<ATNConfig> const& rhs) const { + return (lhs == rhs) || (*lhs == *rhs); + } + + bool operator()(ATNConfig const& lhs, ATNConfig const& rhs) const { + return (&lhs == &rhs) || (lhs == rhs); + } + }; + + using Set = std::unordered_set<Ref<ATNConfig>, Hasher, Comparer>; + + /// The ATN state associated with this configuration. + ATNState *state = nullptr; + + /// What alt (or lexer rule) is predicted by this configuration. + const size_t alt = 0; + + /// The stack of invoking states leading to the rule/states associated + /// with this config. We track only those contexts pushed during + /// execution of the ATN simulator. + /// + /// Can be shared between multiple ANTConfig instances. + Ref<const PredictionContext> context; + + /** + * We cannot execute predicates dependent upon local context unless + * we know for sure we are in the correct context. Because there is + * no way to do this efficiently, we simply cannot evaluate + * dependent predicates unless we are in the rule that initially + * invokes the ATN simulator. + * + * <p> + * closure() tracks the depth of how far we dip into the outer context: + * depth > 0. Note that it may not be totally accurate depth since I + * don't ever decrement. TODO: make it a boolean then</p> + * + * <p> + * For memory efficiency, the {@link #isPrecedenceFilterSuppressed} method + * is also backed by this field. Since the field is publicly accessible, the + * highest bit which would not cause the value to become negative is used to + * store this field. This choice minimizes the risk that code which only + * compares this value to 0 would be affected by the new purpose of the + * flag. It also ensures the performance of the existing {@link ATNConfig} + * constructors as well as certain operations like + * {@link ATNConfigSet#add(ATNConfig, DoubleKeyMap)} method are + * <em>completely</em> unaffected by the change.</p> + */ + size_t reachesIntoOuterContext = 0; + + /// Can be shared between multiple ATNConfig instances. + Ref<const SemanticContext> semanticContext; + + ATNConfig(ATNState *state, size_t alt, Ref<const PredictionContext> context); + ATNConfig(ATNState *state, size_t alt, Ref<const PredictionContext> context, Ref<const SemanticContext> semanticContext); + + ATNConfig(ATNConfig const& other, Ref<const SemanticContext> semanticContext); + ATNConfig(ATNConfig const& other, ATNState *state); + ATNConfig(ATNConfig const& other, ATNState *state, Ref<const SemanticContext> semanticContext); + ATNConfig(ATNConfig const& other, ATNState *state, Ref<const PredictionContext> context); + ATNConfig(ATNConfig const& other, ATNState *state, Ref<const PredictionContext> context, Ref<const SemanticContext> semanticContext); + + ATNConfig(ATNConfig const&) = default; + + ATNConfig(ATNConfig&&) = default; + + virtual ~ATNConfig() = default; + + virtual size_t hashCode() const; + + /** + * This method gets the value of the {@link #reachesIntoOuterContext} field + * as it existed prior to the introduction of the + * {@link #isPrecedenceFilterSuppressed} method. + */ + size_t getOuterContextDepth() const; + bool isPrecedenceFilterSuppressed() const; + void setPrecedenceFilterSuppressed(bool value); + + /// An ATN configuration is equal to another if both have + /// the same state, they predict the same alternative, and + /// syntactic/semantic contexts are the same. + bool operator==(const ATNConfig &other) const; + bool operator!=(const ATNConfig &other) const; + + virtual std::string toString() const; + std::string toString(bool showAlt) const; + + private: + ATNConfig(ATNState *state, size_t alt, Ref<const PredictionContext> context, size_t reachesIntoOuterContext, Ref<const SemanticContext> semanticContext); + }; + +} // namespace atn +} // namespace antlr4 + + +// Hash function for ATNConfig. + +namespace std { + using antlr4::atn::ATNConfig; + + template <> struct hash<ATNConfig> + { + size_t operator() (const ATNConfig &x) const + { + return x.hashCode(); + } + }; + + template <> struct hash<std::vector<Ref<ATNConfig>>> + { + size_t operator() (const std::vector<Ref<ATNConfig>> &vector) const + { + std::size_t seed = 0; + for (const auto &config : vector) { + seed ^= config->hashCode() + 0x9e3779b9 + (seed << 6) + (seed >> 2); + } + return seed; + } + }; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfigSet.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfigSet.cpp new file mode 100644 index 0000000000..4ebdf8882b --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfigSet.cpp @@ -0,0 +1,232 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/PredictionContext.h" +#include "atn/ATNConfig.h" +#include "atn/ATNSimulator.h" +#include "Exceptions.h" +#include "atn/SemanticContext.h" +#include "support/Arrays.h" + +#include "atn/ATNConfigSet.h" + +using namespace antlr4::atn; +using namespace antlrcpp; + +namespace { + +} + +ATNConfigSet::ATNConfigSet() : ATNConfigSet(true) {} + +ATNConfigSet::ATNConfigSet(const ATNConfigSet &other) + : fullCtx(other.fullCtx), _configLookup(other._configLookup.bucket_count(), ATNConfigHasher{this}, ATNConfigComparer{this}) { + addAll(other); + uniqueAlt = other.uniqueAlt; + conflictingAlts = other.conflictingAlts; + hasSemanticContext = other.hasSemanticContext; + dipsIntoOuterContext = other.dipsIntoOuterContext; +} + +ATNConfigSet::ATNConfigSet(bool fullCtx) + : fullCtx(fullCtx), _configLookup(0, ATNConfigHasher{this}, ATNConfigComparer{this}) {} + +bool ATNConfigSet::add(const Ref<ATNConfig> &config) { + return add(config, nullptr); +} + +bool ATNConfigSet::add(const Ref<ATNConfig> &config, PredictionContextMergeCache *mergeCache) { + assert(config); + + if (_readonly) { + throw IllegalStateException("This set is readonly"); + } + if (config->semanticContext != SemanticContext::Empty::Instance) { + hasSemanticContext = true; + } + if (config->getOuterContextDepth() > 0) { + dipsIntoOuterContext = true; + } + + auto existing = _configLookup.find(config.get()); + if (existing == _configLookup.end()) { + _configLookup.insert(config.get()); + _cachedHashCode = 0; + configs.push_back(config); // track order here + + return true; + } + + // a previous (s,i,pi,_), merge with it and save result + bool rootIsWildcard = !fullCtx; + Ref<const PredictionContext> merged = PredictionContext::merge((*existing)->context, config->context, rootIsWildcard, mergeCache); + // no need to check for existing.context, config.context in cache + // since only way to create new graphs is "call rule" and here. We + // cache at both places. + (*existing)->reachesIntoOuterContext = std::max((*existing)->reachesIntoOuterContext, config->reachesIntoOuterContext); + + // make sure to preserve the precedence filter suppression during the merge + if (config->isPrecedenceFilterSuppressed()) { + (*existing)->setPrecedenceFilterSuppressed(true); + } + + (*existing)->context = std::move(merged); // replace context; no need to alt mapping + + return true; +} + +bool ATNConfigSet::addAll(const ATNConfigSet &other) { + for (const auto &c : other.configs) { + add(c); + } + return false; +} + +std::vector<ATNState*> ATNConfigSet::getStates() const { + std::vector<ATNState*> states; + states.reserve(configs.size()); + for (const auto &c : configs) { + states.push_back(c->state); + } + return states; +} + +/** + * Gets the complete set of represented alternatives for the configuration + * set. + * + * @return the set of represented alternatives in this configuration set + * + * @since 4.3 + */ + +BitSet ATNConfigSet::getAlts() const { + BitSet alts; + for (const auto &config : configs) { + alts.set(config->alt); + } + return alts; +} + +std::vector<Ref<const SemanticContext>> ATNConfigSet::getPredicates() const { + std::vector<Ref<const SemanticContext>> preds; + preds.reserve(configs.size()); + for (const auto &c : configs) { + if (c->semanticContext != SemanticContext::Empty::Instance) { + preds.push_back(c->semanticContext); + } + } + return preds; +} + +const Ref<ATNConfig>& ATNConfigSet::get(size_t i) const { + return configs[i]; +} + +void ATNConfigSet::optimizeConfigs(ATNSimulator *interpreter) { + assert(interpreter); + + if (_readonly) { + throw IllegalStateException("This set is readonly"); + } + if (_configLookup.empty()) + return; + + for (const auto &config : configs) { + config->context = interpreter->getCachedContext(config->context); + } +} + +bool ATNConfigSet::equals(const ATNConfigSet &other) const { + if (&other == this) { + return true; + } + + if (configs.size() != other.configs.size()) + return false; + + if (fullCtx != other.fullCtx || uniqueAlt != other.uniqueAlt || + conflictingAlts != other.conflictingAlts || hasSemanticContext != other.hasSemanticContext || + dipsIntoOuterContext != other.dipsIntoOuterContext) // includes stack context + return false; + + return Arrays::equals(configs, other.configs); +} + +size_t ATNConfigSet::hashCode() const { + size_t cachedHashCode = _cachedHashCode.load(std::memory_order_relaxed); + if (!isReadonly() || cachedHashCode == 0) { + cachedHashCode = 1; + for (const auto &i : configs) { + cachedHashCode = 31 * cachedHashCode + i->hashCode(); // Same as Java's list hashCode impl. + } + _cachedHashCode.store(cachedHashCode, std::memory_order_relaxed); + } + return cachedHashCode; +} + +size_t ATNConfigSet::size() const { + return configs.size(); +} + +bool ATNConfigSet::isEmpty() const { + return configs.empty(); +} + +void ATNConfigSet::clear() { + if (_readonly) { + throw IllegalStateException("This set is readonly"); + } + configs.clear(); + _cachedHashCode = 0; + _configLookup.clear(); +} + +bool ATNConfigSet::isReadonly() const { + return _readonly; +} + +void ATNConfigSet::setReadonly(bool readonly) { + _readonly = readonly; + LookupContainer(0, ATNConfigHasher{this}, ATNConfigComparer{this}).swap(_configLookup); +} + +std::string ATNConfigSet::toString() const { + std::stringstream ss; + ss << "["; + for (size_t i = 0; i < configs.size(); i++) { + ss << configs[i]->toString(); + } + ss << "]"; + + if (hasSemanticContext) { + ss << ",hasSemanticContext = " << hasSemanticContext; + } + if (uniqueAlt != ATN::INVALID_ALT_NUMBER) { + ss << ",uniqueAlt = " << uniqueAlt; + } + + if (conflictingAlts.size() > 0) { + ss << ",conflictingAlts = "; + ss << conflictingAlts.toString(); + } + + if (dipsIntoOuterContext) { + ss << ", dipsIntoOuterContext"; + } + return ss.str(); +} + +size_t ATNConfigSet::hashCode(const ATNConfig &other) const { + size_t hashCode = 7; + hashCode = 31 * hashCode + other.state->stateNumber; + hashCode = 31 * hashCode + other.alt; + hashCode = 31 * hashCode + other.semanticContext->hashCode(); + return hashCode; +} + +bool ATNConfigSet::equals(const ATNConfig &lhs, const ATNConfig &rhs) const { + return lhs.state->stateNumber == rhs.state->stateNumber && lhs.alt == rhs.alt && *lhs.semanticContext == *rhs.semanticContext; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfigSet.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfigSet.h new file mode 100644 index 0000000000..d147f183a0 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfigSet.h @@ -0,0 +1,157 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <cassert> + +#include "support/BitSet.h" +#include "atn/PredictionContext.h" +#include "atn/ATNConfig.h" +#include "FlatHashSet.h" + +namespace antlr4 { +namespace atn { + + /// Specialized set that can track info about the set, with support for combining similar configurations using a + /// graph-structured stack. + class ANTLR4CPP_PUBLIC ATNConfigSet { + public: + /// Track the elements as they are added to the set; supports get(i) + std::vector<Ref<ATNConfig>> configs; + + // TODO: these fields make me pretty uncomfortable but nice to pack up info together, saves recomputation + // TODO: can we track conflicts as they are added to save scanning configs later? + size_t uniqueAlt = 0; + + /** Currently this is only used when we detect SLL conflict; this does + * not necessarily represent the ambiguous alternatives. In fact, + * I should also point out that this seems to include predicated alternatives + * that have predicates that evaluate to false. Computed in computeTargetState(). + */ + antlrcpp::BitSet conflictingAlts; + + // Used in parser and lexer. In lexer, it indicates we hit a pred + // while computing a closure operation. Don't make a DFA state from this. + bool hasSemanticContext = false; + bool dipsIntoOuterContext = false; + + /// Indicates that this configuration set is part of a full context + /// LL prediction. It will be used to determine how to merge $. With SLL + /// it's a wildcard whereas it is not for LL context merge. + const bool fullCtx = true; + + ATNConfigSet(); + + ATNConfigSet(const ATNConfigSet &other); + + ATNConfigSet(ATNConfigSet&&) = delete; + + explicit ATNConfigSet(bool fullCtx); + + virtual ~ATNConfigSet() = default; + + bool add(const Ref<ATNConfig> &config); + + /// <summary> + /// Adding a new config means merging contexts with existing configs for + /// {@code (s, i, pi, _)}, where {@code s} is the + /// <seealso cref="ATNConfig#state"/>, {@code i} is the <seealso cref="ATNConfig#alt"/>, and + /// {@code pi} is the <seealso cref="ATNConfig#semanticContext"/>. We use + /// {@code (s,i,pi)} as key. + /// <p/> + /// This method updates <seealso cref="#dipsIntoOuterContext"/> and + /// <seealso cref="#hasSemanticContext"/> when necessary. + /// </summary> + bool add(const Ref<ATNConfig> &config, PredictionContextMergeCache *mergeCache); + + bool addAll(const ATNConfigSet &other); + + std::vector<ATNState*> getStates() const; + + /** + * Gets the complete set of represented alternatives for the configuration + * set. + * + * @return the set of represented alternatives in this configuration set + * + * @since 4.3 + */ + antlrcpp::BitSet getAlts() const; + std::vector<Ref<const SemanticContext>> getPredicates() const; + + const Ref<ATNConfig>& get(size_t i) const; + + void optimizeConfigs(ATNSimulator *interpreter); + + size_t size() const; + bool isEmpty() const; + void clear(); + bool isReadonly() const; + void setReadonly(bool readonly); + + virtual size_t hashCode() const; + + virtual bool equals(const ATNConfigSet &other) const; + + virtual std::string toString() const; + + private: + struct ATNConfigHasher final { + const ATNConfigSet* atnConfigSet; + + size_t operator()(const ATNConfig *other) const { + assert(other != nullptr); + return atnConfigSet->hashCode(*other); + } + }; + + struct ATNConfigComparer final { + const ATNConfigSet* atnConfigSet; + + bool operator()(const ATNConfig *lhs, const ATNConfig *rhs) const { + assert(lhs != nullptr); + assert(rhs != nullptr); + return atnConfigSet->equals(*lhs, *rhs); + } + }; + + mutable std::atomic<size_t> _cachedHashCode = 0; + + /// Indicates that the set of configurations is read-only. Do not + /// allow any code to manipulate the set; DFA states will point at + /// the sets and they must not change. This does not protect the other + /// fields; in particular, conflictingAlts is set after + /// we've made this readonly. + bool _readonly = false; + + virtual size_t hashCode(const ATNConfig &atnConfig) const; + + virtual bool equals(const ATNConfig &lhs, const ATNConfig &rhs) const; + + using LookupContainer = FlatHashSet<ATNConfig*, ATNConfigHasher, ATNConfigComparer>; + + /// All configs but hashed by (s, i, _, pi) not including context. Wiped out + /// when we go readonly as this set becomes a DFA state. + LookupContainer _configLookup; + }; + + inline bool operator==(const ATNConfigSet &lhs, const ATNConfigSet &rhs) { return lhs.equals(rhs); } + + inline bool operator!=(const ATNConfigSet &lhs, const ATNConfigSet &rhs) { return !operator==(lhs, rhs); } + +} // namespace atn +} // namespace antlr4 + +namespace std { + +template <> +struct hash<::antlr4::atn::ATNConfigSet> { + size_t operator()(const ::antlr4::atn::ATNConfigSet &atnConfigSet) const { + return atnConfigSet.hashCode(); + } +}; + +} // namespace std diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializationOptions.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializationOptions.cpp new file mode 100644 index 0000000000..e0a7cb2b27 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializationOptions.cpp @@ -0,0 +1,39 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATNDeserializationOptions.h" +#include "Exceptions.h" + +using namespace antlr4; +using namespace antlr4::atn; + +ATNDeserializationOptions::ATNDeserializationOptions(ATNDeserializationOptions *options) + : _readOnly(false), _verifyATN(options->_verifyATN), + _generateRuleBypassTransitions(options->_generateRuleBypassTransitions) {} + +const ATNDeserializationOptions& ATNDeserializationOptions::getDefaultOptions() { + static const ATNDeserializationOptions* const defaultOptions = new ATNDeserializationOptions(); + return *defaultOptions; +} + +void ATNDeserializationOptions::makeReadOnly() { + _readOnly = true; +} + +void ATNDeserializationOptions::setVerifyATN(bool verify) { + throwIfReadOnly(); + _verifyATN = verify; +} + +void ATNDeserializationOptions::setGenerateRuleBypassTransitions(bool generate) { + throwIfReadOnly(); + _generateRuleBypassTransitions = generate; +} + +void ATNDeserializationOptions::throwIfReadOnly() const { + if (isReadOnly()) { + throw IllegalStateException("ATNDeserializationOptions is read only."); + } +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializationOptions.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializationOptions.h new file mode 100644 index 0000000000..595f918649 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializationOptions.h @@ -0,0 +1,48 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + +class ANTLR4CPP_PUBLIC ATNDeserializationOptions final { +public: + ATNDeserializationOptions() + : _readOnly(false), _verifyATN(true), _generateRuleBypassTransitions(false) {} + + // TODO: Is this useful? If so we should mark it as explicit, otherwise remove it. + ATNDeserializationOptions(ATNDeserializationOptions *options); + + ATNDeserializationOptions(const ATNDeserializationOptions&) = default; + + ATNDeserializationOptions& operator=(const ATNDeserializationOptions&) = default; + + static const ATNDeserializationOptions& getDefaultOptions(); + + bool isReadOnly() const { return _readOnly; } + + void makeReadOnly(); + + bool isVerifyATN() const { return _verifyATN; } + + void setVerifyATN(bool verify); + + bool isGenerateRuleBypassTransitions() const { return _generateRuleBypassTransitions; } + + void setGenerateRuleBypassTransitions(bool generate); + +private: + void throwIfReadOnly() const; + + bool _readOnly; + bool _verifyATN; + bool _generateRuleBypassTransitions; +}; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializer.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializer.cpp new file mode 100644 index 0000000000..2da3c32357 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializer.cpp @@ -0,0 +1,628 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATNDeserializationOptions.h" + +#include "atn/ATNType.h" +#include "atn/ATNState.h" +#include "atn/ATN.h" + +#include "atn/LoopEndState.h" +#include "atn/DecisionState.h" +#include "atn/RuleStartState.h" +#include "atn/RuleStopState.h" +#include "atn/TokensStartState.h" +#include "atn/RuleTransition.h" +#include "atn/EpsilonTransition.h" +#include "atn/PlusLoopbackState.h" +#include "atn/PlusBlockStartState.h" +#include "atn/StarLoopbackState.h" +#include "atn/BasicBlockStartState.h" +#include "atn/BasicState.h" +#include "atn/BlockEndState.h" +#include "atn/StarLoopEntryState.h" + +#include "atn/AtomTransition.h" +#include "atn/StarBlockStartState.h" +#include "atn/RangeTransition.h" +#include "atn/PredicateTransition.h" +#include "atn/PrecedencePredicateTransition.h" +#include "atn/ActionTransition.h" +#include "atn/SetTransition.h" +#include "atn/NotSetTransition.h" +#include "atn/WildcardTransition.h" +#include "atn/TransitionType.h" +#include "Token.h" + +#include "misc/IntervalSet.h" +#include "Exceptions.h" +#include "support/CPPUtils.h" +#include "support/Casts.h" + +#include "atn/LexerCustomAction.h" +#include "atn/LexerChannelAction.h" +#include "atn/LexerModeAction.h" +#include "atn/LexerMoreAction.h" +#include "atn/LexerPopModeAction.h" +#include "atn/LexerPushModeAction.h" +#include "atn/LexerSkipAction.h" +#include "atn/LexerTypeAction.h" + +#include "atn/ATNDeserializer.h" + +#include <cassert> +#include <string> +#include <vector> + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlrcpp; + +namespace { + + void checkCondition(bool condition, std::string_view message) { + if (!condition) { + throw IllegalStateException(std::string(message)); + } + } + + void checkCondition(bool condition) { + checkCondition(condition, ""); + } + + /** + * Analyze the {@link StarLoopEntryState} states in the specified ATN to set + * the {@link StarLoopEntryState#isPrecedenceDecision} field to the + * correct value. + * + * @param atn The ATN. + */ + void markPrecedenceDecisions(const ATN &atn) { + for (ATNState *state : atn.states) { + if (!StarLoopEntryState::is(state)) { + continue; + } + + /* We analyze the ATN to determine if this ATN decision state is the + * decision for the closure block that determines whether a + * precedence rule should continue or complete. + */ + if (atn.ruleToStartState[state->ruleIndex]->isLeftRecursiveRule) { + ATNState *maybeLoopEndState = state->transitions[state->transitions.size() - 1]->target; + if (LoopEndState::is(maybeLoopEndState)) { + if (maybeLoopEndState->epsilonOnlyTransitions && RuleStopState::is(maybeLoopEndState->transitions[0]->target)) { + downCast<StarLoopEntryState*>(state)->isPrecedenceDecision = true; + } + } + } + } + } + + Ref<const LexerAction> lexerActionFactory(LexerActionType type, int data1, int data2) { + switch (type) { + case LexerActionType::CHANNEL: + return std::make_shared<LexerChannelAction>(data1); + + case LexerActionType::CUSTOM: + return std::make_shared<LexerCustomAction>(data1, data2); + + case LexerActionType::MODE: + return std::make_shared< LexerModeAction>(data1); + + case LexerActionType::MORE: + return LexerMoreAction::getInstance(); + + case LexerActionType::POP_MODE: + return LexerPopModeAction::getInstance(); + + case LexerActionType::PUSH_MODE: + return std::make_shared<LexerPushModeAction>(data1); + + case LexerActionType::SKIP: + return LexerSkipAction::getInstance(); + + case LexerActionType::TYPE: + return std::make_shared<LexerTypeAction>(data1); + + default: + throw IllegalArgumentException("The specified lexer action type " + std::to_string(static_cast<size_t>(type)) + + " is not valid."); + } + } + + ConstTransitionPtr edgeFactory(const ATN &atn, TransitionType type, size_t trg, size_t arg1, size_t arg2, + size_t arg3, const std::vector<misc::IntervalSet> &sets) { + ATNState *target = atn.states[trg]; + switch (type) { + case TransitionType::EPSILON: + return std::make_unique<EpsilonTransition>(target); + case TransitionType::RANGE: + if (arg3 != 0) { + return std::make_unique<RangeTransition>(target, Token::EOF, arg2); + } else { + return std::make_unique<RangeTransition>(target, arg1, arg2); + } + case TransitionType::RULE: + return std::make_unique<RuleTransition>(downCast<RuleStartState*>(atn.states[arg1]), arg2, (int)arg3, target); + case TransitionType::PREDICATE: + return std::make_unique<PredicateTransition>(target, arg1, arg2, arg3 != 0); + case TransitionType::PRECEDENCE: + return std::make_unique<PrecedencePredicateTransition>(target, (int)arg1); + case TransitionType::ATOM: + if (arg3 != 0) { + return std::make_unique<AtomTransition>(target, Token::EOF); + } else { + return std::make_unique<AtomTransition>(target, arg1); + } + case TransitionType::ACTION: + return std::make_unique<ActionTransition>(target, arg1, arg2, arg3 != 0); + case TransitionType::SET: + return std::make_unique<SetTransition>(target, sets[arg1]); + case TransitionType::NOT_SET: + return std::make_unique<NotSetTransition>(target, sets[arg1]); + case TransitionType::WILDCARD: + return std::make_unique<WildcardTransition>(target); + } + + throw IllegalArgumentException("The specified transition type is not valid."); + } + + /* mem check: all created instances are freed in the d-tor of the ATN. */ + ATNState* stateFactory(ATNStateType type, size_t ruleIndex) { + ATNState *s; + switch (type) { + case ATNStateType::INVALID: + return nullptr; + case ATNStateType::BASIC : + s = new BasicState(); + break; + case ATNStateType::RULE_START : + s = new RuleStartState(); + break; + case ATNStateType::BLOCK_START : + s = new BasicBlockStartState(); + break; + case ATNStateType::PLUS_BLOCK_START : + s = new PlusBlockStartState(); + break; + case ATNStateType::STAR_BLOCK_START : + s = new StarBlockStartState(); + break; + case ATNStateType::TOKEN_START : + s = new TokensStartState(); + break; + case ATNStateType::RULE_STOP : + s = new RuleStopState(); + break; + case ATNStateType::BLOCK_END : + s = new BlockEndState(); + break; + case ATNStateType::STAR_LOOP_BACK : + s = new StarLoopbackState(); + break; + case ATNStateType::STAR_LOOP_ENTRY : + s = new StarLoopEntryState(); + break; + case ATNStateType::PLUS_LOOP_BACK : + s = new PlusLoopbackState(); + break; + case ATNStateType::LOOP_END : + s = new LoopEndState(); + break; + default : + std::string message = "The specified state type " + std::to_string(static_cast<size_t>(type)) + " is not valid."; + throw IllegalArgumentException(message); + } + assert(s->getStateType() == type); + s->ruleIndex = ruleIndex; + return s; + } + + ssize_t readUnicodeInt32(SerializedATNView data, int& p) { + return static_cast<ssize_t>(data[p++]); + } + + void deserializeSets( + SerializedATNView data, + int& p, + std::vector<misc::IntervalSet>& sets) { + size_t nsets = data[p++]; + sets.reserve(sets.size() + nsets); + for (size_t i = 0; i < nsets; i++) { + size_t nintervals = data[p++]; + misc::IntervalSet set; + + bool containsEof = data[p++] != 0; + if (containsEof) { + set.add(-1); + } + + for (size_t j = 0; j < nintervals; j++) { + auto a = readUnicodeInt32(data, p); + auto b = readUnicodeInt32(data, p); + set.add(a, b); + } + sets.push_back(set); + } + } + +} + +ATNDeserializer::ATNDeserializer() : ATNDeserializer(ATNDeserializationOptions::getDefaultOptions()) {} + +ATNDeserializer::ATNDeserializer(ATNDeserializationOptions deserializationOptions) : _deserializationOptions(std::move(deserializationOptions)) {} + +std::unique_ptr<ATN> ATNDeserializer::deserialize(SerializedATNView data) const { + int p = 0; + int version = data[p++]; + if (version != SERIALIZED_VERSION) { + std::string reason = "Could not deserialize ATN with version" + std::to_string(version) + "(expected " + std::to_string(SERIALIZED_VERSION) + ")."; + + throw UnsupportedOperationException(reason); + } + + ATNType grammarType = (ATNType)data[p++]; + size_t maxTokenType = data[p++]; + auto atn = std::make_unique<ATN>(grammarType, maxTokenType); + + // + // STATES + // + { + std::vector<std::pair<LoopEndState*, size_t>> loopBackStateNumbers; + std::vector<std::pair<BlockStartState*, size_t>> endStateNumbers; + size_t nstates = data[p++]; + atn->states.reserve(nstates); + loopBackStateNumbers.reserve(nstates); // Reserve worst case size, its short lived. + endStateNumbers.reserve(nstates); // Reserve worst case size, its short lived. + for (size_t i = 0; i < nstates; i++) { + ATNStateType stype = static_cast<ATNStateType>(data[p++]); + // ignore bad type of states + if (stype == ATNStateType::INVALID) { + atn->addState(nullptr); + continue; + } + + size_t ruleIndex = data[p++]; + ATNState *s = stateFactory(stype, ruleIndex); + if (stype == ATNStateType::LOOP_END) { // special case + int loopBackStateNumber = data[p++]; + loopBackStateNumbers.push_back({ downCast<LoopEndState*>(s), loopBackStateNumber }); + } else if (BlockStartState::is(s)) { + int endStateNumber = data[p++]; + endStateNumbers.push_back({ downCast<BlockStartState*>(s), endStateNumber }); + } + atn->addState(s); + } + + // delay the assignment of loop back and end states until we know all the state instances have been initialized + for (auto &pair : loopBackStateNumbers) { + pair.first->loopBackState = atn->states[pair.second]; + } + + for (auto &pair : endStateNumbers) { + pair.first->endState = downCast<BlockEndState*>(atn->states[pair.second]); + } + } + + size_t numNonGreedyStates = data[p++]; + for (size_t i = 0; i < numNonGreedyStates; i++) { + size_t stateNumber = data[p++]; + // The serialized ATN must be specifying the right states, so that the + // cast below is correct. + downCast<DecisionState*>(atn->states[stateNumber])->nonGreedy = true; + } + + size_t numPrecedenceStates = data[p++]; + for (size_t i = 0; i < numPrecedenceStates; i++) { + size_t stateNumber = data[p++]; + downCast<RuleStartState*>(atn->states[stateNumber])->isLeftRecursiveRule = true; + } + + // + // RULES + // + size_t nrules = data[p++]; + atn->ruleToStartState.reserve(nrules); + for (size_t i = 0; i < nrules; i++) { + size_t s = data[p++]; + // Also here, the serialized atn must ensure to point to the correct class type. + RuleStartState *startState = downCast<RuleStartState*>(atn->states[s]); + atn->ruleToStartState.push_back(startState); + if (atn->grammarType == ATNType::LEXER) { + size_t tokenType = data[p++]; + atn->ruleToTokenType.push_back(tokenType); + } + } + + atn->ruleToStopState.resize(nrules); + for (ATNState *state : atn->states) { + if (!RuleStopState::is(state)) { + continue; + } + + RuleStopState *stopState = downCast<RuleStopState*>(state); + atn->ruleToStopState[state->ruleIndex] = stopState; + atn->ruleToStartState[state->ruleIndex]->stopState = stopState; + } + + // + // MODES + // + size_t nmodes = data[p++]; + atn->modeToStartState.reserve(nmodes); + for (size_t i = 0; i < nmodes; i++) { + size_t s = data[p++]; + atn->modeToStartState.push_back(downCast<TokensStartState*>(atn->states[s])); + } + + // + // SETS + // + { + std::vector<misc::IntervalSet> sets; + + deserializeSets(data, p, sets); + sets.shrink_to_fit(); + + // + // EDGES + // + int nedges = data[p++]; + for (int i = 0; i < nedges; i++) { + size_t src = data[p]; + size_t trg = data[p + 1]; + TransitionType ttype = static_cast<TransitionType>(data[p + 2]); + size_t arg1 = data[p + 3]; + size_t arg2 = data[p + 4]; + size_t arg3 = data[p + 5]; + ConstTransitionPtr trans = edgeFactory(*atn, ttype, trg, arg1, arg2, arg3, sets); + ATNState *srcState = atn->states[src]; + srcState->addTransition(std::move(trans)); + p += 6; + } + } + // edges for rule stop states can be derived, so they aren't serialized + for (ATNState *state : atn->states) { + for (size_t i = 0; i < state->transitions.size(); i++) { + const Transition *t = state->transitions[i].get(); + if (!RuleTransition::is(t)) { + continue; + } + + const RuleTransition *ruleTransition = downCast<const RuleTransition*>(t); + size_t outermostPrecedenceReturn = INVALID_INDEX; + if (atn->ruleToStartState[ruleTransition->target->ruleIndex]->isLeftRecursiveRule) { + if (ruleTransition->precedence == 0) { + outermostPrecedenceReturn = ruleTransition->target->ruleIndex; + } + } + + ConstTransitionPtr returnTransition = std::make_unique<EpsilonTransition>(ruleTransition->followState, outermostPrecedenceReturn); + atn->ruleToStopState[ruleTransition->target->ruleIndex]->addTransition(std::move(returnTransition)); + } + } + + for (ATNState *state : atn->states) { + if (BlockStartState::is(state)) { + BlockStartState *startState = downCast<BlockStartState*>(state); + + // we need to know the end state to set its start state + if (startState->endState == nullptr) { + throw IllegalStateException(); + } + + // block end states can only be associated to a single block start state + if (startState->endState->startState != nullptr) { + throw IllegalStateException(); + } + + startState->endState->startState = downCast<BlockStartState*>(state); + } + + if (PlusLoopbackState::is(state)) { + PlusLoopbackState *loopbackState = downCast<PlusLoopbackState*>(state); + for (size_t i = 0; i < loopbackState->transitions.size(); i++) { + ATNState *target = loopbackState->transitions[i]->target; + if (PlusBlockStartState::is(target)) { + (downCast<PlusBlockStartState*>(target))->loopBackState = loopbackState; + } + } + } else if (StarLoopbackState::is(state)) { + StarLoopbackState *loopbackState = downCast<StarLoopbackState*>(state); + for (size_t i = 0; i < loopbackState->transitions.size(); i++) { + ATNState *target = loopbackState->transitions[i]->target; + if (StarLoopEntryState::is(target)) { + downCast<StarLoopEntryState*>(target)->loopBackState = loopbackState; + } + } + } + } + + // + // DECISIONS + // + size_t ndecisions = data[p++]; + atn->decisionToState.reserve(ndecisions); + for (size_t i = 0; i < ndecisions; i++) { + size_t s = data[p++]; + DecisionState *decState = downCast<DecisionState*>(atn->states[s]); + if (decState == nullptr) + throw IllegalStateException(); + + atn->decisionToState.push_back(decState); + decState->decision = static_cast<int>(i); + } + + // + // LEXER ACTIONS + // + if (atn->grammarType == ATNType::LEXER) { + atn->lexerActions.resize(data[p++]); + for (size_t i = 0; i < atn->lexerActions.size(); i++) { + LexerActionType actionType = static_cast<LexerActionType>(data[p++]); + int data1 = data[p++]; + int data2 = data[p++]; + atn->lexerActions[i] = lexerActionFactory(actionType, data1, data2); + } + } + + markPrecedenceDecisions(*atn); + + if (_deserializationOptions.isVerifyATN()) { + verifyATN(*atn); + } + + if (_deserializationOptions.isGenerateRuleBypassTransitions() && atn->grammarType == ATNType::PARSER) { + atn->ruleToTokenType.resize(atn->ruleToStartState.size()); + for (size_t i = 0; i < atn->ruleToStartState.size(); i++) { + atn->ruleToTokenType[i] = static_cast<int>(atn->maxTokenType + i + 1); + } + + for (std::vector<RuleStartState*>::size_type i = 0; i < atn->ruleToStartState.size(); i++) { + BasicBlockStartState *bypassStart = new BasicBlockStartState(); /* mem check: freed in ATN d-tor */ + bypassStart->ruleIndex = static_cast<int>(i); + atn->addState(bypassStart); + + BlockEndState *bypassStop = new BlockEndState(); /* mem check: freed in ATN d-tor */ + bypassStop->ruleIndex = static_cast<int>(i); + atn->addState(bypassStop); + + bypassStart->endState = bypassStop; + atn->defineDecisionState(bypassStart); + + bypassStop->startState = bypassStart; + + ATNState *endState; + const Transition *excludeTransition = nullptr; + if (atn->ruleToStartState[i]->isLeftRecursiveRule) { + // wrap from the beginning of the rule to the StarLoopEntryState + endState = nullptr; + for (ATNState *state : atn->states) { + if (state->ruleIndex != i) { + continue; + } + + if (!StarLoopEntryState::is(state)) { + continue; + } + + ATNState *maybeLoopEndState = state->transitions[state->transitions.size() - 1]->target; + if (!LoopEndState::is(maybeLoopEndState)) { + continue; + } + + if (maybeLoopEndState->epsilonOnlyTransitions && RuleStopState::is(maybeLoopEndState->transitions[0]->target)) { + endState = state; + break; + } + } + + if (endState == nullptr) { + throw UnsupportedOperationException("Couldn't identify final state of the precedence rule prefix section."); + + } + + excludeTransition = (static_cast<StarLoopEntryState*>(endState))->loopBackState->transitions[0].get(); + } else { + endState = atn->ruleToStopState[i]; + } + + // all non-excluded transitions that currently target end state need to target blockEnd instead + for (ATNState *state : atn->states) { + for (auto &transition : state->transitions) { + if (transition.get() == excludeTransition) { + continue; + } + + if (transition->target == endState) { + const_cast<Transition*>(transition.get())->target = bypassStop; + } + } + } + + // all transitions leaving the rule start state need to leave blockStart instead + while (atn->ruleToStartState[i]->transitions.size() > 0) { + ConstTransitionPtr transition = atn->ruleToStartState[i]->removeTransition(atn->ruleToStartState[i]->transitions.size() - 1); + bypassStart->addTransition(std::move(transition)); + } + + // link the new states + atn->ruleToStartState[i]->addTransition(std::make_unique<EpsilonTransition>(bypassStart)); + bypassStop->addTransition(std::make_unique<EpsilonTransition>(endState)); + + ATNState *matchState = new BasicState(); /* mem check: freed in ATN d-tor */ + atn->addState(matchState); + matchState->addTransition(std::make_unique<AtomTransition>(bypassStop, atn->ruleToTokenType[i])); + bypassStart->addTransition(std::make_unique<EpsilonTransition>(matchState)); + } + + if (_deserializationOptions.isVerifyATN()) { + // reverify after modification + verifyATN(*atn); + } + } + + return atn; +} + +void ATNDeserializer::verifyATN(const ATN &atn) const { + // verify assumptions + for (ATNState *state : atn.states) { + if (state == nullptr) { + continue; + } + + checkCondition(state->epsilonOnlyTransitions || state->transitions.size() <= 1); + + if (PlusBlockStartState::is(state)) { + checkCondition((downCast<PlusBlockStartState*>(state))->loopBackState != nullptr); + } + + if (StarLoopEntryState::is(state)) { + StarLoopEntryState *starLoopEntryState = downCast<StarLoopEntryState*>(state); + checkCondition(starLoopEntryState->loopBackState != nullptr); + checkCondition(starLoopEntryState->transitions.size() == 2); + + if (StarBlockStartState::is(starLoopEntryState->transitions[0]->target)) { + checkCondition(downCast<LoopEndState*>(starLoopEntryState->transitions[1]->target) != nullptr); + checkCondition(!starLoopEntryState->nonGreedy); + } else if (LoopEndState::is(starLoopEntryState->transitions[0]->target)) { + checkCondition(StarBlockStartState::is(starLoopEntryState->transitions[1]->target)); + checkCondition(starLoopEntryState->nonGreedy); + } else { + throw IllegalStateException(); + } + } + + if (StarLoopbackState::is(state)) { + checkCondition(state->transitions.size() == 1); + checkCondition(StarLoopEntryState::is(state->transitions[0]->target)); + } + + if (LoopEndState::is(state)) { + checkCondition((downCast<LoopEndState*>(state))->loopBackState != nullptr); + } + + if (RuleStartState::is(state)) { + checkCondition((downCast<RuleStartState*>(state))->stopState != nullptr); + } + + if (BlockStartState::is(state)) { + checkCondition((downCast<BlockStartState*>(state))->endState != nullptr); + } + + if (BlockEndState::is(state)) { + checkCondition((downCast<BlockEndState*>(state))->startState != nullptr); + } + + if (DecisionState::is(state)) { + DecisionState *decisionState = downCast<DecisionState*>(state); + checkCondition(decisionState->transitions.size() <= 1 || decisionState->decision >= 0); + } else { + checkCondition(state->transitions.size() <= 1 || RuleStopState::is(state)); + } + } +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializer.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializer.h new file mode 100644 index 0000000000..3cd56b9cdf --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializer.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNDeserializationOptions.h" +#include "atn/SerializedATNView.h" +#include "atn/LexerAction.h" +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ATNDeserializer final { + public: + static constexpr size_t SERIALIZED_VERSION = 4; + + ATNDeserializer(); + + explicit ATNDeserializer(ATNDeserializationOptions deserializationOptions); + + std::unique_ptr<ATN> deserialize(SerializedATNView input) const; + void verifyATN(const ATN &atn) const; + + private: + const ATNDeserializationOptions _deserializationOptions; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNSimulator.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNSimulator.cpp new file mode 100644 index 0000000000..04e1af992e --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNSimulator.cpp @@ -0,0 +1,33 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATNSimulator.h" + +#include "atn/ATNConfigSet.h" +#include "atn/ATNDeserializer.h" +#include "atn/ATNType.h" +#include "dfa/DFAState.h" + +using namespace antlr4; +using namespace antlr4::dfa; +using namespace antlr4::atn; + +const Ref<DFAState> ATNSimulator::ERROR = std::make_shared<DFAState>(std::numeric_limits<int>::max()); + +ATNSimulator::ATNSimulator(const ATN &atn, PredictionContextCache &sharedContextCache) + : atn(atn), _sharedContextCache(sharedContextCache) {} + +void ATNSimulator::clearDFA() { + throw UnsupportedOperationException("This ATN simulator does not support clearing the DFA."); +} + +PredictionContextCache& ATNSimulator::getSharedContextCache() const { + return _sharedContextCache; +} + +Ref<const PredictionContext> ATNSimulator::getCachedContext(const Ref<const PredictionContext> &context) { + // This function must only be called with an active state lock, as we are going to change a shared structure. + return PredictionContext::getCachedContext(context, getSharedContextCache()); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNSimulator.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNSimulator.h new file mode 100644 index 0000000000..b14939e219 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNSimulator.h @@ -0,0 +1,71 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATN.h" +#include "atn/PredictionContext.h" +#include "atn/PredictionContextCache.h" +#include "misc/IntervalSet.h" +#include "support/CPPUtils.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ATNSimulator { + public: + /// Must distinguish between missing edge and edge we know leads nowhere. + static const Ref<dfa::DFAState> ERROR; + const ATN &atn; + + ATNSimulator(const ATN &atn, PredictionContextCache &sharedContextCache); + + virtual ~ATNSimulator() = default; + + virtual void reset() = 0; + + /** + * Clear the DFA cache used by the current instance. Since the DFA cache may + * be shared by multiple ATN simulators, this method may affect the + * performance (but not accuracy) of other parsers which are being used + * concurrently. + * + * @throws UnsupportedOperationException if the current instance does not + * support clearing the DFA. + * + * @since 4.3 + */ + virtual void clearDFA(); + + PredictionContextCache& getSharedContextCache() const; + Ref<const PredictionContext> getCachedContext(const Ref<const PredictionContext> &context); + + protected: + /// <summary> + /// The context cache maps all PredictionContext objects that are equals() + /// to a single cached copy. This cache is shared across all contexts + /// in all ATNConfigs in all DFA states. We rebuild each ATNConfigSet + /// to use only cached nodes/graphs in addDFAState(). We don't want to + /// fill this during closure() since there are lots of contexts that + /// pop up but are not used ever again. It also greatly slows down closure(). + /// <p/> + /// This cache makes a huge difference in memory and a little bit in speed. + /// For the Java grammar on java.*, it dropped the memory requirements + /// at the end from 25M to 16M. We don't store any of the full context + /// graphs in the DFA because they are limited to local context only, + /// but apparently there's a lot of repetition there as well. We optimize + /// the config contexts before storing the config set in the DFA states + /// by literally rebuilding them with cached subgraphs only. + /// <p/> + /// I tried a cache for use during closure operations, that was + /// whacked after each adaptivePredict(). It cost a little bit + /// more time I think and doesn't save on the overall footprint + /// so it's not worth the complexity. + /// </summary> + PredictionContextCache &_sharedContextCache; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNState.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNState.cpp new file mode 100644 index 0000000000..29911901be --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNState.cpp @@ -0,0 +1,56 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATN.h" +#include "atn/Transition.h" +#include "misc/IntervalSet.h" +#include "support/CPPUtils.h" + +#include "atn/ATNState.h" + +using namespace antlr4::atn; +using namespace antlrcpp; + +size_t ATNState::hashCode() const { + return stateNumber; +} + +bool ATNState::equals(const ATNState &other) const { + return stateNumber == other.stateNumber; +} + +bool ATNState::isNonGreedyExitState() const { + return false; +} + +std::string ATNState::toString() const { + return std::to_string(stateNumber); +} + +void ATNState::addTransition(ConstTransitionPtr e) { + addTransition(transitions.size(), std::move(e)); +} + +void ATNState::addTransition(size_t index, ConstTransitionPtr e) { + for (const auto &transition : transitions) + if (transition->target->stateNumber == e->target->stateNumber) { + return; + } + + if (transitions.empty()) { + epsilonOnlyTransitions = e->isEpsilon(); + } else if (epsilonOnlyTransitions != e->isEpsilon()) { + std::cerr << "ATN state %d has both epsilon and non-epsilon transitions.\n" << stateNumber; + epsilonOnlyTransitions = false; + } + + transitions.insert(transitions.begin() + index, std::move(e)); +} + +ConstTransitionPtr ATNState::removeTransition(size_t index) { + ConstTransitionPtr result = std::move(transitions[index]); + transitions.erase(transitions.begin() + index); + return result; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNState.h new file mode 100644 index 0000000000..7613f40eee --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNState.h @@ -0,0 +1,139 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "misc/IntervalSet.h" +#include "atn/Transition.h" +#include "atn/ATNStateType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// The following images show the relation of states and + /// <seealso cref="ATNState#transitions"/> for various grammar constructs. + /// + /// <ul> + /// + /// <li>Solid edges marked with an ε indicate a required + /// <seealso cref="EpsilonTransition"/>.</li> + /// + /// <li>Dashed edges indicate locations where any transition derived from + /// <seealso cref="Transition"/> might appear.</li> + /// + /// <li>Dashed nodes are place holders for either a sequence of linked + /// <seealso cref="BasicState"/> states or the inclusion of a block representing a nested + /// construct in one of the forms below.</li> + /// + /// <li>Nodes showing multiple outgoing alternatives with a {@code ...} support + /// any number of alternatives (one or more). Nodes without the {@code ...} only + /// support the exact number of alternatives shown in the diagram.</li> + /// + /// </ul> + /// + /// <h2>Basic Blocks</h2> + /// + /// <h3>Rule</h3> + /// + /// <embed src="images/Rule.svg" type="image/svg+xml"/> + /// + /// <h3>Block of 1 or more alternatives</h3> + /// + /// <embed src="images/Block.svg" type="image/svg+xml"/> + /// + /// <h2>Greedy Loops</h2> + /// + /// <h3>Greedy Closure: {@code (...)*}</h3> + /// + /// <embed src="images/ClosureGreedy.svg" type="image/svg+xml"/> + /// + /// <h3>Greedy Positive Closure: {@code (...)+}</h3> + /// + /// <embed src="images/PositiveClosureGreedy.svg" type="image/svg+xml"/> + /// + /// <h3>Greedy Optional: {@code (...)?}</h3> + /// + /// <embed src="images/OptionalGreedy.svg" type="image/svg+xml"/> + /// + /// <h2>Non-Greedy Loops</h2> + /// + /// <h3>Non-Greedy Closure: {@code (...)*?}</h3> + /// + /// <embed src="images/ClosureNonGreedy.svg" type="image/svg+xml"/> + /// + /// <h3>Non-Greedy Positive Closure: {@code (...)+?}</h3> + /// + /// <embed src="images/PositiveClosureNonGreedy.svg" type="image/svg+xml"/> + /// + /// <h3>Non-Greedy Optional: {@code (...)??}</h3> + /// + /// <embed src="images/OptionalNonGreedy.svg" type="image/svg+xml"/> + /// </summary> + +// GCC generates a warning here if ATN has already been declared due to the +// attributes added by ANTLR4CPP_PUBLIC. +// See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=39159 +// Only forward-declare if it hasn't already been declared. +#ifndef ANTLR4CPP_ATN_DECLARED + class ANTLR4CPP_PUBLIC ATN; +#endif + + class ANTLR4CPP_PUBLIC ATNState { + public: + static constexpr size_t INITIAL_NUM_TRANSITIONS = 4; + static constexpr size_t INVALID_STATE_NUMBER = std::numeric_limits<size_t>::max(); + + size_t stateNumber = INVALID_STATE_NUMBER; + size_t ruleIndex = 0; // at runtime, we don't have Rule objects + bool epsilonOnlyTransitions = false; + + /// Track the transitions emanating from this ATN state. + std::vector<ConstTransitionPtr> transitions; + + ATNState() = delete; + + ATNState(ATNState const&) = delete; + + ATNState(ATNState&&) = delete; + + virtual ~ATNState() = default; + + ATNState& operator=(ATNState const&) = delete; + + ATNState& operator=(ATNState&&) = delete; + + void addTransition(ConstTransitionPtr e); + void addTransition(size_t index, ConstTransitionPtr e); + ConstTransitionPtr removeTransition(size_t index); + + virtual size_t hashCode() const; + virtual bool equals(const ATNState &other) const; + + virtual bool isNonGreedyExitState() const; + virtual std::string toString() const; + + ATNStateType getStateType() const { return _stateType; } + + protected: + explicit ATNState(ATNStateType stateType) : _stateType(stateType) {} + + private: + /// Used to cache lookahead during parsing, not used during construction. + + misc::IntervalSet _nextTokenWithinRule; + std::atomic<bool> _nextTokenUpdated { false }; + + const ATNStateType _stateType; + + friend class ATN; + }; + + inline bool operator==(const ATNState &lhs, const ATNState &rhs) { return lhs.equals(rhs); } + + inline bool operator!=(const ATNState &lhs, const ATNState &rhs) { return !operator==(lhs, rhs); } + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNStateType.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNStateType.cpp new file mode 100644 index 0000000000..577e2af87c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNStateType.cpp @@ -0,0 +1,33 @@ +#include "atn/ATNStateType.h" + +std::string antlr4::atn::atnStateTypeName(ATNStateType atnStateType) { + switch (atnStateType) { + case ATNStateType::INVALID: + return "INVALID"; + case ATNStateType::BASIC: + return "BASIC"; + case ATNStateType::RULE_START: + return "RULE_START"; + case ATNStateType::BLOCK_START: + return "BLOCK_START"; + case ATNStateType::PLUS_BLOCK_START: + return "PLUS_BLOCK_START"; + case ATNStateType::STAR_BLOCK_START: + return "STAR_BLOCK_START"; + case ATNStateType::TOKEN_START: + return "TOKEN_START"; + case ATNStateType::RULE_STOP: + return "RULE_STOP"; + case ATNStateType::BLOCK_END: + return "BLOCK_END"; + case ATNStateType::STAR_LOOP_BACK: + return "STAR_LOOP_BACK"; + case ATNStateType::STAR_LOOP_ENTRY: + return "STAR_LOOP_ENTRY"; + case ATNStateType::PLUS_LOOP_BACK: + return "PLUS_LOOP_BACK"; + case ATNStateType::LOOP_END: + return "LOOP_END"; + } + return "UNKNOWN"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNStateType.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNStateType.h new file mode 100644 index 0000000000..e19b2cce92 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNStateType.h @@ -0,0 +1,36 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <cstddef> +#include <string> + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + // Constants for ATNState serialization. + enum class ATNStateType : size_t { + INVALID = 0, + BASIC = 1, + RULE_START = 2, + BLOCK_START = 3, + PLUS_BLOCK_START = 4, + STAR_BLOCK_START = 5, + TOKEN_START = 6, + RULE_STOP = 7, + BLOCK_END = 8, + STAR_LOOP_BACK = 9, + STAR_LOOP_ENTRY = 10, + PLUS_LOOP_BACK = 11, + LOOP_END = 12, + }; + + ANTLR4CPP_PUBLIC std::string atnStateTypeName(ATNStateType atnStateType); + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNType.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNType.h new file mode 100644 index 0000000000..3530ef6051 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNType.h @@ -0,0 +1,20 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + /// Represents the type of recognizer an ATN applies to. + enum class ATNType { + LEXER = 0, + PARSER = 1, + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ActionTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ActionTransition.cpp new file mode 100644 index 0000000000..1886b7e169 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ActionTransition.cpp @@ -0,0 +1,29 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ActionTransition.h" + +using namespace antlr4::atn; + +ActionTransition::ActionTransition(ATNState *target, size_t ruleIndex) + : Transition(TransitionType::ACTION, target), ruleIndex(ruleIndex), actionIndex(INVALID_INDEX), isCtxDependent(false) { +} + +ActionTransition::ActionTransition(ATNState *target, size_t ruleIndex, size_t actionIndex, bool isCtxDependent) + : Transition(TransitionType::ACTION, target), ruleIndex(ruleIndex), actionIndex(actionIndex), isCtxDependent(isCtxDependent) { +} + +bool ActionTransition::isEpsilon() const { + return true; // we are to be ignored by analysis 'cept for predicates +} + +bool ActionTransition::matches(size_t /*symbol*/, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return false; +} + +std::string ActionTransition::toString() const { + return " ACTION " + Transition::toString() + " { ruleIndex: " + std::to_string(ruleIndex) + ", actionIndex: " + + std::to_string(actionIndex) + ", isCtxDependent: " + std::to_string(isCtxDependent) + " }"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ActionTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ActionTransition.h new file mode 100644 index 0000000000..1700297a78 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ActionTransition.h @@ -0,0 +1,35 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ActionTransition final : public Transition { + public: + static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::ACTION; } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + const size_t ruleIndex; + const size_t actionIndex; + const bool isCtxDependent; // e.g., $i ref in action + + ActionTransition(ATNState *target, size_t ruleIndex); + + ActionTransition(ATNState *target, size_t ruleIndex, size_t actionIndex, bool isCtxDependent); + + virtual bool isEpsilon() const override; + + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/AmbiguityInfo.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/AmbiguityInfo.cpp new file mode 100644 index 0000000000..72ce922633 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/AmbiguityInfo.cpp @@ -0,0 +1,16 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/AmbiguityInfo.h" + +using namespace antlr4; +using namespace antlr4::atn; + +AmbiguityInfo::AmbiguityInfo(size_t decision, ATNConfigSet *configs, const antlrcpp::BitSet &ambigAlts, + TokenStream *input, size_t startIndex, size_t stopIndex, bool fullCtx) + : DecisionEventInfo(decision, configs, input, startIndex, stopIndex, fullCtx) { + + this->ambigAlts = ambigAlts; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/AmbiguityInfo.h b/contrib/libs/antlr4_cpp_runtime/src/atn/AmbiguityInfo.h new file mode 100644 index 0000000000..db594a1f48 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/AmbiguityInfo.h @@ -0,0 +1,68 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionEventInfo.h" +#include "support/BitSet.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// This class represents profiling event information for an ambiguity. + /// Ambiguities are decisions where a particular input resulted in an SLL + /// conflict, followed by LL prediction also reaching a conflict state + /// (indicating a true ambiguity in the grammar). + /// + /// <para> + /// This event may be reported during SLL prediction in cases where the + /// conflicting SLL configuration set provides sufficient information to + /// determine that the SLL conflict is truly an ambiguity. For example, if none + /// of the ATN configurations in the conflicting SLL configuration set have + /// traversed a global follow transition (i.e. + /// <seealso cref="ATNConfig#reachesIntoOuterContext"/> is 0 for all configurations), then + /// the result of SLL prediction for that input is known to be equivalent to the + /// result of LL prediction for that input.</para> + /// + /// <para> + /// In some cases, the minimum represented alternative in the conflicting LL + /// configuration set is not equal to the minimum represented alternative in the + /// conflicting SLL configuration set. Grammars and inputs which result in this + /// scenario are unable to use <seealso cref="PredictionMode#SLL"/>, which in turn means + /// they cannot use the two-stage parsing strategy to improve parsing performance + /// for that input.</para> + /// </summary> + /// <seealso cref= ParserATNSimulator#reportAmbiguity </seealso> + /// <seealso cref= ANTLRErrorListener#reportAmbiguity + /// + /// @since 4.3 </seealso> + class ANTLR4CPP_PUBLIC AmbiguityInfo : public DecisionEventInfo { + public: + /// The set of alternative numbers for this decision event that lead to a valid parse. + antlrcpp::BitSet ambigAlts; + + /// <summary> + /// Constructs a new instance of the <seealso cref="AmbiguityInfo"/> class with the + /// specified detailed ambiguity information. + /// </summary> + /// <param name="decision"> The decision number </param> + /// <param name="configs"> The final configuration set identifying the ambiguous + /// alternatives for the current input </param> + /// <param name="ambigAlts"> The set of alternatives in the decision that lead to a valid parse. + /// The predicted alt is the min(ambigAlts) </param> + /// <param name="input"> The input token stream </param> + /// <param name="startIndex"> The start index for the current prediction </param> + /// <param name="stopIndex"> The index at which the ambiguity was identified during + /// prediction </param> + /// <param name="fullCtx"> {@code true} if the ambiguity was identified during LL + /// prediction; otherwise, {@code false} if the ambiguity was identified + /// during SLL prediction </param> + AmbiguityInfo(size_t decision, ATNConfigSet *configs, const antlrcpp::BitSet &ambigAlts, TokenStream *input, + size_t startIndex, size_t stopIndex, bool fullCtx); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ArrayPredictionContext.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ArrayPredictionContext.cpp new file mode 100644 index 0000000000..e9478001b4 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ArrayPredictionContext.cpp @@ -0,0 +1,109 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ArrayPredictionContext.h" + +#include <cstring> + +#include "atn/SingletonPredictionContext.h" +#include "misc/MurmurHash.h" +#include "support/Casts.h" + +using namespace antlr4::atn; +using namespace antlr4::misc; +using namespace antlrcpp; + +namespace { + + bool cachedHashCodeEqual(size_t lhs, size_t rhs) { + return lhs == rhs || lhs == 0 || rhs == 0; + } + + bool predictionContextEqual(const Ref<const PredictionContext> &lhs, const Ref<const PredictionContext> &rhs) { + return *lhs == *rhs; + } + +} + +ArrayPredictionContext::ArrayPredictionContext(const SingletonPredictionContext &predictionContext) + : ArrayPredictionContext({ predictionContext.parent }, { predictionContext.returnState }) {} + +ArrayPredictionContext::ArrayPredictionContext(std::vector<Ref<const PredictionContext>> parents, + std::vector<size_t> returnStates) + : PredictionContext(PredictionContextType::ARRAY), parents(std::move(parents)), returnStates(std::move(returnStates)) { + assert(this->parents.size() > 0); + assert(this->returnStates.size() > 0); + assert(this->parents.size() == this->returnStates.size()); +} + +bool ArrayPredictionContext::isEmpty() const { + // Since EMPTY_RETURN_STATE can only appear in the last position, we don't need to verify that size == 1. + return returnStates[0] == EMPTY_RETURN_STATE; +} + +size_t ArrayPredictionContext::size() const { + return returnStates.size(); +} + +const Ref<const PredictionContext>& ArrayPredictionContext::getParent(size_t index) const { + return parents[index]; +} + +size_t ArrayPredictionContext::getReturnState(size_t index) const { + return returnStates[index]; +} + +size_t ArrayPredictionContext::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getContextType())); + for (const auto &parent : parents) { + hash = MurmurHash::update(hash, parent); + } + for (const auto &returnState : returnStates) { + hash = MurmurHash::update(hash, returnState); + } + return MurmurHash::finish(hash, 1 + parents.size() + returnStates.size()); +} + +bool ArrayPredictionContext::equals(const PredictionContext &other) const { + if (this == std::addressof(other)) { + return true; + } + if (getContextType() != other.getContextType()) { + return false; + } + const auto &array = downCast<const ArrayPredictionContext&>(other); + return returnStates.size() == array.returnStates.size() && + parents.size() == array.parents.size() && + cachedHashCodeEqual(cachedHashCode(), array.cachedHashCode()) && + std::memcmp(returnStates.data(), array.returnStates.data(), returnStates.size() * sizeof(decltype(returnStates)::value_type)) == 0 && + std::equal(parents.begin(), parents.end(), array.parents.begin(), predictionContextEqual); +} + +std::string ArrayPredictionContext::toString() const { + if (isEmpty()) { + return "[]"; + } + + std::stringstream ss; + ss << "["; + for (size_t i = 0; i < returnStates.size(); i++) { + if (i > 0) { + ss << ", "; + } + if (returnStates[i] == EMPTY_RETURN_STATE) { + ss << "$"; + continue; + } + ss << returnStates[i]; + if (parents[i] != nullptr) { + ss << " " << parents[i]->toString(); + } else { + ss << "nul"; + } + } + ss << "]"; + return ss.str(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ArrayPredictionContext.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ArrayPredictionContext.h new file mode 100644 index 0000000000..f43db98a01 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ArrayPredictionContext.h @@ -0,0 +1,51 @@ + +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/PredictionContext.h" + +namespace antlr4 { +namespace atn { + + class SingletonPredictionContext; + + class ANTLR4CPP_PUBLIC ArrayPredictionContext final : public PredictionContext { + public: + static bool is(const PredictionContext &predictionContext) { return predictionContext.getContextType() == PredictionContextType::ARRAY; } + + static bool is(const PredictionContext *predictionContext) { return predictionContext != nullptr && is(*predictionContext); } + + /// Parent can be empty only if full ctx mode and we make an array + /// from EMPTY and non-empty. We merge EMPTY by using null parent and + /// returnState == EMPTY_RETURN_STATE. + // Also here: we use a strong reference to our parents to avoid having them freed prematurely. + // See also SinglePredictionContext. + std::vector<Ref<const PredictionContext>> parents; + + /// Sorted for merge, no duplicates; if present, EMPTY_RETURN_STATE is always last. + std::vector<size_t> returnStates; + + explicit ArrayPredictionContext(const SingletonPredictionContext &predictionContext); + + ArrayPredictionContext(std::vector<Ref<const PredictionContext>> parents, std::vector<size_t> returnStates); + + ArrayPredictionContext(ArrayPredictionContext&&) = default; + + bool isEmpty() const override; + size_t size() const override; + const Ref<const PredictionContext>& getParent(size_t index) const override; + size_t getReturnState(size_t index) const override; + bool equals(const PredictionContext &other) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + }; + +} // namespace atn +} // namespace antlr4 + diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/AtomTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/AtomTransition.cpp new file mode 100644 index 0000000000..74153bf5cd --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/AtomTransition.cpp @@ -0,0 +1,27 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/IntervalSet.h" +#include "atn/Transition.h" + +#include "atn/AtomTransition.h" + +using namespace antlr4::misc; +using namespace antlr4::atn; + +AtomTransition::AtomTransition(ATNState *target, size_t label) : Transition(TransitionType::ATOM, target), _label(label) { +} + +IntervalSet AtomTransition::label() const { + return IntervalSet::of((int)_label); +} + +bool AtomTransition::matches(size_t symbol, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return _label == symbol; +} + +std::string AtomTransition::toString() const { + return "ATOM " + Transition::toString() + " { label: " + std::to_string(_label) + " }"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/AtomTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/AtomTransition.h new file mode 100644 index 0000000000..db62a7feab --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/AtomTransition.h @@ -0,0 +1,33 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + /// TODO: make all transitions sets? no, should remove set edges. + class ANTLR4CPP_PUBLIC AtomTransition final : public Transition { + public: + static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::ATOM; } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + /// The token type or character value; or, signifies special label. + /// TODO: rename this to label + const size_t _label; + + AtomTransition(ATNState *target, size_t label); + + virtual misc::IntervalSet label() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/BasicBlockStartState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/BasicBlockStartState.h new file mode 100644 index 0000000000..1c462ec0eb --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/BasicBlockStartState.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" +#include "atn/BlockStartState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC BasicBlockStartState final : public BlockStartState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::BLOCK_START; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + BasicBlockStartState() : BlockStartState(ATNStateType::BLOCK_START) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/BasicState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/BasicState.h new file mode 100644 index 0000000000..7f8a9ef0dd --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/BasicState.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC BasicState final : public ATNState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::BASIC; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + BasicState() : ATNState(ATNStateType::BASIC) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/BlockEndState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/BlockEndState.h new file mode 100644 index 0000000000..11ef5499ba --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/BlockEndState.h @@ -0,0 +1,26 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + /// Terminal node of a simple {@code (a|b|c)} block. + class ANTLR4CPP_PUBLIC BlockEndState final : public ATNState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::BLOCK_END; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + BlockStartState *startState = nullptr; + + BlockEndState() : ATNState(ATNStateType::BLOCK_END) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/BlockStartState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/BlockStartState.h new file mode 100644 index 0000000000..3475115894 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/BlockStartState.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionState.h" + +namespace antlr4 { +namespace atn { + + /// The start of a regular {@code (...)} block. + class ANTLR4CPP_PUBLIC BlockStartState : public DecisionState { + public: + static bool is(const ATNState &atnState) { + const auto stateType = atnState.getStateType(); + return stateType >= ATNStateType::BLOCK_START && stateType <= ATNStateType::STAR_BLOCK_START; + } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + BlockEndState *endState = nullptr; + + protected: + using DecisionState::DecisionState; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ContextSensitivityInfo.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ContextSensitivityInfo.cpp new file mode 100644 index 0000000000..12442a9bc0 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ContextSensitivityInfo.cpp @@ -0,0 +1,14 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ContextSensitivityInfo.h" + +using namespace antlr4; +using namespace antlr4::atn; + +ContextSensitivityInfo::ContextSensitivityInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, + size_t startIndex, size_t stopIndex) + : DecisionEventInfo(decision, configs, input, startIndex, stopIndex, true) { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ContextSensitivityInfo.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ContextSensitivityInfo.h new file mode 100644 index 0000000000..430ce3b6e8 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ContextSensitivityInfo.h @@ -0,0 +1,47 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionEventInfo.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// This class represents profiling event information for a context sensitivity. + /// Context sensitivities are decisions where a particular input resulted in an + /// SLL conflict, but LL prediction produced a single unique alternative. + /// + /// <para> + /// In some cases, the unique alternative identified by LL prediction is not + /// equal to the minimum represented alternative in the conflicting SLL + /// configuration set. Grammars and inputs which result in this scenario are + /// unable to use <seealso cref="PredictionMode#SLL"/>, which in turn means they cannot use + /// the two-stage parsing strategy to improve parsing performance for that + /// input.</para> + /// </summary> + /// <seealso cref= ParserATNSimulator#reportContextSensitivity </seealso> + /// <seealso cref= ANTLRErrorListener#reportContextSensitivity + /// + /// @since 4.3 </seealso> + class ANTLR4CPP_PUBLIC ContextSensitivityInfo : public DecisionEventInfo { + public: + /// <summary> + /// Constructs a new instance of the <seealso cref="ContextSensitivityInfo"/> class + /// with the specified detailed context sensitivity information. + /// </summary> + /// <param name="decision"> The decision number </param> + /// <param name="configs"> The final configuration set containing the unique + /// alternative identified by full-context prediction </param> + /// <param name="input"> The input token stream </param> + /// <param name="startIndex"> The start index for the current prediction </param> + /// <param name="stopIndex"> The index at which the context sensitivity was + /// identified during full-context prediction </param> + ContextSensitivityInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, size_t startIndex, size_t stopIndex); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionEventInfo.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionEventInfo.cpp new file mode 100644 index 0000000000..bca6c778c0 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionEventInfo.cpp @@ -0,0 +1,14 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/DecisionEventInfo.h" + +using namespace antlr4; +using namespace antlr4::atn; + +DecisionEventInfo::DecisionEventInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, size_t startIndex, + size_t stopIndex, bool fullCtx) + : decision(decision), configs(configs), input(input), startIndex(startIndex), stopIndex(stopIndex), fullCtx(fullCtx) { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionEventInfo.h b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionEventInfo.h new file mode 100644 index 0000000000..af7f5f4b17 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionEventInfo.h @@ -0,0 +1,70 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// This is the base class for gathering detailed information about prediction + /// events which occur during parsing. + /// + /// Note that we could record the parser call stack at the time this event + /// occurred but in the presence of left recursive rules, the stack is kind of + /// meaningless. It's better to look at the individual configurations for their + /// individual stacks. Of course that is a <seealso cref="PredictionContext"/> object + /// not a parse tree node and so it does not have information about the extent + /// (start...stop) of the various subtrees. Examining the stack tops of all + /// configurations provide the return states for the rule invocations. + /// From there you can get the enclosing rule. + /// + /// @since 4.3 + /// </summary> + class ANTLR4CPP_PUBLIC DecisionEventInfo { + public: + /// <summary> + /// The invoked decision number which this event is related to. + /// </summary> + /// <seealso cref= ATN#decisionToState </seealso> + const size_t decision; + + /// <summary> + /// The configuration set containing additional information relevant to the + /// prediction state when the current event occurred, or {@code null} if no + /// additional information is relevant or available. + /// </summary> + const ATNConfigSet *configs; + + /// <summary> + /// The input token stream which is being parsed. + /// </summary> + const TokenStream *input; + + /// <summary> + /// The token index in the input stream at which the current prediction was + /// originally invoked. + /// </summary> + const size_t startIndex; + + /// <summary> + /// The token index in the input stream at which the current event occurred. + /// </summary> + const size_t stopIndex; + + /// <summary> + /// {@code true} if the current event occurred during LL prediction; + /// otherwise, {@code false} if the input occurred during SLL prediction. + /// </summary> + const bool fullCtx; + + DecisionEventInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, size_t startIndex, + size_t stopIndex, bool fullCtx); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionInfo.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionInfo.cpp new file mode 100644 index 0000000000..ee9b1aac34 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionInfo.cpp @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ErrorInfo.h" +#include "atn/LookaheadEventInfo.h" + +#include "atn/DecisionInfo.h" + +using namespace antlr4::atn; + +DecisionInfo::DecisionInfo(size_t decision) : decision(decision) { +} + +std::string DecisionInfo::toString() const { + std::stringstream ss; + + ss << "{decision=" << decision << ", contextSensitivities=" << contextSensitivities.size() << ", errors="; + ss << errors.size() << ", ambiguities=" << ambiguities.size() << ", SLL_lookahead=" << SLL_TotalLook; + ss << ", SLL_ATNTransitions=" << SLL_ATNTransitions << ", SLL_DFATransitions=" << SLL_DFATransitions; + ss << ", LL_Fallback=" << LL_Fallback << ", LL_lookahead=" << LL_TotalLook << ", LL_ATNTransitions=" << LL_ATNTransitions << '}'; + + return ss.str(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionInfo.h b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionInfo.h new file mode 100644 index 0000000000..2b43ad8be9 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionInfo.h @@ -0,0 +1,227 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ContextSensitivityInfo.h" +#include "atn/AmbiguityInfo.h" +#include "atn/PredicateEvalInfo.h" +#include "atn/ErrorInfo.h" + +namespace antlr4 { +namespace atn { + + class LookaheadEventInfo; + + /// <summary> + /// This class contains profiling gathered for a particular decision. + /// + /// <para> + /// Parsing performance in ANTLR 4 is heavily influenced by both static factors + /// (e.g. the form of the rules in the grammar) and dynamic factors (e.g. the + /// choice of input and the state of the DFA cache at the time profiling + /// operations are started). For best results, gather and use aggregate + /// statistics from a large sample of inputs representing the inputs expected in + /// production before using the results to make changes in the grammar.</para> + /// + /// @since 4.3 + /// </summary> + class ANTLR4CPP_PUBLIC DecisionInfo { + public: + /// <summary> + /// The decision number, which is an index into <seealso cref="ATN#decisionToState"/>. + /// </summary> + const size_t decision; + + /// <summary> + /// The total number of times <seealso cref="ParserATNSimulator#adaptivePredict"/> was + /// invoked for this decision. + /// </summary> + long long invocations = 0; + + /// <summary> + /// The total time spent in <seealso cref="ParserATNSimulator#adaptivePredict"/> for + /// this decision, in nanoseconds. + /// + /// <para> + /// The value of this field contains the sum of differential results obtained + /// by <seealso cref="System#nanoTime()"/>, and is not adjusted to compensate for JIT + /// and/or garbage collection overhead. For best accuracy, use a modern JVM + /// implementation that provides precise results from + /// <seealso cref="System#nanoTime()"/>, and perform profiling in a separate process + /// which is warmed up by parsing the input prior to profiling. If desired, + /// call <seealso cref="ATNSimulator#clearDFA"/> to reset the DFA cache to its initial + /// state before starting the profiling measurement pass.</para> + /// </summary> + long long timeInPrediction = 0; + + /// <summary> + /// The sum of the lookahead required for SLL prediction for this decision. + /// Note that SLL prediction is used before LL prediction for performance + /// reasons even when <seealso cref="PredictionMode#LL"/> or + /// <seealso cref="PredictionMode#LL_EXACT_AMBIG_DETECTION"/> is used. + /// </summary> + long long SLL_TotalLook = 0; + + /// <summary> + /// Gets the minimum lookahead required for any single SLL prediction to + /// complete for this decision, by reaching a unique prediction, reaching an + /// SLL conflict state, or encountering a syntax error. + /// </summary> + long long SLL_MinLook = 0; + + /// <summary> + /// Gets the maximum lookahead required for any single SLL prediction to + /// complete for this decision, by reaching a unique prediction, reaching an + /// SLL conflict state, or encountering a syntax error. + /// </summary> + long long SLL_MaxLook = 0; + + /// Gets the <seealso cref="LookaheadEventInfo"/> associated with the event where the + /// <seealso cref="#SLL_MaxLook"/> value was set. + Ref<LookaheadEventInfo> SLL_MaxLookEvent; + + /// <summary> + /// The sum of the lookahead required for LL prediction for this decision. + /// Note that LL prediction is only used when SLL prediction reaches a + /// conflict state. + /// </summary> + long long LL_TotalLook = 0; + + /// <summary> + /// Gets the minimum lookahead required for any single LL prediction to + /// complete for this decision. An LL prediction completes when the algorithm + /// reaches a unique prediction, a conflict state (for + /// <seealso cref="PredictionMode#LL"/>, an ambiguity state (for + /// <seealso cref="PredictionMode#LL_EXACT_AMBIG_DETECTION"/>, or a syntax error. + /// </summary> + long long LL_MinLook = 0; + + /// <summary> + /// Gets the maximum lookahead required for any single LL prediction to + /// complete for this decision. An LL prediction completes when the algorithm + /// reaches a unique prediction, a conflict state (for + /// <seealso cref="PredictionMode#LL"/>, an ambiguity state (for + /// <seealso cref="PredictionMode#LL_EXACT_AMBIG_DETECTION"/>, or a syntax error. + /// </summary> + long long LL_MaxLook = 0; + + /// <summary> + /// Gets the <seealso cref="LookaheadEventInfo"/> associated with the event where the + /// <seealso cref="#LL_MaxLook"/> value was set. + /// </summary> + Ref<LookaheadEventInfo> LL_MaxLookEvent; + + /// <summary> + /// A collection of <seealso cref="ContextSensitivityInfo"/> instances describing the + /// context sensitivities encountered during LL prediction for this decision. + /// </summary> + /// <seealso cref= ContextSensitivityInfo </seealso> + std::vector<ContextSensitivityInfo> contextSensitivities; + + /// <summary> + /// A collection of <seealso cref="ErrorInfo"/> instances describing the parse errors + /// identified during calls to <seealso cref="ParserATNSimulator#adaptivePredict"/> for + /// this decision. + /// </summary> + /// <seealso cref= ErrorInfo </seealso> + std::vector<ErrorInfo> errors; + + /// <summary> + /// A collection of <seealso cref="AmbiguityInfo"/> instances describing the + /// ambiguities encountered during LL prediction for this decision. + /// </summary> + /// <seealso cref= AmbiguityInfo </seealso> + std::vector<AmbiguityInfo> ambiguities; + + /// <summary> + /// A collection of <seealso cref="PredicateEvalInfo"/> instances describing the + /// results of evaluating individual predicates during prediction for this + /// decision. + /// </summary> + /// <seealso cref= PredicateEvalInfo </seealso> + std::vector<PredicateEvalInfo> predicateEvals; + + /// <summary> + /// The total number of ATN transitions required during SLL prediction for + /// this decision. An ATN transition is determined by the number of times the + /// DFA does not contain an edge that is required for prediction, resulting + /// in on-the-fly computation of that edge. + /// + /// <para> + /// If DFA caching of SLL transitions is employed by the implementation, ATN + /// computation may cache the computed edge for efficient lookup during + /// future parsing of this decision. Otherwise, the SLL parsing algorithm + /// will use ATN transitions exclusively.</para> + /// </summary> + /// <seealso cref= #SLL_ATNTransitions </seealso> + /// <seealso cref= ParserATNSimulator#computeTargetState </seealso> + /// <seealso cref= LexerATNSimulator#computeTargetState </seealso> + long long SLL_ATNTransitions = 0; + + /// <summary> + /// The total number of DFA transitions required during SLL prediction for + /// this decision. + /// + /// <para>If the ATN simulator implementation does not use DFA caching for SLL + /// transitions, this value will be 0.</para> + /// </summary> + /// <seealso cref= ParserATNSimulator#getExistingTargetState </seealso> + /// <seealso cref= LexerATNSimulator#getExistingTargetState </seealso> + long long SLL_DFATransitions = 0; + + /// <summary> + /// Gets the total number of times SLL prediction completed in a conflict + /// state, resulting in fallback to LL prediction. + /// + /// <para>Note that this value is not related to whether or not + /// <seealso cref="PredictionMode#SLL"/> may be used successfully with a particular + /// grammar. If the ambiguity resolution algorithm applied to the SLL + /// conflicts for this decision produce the same result as LL prediction for + /// this decision, <seealso cref="PredictionMode#SLL"/> would produce the same overall + /// parsing result as <seealso cref="PredictionMode#LL"/>.</para> + /// </summary> + long long LL_Fallback = 0; + + /// <summary> + /// The total number of ATN transitions required during LL prediction for + /// this decision. An ATN transition is determined by the number of times the + /// DFA does not contain an edge that is required for prediction, resulting + /// in on-the-fly computation of that edge. + /// + /// <para> + /// If DFA caching of LL transitions is employed by the implementation, ATN + /// computation may cache the computed edge for efficient lookup during + /// future parsing of this decision. Otherwise, the LL parsing algorithm will + /// use ATN transitions exclusively.</para> + /// </summary> + /// <seealso cref= #LL_DFATransitions </seealso> + /// <seealso cref= ParserATNSimulator#computeTargetState </seealso> + /// <seealso cref= LexerATNSimulator#computeTargetState </seealso> + long long LL_ATNTransitions = 0; + + /// <summary> + /// The total number of DFA transitions required during LL prediction for + /// this decision. + /// + /// <para>If the ATN simulator implementation does not use DFA caching for LL + /// transitions, this value will be 0.</para> + /// </summary> + /// <seealso cref= ParserATNSimulator#getExistingTargetState </seealso> + /// <seealso cref= LexerATNSimulator#getExistingTargetState </seealso> + long long LL_DFATransitions = 0; + + /// <summary> + /// Constructs a new instance of the <seealso cref="DecisionInfo"/> class to contain + /// statistics for a particular decision. + /// </summary> + /// <param name="decision"> The decision number </param> + explicit DecisionInfo(size_t decision); + + std::string toString() const; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionState.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionState.cpp new file mode 100644 index 0000000000..72adb210f5 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionState.cpp @@ -0,0 +1,12 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/DecisionState.h" + +using namespace antlr4::atn; + +std::string DecisionState::toString() const { + return "DECISION " + ATNState::toString(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionState.h new file mode 100644 index 0000000000..b7341ac6c9 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionState.h @@ -0,0 +1,34 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC DecisionState : public ATNState { + public: + static bool is(const ATNState &atnState) { + const auto stateType = atnState.getStateType(); + return (stateType >= ATNStateType::BLOCK_START && stateType <= ATNStateType::TOKEN_START) || + stateType == ATNStateType::PLUS_LOOP_BACK || + stateType == ATNStateType::STAR_LOOP_ENTRY; + } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + int decision = -1; + bool nonGreedy = false; + + virtual std::string toString() const override; + + protected: + using ATNState::ATNState; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/EpsilonTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/EpsilonTransition.cpp new file mode 100644 index 0000000000..503fb1630e --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/EpsilonTransition.cpp @@ -0,0 +1,31 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/EpsilonTransition.h" + +using namespace antlr4::atn; + +EpsilonTransition::EpsilonTransition(ATNState *target) : EpsilonTransition(target, INVALID_INDEX) { +} + +EpsilonTransition::EpsilonTransition(ATNState *target, size_t outermostPrecedenceReturn) + : Transition(TransitionType::EPSILON, target), _outermostPrecedenceReturn(outermostPrecedenceReturn) { +} + +size_t EpsilonTransition::outermostPrecedenceReturn() const { + return _outermostPrecedenceReturn; +} + +bool EpsilonTransition::isEpsilon() const { + return true; +} + +bool EpsilonTransition::matches(size_t /*symbol*/, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return false; +} + +std::string EpsilonTransition::toString() const { + return "EPSILON " + Transition::toString() + " {}"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/EpsilonTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/EpsilonTransition.h new file mode 100644 index 0000000000..21bc812822 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/EpsilonTransition.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC EpsilonTransition final : public Transition { + public: + static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::EPSILON; } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + explicit EpsilonTransition(ATNState *target); + EpsilonTransition(ATNState *target, size_t outermostPrecedenceReturn); + + /** + * @return the rule index of a precedence rule for which this transition is + * returning from, where the precedence value is 0; otherwise, INVALID_INDEX. + * + * @see ATNConfig#isPrecedenceFilterSuppressed() + * @see ParserATNSimulator#applyPrecedenceFilter(ATNConfigSet) + * @since 4.4.1 + */ + size_t outermostPrecedenceReturn() const; + + virtual bool isEpsilon() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + + private: + const size_t _outermostPrecedenceReturn; // A rule index. + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ErrorInfo.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ErrorInfo.cpp new file mode 100644 index 0000000000..efe8507124 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ErrorInfo.cpp @@ -0,0 +1,15 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATNConfigSet.h" + +#include "atn/ErrorInfo.h" + +using namespace antlr4; +using namespace antlr4::atn; + +ErrorInfo::ErrorInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, size_t startIndex, size_t stopIndex, bool fullCtx) + : DecisionEventInfo(decision, configs, input, startIndex, stopIndex, fullCtx) { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ErrorInfo.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ErrorInfo.h new file mode 100644 index 0000000000..d34642a195 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ErrorInfo.h @@ -0,0 +1,43 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionEventInfo.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// This class represents profiling event information for a syntax error + /// identified during prediction. Syntax errors occur when the prediction + /// algorithm is unable to identify an alternative which would lead to a + /// successful parse. + /// </summary> + /// <seealso cref= Parser#notifyErrorListeners(Token, String, RecognitionException) </seealso> + /// <seealso cref= ANTLRErrorListener#syntaxError + /// + /// @since 4.3 </seealso> + class ANTLR4CPP_PUBLIC ErrorInfo : public DecisionEventInfo { + public: + /// <summary> + /// Constructs a new instance of the <seealso cref="ErrorInfo"/> class with the + /// specified detailed syntax error information. + /// </summary> + /// <param name="decision"> The decision number </param> + /// <param name="configs"> The final configuration set reached during prediction + /// prior to reaching the <seealso cref="ATNSimulator#ERROR"/> state </param> + /// <param name="input"> The input token stream </param> + /// <param name="startIndex"> The start index for the current prediction </param> + /// <param name="stopIndex"> The index at which the syntax error was identified </param> + /// <param name="fullCtx"> {@code true} if the syntax error was identified during LL + /// prediction; otherwise, {@code false} if the syntax error was identified + /// during SLL prediction </param> + ErrorInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, size_t startIndex, size_t stopIndex, + bool fullCtx); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LL1Analyzer.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LL1Analyzer.cpp new file mode 100644 index 0000000000..1d43697584 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LL1Analyzer.cpp @@ -0,0 +1,189 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/RuleStopState.h" +#include "atn/Transition.h" +#include "atn/RuleTransition.h" +#include "atn/SingletonPredictionContext.h" +#include "atn/WildcardTransition.h" +#include "atn/NotSetTransition.h" +#include "misc/IntervalSet.h" +#include "atn/ATNConfig.h" + +#include "support/CPPUtils.h" + +#include "atn/LL1Analyzer.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlrcpp; + +namespace { + + struct ATNConfigHasher final { + size_t operator()(const ATNConfig& atn_config) const { + return atn_config.hashCode(); + } + }; + + struct ATNConfigComparer final { + bool operator()(const ATNConfig& lhs, const ATNConfig& rhs) const { + return lhs == rhs; + } + }; + + class LL1AnalyzerImpl final { + public: + LL1AnalyzerImpl(const ATN& atn, misc::IntervalSet& look, bool seeThruPreds, bool addEOF) : _atn(atn), _look(look), _seeThruPreds(seeThruPreds), _addEOF(addEOF) {} + + /// <summary> + /// Compute set of tokens that can follow {@code s} in the ATN in the + /// specified {@code ctx}. + /// <p/> + /// If {@code ctx} is {@code null} and {@code stopState} or the end of the + /// rule containing {@code s} is reached, <seealso cref="Token#EPSILON"/> is added to + /// the result set. If {@code ctx} is not {@code null} and {@code addEOF} is + /// {@code true} and {@code stopState} or the end of the outermost rule is + /// reached, <seealso cref="Token#EOF"/> is added to the result set. + /// </summary> + /// <param name="s"> the ATN state. </param> + /// <param name="stopState"> the ATN state to stop at. This can be a + /// <seealso cref="BlockEndState"/> to detect epsilon paths through a closure. </param> + /// <param name="ctx"> The outer context, or {@code null} if the outer context should + /// not be used. </param> + /// <param name="look"> The result lookahead set. </param> + /// <param name="lookBusy"> A set used for preventing epsilon closures in the ATN + /// from causing a stack overflow. Outside code should pass + /// {@code new HashSet<ATNConfig>} for this argument. </param> + /// <param name="calledRuleStack"> A set used for preventing left recursion in the + /// ATN from causing a stack overflow. Outside code should pass + /// {@code new BitSet()} for this argument. </param> + /// <param name="seeThruPreds"> {@code true} to true semantic predicates as + /// implicitly {@code true} and "see through them", otherwise {@code false} + /// to treat semantic predicates as opaque and add <seealso cref="#HIT_PRED"/> to the + /// result if one is encountered. </param> + /// <param name="addEOF"> Add <seealso cref="Token#EOF"/> to the result if the end of the + /// outermost context is reached. This parameter has no effect if {@code ctx} + /// is {@code null}. </param> + void LOOK(ATNState *s, ATNState *stopState, Ref<const PredictionContext> const& ctx) { + if (!_lookBusy.insert(ATNConfig(s, 0, ctx)).second) { + return; + } + + // ml: s can never be null, hence no need to check if stopState is != null. + if (s == stopState) { + if (ctx == nullptr) { + _look.add(Token::EPSILON); + return; + } else if (ctx->isEmpty() && _addEOF) { + _look.add(Token::EOF); + return; + } + } + + if (s->getStateType() == ATNStateType::RULE_STOP) { + if (ctx == nullptr) { + _look.add(Token::EPSILON); + return; + } else if (ctx->isEmpty() && _addEOF) { + _look.add(Token::EOF); + return; + } + + if (ctx != PredictionContext::EMPTY) { + bool removed = _calledRuleStack.test(s->ruleIndex); + _calledRuleStack[s->ruleIndex] = false; + // run thru all possible stack tops in ctx + for (size_t i = 0; i < ctx->size(); i++) { + ATNState *returnState = _atn.states[ctx->getReturnState(i)]; + LOOK(returnState, stopState, ctx->getParent(i)); + } + if (removed) { + _calledRuleStack.set(s->ruleIndex); + } + return; + } + } + + size_t n = s->transitions.size(); + for (size_t i = 0; i < n; i++) { + const Transition *t = s->transitions[i].get(); + const auto tType = t->getTransitionType(); + + if (tType == TransitionType::RULE) { + if (_calledRuleStack[(static_cast<const RuleTransition*>(t))->target->ruleIndex]) { + continue; + } + + Ref<const PredictionContext> newContext = SingletonPredictionContext::create(ctx, (static_cast<const RuleTransition*>(t))->followState->stateNumber); + + _calledRuleStack.set((static_cast<const RuleTransition*>(t))->target->ruleIndex); + LOOK(t->target, stopState, newContext); + _calledRuleStack[(static_cast<const RuleTransition*>(t))->target->ruleIndex] = false; + + } else if (tType == TransitionType::PREDICATE || tType == TransitionType::PRECEDENCE) { + if (_seeThruPreds) { + LOOK(t->target, stopState, ctx); + } else { + _look.add(LL1Analyzer::HIT_PRED); + } + } else if (t->isEpsilon()) { + LOOK(t->target, stopState, ctx); + } else if (tType == TransitionType::WILDCARD) { + _look.addAll(misc::IntervalSet::of(Token::MIN_USER_TOKEN_TYPE, static_cast<ssize_t>(_atn.maxTokenType))); + } else { + misc::IntervalSet set = t->label(); + if (!set.isEmpty()) { + if (tType == TransitionType::NOT_SET) { + set = set.complement(misc::IntervalSet::of(Token::MIN_USER_TOKEN_TYPE, static_cast<ssize_t>(_atn.maxTokenType))); + } + _look.addAll(set); + } + } + } + } + + private: + const ATN& _atn; + misc::IntervalSet& _look; + antlrcpp::BitSet _calledRuleStack; + std::unordered_set<ATNConfig, ATNConfigHasher, ATNConfigComparer> _lookBusy; + bool _seeThruPreds; + bool _addEOF; + }; + +} + +std::vector<misc::IntervalSet> LL1Analyzer::getDecisionLookahead(ATNState *s) const { + std::vector<misc::IntervalSet> look; + + if (s == nullptr) { + return look; + } + + look.resize(s->transitions.size()); // Fills all interval sets with defaults. + for (size_t alt = 0; alt < s->transitions.size(); alt++) { + LL1AnalyzerImpl impl(_atn, look[alt], false, false); + impl.LOOK(s->transitions[alt]->target, nullptr, PredictionContext::EMPTY); + // Wipe out lookahead for this alternative if we found nothing + // or we had a predicate when we !seeThruPreds + if (look[alt].size() == 0 || look[alt].contains(LL1Analyzer::HIT_PRED)) { + look[alt].clear(); + } + } + return look; +} + +misc::IntervalSet LL1Analyzer::LOOK(ATNState *s, RuleContext *ctx) const { + return LOOK(s, nullptr, ctx); +} + +misc::IntervalSet LL1Analyzer::LOOK(ATNState *s, ATNState *stopState, RuleContext *ctx) const { + Ref<const PredictionContext> lookContext = ctx != nullptr ? PredictionContext::fromRuleContext(_atn, ctx) : nullptr; + misc::IntervalSet r; + LL1AnalyzerImpl impl(_atn, r, true, true); + impl.LOOK(s, stopState, lookContext); + return r; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LL1Analyzer.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LL1Analyzer.h new file mode 100644 index 0000000000..7d47c7610f --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LL1Analyzer.h @@ -0,0 +1,76 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Token.h" +#include "atn/ATNConfig.h" +#include "atn/PredictionContext.h" +#include "support/BitSet.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC LL1Analyzer final { + public: + /// Special value added to the lookahead sets to indicate that we hit + /// a predicate during analysis if {@code seeThruPreds==false}. + static constexpr size_t HIT_PRED = Token::INVALID_TYPE; + + explicit LL1Analyzer(const atn::ATN &atn) : _atn(atn) {} + + /// <summary> + /// Calculates the SLL(1) expected lookahead set for each outgoing transition + /// of an <seealso cref="ATNState"/>. The returned array has one element for each + /// outgoing transition in {@code s}. If the closure from transition + /// <em>i</em> leads to a semantic predicate before matching a symbol, the + /// element at index <em>i</em> of the result will be {@code null}. + /// </summary> + /// <param name="s"> the ATN state </param> + /// <returns> the expected symbols for each outgoing transition of {@code s}. </returns> + std::vector<misc::IntervalSet> getDecisionLookahead(ATNState *s) const; + + /// <summary> + /// Compute set of tokens that can follow {@code s} in the ATN in the + /// specified {@code ctx}. + /// <p/> + /// If {@code ctx} is {@code null} and the end of the rule containing + /// {@code s} is reached, <seealso cref="Token#EPSILON"/> is added to the result set. + /// If {@code ctx} is not {@code null} and the end of the outermost rule is + /// reached, <seealso cref="Token#EOF"/> is added to the result set. + /// </summary> + /// <param name="s"> the ATN state </param> + /// <param name="ctx"> the complete parser context, or {@code null} if the context + /// should be ignored + /// </param> + /// <returns> The set of tokens that can follow {@code s} in the ATN in the + /// specified {@code ctx}. </returns> + misc::IntervalSet LOOK(ATNState *s, RuleContext *ctx) const; + + /// <summary> + /// Compute set of tokens that can follow {@code s} in the ATN in the + /// specified {@code ctx}. + /// <p/> + /// If {@code ctx} is {@code null} and the end of the rule containing + /// {@code s} is reached, <seealso cref="Token#EPSILON"/> is added to the result set. + /// If {@code ctx} is not {@code null} and the end of the outermost rule is + /// reached, <seealso cref="Token#EOF"/> is added to the result set. + /// </summary> + /// <param name="s"> the ATN state </param> + /// <param name="stopState"> the ATN state to stop at. This can be a + /// <seealso cref="BlockEndState"/> to detect epsilon paths through a closure. </param> + /// <param name="ctx"> the complete parser context, or {@code null} if the context + /// should be ignored + /// </param> + /// <returns> The set of tokens that can follow {@code s} in the ATN in the + /// specified {@code ctx}. </returns> + misc::IntervalSet LOOK(ATNState *s, ATNState *stopState, RuleContext *ctx) const; + + private: + const atn::ATN &_atn; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNConfig.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNConfig.cpp new file mode 100644 index 0000000000..e70cfac2ca --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNConfig.cpp @@ -0,0 +1,67 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "atn/DecisionState.h" +#include "atn/PredictionContext.h" +#include "SemanticContext.h" +#include "atn/LexerActionExecutor.h" + +#include "support/CPPUtils.h" +#include "support/Casts.h" + +#include "atn/LexerATNConfig.h" + +using namespace antlr4::atn; +using namespace antlrcpp; + +LexerATNConfig::LexerATNConfig(ATNState *state, int alt, Ref<const PredictionContext> context) + : ATNConfig(state, alt, std::move(context)) {} + +LexerATNConfig::LexerATNConfig(ATNState *state, int alt, Ref<const PredictionContext> context, Ref<const LexerActionExecutor> lexerActionExecutor) + : ATNConfig(state, alt, std::move(context)), _lexerActionExecutor(std::move(lexerActionExecutor)) {} + +LexerATNConfig::LexerATNConfig(LexerATNConfig const& other, ATNState *state) + : ATNConfig(other, state), _lexerActionExecutor(other._lexerActionExecutor), _passedThroughNonGreedyDecision(checkNonGreedyDecision(other, state)) {} + +LexerATNConfig::LexerATNConfig(LexerATNConfig const& other, ATNState *state, Ref<const LexerActionExecutor> lexerActionExecutor) + : ATNConfig(other, state), _lexerActionExecutor(std::move(lexerActionExecutor)), _passedThroughNonGreedyDecision(checkNonGreedyDecision(other, state)) {} + +LexerATNConfig::LexerATNConfig(LexerATNConfig const& other, ATNState *state, Ref<const PredictionContext> context) + : ATNConfig(other, state, std::move(context)), _lexerActionExecutor(other._lexerActionExecutor), _passedThroughNonGreedyDecision(checkNonGreedyDecision(other, state)) {} + +size_t LexerATNConfig::hashCode() const { + size_t hashCode = misc::MurmurHash::initialize(7); + hashCode = misc::MurmurHash::update(hashCode, state->stateNumber); + hashCode = misc::MurmurHash::update(hashCode, alt); + hashCode = misc::MurmurHash::update(hashCode, context); + hashCode = misc::MurmurHash::update(hashCode, semanticContext); + hashCode = misc::MurmurHash::update(hashCode, _passedThroughNonGreedyDecision ? 1 : 0); + hashCode = misc::MurmurHash::update(hashCode, _lexerActionExecutor); + hashCode = misc::MurmurHash::finish(hashCode, 6); + return hashCode; +} + +bool LexerATNConfig::operator==(const LexerATNConfig& other) const +{ + if (this == &other) + return true; + + if (_passedThroughNonGreedyDecision != other._passedThroughNonGreedyDecision) + return false; + + if (_lexerActionExecutor == nullptr) + return other._lexerActionExecutor == nullptr; + if (*_lexerActionExecutor != *(other._lexerActionExecutor)) { + return false; + } + + return ATNConfig::operator==(other); +} + +bool LexerATNConfig::checkNonGreedyDecision(LexerATNConfig const& source, ATNState *target) { + return source._passedThroughNonGreedyDecision || + (DecisionState::is(target) && downCast<DecisionState*>(target)->nonGreedy); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNConfig.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNConfig.h new file mode 100644 index 0000000000..7d1d6b40e2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNConfig.h @@ -0,0 +1,44 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNConfig.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC LexerATNConfig final : public ATNConfig { + public: + LexerATNConfig(ATNState *state, int alt, Ref<const PredictionContext> context); + LexerATNConfig(ATNState *state, int alt, Ref<const PredictionContext> context, Ref<const LexerActionExecutor> lexerActionExecutor); + + LexerATNConfig(LexerATNConfig const& other, ATNState *state); + LexerATNConfig(LexerATNConfig const& other, ATNState *state, Ref<const LexerActionExecutor> lexerActionExecutor); + LexerATNConfig(LexerATNConfig const& other, ATNState *state, Ref<const PredictionContext> context); + + /** + * Gets the {@link LexerActionExecutor} capable of executing the embedded + * action(s) for the current configuration. + */ + const Ref<const LexerActionExecutor>& getLexerActionExecutor() const { return _lexerActionExecutor; } + bool hasPassedThroughNonGreedyDecision() const { return _passedThroughNonGreedyDecision; } + + virtual size_t hashCode() const override; + + bool operator==(const LexerATNConfig& other) const; + + private: + /** + * This is the backing field for {@link #getLexerActionExecutor}. + */ + const Ref<const LexerActionExecutor> _lexerActionExecutor; + const bool _passedThroughNonGreedyDecision = false; + + static bool checkNonGreedyDecision(LexerATNConfig const& source, ATNState *target); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNSimulator.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNSimulator.cpp new file mode 100644 index 0000000000..ef1b1cf2f1 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNSimulator.cpp @@ -0,0 +1,617 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "IntStream.h" +#include "atn/OrderedATNConfigSet.h" +#include "Token.h" +#include "LexerNoViableAltException.h" +#include "atn/RuleStopState.h" +#include "atn/RuleTransition.h" +#include "atn/SingletonPredictionContext.h" +#include "atn/PredicateTransition.h" +#include "atn/ActionTransition.h" +#include "atn/TokensStartState.h" +#include "misc/Interval.h" +#include "dfa/DFA.h" +#include "Lexer.h" +#include "internal/Synchronization.h" + +#include "dfa/DFAState.h" +#include "atn/LexerATNConfig.h" +#include "atn/LexerActionExecutor.h" + +#include "atn/LexerATNSimulator.h" + +#define DEBUG_ATN 0 +#define DEBUG_DFA 0 + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::internal; +using namespace antlrcpp; + +void LexerATNSimulator::SimState::reset() { + *this = SimState(); +} + +LexerATNSimulator::LexerATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache) + : LexerATNSimulator(nullptr, atn, decisionToDFA, sharedContextCache) { +} + +LexerATNSimulator::LexerATNSimulator(Lexer *recog, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache) + : ATNSimulator(atn, sharedContextCache), _recog(recog), _decisionToDFA(decisionToDFA) { + InitializeInstanceFields(); +} + +void LexerATNSimulator::copyState(LexerATNSimulator *simulator) { + _charPositionInLine = simulator->_charPositionInLine; + _line = simulator->_line; + _mode = simulator->_mode; + _startIndex = simulator->_startIndex; +} + +size_t LexerATNSimulator::match(CharStream *input, size_t mode) { + _mode = mode; + ssize_t mark = input->mark(); + + auto onExit = finally([input, mark] { + input->release(mark); + }); + + _startIndex = input->index(); + _prevAccept.reset(); + const dfa::DFA &dfa = _decisionToDFA[mode]; + dfa::DFAState* s0; + { + SharedLock<SharedMutex> stateLock(atn._stateMutex); + s0 = dfa.s0; + } + if (s0 == nullptr) { + return matchATN(input); + } else { + return execATN(input, s0); + } +} + +void LexerATNSimulator::reset() { + _prevAccept.reset(); + _startIndex = 0; + _line = 1; + _charPositionInLine = 0; + _mode = Lexer::DEFAULT_MODE; +} + +void LexerATNSimulator::clearDFA() { + size_t size = _decisionToDFA.size(); + _decisionToDFA.clear(); + for (size_t d = 0; d < size; ++d) { + _decisionToDFA.emplace_back(atn.getDecisionState(d), d); + } +} + +size_t LexerATNSimulator::matchATN(CharStream *input) { + ATNState *startState = atn.modeToStartState[_mode]; + + std::unique_ptr<ATNConfigSet> s0_closure = computeStartState(input, startState); + + bool suppressEdge = s0_closure->hasSemanticContext; + s0_closure->hasSemanticContext = false; + + dfa::DFAState *next = addDFAState(s0_closure.release(), suppressEdge); + + size_t predict = execATN(input, next); + + return predict; +} + +size_t LexerATNSimulator::execATN(CharStream *input, dfa::DFAState *ds0) { + if (ds0->isAcceptState) { + // allow zero-length tokens + // ml: in Java code this method uses 3 params. The first is a member var of the class anyway (_prevAccept), so why pass it here? + captureSimState(input, ds0); + } + + size_t t = input->LA(1); + dfa::DFAState *s = ds0; // s is current/from DFA state + + while (true) { // while more work + // As we move src->trg, src->trg, we keep track of the previous trg to + // avoid looking up the DFA state again, which is expensive. + // If the previous target was already part of the DFA, we might + // be able to avoid doing a reach operation upon t. If s!=null, + // it means that semantic predicates didn't prevent us from + // creating a DFA state. Once we know s!=null, we check to see if + // the DFA state has an edge already for t. If so, we can just reuse + // it's configuration set; there's no point in re-computing it. + // This is kind of like doing DFA simulation within the ATN + // simulation because DFA simulation is really just a way to avoid + // computing reach/closure sets. Technically, once we know that + // we have a previously added DFA state, we could jump over to + // the DFA simulator. But, that would mean popping back and forth + // a lot and making things more complicated algorithmically. + // This optimization makes a lot of sense for loops within DFA. + // A character will take us back to an existing DFA state + // that already has lots of edges out of it. e.g., .* in comments. + dfa::DFAState *target = getExistingTargetState(s, t); + if (target == nullptr) { + target = computeTargetState(input, s, t); + } + + if (target == ERROR.get()) { + break; + } + + // If this is a consumable input element, make sure to consume before + // capturing the accept state so the input index, line, and char + // position accurately reflect the state of the interpreter at the + // end of the token. + if (t != Token::EOF) { + consume(input); + } + + if (target->isAcceptState) { + captureSimState(input, target); + if (t == Token::EOF) { + break; + } + } + + t = input->LA(1); + s = target; // flip; current DFA target becomes new src/from state + } + + return failOrAccept(input, s->configs.get(), t); +} + +dfa::DFAState *LexerATNSimulator::getExistingTargetState(dfa::DFAState *s, size_t t) { + dfa::DFAState* retval = nullptr; + SharedLock<SharedMutex> edgeLock(atn._edgeMutex); + if (t <= MAX_DFA_EDGE) { + auto iterator = s->edges.find(t - MIN_DFA_EDGE); +#if DEBUG_ATN == 1 + if (iterator != s->edges.end()) { + std::cout << std::string("reuse state ") << s->stateNumber << std::string(" edge to ") << iterator->second->stateNumber << std::endl; + } +#endif + + if (iterator != s->edges.end()) + retval = iterator->second; + } + return retval; +} + +dfa::DFAState *LexerATNSimulator::computeTargetState(CharStream *input, dfa::DFAState *s, size_t t) { + OrderedATNConfigSet *reach = new OrderedATNConfigSet(); /* mem-check: deleted on error or managed by new DFA state. */ + + // if we don't find an existing DFA state + // Fill reach starting from closure, following t transitions + getReachableConfigSet(input, s->configs.get(), reach, t); + + if (reach->isEmpty()) { // we got nowhere on t from s + if (!reach->hasSemanticContext) { + // we got nowhere on t, don't throw out this knowledge; it'd + // cause a failover from DFA later. + addDFAEdge(s, t, ERROR.get()); + } + delete reach; + + // stop when we can't match any more char + return ERROR.get(); + } + + // Add an edge from s to target DFA found/created for reach + return addDFAEdge(s, t, reach); +} + +size_t LexerATNSimulator::failOrAccept(CharStream *input, ATNConfigSet *reach, size_t t) { + if (_prevAccept.dfaState != nullptr) { + accept(input, _prevAccept.dfaState->lexerActionExecutor, _startIndex, _prevAccept.index, _prevAccept.line, _prevAccept.charPos); + return _prevAccept.dfaState->prediction; + } else { + // if no accept and EOF is first char, return EOF + if (t == Token::EOF && input->index() == _startIndex) { + return Token::EOF; + } + + throw LexerNoViableAltException(_recog, input, _startIndex, reach); + } +} + +void LexerATNSimulator::getReachableConfigSet(CharStream *input, ATNConfigSet *closure_, ATNConfigSet *reach, size_t t) { + // this is used to skip processing for configs which have a lower priority + // than a config that already reached an accept state for the same rule + size_t skipAlt = ATN::INVALID_ALT_NUMBER; + + for (const auto &c : closure_->configs) { + bool currentAltReachedAcceptState = c->alt == skipAlt; + if (currentAltReachedAcceptState && (std::static_pointer_cast<LexerATNConfig>(c))->hasPassedThroughNonGreedyDecision()) { + continue; + } + +#if DEBUG_ATN == 1 + std::cout << "testing " << getTokenName((int)t) << " at " << c->toString(true) << std::endl; +#endif + + size_t n = c->state->transitions.size(); + for (size_t ti = 0; ti < n; ti++) { // for each transition + const Transition *trans = c->state->transitions[ti].get(); + ATNState *target = getReachableTarget(trans, (int)t); + if (target != nullptr) { + auto lexerActionExecutor = downCast<const LexerATNConfig&>(*c).getLexerActionExecutor(); + if (lexerActionExecutor != nullptr) { + lexerActionExecutor = lexerActionExecutor->fixOffsetBeforeMatch((int)input->index() - (int)_startIndex); + } + + bool treatEofAsEpsilon = t == Token::EOF; + Ref<LexerATNConfig> config = std::make_shared<LexerATNConfig>(downCast<const LexerATNConfig&>(*c), + target, std::move(lexerActionExecutor)); + + if (closure(input, config, reach, currentAltReachedAcceptState, true, treatEofAsEpsilon)) { + // any remaining configs for this alt have a lower priority than + // the one that just reached an accept state. + skipAlt = c->alt; + break; + } + } + } + } +} + +void LexerATNSimulator::accept(CharStream *input, const Ref<const LexerActionExecutor> &lexerActionExecutor, size_t /*startIndex*/, + size_t index, size_t line, size_t charPos) { +#if DEBUG_ATN == 1 + std::cout << "ACTION "; + std::cout << toString(lexerActionExecutor) << std::endl; +#endif + + // seek to after last char in token + input->seek(index); + _line = line; + _charPositionInLine = (int)charPos; + + if (lexerActionExecutor != nullptr && _recog != nullptr) { + lexerActionExecutor->execute(_recog, input, _startIndex); + } +} + +atn::ATNState *LexerATNSimulator::getReachableTarget(const Transition *trans, size_t t) { + if (trans->matches(t, Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE)) { + return trans->target; + } + + return nullptr; +} + +std::unique_ptr<ATNConfigSet> LexerATNSimulator::computeStartState(CharStream *input, ATNState *p) { + Ref<const PredictionContext> initialContext = PredictionContext::EMPTY; // ml: the purpose of this assignment is unclear + std::unique_ptr<ATNConfigSet> configs(new OrderedATNConfigSet()); + for (size_t i = 0; i < p->transitions.size(); i++) { + ATNState *target = p->transitions[i]->target; + Ref<LexerATNConfig> c = std::make_shared<LexerATNConfig>(target, (int)(i + 1), initialContext); + closure(input, c, configs.get(), false, false, false); + } + + return configs; +} + +bool LexerATNSimulator::closure(CharStream *input, const Ref<LexerATNConfig> &config, ATNConfigSet *configs, + bool currentAltReachedAcceptState, bool speculative, bool treatEofAsEpsilon) { +#if DEBUG_ATN == 1 + std::cout << "closure(" << config->toString(true) << ")" << std::endl; +#endif + + if (config->state != nullptr && config->state->getStateType() == ATNStateType::RULE_STOP) { +#if DEBUG_ATN == 1 + if (_recog != nullptr) { + std::cout << "closure at " << _recog->getRuleNames()[config->state->ruleIndex] << " rule stop " << config << std::endl; + } else { + std::cout << "closure at rule stop " << config << std::endl; + } +#endif + + if (config->context == nullptr || config->context->hasEmptyPath()) { + if (config->context == nullptr || config->context->isEmpty()) { + configs->add(config); + return true; + } else { + configs->add(std::make_shared<LexerATNConfig>(*config, config->state, PredictionContext::EMPTY)); + currentAltReachedAcceptState = true; + } + } + + if (config->context != nullptr && !config->context->isEmpty()) { + for (size_t i = 0; i < config->context->size(); i++) { + if (config->context->getReturnState(i) != PredictionContext::EMPTY_RETURN_STATE) { + Ref<const PredictionContext> newContext = config->context->getParent(i); // "pop" return state + ATNState *returnState = atn.states[config->context->getReturnState(i)]; + Ref<LexerATNConfig> c = std::make_shared<LexerATNConfig>(*config, returnState, newContext); + currentAltReachedAcceptState = closure(input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon); + } + } + } + + return currentAltReachedAcceptState; + } + + // optimization + if (!config->state->epsilonOnlyTransitions) { + if (!currentAltReachedAcceptState || !config->hasPassedThroughNonGreedyDecision()) { + configs->add(config); + } + } + + ATNState *p = config->state; + for (size_t i = 0; i < p->transitions.size(); i++) { + const Transition *t = p->transitions[i].get(); + Ref<LexerATNConfig> c = getEpsilonTarget(input, config, t, configs, speculative, treatEofAsEpsilon); + if (c != nullptr) { + currentAltReachedAcceptState = closure(input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon); + } + } + + return currentAltReachedAcceptState; +} + +Ref<LexerATNConfig> LexerATNSimulator::getEpsilonTarget(CharStream *input, const Ref<LexerATNConfig> &config, const Transition *t, + ATNConfigSet *configs, bool speculative, bool treatEofAsEpsilon) { + + Ref<LexerATNConfig> c = nullptr; + switch (t->getTransitionType()) { + case TransitionType::RULE: { + const RuleTransition *ruleTransition = static_cast<const RuleTransition*>(t); + Ref<const PredictionContext> newContext = SingletonPredictionContext::create(config->context, ruleTransition->followState->stateNumber); + c = std::make_shared<LexerATNConfig>(*config, t->target, newContext); + break; + } + + case TransitionType::PRECEDENCE: + throw UnsupportedOperationException("Precedence predicates are not supported in lexers."); + + case TransitionType::PREDICATE: { + /* Track traversing semantic predicates. If we traverse, + we cannot add a DFA state for this "reach" computation + because the DFA would not test the predicate again in the + future. Rather than creating collections of semantic predicates + like v3 and testing them on prediction, v4 will test them on the + fly all the time using the ATN not the DFA. This is slower but + semantically it's not used that often. One of the key elements to + this predicate mechanism is not adding DFA states that see + predicates immediately afterwards in the ATN. For example, + + a : ID {p1}? | ID {p2}? ; + + should create the start state for rule 'a' (to save start state + competition), but should not create target of ID state. The + collection of ATN states the following ID references includes + states reached by traversing predicates. Since this is when we + test them, we cannot cash the DFA state target of ID. + */ + const PredicateTransition *pt = static_cast<const PredicateTransition*>(t); + +#if DEBUG_ATN == 1 + std::cout << "EVAL rule " << pt->getRuleIndex() << ":" << pt->getPredIndex() << std::endl; +#endif + + configs->hasSemanticContext = true; + if (evaluatePredicate(input, pt->getRuleIndex(), pt->getPredIndex(), speculative)) { + c = std::make_shared<LexerATNConfig>(*config, t->target); + } + break; + } + + case TransitionType::ACTION: + if (config->context == nullptr|| config->context->hasEmptyPath()) { + // execute actions anywhere in the start rule for a token. + // + // TODO: if the entry rule is invoked recursively, some + // actions may be executed during the recursive call. The + // problem can appear when hasEmptyPath() is true but + // isEmpty() is false. In this case, the config needs to be + // split into two contexts - one with just the empty path + // and another with everything but the empty path. + // Unfortunately, the current algorithm does not allow + // getEpsilonTarget to return two configurations, so + // additional modifications are needed before we can support + // the split operation. + auto lexerActionExecutor = LexerActionExecutor::append(config->getLexerActionExecutor(), + atn.lexerActions[static_cast<const ActionTransition *>(t)->actionIndex]); + c = std::make_shared<LexerATNConfig>(*config, t->target, std::move(lexerActionExecutor)); + break; + } + else { + // ignore actions in referenced rules + c = std::make_shared<LexerATNConfig>(*config, t->target); + break; + } + + case TransitionType::EPSILON: + c = std::make_shared<LexerATNConfig>(*config, t->target); + break; + + case TransitionType::ATOM: + case TransitionType::RANGE: + case TransitionType::SET: + if (treatEofAsEpsilon) { + if (t->matches(Token::EOF, Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE)) { + c = std::make_shared<LexerATNConfig>(*config, t->target); + break; + } + } + + break; + + default: // To silence the compiler. Other transition types are not used here. + break; + } + + return c; +} + +bool LexerATNSimulator::evaluatePredicate(CharStream *input, size_t ruleIndex, size_t predIndex, bool speculative) { + // assume true if no recognizer was provided + if (_recog == nullptr) { + return true; + } + + if (!speculative) { + return _recog->sempred(nullptr, ruleIndex, predIndex); + } + + size_t savedCharPositionInLine = _charPositionInLine; + size_t savedLine = _line; + size_t index = input->index(); + ssize_t marker = input->mark(); + + auto onExit = finally([this, input, savedCharPositionInLine, savedLine, index, marker] { + _charPositionInLine = savedCharPositionInLine; + _line = savedLine; + input->seek(index); + input->release(marker); + }); + + consume(input); + return _recog->sempred(nullptr, ruleIndex, predIndex); +} + +void LexerATNSimulator::captureSimState(CharStream *input, dfa::DFAState *dfaState) { + _prevAccept.index = input->index(); + _prevAccept.line = _line; + _prevAccept.charPos = _charPositionInLine; + _prevAccept.dfaState = dfaState; +} + +dfa::DFAState *LexerATNSimulator::addDFAEdge(dfa::DFAState *from, size_t t, ATNConfigSet *q) { + /* leading to this call, ATNConfigSet.hasSemanticContext is used as a + * marker indicating dynamic predicate evaluation makes this edge + * dependent on the specific input sequence, so the static edge in the + * DFA should be omitted. The target DFAState is still created since + * execATN has the ability to resynchronize with the DFA state cache + * following the predicate evaluation step. + * + * TJP notes: next time through the DFA, we see a pred again and eval. + * If that gets us to a previously created (but dangling) DFA + * state, we can continue in pure DFA mode from there. + */ + bool suppressEdge = q->hasSemanticContext; + q->hasSemanticContext = false; + + dfa::DFAState *to = addDFAState(q); + + if (suppressEdge) { + return to; + } + + addDFAEdge(from, t, to); + return to; +} + +void LexerATNSimulator::addDFAEdge(dfa::DFAState *p, size_t t, dfa::DFAState *q) { + if (/*t < MIN_DFA_EDGE ||*/ t > MAX_DFA_EDGE) { // MIN_DFA_EDGE is 0 + // Only track edges within the DFA bounds + return; + } + + UniqueLock<SharedMutex> edgeLock(atn._edgeMutex); + p->edges[t - MIN_DFA_EDGE] = q; // connect +} + +dfa::DFAState *LexerATNSimulator::addDFAState(ATNConfigSet *configs) { + return addDFAState(configs, true); +} + +dfa::DFAState *LexerATNSimulator::addDFAState(ATNConfigSet *configs, bool suppressEdge) { + /* the lexer evaluates predicates on-the-fly; by this point configs + * should not contain any configurations with unevaluated predicates. + */ + assert(!configs->hasSemanticContext); + + dfa::DFAState *proposed = new dfa::DFAState(std::unique_ptr<ATNConfigSet>(configs)); /* mem-check: managed by the DFA or deleted below */ + Ref<ATNConfig> firstConfigWithRuleStopState = nullptr; + for (const auto &c : configs->configs) { + if (RuleStopState::is(c->state)) { + firstConfigWithRuleStopState = c; + break; + } + } + + if (firstConfigWithRuleStopState != nullptr) { + proposed->isAcceptState = true; + proposed->lexerActionExecutor = downCast<const LexerATNConfig&>(*firstConfigWithRuleStopState).getLexerActionExecutor(); + proposed->prediction = atn.ruleToTokenType[firstConfigWithRuleStopState->state->ruleIndex]; + } + + dfa::DFA &dfa = _decisionToDFA[_mode]; + + { + UniqueLock<SharedMutex> stateLock(atn._stateMutex); + auto [existing, inserted] = dfa.states.insert(proposed); + if (!inserted) { + delete proposed; + proposed = *existing; + } else { + // Previously we did a lookup, then set fields, then inserted. It was `dfa.states.size()`, + // since we already inserted we need to subtract one. + proposed->stateNumber = static_cast<int>(dfa.states.size() - 1); + proposed->configs->setReadonly(true); + } + if (!suppressEdge) { + dfa.s0 = proposed; + } + } + + return proposed; +} + +dfa::DFA& LexerATNSimulator::getDFA(size_t mode) { + return _decisionToDFA[mode]; +} + +std::string LexerATNSimulator::getText(CharStream *input) { + // index is first lookahead char, don't include. + return input->getText(misc::Interval(_startIndex, input->index() - 1)); +} + +size_t LexerATNSimulator::getLine() const { + return _line; +} + +void LexerATNSimulator::setLine(size_t line) { + _line = line; +} + +size_t LexerATNSimulator::getCharPositionInLine() { + return _charPositionInLine; +} + +void LexerATNSimulator::setCharPositionInLine(size_t charPositionInLine) { + _charPositionInLine = charPositionInLine; +} + +void LexerATNSimulator::consume(CharStream *input) { + size_t curChar = input->LA(1); + if (curChar == '\n') { + _line++; + _charPositionInLine = 0; + } else { + _charPositionInLine++; + } + input->consume(); +} + +std::string LexerATNSimulator::getTokenName(size_t t) { + if (t == Token::EOF) { + return "EOF"; + } + return std::string("'") + static_cast<char>(t) + std::string("'"); +} + +void LexerATNSimulator::InitializeInstanceFields() { + _startIndex = 0; + _line = 1; + _charPositionInLine = 0; + _mode = antlr4::Lexer::DEFAULT_MODE; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNSimulator.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNSimulator.h new file mode 100644 index 0000000000..304430b04d --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNSimulator.h @@ -0,0 +1,199 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <atomic> + +#include "atn/ATNSimulator.h" +#include "atn/LexerATNConfig.h" +#include "atn/ATNConfigSet.h" + +namespace antlr4 { +namespace atn { + + /// "dup" of ParserInterpreter + class ANTLR4CPP_PUBLIC LexerATNSimulator : public ATNSimulator { + protected: + struct ANTLR4CPP_PUBLIC SimState final { + size_t index = INVALID_INDEX; + size_t line = 0; + size_t charPos = INVALID_INDEX; + dfa::DFAState *dfaState = nullptr; + + void reset(); + }; + + public: + static constexpr size_t MIN_DFA_EDGE = 0; + static constexpr size_t MAX_DFA_EDGE = 127; // forces unicode to stay in ATN + + protected: + /// <summary> + /// When we hit an accept state in either the DFA or the ATN, we + /// have to notify the character stream to start buffering characters + /// via <seealso cref="IntStream#mark"/> and record the current state. The current sim state + /// includes the current index into the input, the current line, + /// and current character position in that line. Note that the Lexer is + /// tracking the starting line and characterization of the token. These + /// variables track the "state" of the simulator when it hits an accept state. + /// <p/> + /// We track these variables separately for the DFA and ATN simulation + /// because the DFA simulation often has to fail over to the ATN + /// simulation. If the ATN simulation fails, we need the DFA to fall + /// back to its previously accepted state, if any. If the ATN succeeds, + /// then the ATN does the accept and the DFA simulator that invoked it + /// can simply return the predicted token type. + /// </summary> + Lexer *const _recog; + + /// The current token's starting index into the character stream. + /// Shared across DFA to ATN simulation in case the ATN fails and the + /// DFA did not have a previous accept state. In this case, we use the + /// ATN-generated exception object. + size_t _startIndex; + + /// line number 1..n within the input. + size_t _line; + + /// The index of the character relative to the beginning of the line 0..n-1. + size_t _charPositionInLine; + + public: + std::vector<dfa::DFA> &_decisionToDFA; + + protected: + size_t _mode; + + /// Used during DFA/ATN exec to record the most recent accept configuration info. + SimState _prevAccept; + + public: + LexerATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, PredictionContextCache &sharedContextCache); + LexerATNSimulator(Lexer *recog, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, PredictionContextCache &sharedContextCache); + virtual ~LexerATNSimulator() = default; + + virtual void copyState(LexerATNSimulator *simulator); + virtual size_t match(CharStream *input, size_t mode); + virtual void reset() override; + + virtual void clearDFA() override; + + protected: + virtual size_t matchATN(CharStream *input); + virtual size_t execATN(CharStream *input, dfa::DFAState *ds0); + + /// <summary> + /// Get an existing target state for an edge in the DFA. If the target state + /// for the edge has not yet been computed or is otherwise not available, + /// this method returns {@code null}. + /// </summary> + /// <param name="s"> The current DFA state </param> + /// <param name="t"> The next input symbol </param> + /// <returns> The existing target DFA state for the given input symbol + /// {@code t}, or {@code null} if the target state for this edge is not + /// already cached </returns> + virtual dfa::DFAState *getExistingTargetState(dfa::DFAState *s, size_t t); + + /// <summary> + /// Compute a target state for an edge in the DFA, and attempt to add the + /// computed state and corresponding edge to the DFA. + /// </summary> + /// <param name="input"> The input stream </param> + /// <param name="s"> The current DFA state </param> + /// <param name="t"> The next input symbol + /// </param> + /// <returns> The computed target DFA state for the given input symbol + /// {@code t}. If {@code t} does not lead to a valid DFA state, this method + /// returns <seealso cref="#ERROR"/>. </returns> + virtual dfa::DFAState *computeTargetState(CharStream *input, dfa::DFAState *s, size_t t); + + virtual size_t failOrAccept(CharStream *input, ATNConfigSet *reach, size_t t); + + /// <summary> + /// Given a starting configuration set, figure out all ATN configurations + /// we can reach upon input {@code t}. Parameter {@code reach} is a return + /// parameter. + /// </summary> + void getReachableConfigSet(CharStream *input, ATNConfigSet *closure_, // closure_ as we have a closure() already + ATNConfigSet *reach, size_t t); + + virtual void accept(CharStream *input, const Ref<const LexerActionExecutor> &lexerActionExecutor, size_t startIndex, size_t index, + size_t line, size_t charPos); + + virtual ATNState *getReachableTarget(const Transition *trans, size_t t); + + virtual std::unique_ptr<ATNConfigSet> computeStartState(CharStream *input, ATNState *p); + + /// <summary> + /// Since the alternatives within any lexer decision are ordered by + /// preference, this method stops pursuing the closure as soon as an accept + /// state is reached. After the first accept state is reached by depth-first + /// search from {@code config}, all other (potentially reachable) states for + /// this rule would have a lower priority. + /// </summary> + /// <returns> {@code true} if an accept state is reached, otherwise + /// {@code false}. </returns> + virtual bool closure(CharStream *input, const Ref<LexerATNConfig> &config, ATNConfigSet *configs, + bool currentAltReachedAcceptState, bool speculative, bool treatEofAsEpsilon); + + // side-effect: can alter configs.hasSemanticContext + virtual Ref<LexerATNConfig> getEpsilonTarget(CharStream *input, const Ref<LexerATNConfig> &config, const Transition *t, + ATNConfigSet *configs, bool speculative, bool treatEofAsEpsilon); + + /// <summary> + /// Evaluate a predicate specified in the lexer. + /// <p/> + /// If {@code speculative} is {@code true}, this method was called before + /// <seealso cref="#consume"/> for the matched character. This method should call + /// <seealso cref="#consume"/> before evaluating the predicate to ensure position + /// sensitive values, including <seealso cref="Lexer#getText"/>, <seealso cref="Lexer#getLine"/>, + /// and <seealso cref="Lexer#getCharPositionInLine"/>, properly reflect the current + /// lexer state. This method should restore {@code input} and the simulator + /// to the original state before returning (i.e. undo the actions made by the + /// call to <seealso cref="#consume"/>. + /// </summary> + /// <param name="input"> The input stream. </param> + /// <param name="ruleIndex"> The rule containing the predicate. </param> + /// <param name="predIndex"> The index of the predicate within the rule. </param> + /// <param name="speculative"> {@code true} if the current index in {@code input} is + /// one character before the predicate's location. + /// </param> + /// <returns> {@code true} if the specified predicate evaluates to + /// {@code true}. </returns> + virtual bool evaluatePredicate(CharStream *input, size_t ruleIndex, size_t predIndex, bool speculative); + + virtual void captureSimState(CharStream *input, dfa::DFAState *dfaState); + virtual dfa::DFAState* addDFAEdge(dfa::DFAState *from, size_t t, ATNConfigSet *q); + virtual void addDFAEdge(dfa::DFAState *p, size_t t, dfa::DFAState *q); + + /// <summary> + /// Add a new DFA state if there isn't one with this set of + /// configurations already. This method also detects the first + /// configuration containing an ATN rule stop state. Later, when + /// traversing the DFA, we will know which rule to accept. + /// </summary> + virtual dfa::DFAState *addDFAState(ATNConfigSet *configs); + + virtual dfa::DFAState *addDFAState(ATNConfigSet *configs, bool suppressEdge); + + public: + dfa::DFA& getDFA(size_t mode); + + /// Get the text matched so far for the current token. + virtual std::string getText(CharStream *input); + virtual size_t getLine() const; + virtual void setLine(size_t line); + virtual size_t getCharPositionInLine(); + virtual void setCharPositionInLine(size_t charPositionInLine); + virtual void consume(CharStream *input); + virtual std::string getTokenName(size_t t); + + private: + void InitializeInstanceFields(); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerAction.cpp new file mode 100644 index 0000000000..a9d9a6771b --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerAction.cpp @@ -0,0 +1,15 @@ +#include "LexerAction.h" + +using namespace antlr4::atn; + +size_t LexerAction::hashCode() const { + auto hash = cachedHashCode(); + if (hash == 0) { + hash = hashCodeImpl(); + if (hash == 0) { + hash = std::numeric_limits<size_t>::max(); + } + _hashCode.store(hash, std::memory_order_relaxed); + } + return hash; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerAction.h new file mode 100644 index 0000000000..5c30a89608 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerAction.h @@ -0,0 +1,100 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerActionType.h" +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Represents a single action which can be executed following the successful + /// match of a lexer rule. Lexer actions are used for both embedded action syntax + /// and ANTLR 4's new lexer command syntax. + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerAction { + public: + virtual ~LexerAction() = default; + + /// <summary> + /// Gets the serialization type of the lexer action. + /// </summary> + /// <returns> The serialization type of the lexer action. </returns> + /// + /// IMPORTANT: Unlike Java, this returns LexerActionType::INDEXED_CUSTOM for instances of + /// LexerIndexedCustomAction. If you need the wrapped action type, use + /// LexerIndexedCustomAction::getAction()->getActionType(). + LexerActionType getActionType() const { return _actionType; } + + /// <summary> + /// Gets whether the lexer action is position-dependent. Position-dependent + /// actions may have different semantics depending on the <seealso cref="CharStream"/> + /// index at the time the action is executed. + /// + /// <para>Many lexer commands, including {@code type}, {@code skip}, and + /// {@code more}, do not check the input index during their execution. + /// Actions like this are position-independent, and may be stored more + /// efficiently as part of the <seealso cref="LexerATNConfig#lexerActionExecutor"/>.</para> + /// </summary> + /// <returns> {@code true} if the lexer action semantics can be affected by the + /// position of the input <seealso cref="CharStream"/> at the time it is executed; + /// otherwise, {@code false}. </returns> + bool isPositionDependent() const { return _positionDependent; } + + /// <summary> + /// Execute the lexer action in the context of the specified <seealso cref="Lexer"/>. + /// + /// <para>For position-dependent actions, the input stream must already be + /// positioned correctly prior to calling this method.</para> + /// </summary> + /// <param name="lexer"> The lexer instance. </param> + virtual void execute(Lexer *lexer) const = 0; + + size_t hashCode() const; + + virtual bool equals(const LexerAction &other) const = 0; + + virtual std::string toString() const = 0; + + protected: + LexerAction(LexerActionType actionType, bool positionDependent) + : _actionType(actionType), _hashCode(0), _positionDependent(positionDependent) {} + + virtual size_t hashCodeImpl() const = 0; + + size_t cachedHashCode() const { return _hashCode.load(std::memory_order_relaxed); } + + private: + const LexerActionType _actionType; + mutable std::atomic<size_t> _hashCode; + const bool _positionDependent; + }; + + inline bool operator==(const LexerAction &lhs, const LexerAction &rhs) { + return lhs.equals(rhs); + } + + inline bool operator!=(const LexerAction &lhs, const LexerAction &rhs) { + return !operator==(lhs, rhs); + } + +} // namespace atn +} // namespace antlr4 + +namespace std { + + template <> + struct hash<::antlr4::atn::LexerAction> { + size_t operator()(const ::antlr4::atn::LexerAction &lexerAction) const { + return lexerAction.hashCode(); + } + }; + +} // namespace std diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerActionExecutor.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerActionExecutor.cpp new file mode 100644 index 0000000000..490351b892 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerActionExecutor.cpp @@ -0,0 +1,111 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "atn/LexerIndexedCustomAction.h" +#include "support/CPPUtils.h" +#include "support/Arrays.h" +#include "support/Casts.h" + +#include "atn/LexerActionExecutor.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; +using namespace antlrcpp; + +namespace { + + bool cachedHashCodeEqual(size_t lhs, size_t rhs) { + return lhs == rhs || lhs == 0 || rhs == 0; + } + + bool lexerActionEqual(const Ref<const LexerAction> &lhs, const Ref<const LexerAction> &rhs) { + return *lhs == *rhs; + } + +} + +LexerActionExecutor::LexerActionExecutor(std::vector<Ref<const LexerAction>> lexerActions) + : _lexerActions(std::move(lexerActions)), _hashCode(0) {} + +Ref<const LexerActionExecutor> LexerActionExecutor::append(const Ref<const LexerActionExecutor> &lexerActionExecutor, + Ref<const LexerAction> lexerAction) { + if (lexerActionExecutor == nullptr) { + return std::make_shared<LexerActionExecutor>(std::vector<Ref<const LexerAction>>{ std::move(lexerAction) }); + } + std::vector<Ref<const LexerAction>> lexerActions; + lexerActions.reserve(lexerActionExecutor->_lexerActions.size() + 1); + lexerActions.insert(lexerActions.begin(), lexerActionExecutor->_lexerActions.begin(), lexerActionExecutor->_lexerActions.end()); + lexerActions.push_back(std::move(lexerAction)); + return std::make_shared<LexerActionExecutor>(std::move(lexerActions)); +} + +Ref<const LexerActionExecutor> LexerActionExecutor::fixOffsetBeforeMatch(int offset) const { + std::vector<Ref<const LexerAction>> updatedLexerActions; + for (size_t i = 0; i < _lexerActions.size(); i++) { + if (_lexerActions[i]->isPositionDependent() && !LexerIndexedCustomAction::is(*_lexerActions[i])) { + if (updatedLexerActions.empty()) { + updatedLexerActions = _lexerActions; // Make a copy. + } + updatedLexerActions[i] = std::make_shared<LexerIndexedCustomAction>(offset, _lexerActions[i]); + } + } + if (updatedLexerActions.empty()) { + return shared_from_this(); + } + return std::make_shared<LexerActionExecutor>(std::move(updatedLexerActions)); +} + +const std::vector<Ref<const LexerAction>>& LexerActionExecutor::getLexerActions() const { + return _lexerActions; +} + +void LexerActionExecutor::execute(Lexer *lexer, CharStream *input, size_t startIndex) const { + bool requiresSeek = false; + size_t stopIndex = input->index(); + + auto onExit = finally([requiresSeek, input, stopIndex]() { + if (requiresSeek) { + input->seek(stopIndex); + } + }); + for (const auto &lexerAction : _lexerActions) { + if (LexerIndexedCustomAction::is(*lexerAction)) { + int offset = downCast<const LexerIndexedCustomAction&>(*lexerAction).getOffset(); + input->seek(startIndex + offset); + requiresSeek = (startIndex + offset) != stopIndex; + } else if (lexerAction->isPositionDependent()) { + input->seek(stopIndex); + requiresSeek = false; + } + lexerAction->execute(lexer); + } +} + +size_t LexerActionExecutor::hashCode() const { + auto hash = _hashCode.load(std::memory_order_relaxed); + if (hash == 0) { + hash = MurmurHash::initialize(); + for (const auto &lexerAction : _lexerActions) { + hash = MurmurHash::update(hash, lexerAction); + } + hash = MurmurHash::finish(hash, _lexerActions.size()); + if (hash == 0) { + hash = std::numeric_limits<size_t>::max(); + } + _hashCode.store(hash, std::memory_order_relaxed); + } + return hash; +} + +bool LexerActionExecutor::equals(const LexerActionExecutor &other) const { + if (this == std::addressof(other)) { + return true; + } + return cachedHashCodeEqual(_hashCode.load(std::memory_order_relaxed), other._hashCode.load(std::memory_order_relaxed)) && + _lexerActions.size() == other._lexerActions.size() && + std::equal(_lexerActions.begin(), _lexerActions.end(), other._lexerActions.begin(), lexerActionEqual); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerActionExecutor.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerActionExecutor.h new file mode 100644 index 0000000000..28bb1e28ec --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerActionExecutor.h @@ -0,0 +1,128 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "CharStream.h" +#include "atn/LexerAction.h" + +namespace antlr4 { +namespace atn { + + /// Represents an executor for a sequence of lexer actions which traversed during + /// the matching operation of a lexer rule (token). + /// + /// <para>The executor tracks position information for position-dependent lexer actions + /// efficiently, ensuring that actions appearing only at the end of the rule do + /// not cause bloating of the <seealso cref="DFA"/> created for the lexer.</para> + class ANTLR4CPP_PUBLIC LexerActionExecutor final : public std::enable_shared_from_this<LexerActionExecutor> { + public: + /// <summary> + /// Constructs an executor for a sequence of <seealso cref="LexerAction"/> actions. </summary> + /// <param name="lexerActions"> The lexer actions to execute. </param> + explicit LexerActionExecutor(std::vector<Ref<const LexerAction>> lexerActions); + + /// <summary> + /// Creates a <seealso cref="LexerActionExecutor"/> which executes the actions for + /// the input {@code lexerActionExecutor} followed by a specified + /// {@code lexerAction}. + /// </summary> + /// <param name="lexerActionExecutor"> The executor for actions already traversed by + /// the lexer while matching a token within a particular + /// <seealso cref="LexerATNConfig"/>. If this is {@code null}, the method behaves as + /// though it were an empty executor. </param> + /// <param name="lexerAction"> The lexer action to execute after the actions + /// specified in {@code lexerActionExecutor}. + /// </param> + /// <returns> A <seealso cref="LexerActionExecutor"/> for executing the combine actions + /// of {@code lexerActionExecutor} and {@code lexerAction}. </returns> + static Ref<const LexerActionExecutor> append(const Ref<const LexerActionExecutor> &lexerActionExecutor, + Ref<const LexerAction> lexerAction); + + /// <summary> + /// Creates a <seealso cref="LexerActionExecutor"/> which encodes the current offset + /// for position-dependent lexer actions. + /// + /// <para>Normally, when the executor encounters lexer actions where + /// <seealso cref="LexerAction#isPositionDependent"/> returns {@code true}, it calls + /// <seealso cref="IntStream#seek"/> on the input <seealso cref="CharStream"/> to set the input + /// position to the <em>end</em> of the current token. This behavior provides + /// for efficient DFA representation of lexer actions which appear at the end + /// of a lexer rule, even when the lexer rule matches a variable number of + /// characters.</para> + /// + /// <para>Prior to traversing a match transition in the ATN, the current offset + /// from the token start index is assigned to all position-dependent lexer + /// actions which have not already been assigned a fixed offset. By storing + /// the offsets relative to the token start index, the DFA representation of + /// lexer actions which appear in the middle of tokens remains efficient due + /// to sharing among tokens of the same length, regardless of their absolute + /// position in the input stream.</para> + /// + /// <para>If the current executor already has offsets assigned to all + /// position-dependent lexer actions, the method returns {@code this}.</para> + /// </summary> + /// <param name="offset"> The current offset to assign to all position-dependent + /// lexer actions which do not already have offsets assigned. + /// </param> + /// <returns> A <seealso cref="LexerActionExecutor"/> which stores input stream offsets + /// for all position-dependent lexer actions. </returns> + Ref<const LexerActionExecutor> fixOffsetBeforeMatch(int offset) const; + + /// <summary> + /// Gets the lexer actions to be executed by this executor. </summary> + /// <returns> The lexer actions to be executed by this executor. </returns> + const std::vector<Ref<const LexerAction>>& getLexerActions() const; + + /// <summary> + /// Execute the actions encapsulated by this executor within the context of a + /// particular <seealso cref="Lexer"/>. + /// + /// <para>This method calls <seealso cref="IntStream#seek"/> to set the position of the + /// {@code input} <seealso cref="CharStream"/> prior to calling + /// <seealso cref="LexerAction#execute"/> on a position-dependent action. Before the + /// method returns, the input position will be restored to the same position + /// it was in when the method was invoked.</para> + /// </summary> + /// <param name="lexer"> The lexer instance. </param> + /// <param name="input"> The input stream which is the source for the current token. + /// When this method is called, the current <seealso cref="IntStream#index"/> for + /// {@code input} should be the start of the following token, i.e. 1 + /// character past the end of the current token. </param> + /// <param name="startIndex"> The token start index. This value may be passed to + /// <seealso cref="IntStream#seek"/> to set the {@code input} position to the beginning + /// of the token. </param> + void execute(Lexer *lexer, CharStream *input, size_t startIndex) const; + + size_t hashCode() const; + + bool equals(const LexerActionExecutor &other) const; + + private: + const std::vector<Ref<const LexerAction>> _lexerActions; + mutable std::atomic<size_t> _hashCode; + }; + + inline bool operator==(const LexerActionExecutor &lhs, const LexerActionExecutor &rhs) { + return lhs.equals(rhs); + } + + inline bool operator!=(const LexerActionExecutor &lhs, const LexerActionExecutor &rhs) { + return !operator==(lhs, rhs); + } + +} // namespace atn +} // namespace antlr4 + +namespace std { + + template <> + struct hash<::antlr4::atn::LexerActionExecutor> { + size_t operator()(const ::antlr4::atn::LexerActionExecutor &lexerActionExecutor) const { + return lexerActionExecutor.hashCode(); + } + }; + +} // namespace std diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerActionType.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerActionType.h new file mode 100644 index 0000000000..aab4033415 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerActionType.h @@ -0,0 +1,57 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Represents the serialization type of a <seealso cref="LexerAction"/>. + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + enum class LexerActionType : size_t { + /// <summary> + /// The type of a <seealso cref="LexerChannelAction"/> action. + /// </summary> + CHANNEL = 0, + /// <summary> + /// The type of a <seealso cref="LexerCustomAction"/> action. + /// </summary> + CUSTOM, + /// <summary> + /// The type of a <seealso cref="LexerModeAction"/> action. + /// </summary> + MODE, + /// <summary> + /// The type of a <seealso cref="LexerMoreAction"/> action. + /// </summary> + MORE, + /// <summary> + /// The type of a <seealso cref="LexerPopModeAction"/> action. + /// </summary> + POP_MODE, + /// <summary> + /// The type of a <seealso cref="LexerPushModeAction"/> action. + /// </summary> + PUSH_MODE, + /// <summary> + /// The type of a <seealso cref="LexerSkipAction"/> action. + /// </summary> + SKIP, + /// <summary> + /// The type of a <seealso cref="LexerTypeAction"/> action. + /// </summary> + TYPE, + + INDEXED_CUSTOM, + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerChannelAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerChannelAction.cpp new file mode 100644 index 0000000000..b6cda6cff0 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerChannelAction.cpp @@ -0,0 +1,43 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" +#include "support/Casts.h" + +#include "atn/LexerChannelAction.h" + +using namespace antlr4::atn; +using namespace antlr4::misc; +using namespace antlrcpp; + +LexerChannelAction::LexerChannelAction(int channel) + : LexerAction(LexerActionType::CHANNEL, false), _channel(channel) {} + +void LexerChannelAction::execute(Lexer *lexer) const { + lexer->setChannel(getChannel()); +} + +size_t LexerChannelAction::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + hash = MurmurHash::update(hash, getChannel()); + return MurmurHash::finish(hash, 2); +} + +bool LexerChannelAction::equals(const LexerAction &other) const { + if (this == std::addressof(other)) { + return true; + } + if (getActionType() != other.getActionType()) { + return false; + } + const auto &lexerAction = downCast<const LexerChannelAction&>(other); + return getChannel() == lexerAction.getChannel(); +} + +std::string LexerChannelAction::toString() const { + return "channel(" + std::to_string(getChannel()) + ")"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerChannelAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerChannelAction.h new file mode 100644 index 0000000000..1a5c53efef --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerChannelAction.h @@ -0,0 +1,59 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + using antlr4::Lexer; + + /// <summary> + /// Implements the {@code channel} lexer action by calling + /// <seealso cref="Lexer#setChannel"/> with the assigned channel. + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerChannelAction final : public LexerAction { + public: + static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::CHANNEL; } + + static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); } + + /// <summary> + /// Constructs a new {@code channel} action with the specified channel value. </summary> + /// <param name="channel"> The channel value to pass to <seealso cref="Lexer#setChannel"/>. </param> + explicit LexerChannelAction(int channel); + + /// <summary> + /// Gets the channel to use for the <seealso cref="Token"/> created by the lexer. + /// </summary> + /// <returns> The channel to use for the <seealso cref="Token"/> created by the lexer. </returns> + int getChannel() const { return _channel; } + + /// <summary> + /// {@inheritDoc} + /// + /// <para>This action is implemented by calling <seealso cref="Lexer#setChannel"/> with the + /// value provided by <seealso cref="#getChannel"/>.</para> + /// </summary> + void execute(Lexer *lexer) const override; + + bool equals(const LexerAction &other) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + + private: + const int _channel; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerCustomAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerCustomAction.cpp new file mode 100644 index 0000000000..b6edd89ea1 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerCustomAction.cpp @@ -0,0 +1,45 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" +#include "support/Casts.h" + +#include "atn/LexerCustomAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; +using namespace antlrcpp; + +LexerCustomAction::LexerCustomAction(size_t ruleIndex, size_t actionIndex) + : LexerAction(LexerActionType::CUSTOM, true), _ruleIndex(ruleIndex), _actionIndex(actionIndex) {} + +void LexerCustomAction::execute(Lexer *lexer) const { + lexer->action(nullptr, getRuleIndex(), getActionIndex()); +} + +size_t LexerCustomAction::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + hash = MurmurHash::update(hash, getRuleIndex()); + hash = MurmurHash::update(hash, getActionIndex()); + return MurmurHash::finish(hash, 3); +} + +bool LexerCustomAction::equals(const LexerAction &other) const { + if (this == std::addressof(other)) { + return true; + } + if (getActionType() != other.getActionType()) { + return false; + } + const auto &lexerAction = downCast<const LexerCustomAction&>(other); + return getRuleIndex() == lexerAction.getRuleIndex() && getActionIndex() == lexerAction.getActionIndex(); +} + +std::string LexerCustomAction::toString() const { + return "custom(" + std::to_string(getRuleIndex()) + ", " + std::to_string(getActionIndex()) + ")"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerCustomAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerCustomAction.h new file mode 100644 index 0000000000..7973271c62 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerCustomAction.h @@ -0,0 +1,75 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Executes a custom lexer action by calling <seealso cref="Recognizer#action"/> with the + /// rule and action indexes assigned to the custom action. The implementation of + /// a custom action is added to the generated code for the lexer in an override + /// of <seealso cref="Recognizer#action"/> when the grammar is compiled. + /// + /// <para>This class may represent embedded actions created with the <code>{...}</code> + /// syntax in ANTLR 4, as well as actions created for lexer commands where the + /// command argument could not be evaluated when the grammar was compiled.</para> + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerCustomAction final : public LexerAction { + public: + static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::CUSTOM; } + + static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); } + + /// <summary> + /// Constructs a custom lexer action with the specified rule and action + /// indexes. + /// </summary> + /// <param name="ruleIndex"> The rule index to use for calls to + /// <seealso cref="Recognizer#action"/>. </param> + /// <param name="actionIndex"> The action index to use for calls to + /// <seealso cref="Recognizer#action"/>. </param> + LexerCustomAction(size_t ruleIndex, size_t actionIndex); + + /// <summary> + /// Gets the rule index to use for calls to <seealso cref="Recognizer#action"/>. + /// </summary> + /// <returns> The rule index for the custom action. </returns> + size_t getRuleIndex() const { return _ruleIndex; } + + /// <summary> + /// Gets the action index to use for calls to <seealso cref="Recognizer#action"/>. + /// </summary> + /// <returns> The action index for the custom action. </returns> + size_t getActionIndex() const { return _actionIndex; } + + /// <summary> + /// {@inheritDoc} + /// + /// <para>Custom actions are implemented by calling <seealso cref="Lexer#action"/> with the + /// appropriate rule and action indexes.</para> + /// </summary> + void execute(Lexer *lexer) const override; + + bool equals(const LexerAction &other) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + + private: + const size_t _ruleIndex; + const size_t _actionIndex; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerIndexedCustomAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerIndexedCustomAction.cpp new file mode 100644 index 0000000000..114863702c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerIndexedCustomAction.cpp @@ -0,0 +1,57 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" +#include "support/CPPUtils.h" +#include "support/Casts.h" + +#include "atn/LexerIndexedCustomAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; +using namespace antlrcpp; + +namespace { + + bool cachedHashCodeEqual(size_t lhs, size_t rhs) { + return lhs == rhs || lhs == 0 || rhs == 0; + } + +} + +LexerIndexedCustomAction::LexerIndexedCustomAction(int offset, Ref<const LexerAction> action) + : LexerAction(LexerActionType::INDEXED_CUSTOM, true), _action(std::move(action)), _offset(offset) {} + +void LexerIndexedCustomAction::execute(Lexer *lexer) const { + // assume the input stream position was properly set by the calling code + getAction()->execute(lexer); +} + +size_t LexerIndexedCustomAction::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + hash = MurmurHash::update(hash, getOffset()); + hash = MurmurHash::update(hash, getAction()); + return MurmurHash::finish(hash, 3); +} + +bool LexerIndexedCustomAction::equals(const LexerAction &other) const { + if (this == std::addressof(other)) { + return true; + } + if (getActionType() != other.getActionType()) { + return false; + } + const auto &lexerAction = downCast<const LexerIndexedCustomAction&>(other); + return getOffset() == lexerAction.getOffset() && + cachedHashCodeEqual(cachedHashCode(), lexerAction.cachedHashCode()) && + *getAction() == *lexerAction.getAction(); +} + +std::string LexerIndexedCustomAction::toString() const { + return "indexedCustom(" + std::to_string(getOffset()) + ", " + getAction()->toString() + ")"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerIndexedCustomAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerIndexedCustomAction.h new file mode 100644 index 0000000000..5693bac62b --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerIndexedCustomAction.h @@ -0,0 +1,76 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RuleContext.h" +#include "atn/LexerAction.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// This implementation of <seealso cref="LexerAction"/> is used for tracking input offsets + /// for position-dependent actions within a <seealso cref="LexerActionExecutor"/>. + /// + /// <para>This action is not serialized as part of the ATN, and is only required for + /// position-dependent lexer actions which appear at a location other than the + /// end of a rule. For more information about DFA optimizations employed for + /// lexer actions, see <seealso cref="LexerActionExecutor#append"/> and + /// <seealso cref="LexerActionExecutor#fixOffsetBeforeMatch"/>.</para> + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerIndexedCustomAction final : public LexerAction { + public: + static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::INDEXED_CUSTOM; } + + static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); } + + /// <summary> + /// Constructs a new indexed custom action by associating a character offset + /// with a <seealso cref="LexerAction"/>. + /// + /// <para>Note: This class is only required for lexer actions for which + /// <seealso cref="LexerAction#isPositionDependent"/> returns {@code true}.</para> + /// </summary> + /// <param name="offset"> The offset into the input <seealso cref="CharStream"/>, relative to + /// the token start index, at which the specified lexer action should be + /// executed. </param> + /// <param name="action"> The lexer action to execute at a particular offset in the + /// input <seealso cref="CharStream"/>. </param> + LexerIndexedCustomAction(int offset, Ref<const LexerAction> action); + + /// <summary> + /// Gets the location in the input <seealso cref="CharStream"/> at which the lexer + /// action should be executed. The value is interpreted as an offset relative + /// to the token start index. + /// </summary> + /// <returns> The location in the input <seealso cref="CharStream"/> at which the lexer + /// action should be executed. </returns> + int getOffset() const { return _offset; } + + /// <summary> + /// Gets the lexer action to execute. + /// </summary> + /// <returns> A <seealso cref="LexerAction"/> object which executes the lexer action. </returns> + const Ref<const LexerAction>& getAction() const { return _action; } + + void execute(Lexer *lexer) const override; + bool equals(const LexerAction &other) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + + private: + const Ref<const LexerAction> _action; + const int _offset; + }; + +} // namespace atn +} // namespace antlr4 + diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerModeAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerModeAction.cpp new file mode 100644 index 0000000000..a4ca3b3d79 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerModeAction.cpp @@ -0,0 +1,43 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" +#include "support/Casts.h" + +#include "atn/LexerModeAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; +using namespace antlrcpp; + +LexerModeAction::LexerModeAction(int mode) : LexerAction(LexerActionType::MODE, false), _mode(mode) {} + +void LexerModeAction::execute(Lexer *lexer) const { + lexer->setMode(getMode()); +} + +size_t LexerModeAction::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + hash = MurmurHash::update(hash, getMode()); + return MurmurHash::finish(hash, 2); +} + +bool LexerModeAction::equals(const LexerAction &other) const { + if (this == std::addressof(other)) { + return true; + } + if (getActionType() != other.getActionType()) { + return false; + } + const auto &lexerAction = downCast<const LexerModeAction&>(other); + return getMode() == lexerAction.getMode(); +} + +std::string LexerModeAction::toString() const { + return "mode(" + std::to_string(getMode()) + ")"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerModeAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerModeAction.h new file mode 100644 index 0000000000..6fa61a2e67 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerModeAction.h @@ -0,0 +1,57 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Implements the {@code mode} lexer action by calling <seealso cref="Lexer#mode"/> with + /// the assigned mode. + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerModeAction final : public LexerAction { + public: + static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::MODE; } + + static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); } + + /// <summary> + /// Constructs a new {@code mode} action with the specified mode value. </summary> + /// <param name="mode"> The mode value to pass to <seealso cref="Lexer#mode"/>. </param> + explicit LexerModeAction(int mode); + + /// <summary> + /// Get the lexer mode this action should transition the lexer to. + /// </summary> + /// <returns> The lexer mode for this {@code mode} command. </returns> + int getMode() const { return _mode; } + + /// <summary> + /// {@inheritDoc} + /// + /// <para>This action is implemented by calling <seealso cref="Lexer#mode"/> with the + /// value provided by <seealso cref="#getMode"/>.</para> + /// </summary> + void execute(Lexer *lexer) const override; + + bool equals(const LexerAction &obj) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + + private: + const int _mode; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerMoreAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerMoreAction.cpp new file mode 100644 index 0000000000..30df87b7b6 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerMoreAction.cpp @@ -0,0 +1,36 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" + +#include "atn/LexerMoreAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; + +const Ref<const LexerMoreAction>& LexerMoreAction::getInstance() { + static const Ref<const LexerMoreAction> instance(new LexerMoreAction()); + return instance; +} + +void LexerMoreAction::execute(Lexer *lexer) const { + lexer->more(); +} + +size_t LexerMoreAction::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + return MurmurHash::finish(hash, 1); +} + +bool LexerMoreAction::equals(const LexerAction &other) const { + return this == std::addressof(other); +} + +std::string LexerMoreAction::toString() const { + return "more"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerMoreAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerMoreAction.h new file mode 100644 index 0000000000..fc4b8fcbfc --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerMoreAction.h @@ -0,0 +1,53 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Implements the {@code more} lexer action by calling <seealso cref="Lexer#more"/>. + /// + /// <para>The {@code more} command does not have any parameters, so this action is + /// implemented as a singleton instance exposed by <seealso cref="#INSTANCE"/>.</para> + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerMoreAction final : public LexerAction { + public: + static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::MORE; } + + static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); } + + /// <summary> + /// Provides a singleton instance of this parameterless lexer action. + /// </summary> + static const Ref<const LexerMoreAction>& getInstance(); + + /// <summary> + /// {@inheritDoc} + /// + /// <para>This action is implemented by calling <seealso cref="Lexer#more"/>.</para> + /// </summary> + void execute(Lexer *lexer) const override; + + bool equals(const LexerAction &obj) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + + private: + /// Constructs the singleton instance of the lexer {@code more} command. + LexerMoreAction() : LexerAction(LexerActionType::MORE, false) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPopModeAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPopModeAction.cpp new file mode 100644 index 0000000000..5192049348 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPopModeAction.cpp @@ -0,0 +1,36 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" + +#include "atn/LexerPopModeAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; + +const Ref<const LexerPopModeAction>& LexerPopModeAction::getInstance() { + static const Ref<const LexerPopModeAction> instance(new LexerPopModeAction()); + return instance; +} + +void LexerPopModeAction::execute(Lexer *lexer) const { + lexer->popMode(); +} + +size_t LexerPopModeAction::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + return MurmurHash::finish(hash, 1); +} + +bool LexerPopModeAction::equals(const LexerAction &other) const { + return this == std::addressof(other); +} + +std::string LexerPopModeAction::toString() const { + return "popMode"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPopModeAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPopModeAction.h new file mode 100644 index 0000000000..8d712cad8c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPopModeAction.h @@ -0,0 +1,53 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Implements the {@code popMode} lexer action by calling <seealso cref="Lexer#popMode"/>. + /// + /// <para>The {@code popMode} command does not have any parameters, so this action is + /// implemented as a singleton instance exposed by <seealso cref="#INSTANCE"/>.</para> + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerPopModeAction final : public LexerAction { + public: + static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::POP_MODE; } + + static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); } + + /// <summary> + /// Provides a singleton instance of this parameterless lexer action. + /// </summary> + static const Ref<const LexerPopModeAction>& getInstance(); + + /// <summary> + /// {@inheritDoc} + /// + /// <para>This action is implemented by calling <seealso cref="Lexer#popMode"/>.</para> + /// </summary> + void execute(Lexer *lexer) const override; + + bool equals(const LexerAction &other) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + + private: + /// Constructs the singleton instance of the lexer {@code popMode} command. + LexerPopModeAction() : LexerAction(LexerActionType::POP_MODE, false) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPushModeAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPushModeAction.cpp new file mode 100644 index 0000000000..3ebd21fab2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPushModeAction.cpp @@ -0,0 +1,43 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" +#include "support/Casts.h" + +#include "atn/LexerPushModeAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; +using namespace antlrcpp; + +LexerPushModeAction::LexerPushModeAction(int mode) : LexerAction(LexerActionType::PUSH_MODE, false), _mode(mode) {} + +void LexerPushModeAction::execute(Lexer *lexer) const { + lexer->pushMode(getMode()); +} + +size_t LexerPushModeAction::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + hash = MurmurHash::update(hash, getMode()); + return MurmurHash::finish(hash, 2); +} + +bool LexerPushModeAction::equals(const LexerAction &other) const { + if (this == std::addressof(other)) { + return true; + } + if (getActionType() != other.getActionType()) { + return false; + } + const auto &lexerAction = downCast<const LexerPushModeAction&>(other); + return getMode() == lexerAction.getMode(); +} + +std::string LexerPushModeAction::toString() const { + return "pushMode(" + std::to_string(getMode()) + ")"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPushModeAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPushModeAction.h new file mode 100644 index 0000000000..32b706b583 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPushModeAction.h @@ -0,0 +1,57 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Implements the {@code pushMode} lexer action by calling + /// <seealso cref="Lexer#pushMode"/> with the assigned mode. + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerPushModeAction final : public LexerAction { + public: + static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::PUSH_MODE; } + + static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); } + + /// <summary> + /// Constructs a new {@code pushMode} action with the specified mode value. </summary> + /// <param name="mode"> The mode value to pass to <seealso cref="Lexer#pushMode"/>. </param> + explicit LexerPushModeAction(int mode); + + /// <summary> + /// Get the lexer mode this action should transition the lexer to. + /// </summary> + /// <returns> The lexer mode for this {@code pushMode} command. </returns> + int getMode() const { return _mode; } + + /// <summary> + /// {@inheritDoc} + /// + /// <para>This action is implemented by calling <seealso cref="Lexer#pushMode"/> with the + /// value provided by <seealso cref="#getMode"/>.</para> + /// </summary> + void execute(Lexer *lexer) const override; + + bool equals(const LexerAction &obj) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + + private: + const int _mode; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerSkipAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerSkipAction.cpp new file mode 100644 index 0000000000..72f9de3e1f --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerSkipAction.cpp @@ -0,0 +1,36 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" + +#include "atn/LexerSkipAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; + +const Ref<const LexerSkipAction>& LexerSkipAction::getInstance() { + static const Ref<const LexerSkipAction> instance(new LexerSkipAction()); + return instance; +} + +void LexerSkipAction::execute(Lexer *lexer) const { + lexer->skip(); +} + +size_t LexerSkipAction::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + return MurmurHash::finish(hash, 1); +} + +bool LexerSkipAction::equals(const LexerAction &other) const { + return this == std::addressof(other); +} + +std::string LexerSkipAction::toString() const { + return "skip"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerSkipAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerSkipAction.h new file mode 100644 index 0000000000..afdf4702f2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerSkipAction.h @@ -0,0 +1,51 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Implements the {@code skip} lexer action by calling <seealso cref="Lexer#skip"/>. + /// + /// <para>The {@code skip} command does not have any parameters, so this action is + /// implemented as a singleton instance exposed by <seealso cref="#INSTANCE"/>.</para> + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerSkipAction final : public LexerAction { + public: + static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::SKIP; } + + static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); } + + /// Provides a singleton instance of this parameterless lexer action. + static const Ref<const LexerSkipAction>& getInstance(); + + /// <summary> + /// {@inheritDoc} + /// + /// <para>This action is implemented by calling <seealso cref="Lexer#skip"/>.</para> + /// </summary> + void execute(Lexer *lexer) const override; + + bool equals(const LexerAction &obj) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + + private: + /// Constructs the singleton instance of the lexer {@code skip} command. + LexerSkipAction() : LexerAction(LexerActionType::SKIP, false) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerTypeAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerTypeAction.cpp new file mode 100644 index 0000000000..55ccf358ba --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerTypeAction.cpp @@ -0,0 +1,43 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" +#include "support/Casts.h" + +#include "atn/LexerTypeAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; +using namespace antlrcpp; + +LexerTypeAction::LexerTypeAction(int type) : LexerAction(LexerActionType::TYPE, false), _type(type) {} + +void LexerTypeAction::execute(Lexer *lexer) const { + lexer->setType(getType()); +} + +size_t LexerTypeAction::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + hash = MurmurHash::update(hash, getType()); + return MurmurHash::finish(hash, 2); +} + +bool LexerTypeAction::equals(const LexerAction &other) const { + if (this == std::addressof(other)) { + return true; + } + if (getActionType() != other.getActionType()) { + return false; + } + const auto &lexerAction = downCast<const LexerTypeAction&>(other); + return getType() == lexerAction.getType(); +} + +std::string LexerTypeAction::toString() const { + return "type(" + std::to_string(getType()) + ")"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerTypeAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerTypeAction.h new file mode 100644 index 0000000000..1cd7d71fd3 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerTypeAction.h @@ -0,0 +1,51 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerActionType.h" +#include "atn/LexerAction.h" + +namespace antlr4 { +namespace atn { + + /// Implements the {@code type} lexer action by calling <seealso cref="Lexer#setType"/> + /// with the assigned type. + class ANTLR4CPP_PUBLIC LexerTypeAction final : public LexerAction { + public: + static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::TYPE; } + + static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); } + + /// <summary> + /// Constructs a new {@code type} action with the specified token type value. </summary> + /// <param name="type"> The type to assign to the token using <seealso cref="Lexer#setType"/>. </param> + explicit LexerTypeAction(int type); + + /// <summary> + /// Gets the type to assign to a token created by the lexer. </summary> + /// <returns> The type to assign to a token created by the lexer. </returns> + int getType() const { return _type; } + + /// <summary> + /// {@inheritDoc} + /// + /// <para>This action is implemented by calling <seealso cref="Lexer#setType"/> with the + /// value provided by <seealso cref="#getType"/>.</para> + /// </summary> + void execute(Lexer *lexer) const override; + + bool equals(const LexerAction &obj) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + + private: + const int _type; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LookaheadEventInfo.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LookaheadEventInfo.cpp new file mode 100644 index 0000000000..aa3f9124c7 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LookaheadEventInfo.cpp @@ -0,0 +1,16 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/LookaheadEventInfo.h" + +using namespace antlr4; +using namespace antlr4::atn; + +LookaheadEventInfo::LookaheadEventInfo(size_t decision, ATNConfigSet *configs, size_t predictedAlt, + TokenStream *input, size_t startIndex, size_t stopIndex, bool fullCtx) + : DecisionEventInfo(decision, configs, input, startIndex, stopIndex, fullCtx) { + + this->predictedAlt = predictedAlt; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LookaheadEventInfo.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LookaheadEventInfo.h new file mode 100644 index 0000000000..f5fc24fde2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LookaheadEventInfo.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionEventInfo.h" + +namespace antlr4 { +namespace atn { + + /// This class represents profiling event information for tracking the lookahead + /// depth required in order to make a prediction. + class ANTLR4CPP_PUBLIC LookaheadEventInfo : public DecisionEventInfo { + public: + /// The alternative chosen by adaptivePredict(), not necessarily + /// the outermost alt shown for a rule; left-recursive rules have + /// user-level alts that differ from the rewritten rule with a (...) block + /// and a (..)* loop. + size_t predictedAlt = 0; + + /// <summary> + /// Constructs a new instance of the <seealso cref="LookaheadEventInfo"/> class with + /// the specified detailed lookahead information. + /// </summary> + /// <param name="decision"> The decision number </param> + /// <param name="configs"> The final configuration set containing the necessary + /// information to determine the result of a prediction, or {@code null} if + /// the final configuration set is not available </param> + /// <param name="input"> The input token stream </param> + /// <param name="startIndex"> The start index for the current prediction </param> + /// <param name="stopIndex"> The index at which the prediction was finally made </param> + /// <param name="fullCtx"> {@code true} if the current lookahead is part of an LL + /// prediction; otherwise, {@code false} if the current lookahead is part of + /// an SLL prediction </param> + LookaheadEventInfo(size_t decision, ATNConfigSet *configs, size_t predictedAlt, TokenStream *input, size_t startIndex, + size_t stopIndex, bool fullCtx); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LoopEndState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LoopEndState.h new file mode 100644 index 0000000000..2616b1c4b8 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LoopEndState.h @@ -0,0 +1,26 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + /// Mark the end of a * or + loop. + class ANTLR4CPP_PUBLIC LoopEndState final : public ATNState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::LOOP_END; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + ATNState *loopBackState = nullptr; + + LoopEndState() : ATNState(ATNStateType::LOOP_END) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/NotSetTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/NotSetTransition.cpp new file mode 100644 index 0000000000..ba796d7188 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/NotSetTransition.cpp @@ -0,0 +1,22 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/NotSetTransition.h" +#include "atn/ATNState.h" +#include "misc/IntervalSet.h" + +using namespace antlr4; +using namespace antlr4::atn; + +NotSetTransition::NotSetTransition(ATNState *target, misc::IntervalSet set) : SetTransition(TransitionType::NOT_SET, target, std::move(set)) {} + +bool NotSetTransition::matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const { + return symbol >= minVocabSymbol && symbol <= maxVocabSymbol + && !SetTransition::matches(symbol, minVocabSymbol, maxVocabSymbol); +} + +std::string NotSetTransition::toString() const { + return "NOT_SET " + Transition::toString() + " { " + SetTransition::toString() + " }"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/NotSetTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/NotSetTransition.h new file mode 100644 index 0000000000..ef937a60fe --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/NotSetTransition.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/SetTransition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC NotSetTransition final : public SetTransition { + public: + static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::NOT_SET; } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + NotSetTransition(ATNState *target, misc::IntervalSet set); + + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/OrderedATNConfigSet.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/OrderedATNConfigSet.cpp new file mode 100644 index 0000000000..48655424d8 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/OrderedATNConfigSet.cpp @@ -0,0 +1,16 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/OrderedATNConfigSet.h" + +using namespace antlr4::atn; + +size_t OrderedATNConfigSet::hashCode(const ATNConfig &atnConfig) const { + return atnConfig.hashCode(); +} + +bool OrderedATNConfigSet::equals(const ATNConfig &lhs, const ATNConfig &rhs) const { + return lhs == rhs; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/OrderedATNConfigSet.h b/contrib/libs/antlr4_cpp_runtime/src/atn/OrderedATNConfigSet.h new file mode 100644 index 0000000000..18bf6bcb21 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/OrderedATNConfigSet.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNConfigSet.h" +#include "atn/ATNConfig.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC OrderedATNConfigSet final : public ATNConfigSet { + public: + OrderedATNConfigSet() = default; + + private: + size_t hashCode(const ATNConfig &atnConfig) const override; + + bool equals(const ATNConfig &lhs, const ATNConfig &rhs) const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ParseInfo.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ParseInfo.cpp new file mode 100644 index 0000000000..95a89ac855 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ParseInfo.cpp @@ -0,0 +1,102 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ProfilingATNSimulator.h" +#include "dfa/DFA.h" + +#include "atn/ParseInfo.h" + +using namespace antlr4::atn; + +ParseInfo::ParseInfo(ProfilingATNSimulator *atnSimulator) : _atnSimulator(atnSimulator) { +} + +ParseInfo::~ParseInfo() { +} + +std::vector<DecisionInfo> ParseInfo::getDecisionInfo() { + return _atnSimulator->getDecisionInfo(); +} + +std::vector<size_t> ParseInfo::getLLDecisions() { + std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo(); + std::vector<size_t> LL; + for (size_t i = 0; i < decisions.size(); ++i) { + long long fallBack = decisions[i].LL_Fallback; + if (fallBack > 0) { + LL.push_back(i); + } + } + return LL; +} + +long long ParseInfo::getTotalTimeInPrediction() { + std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo(); + long long t = 0; + for (size_t i = 0; i < decisions.size(); ++i) { + t += decisions[i].timeInPrediction; + } + return t; +} + +long long ParseInfo::getTotalSLLLookaheadOps() { + std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo(); + long long k = 0; + for (size_t i = 0; i < decisions.size(); ++i) { + k += decisions[i].SLL_TotalLook; + } + return k; +} + +long long ParseInfo::getTotalLLLookaheadOps() { + std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo(); + long long k = 0; + for (size_t i = 0; i < decisions.size(); i++) { + k += decisions[i].LL_TotalLook; + } + return k; +} + +long long ParseInfo::getTotalSLLATNLookaheadOps() { + std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo(); + long long k = 0; + for (size_t i = 0; i < decisions.size(); ++i) { + k += decisions[i].SLL_ATNTransitions; + } + return k; +} + +long long ParseInfo::getTotalLLATNLookaheadOps() { + std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo(); + long long k = 0; + for (size_t i = 0; i < decisions.size(); ++i) { + k += decisions[i].LL_ATNTransitions; + } + return k; +} + +long long ParseInfo::getTotalATNLookaheadOps() { + std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo(); + long long k = 0; + for (size_t i = 0; i < decisions.size(); ++i) { + k += decisions[i].SLL_ATNTransitions; + k += decisions[i].LL_ATNTransitions; + } + return k; +} + +size_t ParseInfo::getDFASize() { + size_t n = 0; + std::vector<dfa::DFA> &decisionToDFA = _atnSimulator->decisionToDFA; + for (size_t i = 0; i < decisionToDFA.size(); ++i) { + n += getDFASize(i); + } + return n; +} + +size_t ParseInfo::getDFASize(size_t decision) { + dfa::DFA &decisionToDFA = _atnSimulator->decisionToDFA[decision]; + return decisionToDFA.states.size(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ParseInfo.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ParseInfo.h new file mode 100644 index 0000000000..7ced7de433 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ParseInfo.h @@ -0,0 +1,102 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionInfo.h" + +namespace antlr4 { +namespace atn { + + class ProfilingATNSimulator; + + /// This class provides access to specific and aggregate statistics gathered + /// during profiling of a parser. + class ANTLR4CPP_PUBLIC ParseInfo { + public: + ParseInfo(ProfilingATNSimulator *atnSimulator); + ParseInfo(ParseInfo const&) = default; + virtual ~ParseInfo(); + + ParseInfo& operator=(ParseInfo const&) = default; + + /// <summary> + /// Gets an array of <seealso cref="DecisionInfo"/> instances containing the profiling + /// information gathered for each decision in the ATN. + /// </summary> + /// <returns> An array of <seealso cref="DecisionInfo"/> instances, indexed by decision + /// number. </returns> + virtual std::vector<DecisionInfo> getDecisionInfo(); + + /// <summary> + /// Gets the decision numbers for decisions that required one or more + /// full-context predictions during parsing. These are decisions for which + /// <seealso cref="DecisionInfo#LL_Fallback"/> is non-zero. + /// </summary> + /// <returns> A list of decision numbers which required one or more + /// full-context predictions during parsing. </returns> + virtual std::vector<size_t> getLLDecisions(); + + /// <summary> + /// Gets the total time spent during prediction across all decisions made + /// during parsing. This value is the sum of + /// <seealso cref="DecisionInfo#timeInPrediction"/> for all decisions. + /// </summary> + virtual long long getTotalTimeInPrediction(); + + /// <summary> + /// Gets the total number of SLL lookahead operations across all decisions + /// made during parsing. This value is the sum of + /// <seealso cref="DecisionInfo#SLL_TotalLook"/> for all decisions. + /// </summary> + virtual long long getTotalSLLLookaheadOps(); + + /// <summary> + /// Gets the total number of LL lookahead operations across all decisions + /// made during parsing. This value is the sum of + /// <seealso cref="DecisionInfo#LL_TotalLook"/> for all decisions. + /// </summary> + virtual long long getTotalLLLookaheadOps(); + + /// <summary> + /// Gets the total number of ATN lookahead operations for SLL prediction + /// across all decisions made during parsing. + /// </summary> + virtual long long getTotalSLLATNLookaheadOps(); + + /// <summary> + /// Gets the total number of ATN lookahead operations for LL prediction + /// across all decisions made during parsing. + /// </summary> + virtual long long getTotalLLATNLookaheadOps(); + + /// <summary> + /// Gets the total number of ATN lookahead operations for SLL and LL + /// prediction across all decisions made during parsing. + /// + /// <para> + /// This value is the sum of <seealso cref="#getTotalSLLATNLookaheadOps"/> and + /// <seealso cref="#getTotalLLATNLookaheadOps"/>.</para> + /// </summary> + virtual long long getTotalATNLookaheadOps(); + + /// <summary> + /// Gets the total number of DFA states stored in the DFA cache for all + /// decisions in the ATN. + /// </summary> + virtual size_t getDFASize(); + + /// <summary> + /// Gets the total number of DFA states stored in the DFA cache for a + /// particular decision. + /// </summary> + virtual size_t getDFASize(size_t decision); + + protected: + const ProfilingATNSimulator *_atnSimulator; // non-owning, we are created by this simulator. + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ParserATNSimulator.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ParserATNSimulator.cpp new file mode 100644 index 0000000000..ad1da03570 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ParserATNSimulator.cpp @@ -0,0 +1,1387 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "dfa/DFA.h" +#include "NoViableAltException.h" +#include "atn/DecisionState.h" +#include "ParserRuleContext.h" +#include "misc/IntervalSet.h" +#include "Parser.h" +#include "CommonTokenStream.h" +#include "atn/NotSetTransition.h" +#include "atn/AtomTransition.h" +#include "atn/RuleTransition.h" +#include "atn/PredicateTransition.h" +#include "atn/PrecedencePredicateTransition.h" +#include "atn/SingletonPredictionContext.h" +#include "atn/ActionTransition.h" +#include "atn/EpsilonTransition.h" +#include "atn/RuleStopState.h" +#include "atn/ATNConfigSet.h" +#include "atn/ATNConfig.h" +#include "internal/Synchronization.h" + +#include "atn/StarLoopEntryState.h" +#include "atn/BlockStartState.h" +#include "atn/BlockEndState.h" + +#include "misc/Interval.h" +#include "ANTLRErrorListener.h" + +#include "Vocabulary.h" +#include "support/Arrays.h" +#include "support/Casts.h" + +#include "atn/ParserATNSimulator.h" + +#define DEBUG_ATN 0 +#define DEBUG_LIST_ATN_DECISIONS 0 +#define DEBUG_DFA 0 +#define RETRY_DEBUG 0 + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::internal; +using namespace antlrcpp; + +const bool ParserATNSimulator::TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT = ParserATNSimulator::getLrLoopSetting(); + +ParserATNSimulator::ParserATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache) +: ParserATNSimulator(nullptr, atn, decisionToDFA, sharedContextCache) { +} + +ParserATNSimulator::ParserATNSimulator(Parser *parser, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache) +: ParserATNSimulator(parser, atn, decisionToDFA, sharedContextCache, ParserATNSimulatorOptions()) {} + +ParserATNSimulator::ParserATNSimulator(Parser *parser, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache, + const ParserATNSimulatorOptions &options) +: ATNSimulator(atn, sharedContextCache), decisionToDFA(decisionToDFA), parser(parser), + mergeCache(options.getPredictionContextMergeCacheOptions()) { + InitializeInstanceFields(); +} + +void ParserATNSimulator::reset() { +} + +void ParserATNSimulator::clearDFA() { + int size = (int)decisionToDFA.size(); + decisionToDFA.clear(); + for (int d = 0; d < size; ++d) { + decisionToDFA.push_back(dfa::DFA(atn.getDecisionState(d), d)); + } +} + +size_t ParserATNSimulator::adaptivePredict(TokenStream *input, size_t decision, ParserRuleContext *outerContext) { + +#if DEBUG_ATN == 1 || DEBUG_LIST_ATN_DECISIONS == 1 + std::cout << "adaptivePredict decision " << decision << " exec LA(1)==" << getLookaheadName(input) << " line " + << input->LT(1)->getLine() << ":" << input->LT(1)->getCharPositionInLine() << std::endl; +#endif + + _input = input; + _startIndex = input->index(); + _outerContext = outerContext; + dfa::DFA &dfa = decisionToDFA[decision]; + _dfa = &dfa; + + ssize_t m = input->mark(); + size_t index = _startIndex; + + // Now we are certain to have a specific decision's DFA + // But, do we still need an initial state? + auto onExit = finally([this, input, index, m] { + if (mergeCache.getOptions().getClearEveryN() != 0) { + if (++_mergeCacheCounter == mergeCache.getOptions().getClearEveryN()) { + mergeCache.clear(); + _mergeCacheCounter = 0; + } + } + _dfa = nullptr; + input->seek(index); + input->release(m); + }); + + dfa::DFAState *s0; + { + SharedLock<SharedMutex> stateLock(atn._stateMutex); + if (dfa.isPrecedenceDfa()) { + // the start state for a precedence DFA depends on the current + // parser precedence, and is provided by a DFA method. + SharedLock<SharedMutex> edgeLock(atn._edgeMutex); + s0 = dfa.getPrecedenceStartState(parser->getPrecedence()); + } else { + // the start state for a "regular" DFA is just s0 + s0 = dfa.s0; + } + } + + if (s0 == nullptr) { + auto s0_closure = computeStartState(dfa.atnStartState, &ParserRuleContext::EMPTY, false); + std::unique_ptr<dfa::DFAState> newState; + std::unique_ptr<dfa::DFAState> oldState; + UniqueLock<SharedMutex> stateLock(atn._stateMutex); + dfa::DFAState* ds0 = dfa.s0; + if (dfa.isPrecedenceDfa()) { + /* If this is a precedence DFA, we use applyPrecedenceFilter + * to convert the computed start state to a precedence start + * state. We then use DFA.setPrecedenceStartState to set the + * appropriate start state for the precedence level rather + * than simply setting DFA.s0. + */ + ds0->configs = std::move(s0_closure); // not used for prediction but useful to know start configs anyway + newState = std::make_unique<dfa::DFAState>(applyPrecedenceFilter(ds0->configs.get())); + s0 = addDFAState(dfa, newState.get()); + UniqueLock<SharedMutex> edgeLock(atn._edgeMutex); + dfa.setPrecedenceStartState(parser->getPrecedence(), s0); + } else { + newState = std::make_unique<dfa::DFAState>(std::move(s0_closure)); + s0 = addDFAState(dfa, newState.get()); + if (ds0 != s0) { + oldState.reset(ds0); + dfa.s0 = s0; + } + } + if (s0 == newState.get()) { + newState.release(); + } + } + + // We can start with an existing DFA. + size_t alt = execATN(dfa, s0, input, index, outerContext != nullptr ? outerContext : &ParserRuleContext::EMPTY); + + return alt; +} + +size_t ParserATNSimulator::execATN(dfa::DFA &dfa, dfa::DFAState *s0, TokenStream *input, size_t startIndex, + ParserRuleContext *outerContext) { + +#if DEBUG_ATN == 1 || DEBUG_LIST_ATN_DECISIONS == 1 + std::cout << "execATN decision " << dfa.decision << " exec LA(1)==" << getLookaheadName(input) << + " line " << input->LT(1)->getLine() << ":" << input->LT(1)->getCharPositionInLine() << std::endl; +#endif + + dfa::DFAState *previousD = s0; + +#if DEBUG_ATN == 1 + std::cout << "s0 = " << s0 << std::endl; +#endif + + size_t t = input->LA(1); + + while (true) { // while more work + dfa::DFAState *D = getExistingTargetState(previousD, t); + if (D == nullptr) { + D = computeTargetState(dfa, previousD, t); + } + + if (D == ERROR.get()) { + // if any configs in previous dipped into outer context, that + // means that input up to t actually finished entry rule + // at least for SLL decision. Full LL doesn't dip into outer + // so don't need special case. + // We will get an error no matter what so delay until after + // decision; better error message. Also, no reachable target + // ATN states in SLL implies LL will also get nowhere. + // If conflict in states that dip out, choose min since we + // will get error no matter what. + NoViableAltException e = noViableAlt(input, outerContext, previousD->configs.get(), startIndex, false); + input->seek(startIndex); + size_t alt = getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previousD->configs.get(), outerContext); + if (alt != ATN::INVALID_ALT_NUMBER) { + return alt; + } + + throw e; + } + + if (D->requiresFullContext && _mode != PredictionMode::SLL) { + // IF PREDS, MIGHT RESOLVE TO SINGLE ALT => SLL (or syntax error) + BitSet conflictingAlts; + if (D->predicates.size() != 0) { +#if DEBUG_ATN == 1 + std::cout << "DFA state has preds in DFA sim LL failover" << std::endl; +#endif + + size_t conflictIndex = input->index(); + if (conflictIndex != startIndex) { + input->seek(startIndex); + } + + conflictingAlts = evalSemanticContext(D->predicates, outerContext, true); + if (conflictingAlts.count() == 1) { +#if DEBUG_ATN == 1 + std::cout << "Full LL avoided" << std::endl; +#endif + + return conflictingAlts.nextSetBit(0); + } + + if (conflictIndex != startIndex) { + // restore the index so reporting the fallback to full + // context occurs with the index at the correct spot + input->seek(conflictIndex); + } + } + +#if DEBUG_DFA == 1 + std::cout << "ctx sensitive state " << outerContext << " in " << D << std::endl; +#endif + + bool fullCtx = true; + std::unique_ptr<ATNConfigSet> s0_closure = computeStartState(dfa.atnStartState, outerContext, fullCtx); + reportAttemptingFullContext(dfa, conflictingAlts, D->configs.get(), startIndex, input->index()); + size_t alt = execATNWithFullContext(dfa, D, s0_closure.get(), input, startIndex, outerContext); + return alt; + } + + if (D->isAcceptState) { + if (D->predicates.empty()) { + return D->prediction; + } + + size_t stopIndex = input->index(); + input->seek(startIndex); + BitSet alts = evalSemanticContext(D->predicates, outerContext, true); + switch (alts.count()) { + case 0: + throw noViableAlt(input, outerContext, D->configs.get(), startIndex, false); + + case 1: + return alts.nextSetBit(0); + + default: + // report ambiguity after predicate evaluation to make sure the correct + // set of ambig alts is reported. + reportAmbiguity(dfa, D, startIndex, stopIndex, false, alts, D->configs.get()); + return alts.nextSetBit(0); + } + } + + previousD = D; + + if (t != Token::EOF) { + input->consume(); + t = input->LA(1); + } + } +} + +dfa::DFAState *ParserATNSimulator::getExistingTargetState(dfa::DFAState *previousD, size_t t) { + dfa::DFAState* retval; + SharedLock<SharedMutex> edgeLock(atn._edgeMutex); + auto iterator = previousD->edges.find(t); + retval = (iterator == previousD->edges.end()) ? nullptr : iterator->second; + return retval; +} + +dfa::DFAState *ParserATNSimulator::computeTargetState(dfa::DFA &dfa, dfa::DFAState *previousD, size_t t) { + std::unique_ptr<ATNConfigSet> reach = computeReachSet(previousD->configs.get(), t, false); + if (reach == nullptr) { + addDFAEdge(dfa, previousD, t, ERROR.get()); + return ERROR.get(); + } + + // create new target state; we'll add to DFA after it's complete + dfa::DFAState *D = new dfa::DFAState(std::move(reach)); /* mem-check: managed by the DFA or deleted below, "reach" is no longer valid now. */ + size_t predictedAlt = getUniqueAlt(D->configs.get()); + + if (predictedAlt != ATN::INVALID_ALT_NUMBER) { + // NO CONFLICT, UNIQUELY PREDICTED ALT + D->isAcceptState = true; + D->configs->uniqueAlt = predictedAlt; + D->prediction = predictedAlt; + } else if (PredictionModeClass::hasSLLConflictTerminatingPrediction(_mode, D->configs.get())) { + // MORE THAN ONE VIABLE ALTERNATIVE + D->configs->conflictingAlts = getConflictingAlts(D->configs.get()); + D->requiresFullContext = true; + // in SLL-only mode, we will stop at this state and return the minimum alt + D->isAcceptState = true; + D->prediction = D->configs->conflictingAlts.nextSetBit(0); + } + + if (D->isAcceptState && D->configs->hasSemanticContext) { + predicateDFAState(D, atn.getDecisionState(dfa.decision)); + if (D->predicates.size() != 0) { + D->prediction = ATN::INVALID_ALT_NUMBER; + } + } + + // all adds to dfa are done after we've created full D state + dfa::DFAState *state = addDFAEdge(dfa, previousD, t, D); + if (state != D) { + delete D; // If the new state exists already we don't need it and use the existing one instead. + } + return state; +} + +void ParserATNSimulator::predicateDFAState(dfa::DFAState *dfaState, DecisionState *decisionState) { + // We need to test all predicates, even in DFA states that + // uniquely predict alternative. + size_t nalts = decisionState->transitions.size(); + + // Update DFA so reach becomes accept state with (predicate,alt) + // pairs if preds found for conflicting alts + BitSet altsToCollectPredsFrom = getConflictingAltsOrUniqueAlt(dfaState->configs.get()); + std::vector<Ref<const SemanticContext>> altToPred = getPredsForAmbigAlts(altsToCollectPredsFrom, dfaState->configs.get(), nalts); + if (!altToPred.empty()) { + dfaState->predicates = getPredicatePredictions(altsToCollectPredsFrom, altToPred); + dfaState->prediction = ATN::INVALID_ALT_NUMBER; // make sure we use preds + } else { + // There are preds in configs but they might go away + // when OR'd together like {p}? || NONE == NONE. If neither + // alt has preds, resolve to min alt + dfaState->prediction = altsToCollectPredsFrom.nextSetBit(0); + } +} + +size_t ParserATNSimulator::execATNWithFullContext(dfa::DFA &dfa, dfa::DFAState *D, ATNConfigSet *s0, + TokenStream *input, size_t startIndex, ParserRuleContext *outerContext) { + + bool fullCtx = true; + bool foundExactAmbig = false; + + std::unique_ptr<ATNConfigSet> reach; + ATNConfigSet *previous = s0; + input->seek(startIndex); + size_t t = input->LA(1); + size_t predictedAlt; + + while (true) { + reach = computeReachSet(previous, t, fullCtx); + if (reach == nullptr) { + // if any configs in previous dipped into outer context, that + // means that input up to t actually finished entry rule + // at least for LL decision. Full LL doesn't dip into outer + // so don't need special case. + // We will get an error no matter what so delay until after + // decision; better error message. Also, no reachable target + // ATN states in SLL implies LL will also get nowhere. + // If conflict in states that dip out, choose min since we + // will get error no matter what. + NoViableAltException e = noViableAlt(input, outerContext, previous, startIndex, previous != s0); + input->seek(startIndex); + size_t alt = getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previous, outerContext); + if (alt != ATN::INVALID_ALT_NUMBER) { + return alt; + } + throw e; + } + if (previous != s0) // Don't delete the start set. + delete previous; + previous = nullptr; + + std::vector<BitSet> altSubSets = PredictionModeClass::getConflictingAltSubsets(reach.get()); + reach->uniqueAlt = getUniqueAlt(reach.get()); + // unique prediction? + if (reach->uniqueAlt != ATN::INVALID_ALT_NUMBER) { + predictedAlt = reach->uniqueAlt; + break; + } + if (_mode != PredictionMode::LL_EXACT_AMBIG_DETECTION) { + predictedAlt = PredictionModeClass::resolvesToJustOneViableAlt(altSubSets); + if (predictedAlt != ATN::INVALID_ALT_NUMBER) { + break; + } + } else { + // In exact ambiguity mode, we never try to terminate early. + // Just keeps scarfing until we know what the conflict is + if (PredictionModeClass::allSubsetsConflict(altSubSets) && PredictionModeClass::allSubsetsEqual(altSubSets)) { + foundExactAmbig = true; + predictedAlt = PredictionModeClass::getSingleViableAlt(altSubSets); + break; + } + // else there are multiple non-conflicting subsets or + // we're not sure what the ambiguity is yet. + // So, keep going. + } + previous = reach.release(); + + if (t != Token::EOF) { + input->consume(); + t = input->LA(1); + } + } + + if (previous != s0) // Don't delete the start set + delete previous; + + // If the configuration set uniquely predicts an alternative, + // without conflict, then we know that it's a full LL decision + // not SLL. + if (reach->uniqueAlt != ATN::INVALID_ALT_NUMBER) { + reportContextSensitivity(dfa, predictedAlt, reach.get(), startIndex, input->index()); + return predictedAlt; + } + + // We do not check predicates here because we have checked them + // on-the-fly when doing full context prediction. + + /* + In non-exact ambiguity detection mode, we might actually be able to + detect an exact ambiguity, but I'm not going to spend the cycles + needed to check. We only emit ambiguity warnings in exact ambiguity + mode. + + For example, we might know that we have conflicting configurations. + But, that does not mean that there is no way forward without a + conflict. It's possible to have nonconflicting alt subsets as in: + + LL altSubSets=[{1, 2}, {1, 2}, {1}, {1, 2}] + + from + + [(17,1,[5 $]), (13,1,[5 10 $]), (21,1,[5 10 $]), (11,1,[$]), + (13,2,[5 10 $]), (21,2,[5 10 $]), (11,2,[$])] + + In this case, (17,1,[5 $]) indicates there is some next sequence that + would resolve this without conflict to alternative 1. Any other viable + next sequence, however, is associated with a conflict. We stop + looking for input because no amount of further lookahead will alter + the fact that we should predict alternative 1. We just can't say for + sure that there is an ambiguity without looking further. + */ + reportAmbiguity(dfa, D, startIndex, input->index(), foundExactAmbig, reach->getAlts(), reach.get()); + + return predictedAlt; +} + +std::unique_ptr<ATNConfigSet> ParserATNSimulator::computeReachSet(ATNConfigSet *closure_, size_t t, bool fullCtx) { + + std::unique_ptr<ATNConfigSet> intermediate(new ATNConfigSet(fullCtx)); + + /* Configurations already in a rule stop state indicate reaching the end + * of the decision rule (local context) or end of the start rule (full + * context). Once reached, these configurations are never updated by a + * closure operation, so they are handled separately for the performance + * advantage of having a smaller intermediate set when calling closure. + * + * For full-context reach operations, separate handling is required to + * ensure that the alternative matching the longest overall sequence is + * chosen when multiple such configurations can match the input. + */ + std::vector<Ref<ATNConfig>> skippedStopStates; + + // First figure out where we can reach on input t + for (const auto &c : closure_->configs) { + if (RuleStopState::is(c->state)) { + assert(c->context->isEmpty()); + + if (fullCtx || t == Token::EOF) { + skippedStopStates.push_back(c); + } + + continue; + } + + size_t n = c->state->transitions.size(); + for (size_t ti = 0; ti < n; ti++) { // for each transition + const Transition *trans = c->state->transitions[ti].get(); + ATNState *target = getReachableTarget(trans, (int)t); + if (target != nullptr) { + intermediate->add(std::make_shared<ATNConfig>(*c, target), &mergeCache); + } + } + } + + // Now figure out where the reach operation can take us... + std::unique_ptr<ATNConfigSet> reach; + + /* This block optimizes the reach operation for intermediate sets which + * trivially indicate a termination state for the overall + * adaptivePredict operation. + * + * The conditions assume that intermediate + * contains all configurations relevant to the reach set, but this + * condition is not true when one or more configurations have been + * withheld in skippedStopStates, or when the current symbol is EOF. + */ + if (skippedStopStates.empty() && t != Token::EOF) { + if (intermediate->size() == 1) { + // Don't pursue the closure if there is just one state. + // It can only have one alternative; just add to result + // Also don't pursue the closure if there is unique alternative + // among the configurations. + reach = std::move(intermediate); + } else if (getUniqueAlt(intermediate.get()) != ATN::INVALID_ALT_NUMBER) { + // Also don't pursue the closure if there is unique alternative + // among the configurations. + reach = std::move(intermediate); + } + } + + /* If the reach set could not be trivially determined, perform a closure + * operation on the intermediate set to compute its initial value. + */ + if (reach == nullptr) { + reach.reset(new ATNConfigSet(fullCtx)); + ATNConfig::Set closureBusy; + + bool treatEofAsEpsilon = t == Token::EOF; + for (const auto &c : intermediate->configs) { + closure(c, reach.get(), closureBusy, false, fullCtx, treatEofAsEpsilon); + } + } + + if (t == IntStream::EOF) { + /* After consuming EOF no additional input is possible, so we are + * only interested in configurations which reached the end of the + * decision rule (local context) or end of the start rule (full + * context). Update reach to contain only these configurations. This + * handles both explicit EOF transitions in the grammar and implicit + * EOF transitions following the end of the decision or start rule. + * + * When reach==intermediate, no closure operation was performed. In + * this case, removeAllConfigsNotInRuleStopState needs to check for + * reachable rule stop states as well as configurations already in + * a rule stop state. + * + * This is handled before the configurations in skippedStopStates, + * because any configurations potentially added from that list are + * already guaranteed to meet this condition whether or not it's + * required. + */ + ATNConfigSet *temp = removeAllConfigsNotInRuleStopState(reach.get(), *reach == *intermediate); + if (temp != reach.get()) + reach.reset(temp); // We got a new set, so use that. + } + + /* If skippedStopStates is not null, then it contains at least one + * configuration. For full-context reach operations, these + * configurations reached the end of the start rule, in which case we + * only add them back to reach if no configuration during the current + * closure operation reached such a state. This ensures adaptivePredict + * chooses an alternative matching the longest overall sequence when + * multiple alternatives are viable. + */ + if (skippedStopStates.size() > 0 && (!fullCtx || !PredictionModeClass::hasConfigInRuleStopState(reach.get()))) { + assert(!skippedStopStates.empty()); + + for (const auto &c : skippedStopStates) { + reach->add(c, &mergeCache); + } + } + + if (reach->isEmpty()) { + return nullptr; + } + return reach; +} + +ATNConfigSet* ParserATNSimulator::removeAllConfigsNotInRuleStopState(ATNConfigSet *configs, + bool lookToEndOfRule) { + if (PredictionModeClass::allConfigsInRuleStopStates(configs)) { + return configs; + } + + ATNConfigSet *result = new ATNConfigSet(configs->fullCtx); /* mem-check: released by caller */ + + for (const auto &config : configs->configs) { + if (config->state != nullptr && config->state->getStateType() == ATNStateType::RULE_STOP) { + result->add(config, &mergeCache); + continue; + } + + if (lookToEndOfRule && config->state->epsilonOnlyTransitions) { + misc::IntervalSet nextTokens = atn.nextTokens(config->state); + if (nextTokens.contains(Token::EPSILON)) { + ATNState *endOfRuleState = atn.ruleToStopState[config->state->ruleIndex]; + result->add(std::make_shared<ATNConfig>(*config, endOfRuleState), &mergeCache); + } + } + } + + return result; +} + +std::unique_ptr<ATNConfigSet> ParserATNSimulator::computeStartState(ATNState *p, RuleContext *ctx, bool fullCtx) { + // always at least the implicit call to start rule + Ref<const PredictionContext> initialContext = PredictionContext::fromRuleContext(atn, ctx); + std::unique_ptr<ATNConfigSet> configs(new ATNConfigSet(fullCtx)); + + for (size_t i = 0; i < p->transitions.size(); i++) { + ATNState *target = p->transitions[i]->target; + Ref<ATNConfig> c = std::make_shared<ATNConfig>(target, (int)i + 1, initialContext); + ATNConfig::Set closureBusy; + closure(c, configs.get(), closureBusy, true, fullCtx, false); + } + + return configs; +} + +std::unique_ptr<ATNConfigSet> ParserATNSimulator::applyPrecedenceFilter(ATNConfigSet *configs) { + std::map<size_t, Ref<const PredictionContext>> statesFromAlt1; + std::unique_ptr<ATNConfigSet> configSet(new ATNConfigSet(configs->fullCtx)); + for (const auto &config : configs->configs) { + // handle alt 1 first + if (config->alt != 1) { + continue; + } + + Ref<const SemanticContext> updatedContext = config->semanticContext->evalPrecedence(parser, _outerContext); + if (updatedContext == nullptr) { + // the configuration was eliminated + continue; + } + + statesFromAlt1[config->state->stateNumber] = config->context; + if (updatedContext != config->semanticContext) { + configSet->add(std::make_shared<ATNConfig>(*config, updatedContext), &mergeCache); + } + else { + configSet->add(config, &mergeCache); + } + } + + for (const auto &config : configs->configs) { + if (config->alt == 1) { + // already handled + continue; + } + + if (!config->isPrecedenceFilterSuppressed()) { + /* In the future, this elimination step could be updated to also + * filter the prediction context for alternatives predicting alt>1 + * (basically a graph subtraction algorithm). + */ + auto iterator = statesFromAlt1.find(config->state->stateNumber); + if (iterator != statesFromAlt1.end() && *iterator->second == *config->context) { + // eliminated + continue; + } + } + + configSet->add(config, &mergeCache); + } + + return configSet; +} + +atn::ATNState* ParserATNSimulator::getReachableTarget(const Transition *trans, size_t ttype) { + if (trans->matches(ttype, 0, atn.maxTokenType)) { + return trans->target; + } + + return nullptr; +} + +// Note that caller must memory manage the returned value from this function +std::vector<Ref<const SemanticContext>> ParserATNSimulator::getPredsForAmbigAlts(const BitSet &ambigAlts, + ATNConfigSet *configs, size_t nalts) { + // REACH=[1|1|[]|0:0, 1|2|[]|0:1] + /* altToPred starts as an array of all null contexts. The entry at index i + * corresponds to alternative i. altToPred[i] may have one of three values: + * 1. null: no ATNConfig c is found such that c.alt==i + * 2. SemanticContext.NONE: At least one ATNConfig c exists such that + * c.alt==i and c.semanticContext==SemanticContext.NONE. In other words, + * alt i has at least one un-predicated config. + * 3. Non-NONE Semantic Context: There exists at least one, and for all + * ATNConfig c such that c.alt==i, c.semanticContext!=SemanticContext.NONE. + * + * From this, it is clear that NONE||anything==NONE. + */ + std::vector<Ref<const SemanticContext>> altToPred(nalts + 1); + + for (const auto &c : configs->configs) { + if (ambigAlts.test(c->alt)) { + altToPred[c->alt] = SemanticContext::Or(altToPred[c->alt], c->semanticContext); + } + } + + size_t nPredAlts = 0; + for (size_t i = 1; i <= nalts; i++) { + if (altToPred[i] == nullptr) { + altToPred[i] = SemanticContext::Empty::Instance; + } else if (altToPred[i] != SemanticContext::Empty::Instance) { + nPredAlts++; + } + } + + // nonambig alts are null in altToPred + if (nPredAlts == 0) { + altToPred.clear(); + } +#if DEBUG_ATN == 1 + std::cout << "getPredsForAmbigAlts result " << Arrays::toString(altToPred) << std::endl; +#endif + + return altToPred; +} + +std::vector<dfa::DFAState::PredPrediction> ParserATNSimulator::getPredicatePredictions(const antlrcpp::BitSet &ambigAlts, + const std::vector<Ref<const SemanticContext>> &altToPred) { + bool containsPredicate = std::find_if(altToPred.begin(), altToPred.end(), [](const Ref<const SemanticContext> &context) { + return context != SemanticContext::Empty::Instance; + }) != altToPred.end(); + std::vector<dfa::DFAState::PredPrediction> pairs; + if (containsPredicate) { + for (size_t i = 1; i < altToPred.size(); i++) { + const auto &pred = altToPred[i]; + assert(pred != nullptr); // unpredicted is indicated by SemanticContext.NONE + if (ambigAlts.test(i)) { + pairs.emplace_back(pred, static_cast<int>(i)); + } + } + } + return pairs; +} + +size_t ParserATNSimulator::getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(ATNConfigSet *configs, + ParserRuleContext *outerContext) +{ + std::pair<ATNConfigSet *, ATNConfigSet *> sets = splitAccordingToSemanticValidity(configs, outerContext); + std::unique_ptr<ATNConfigSet> semValidConfigs(sets.first); + std::unique_ptr<ATNConfigSet> semInvalidConfigs(sets.second); + size_t alt = getAltThatFinishedDecisionEntryRule(semValidConfigs.get()); + if (alt != ATN::INVALID_ALT_NUMBER) { // semantically/syntactically viable path exists + return alt; + } + // Is there a syntactically valid path with a failed pred? + if (!semInvalidConfigs->configs.empty()) { + alt = getAltThatFinishedDecisionEntryRule(semInvalidConfigs.get()); + if (alt != ATN::INVALID_ALT_NUMBER) { // syntactically viable path exists + return alt; + } + } + return ATN::INVALID_ALT_NUMBER; +} + +size_t ParserATNSimulator::getAltThatFinishedDecisionEntryRule(ATNConfigSet *configs) { + misc::IntervalSet alts; + for (const auto &c : configs->configs) { + if (c->getOuterContextDepth() > 0 || (c->state != nullptr && c->state->getStateType() == ATNStateType::RULE_STOP && c->context->hasEmptyPath())) { + alts.add(c->alt); + } + } + if (alts.size() == 0) { + return ATN::INVALID_ALT_NUMBER; + } + return alts.getMinElement(); +} + +std::pair<ATNConfigSet *, ATNConfigSet *> ParserATNSimulator::splitAccordingToSemanticValidity(ATNConfigSet *configs, + ParserRuleContext *outerContext) { + + // mem-check: both pointers must be freed by the caller. + ATNConfigSet *succeeded(new ATNConfigSet(configs->fullCtx)); + ATNConfigSet *failed(new ATNConfigSet(configs->fullCtx)); + for (const auto &c : configs->configs) { + if (c->semanticContext != SemanticContext::Empty::Instance) { + bool predicateEvaluationResult = evalSemanticContext(c->semanticContext, outerContext, c->alt, configs->fullCtx); + if (predicateEvaluationResult) { + succeeded->add(c); + } else { + failed->add(c); + } + } else { + succeeded->add(c); + } + } + return { succeeded, failed }; +} + +BitSet ParserATNSimulator::evalSemanticContext(const std::vector<dfa::DFAState::PredPrediction> &predPredictions, + ParserRuleContext *outerContext, bool complete) { + BitSet predictions; + for (const auto &prediction : predPredictions) { + if (prediction.pred == SemanticContext::Empty::Instance) { + predictions.set(prediction.alt); + if (!complete) { + break; + } + continue; + } + + bool fullCtx = false; // in dfa + bool predicateEvaluationResult = evalSemanticContext(prediction.pred, outerContext, prediction.alt, fullCtx); +#if DEBUG_ATN == 1 || DEBUG_DFA == 1 + std::cout << "eval pred " << prediction.toString() << " = " << predicateEvaluationResult << std::endl; +#endif + + if (predicateEvaluationResult) { +#if DEBUG_ATN == 1 || DEBUG_DFA == 1 + std::cout << "PREDICT " << prediction.alt << std::endl; +#endif + + predictions.set(prediction.alt); + if (!complete) { + break; + } + } + } + + return predictions; +} + +bool ParserATNSimulator::evalSemanticContext(Ref<const SemanticContext> const& pred, ParserRuleContext *parserCallStack, + size_t /*alt*/, bool /*fullCtx*/) { + return pred->eval(parser, parserCallStack); +} + +void ParserATNSimulator::closure(Ref<ATNConfig> const& config, ATNConfigSet *configs, ATNConfig::Set &closureBusy, + bool collectPredicates, bool fullCtx, bool treatEofAsEpsilon) { + const int initialDepth = 0; + closureCheckingStopState(config, configs, closureBusy, collectPredicates, fullCtx, initialDepth, treatEofAsEpsilon); + + assert(!fullCtx || !configs->dipsIntoOuterContext); +} + +void ParserATNSimulator::closureCheckingStopState(Ref<ATNConfig> const& config, ATNConfigSet *configs, + ATNConfig::Set &closureBusy, bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon) { + +#if DEBUG_ATN == 1 + std::cout << "closure(" << config->toString(true) << ")" << std::endl; +#endif + + if (config->state != nullptr && config->state->getStateType() == ATNStateType::RULE_STOP) { + // We hit rule end. If we have context info, use it + // run thru all possible stack tops in ctx + if (!config->context->isEmpty()) { + for (size_t i = 0; i < config->context->size(); i++) { + if (config->context->getReturnState(i) == PredictionContext::EMPTY_RETURN_STATE) { + if (fullCtx) { + configs->add(std::make_shared<ATNConfig>(*config, config->state, PredictionContext::EMPTY), &mergeCache); + continue; + } else { + // we have no context info, just chase follow links (if greedy) +#if DEBUG_ATN == 1 + std::cout << "FALLING off rule " << getRuleName(config->state->ruleIndex) << std::endl; +#endif + closure_(config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEofAsEpsilon); + } + continue; + } + ATNState *returnState = atn.states[config->context->getReturnState(i)]; + Ref<const PredictionContext> newContext = config->context->getParent(i); // "pop" return state + Ref<ATNConfig> c = std::make_shared<ATNConfig>(returnState, config->alt, newContext, config->semanticContext); + // While we have context to pop back from, we may have + // gotten that context AFTER having falling off a rule. + // Make sure we track that we are now out of context. + // + // This assignment also propagates the + // isPrecedenceFilterSuppressed() value to the new + // configuration. + c->reachesIntoOuterContext = config->reachesIntoOuterContext; + assert(depth > INT_MIN); + + closureCheckingStopState(c, configs, closureBusy, collectPredicates, fullCtx, depth - 1, treatEofAsEpsilon); + } + return; + } else if (fullCtx) { + // reached end of start rule + configs->add(config, &mergeCache); + return; + } else { + // else if we have no context info, just chase follow links (if greedy) + } + } + + closure_(config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEofAsEpsilon); +} + +void ParserATNSimulator::closure_(Ref<ATNConfig> const& config, ATNConfigSet *configs, ATNConfig::Set &closureBusy, + bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon) { + ATNState *p = config->state; + // optimization + if (!p->epsilonOnlyTransitions) { + // make sure to not return here, because EOF transitions can act as + // both epsilon transitions and non-epsilon transitions. + configs->add(config, &mergeCache); + } + + for (size_t i = 0; i < p->transitions.size(); i++) { + if (i == 0 && canDropLoopEntryEdgeInLeftRecursiveRule(config.get())) + continue; + + const Transition *t = p->transitions[i].get(); + bool continueCollecting = !(t != nullptr && t->getTransitionType() == TransitionType::ACTION) && collectPredicates; + Ref<ATNConfig> c = getEpsilonTarget(config, t, continueCollecting, depth == 0, fullCtx, treatEofAsEpsilon); + if (c != nullptr) { + int newDepth = depth; + if (config->state != nullptr && config->state->getStateType() == ATNStateType::RULE_STOP) { + assert(!fullCtx); + + // target fell off end of rule; mark resulting c as having dipped into outer context + // We can't get here if incoming config was rule stop and we had context + // track how far we dip into outer context. Might + // come in handy and we avoid evaluating context dependent + // preds if this is > 0. + + if (closureBusy.count(c) > 0) { + // avoid infinite recursion for right-recursive rules + continue; + } + closureBusy.insert(c); + + if (_dfa != nullptr && _dfa->isPrecedenceDfa()) { + size_t outermostPrecedenceReturn = downCast<const EpsilonTransition *>(t)->outermostPrecedenceReturn(); + if (outermostPrecedenceReturn == _dfa->atnStartState->ruleIndex) { + c->setPrecedenceFilterSuppressed(true); + } + } + + c->reachesIntoOuterContext++; + + if (!t->isEpsilon()) { + // avoid infinite recursion for EOF* and EOF+ + if (closureBusy.count(c) == 0) { + closureBusy.insert(c); + } else { + continue; + } + } + + configs->dipsIntoOuterContext = true; // TODO: can remove? only care when we add to set per middle of this method + assert(newDepth > INT_MIN); + + newDepth--; +#if DEBUG_DFA == 1 + std::cout << "dips into outer ctx: " << c << std::endl; +#endif + + } else if (!t->isEpsilon()) { + // avoid infinite recursion for EOF* and EOF+ + if (closureBusy.count(c) == 0) { + closureBusy.insert(c); + } else { + continue; + } + } + + if (t != nullptr && t->getTransitionType() == TransitionType::RULE) { + // latch when newDepth goes negative - once we step out of the entry context we can't return + if (newDepth >= 0) { + newDepth++; + } + } + + closureCheckingStopState(c, configs, closureBusy, continueCollecting, fullCtx, newDepth, treatEofAsEpsilon); + } + } +} + +bool ParserATNSimulator::canDropLoopEntryEdgeInLeftRecursiveRule(ATNConfig *config) const { + if (TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT) + return false; + + ATNState *p = config->state; + + // First check to see if we are in StarLoopEntryState generated during + // left-recursion elimination. For efficiency, also check if + // the context has an empty stack case. If so, it would mean + // global FOLLOW so we can't perform optimization + if (p->getStateType() != ATNStateType::STAR_LOOP_ENTRY || + !((StarLoopEntryState *)p)->isPrecedenceDecision || // Are we the special loop entry/exit state? + config->context->isEmpty() || // If SLL wildcard + config->context->hasEmptyPath()) + { + return false; + } + + // Require all return states to return back to the same rule + // that p is in. + size_t numCtxs = config->context->size(); + for (size_t i = 0; i < numCtxs; i++) { // for each stack context + ATNState *returnState = atn.states[config->context->getReturnState(i)]; + if (returnState->ruleIndex != p->ruleIndex) + return false; + } + + BlockStartState *decisionStartState = (BlockStartState *)p->transitions[0]->target; + size_t blockEndStateNum = decisionStartState->endState->stateNumber; + BlockEndState *blockEndState = (BlockEndState *)atn.states[blockEndStateNum]; + + // Verify that the top of each stack context leads to loop entry/exit + // state through epsilon edges and w/o leaving rule. + for (size_t i = 0; i < numCtxs; i++) { // for each stack context + size_t returnStateNumber = config->context->getReturnState(i); + ATNState *returnState = atn.states[returnStateNumber]; + // All states must have single outgoing epsilon edge. + if (returnState->transitions.size() != 1 || !returnState->transitions[0]->isEpsilon()) + { + return false; + } + + // Look for prefix op case like 'not expr', (' type ')' expr + ATNState *returnStateTarget = returnState->transitions[0]->target; + if (returnState->getStateType() == ATNStateType::BLOCK_END && returnStateTarget == p) { + continue; + } + + // Look for 'expr op expr' or case where expr's return state is block end + // of (...)* internal block; the block end points to loop back + // which points to p but we don't need to check that + if (returnState == blockEndState) { + continue; + } + + // Look for ternary expr ? expr : expr. The return state points at block end, + // which points at loop entry state + if (returnStateTarget == blockEndState) { + continue; + } + + // Look for complex prefix 'between expr and expr' case where 2nd expr's + // return state points at block end state of (...)* internal block + if (returnStateTarget->getStateType() == ATNStateType::BLOCK_END && + returnStateTarget->transitions.size() == 1 && + returnStateTarget->transitions[0]->isEpsilon() && + returnStateTarget->transitions[0]->target == p) + { + continue; + } + + // Anything else ain't conforming. + return false; + } + + return true; +} + +std::string ParserATNSimulator::getRuleName(size_t index) { + if (parser != nullptr) { + return parser->getRuleNames()[index]; + } + return "<rule " + std::to_string(index) + ">"; +} + +Ref<ATNConfig> ParserATNSimulator::getEpsilonTarget(Ref<ATNConfig> const& config, const Transition *t, bool collectPredicates, + bool inContext, bool fullCtx, bool treatEofAsEpsilon) { + switch (t->getTransitionType()) { + case TransitionType::RULE: + return ruleTransition(config, static_cast<const RuleTransition*>(t)); + + case TransitionType::PRECEDENCE: + return precedenceTransition(config, static_cast<const PrecedencePredicateTransition*>(t), collectPredicates, inContext, fullCtx); + + case TransitionType::PREDICATE: + return predTransition(config, static_cast<const PredicateTransition*>(t), collectPredicates, inContext, fullCtx); + + case TransitionType::ACTION: + return actionTransition(config, static_cast<const ActionTransition*>(t)); + + case TransitionType::EPSILON: + return std::make_shared<ATNConfig>(*config, t->target); + + case TransitionType::ATOM: + case TransitionType::RANGE: + case TransitionType::SET: + // EOF transitions act like epsilon transitions after the first EOF + // transition is traversed + if (treatEofAsEpsilon) { + if (t->matches(Token::EOF, 0, 1)) { + return std::make_shared<ATNConfig>(*config, t->target); + } + } + + return nullptr; + + default: + return nullptr; + } +} + +Ref<ATNConfig> ParserATNSimulator::actionTransition(Ref<ATNConfig> const& config, const ActionTransition *t) { +#if DEBUG_DFA == 1 + std::cout << "ACTION edge " << t->ruleIndex << ":" << t->actionIndex << std::endl; +#endif + + return std::make_shared<ATNConfig>(*config, t->target); +} + +Ref<ATNConfig> ParserATNSimulator::precedenceTransition(Ref<ATNConfig> const& config, const PrecedencePredicateTransition *pt, + bool collectPredicates, bool inContext, bool fullCtx) { +#if DEBUG_DFA == 1 + std::cout << "PRED (collectPredicates=" << collectPredicates << ") " << pt->getPrecedence() << ">=_p" << ", ctx dependent=true" << std::endl; + if (parser != nullptr) { + std::cout << "context surrounding pred is " << Arrays::listToString(parser->getRuleInvocationStack(), ", ") << std::endl; + } +#endif + + Ref<ATNConfig> c; + if (collectPredicates && inContext) { + const auto &predicate = pt->getPredicate(); + + if (fullCtx) { + // In full context mode, we can evaluate predicates on-the-fly + // during closure, which dramatically reduces the size of + // the config sets. It also obviates the need to test predicates + // later during conflict resolution. + size_t currentPosition = _input->index(); + _input->seek(_startIndex); + bool predSucceeds = evalSemanticContext(predicate, _outerContext, config->alt, fullCtx); + _input->seek(currentPosition); + if (predSucceeds) { + c = std::make_shared<ATNConfig>(*config, pt->target); // no pred context + } + } else { + Ref<const SemanticContext> newSemCtx = SemanticContext::And(config->semanticContext, predicate); + c = std::make_shared<ATNConfig>(*config, pt->target, std::move(newSemCtx)); + } + } else { + c = std::make_shared<ATNConfig>(*config, pt->target); + } + +#if DEBUG_DFA == 1 + std::cout << "config from pred transition=" << c << std::endl; +#endif + + return c; +} + +Ref<ATNConfig> ParserATNSimulator::predTransition(Ref<ATNConfig> const& config, const PredicateTransition *pt, + bool collectPredicates, bool inContext, bool fullCtx) { +#if DEBUG_DFA == 1 + std::cout << "PRED (collectPredicates=" << collectPredicates << ") " << pt->getRuleIndex() << ":" << pt->getPredIndex() << ", ctx dependent=" << pt->isCtxDependent() << std::endl; + if (parser != nullptr) { + std::cout << "context surrounding pred is " << Arrays::listToString(parser->getRuleInvocationStack(), ", ") << std::endl; + } +#endif + + Ref<ATNConfig> c = nullptr; + if (collectPredicates && (!pt->isCtxDependent() || (pt->isCtxDependent() && inContext))) { + const auto &predicate = pt->getPredicate(); + if (fullCtx) { + // In full context mode, we can evaluate predicates on-the-fly + // during closure, which dramatically reduces the size of + // the config sets. It also obviates the need to test predicates + // later during conflict resolution. + size_t currentPosition = _input->index(); + _input->seek(_startIndex); + bool predSucceeds = evalSemanticContext(predicate, _outerContext, config->alt, fullCtx); + _input->seek(currentPosition); + if (predSucceeds) { + c = std::make_shared<ATNConfig>(*config, pt->target); // no pred context + } + } else { + Ref<const SemanticContext> newSemCtx = SemanticContext::And(config->semanticContext, predicate); + c = std::make_shared<ATNConfig>(*config, pt->target, std::move(newSemCtx)); + } + } else { + c = std::make_shared<ATNConfig>(*config, pt->target); + } + +#if DEBUG_DFA == 1 + std::cout << "config from pred transition=" << c << std::endl; +#endif + + return c; +} + +Ref<ATNConfig> ParserATNSimulator::ruleTransition(Ref<ATNConfig> const& config, const RuleTransition *t) { +#if DEBUG_DFA == 1 + std::cout << "CALL rule " << getRuleName(t->target->ruleIndex) << ", ctx=" << config->context << std::endl; +#endif + + atn::ATNState *returnState = t->followState; + Ref<const PredictionContext> newContext = SingletonPredictionContext::create(config->context, returnState->stateNumber); + return std::make_shared<ATNConfig>(*config, t->target, newContext); +} + +BitSet ParserATNSimulator::getConflictingAlts(ATNConfigSet *configs) { + std::vector<BitSet> altsets = PredictionModeClass::getConflictingAltSubsets(configs); + return PredictionModeClass::getAlts(altsets); +} + +BitSet ParserATNSimulator::getConflictingAltsOrUniqueAlt(ATNConfigSet *configs) { + BitSet conflictingAlts; + if (configs->uniqueAlt != ATN::INVALID_ALT_NUMBER) { + conflictingAlts.set(configs->uniqueAlt); + } else { + conflictingAlts = configs->conflictingAlts; + } + return conflictingAlts; +} + +std::string ParserATNSimulator::getTokenName(size_t t) { + if (t == Token::EOF) { + return "EOF"; + } + + const dfa::Vocabulary &vocabulary = parser != nullptr ? parser->getVocabulary() : dfa::Vocabulary(); + std::string displayName = vocabulary.getDisplayName(t); + if (displayName == std::to_string(t)) { + return displayName; + } + + return displayName + "<" + std::to_string(t) + ">"; +} + +std::string ParserATNSimulator::getLookaheadName(TokenStream *input) { + return getTokenName(input->LA(1)); +} + +void ParserATNSimulator::dumpDeadEndConfigs(NoViableAltException &nvae) { + std::cerr << "dead end configs: "; + for (const auto &c : nvae.getDeadEndConfigs()->configs) { + std::string trans = "no edges"; + if (c->state->transitions.size() > 0) { + const Transition *t = c->state->transitions[0].get(); + if (t != nullptr && t->getTransitionType() == TransitionType::ATOM) { + const AtomTransition *at = static_cast<const AtomTransition*>(t); + trans = "Atom " + getTokenName(at->_label); + } else if (t != nullptr && t->getTransitionType() == TransitionType::SET) { + const SetTransition *st = static_cast<const SetTransition*>(t); + trans = "Set "; + trans += st->set.toString(); + } else if (t != nullptr && t->getTransitionType() == TransitionType::NOT_SET) { + const SetTransition *st = static_cast<const NotSetTransition*>(t); + trans = "~Set "; + trans += st->set.toString(); + } + } + std::cerr << c->toString(true) + ":" + trans; + } +} + +NoViableAltException ParserATNSimulator::noViableAlt(TokenStream *input, ParserRuleContext *outerContext, + ATNConfigSet *configs, size_t startIndex, bool deleteConfigs) { + return NoViableAltException(parser, input, input->get(startIndex), input->LT(1), configs, outerContext, deleteConfigs); +} + +size_t ParserATNSimulator::getUniqueAlt(ATNConfigSet *configs) { + size_t alt = ATN::INVALID_ALT_NUMBER; + for (const auto &c : configs->configs) { + if (alt == ATN::INVALID_ALT_NUMBER) { + alt = c->alt; // found first alt + } else if (c->alt != alt) { + return ATN::INVALID_ALT_NUMBER; + } + } + return alt; +} + +dfa::DFAState *ParserATNSimulator::addDFAEdge(dfa::DFA &dfa, dfa::DFAState *from, ssize_t t, dfa::DFAState *to) { +#if DEBUG_DFA == 1 + std::cout << "EDGE " << from << " -> " << to << " upon " << getTokenName(t) << std::endl; +#endif + + if (to == nullptr) { + return nullptr; + } + + { + UniqueLock<SharedMutex> stateLock(atn._stateMutex); + to = addDFAState(dfa, to); // used existing if possible not incoming + } + if (from == nullptr || t > (int)atn.maxTokenType) { + return to; + } + + { + UniqueLock<SharedMutex> edgeLock(atn._edgeMutex); + from->edges[t] = to; // connect + } + +#if DEBUG_DFA == 1 + std::string dfaText; + if (parser != nullptr) { + dfaText = dfa.toString(parser->getVocabulary()); + } else { + dfaText = dfa.toString(dfa::Vocabulary()); + } + std::cout << "DFA=\n" << dfaText << std::endl; +#endif + + return to; +} + +dfa::DFAState *ParserATNSimulator::addDFAState(dfa::DFA &dfa, dfa::DFAState *D) { + if (D == ERROR.get()) { + return D; + } + + // Optimizing the configs below should not alter the hash code. Thus we can just do an insert + // which will only succeed if an equivalent DFAState does not already exist. + auto [existing, inserted] = dfa.states.insert(D); + if (!inserted) { + return *existing; + } + + // Previously we did a lookup, then set fields, then inserted. It was `dfa.states.size()`, since + // we already inserted we need to subtract one. + D->stateNumber = static_cast<int>(dfa.states.size() - 1); + if (!D->configs->isReadonly()) { + D->configs->optimizeConfigs(this); + D->configs->setReadonly(true); + } + +#if DEBUG_DFA == 1 + std::cout << "adding new DFA state: " << D << std::endl; +#endif + + return D; +} + +void ParserATNSimulator::reportAttemptingFullContext(dfa::DFA &dfa, const antlrcpp::BitSet &conflictingAlts, + ATNConfigSet *configs, size_t startIndex, size_t stopIndex) { +#if DEBUG_DFA == 1 || RETRY_DEBUG == 1 + misc::Interval interval = misc::Interval((int)startIndex, (int)stopIndex); + std::cout << "reportAttemptingFullContext decision=" << dfa.decision << ":" << configs << ", input=" << parser->getTokenStream()->getText(interval) << std::endl; +#endif + + if (parser != nullptr) { + parser->getErrorListenerDispatch().reportAttemptingFullContext(parser, dfa, startIndex, stopIndex, conflictingAlts, configs); + } +} + +void ParserATNSimulator::reportContextSensitivity(dfa::DFA &dfa, size_t prediction, ATNConfigSet *configs, + size_t startIndex, size_t stopIndex) { +#if DEBUG_DFA == 1 || RETRY_DEBUG == 1 + misc::Interval interval = misc::Interval(startIndex, stopIndex); + std::cout << "reportContextSensitivity decision=" << dfa.decision << ":" << configs << ", input=" << parser->getTokenStream()->getText(interval) << std::endl; +#endif + + if (parser != nullptr) { + parser->getErrorListenerDispatch().reportContextSensitivity(parser, dfa, startIndex, stopIndex, prediction, configs); + } +} + +void ParserATNSimulator::reportAmbiguity(dfa::DFA &dfa, dfa::DFAState * /*D*/, size_t startIndex, size_t stopIndex, + bool exact, const antlrcpp::BitSet &ambigAlts, ATNConfigSet *configs) { +#if DEBUG_DFA == 1 || RETRY_DEBUG == 1 + misc::Interval interval = misc::Interval((int)startIndex, (int)stopIndex); + std::cout << "reportAmbiguity " << ambigAlts << ":" << configs << ", input=" << parser->getTokenStream()->getText(interval) << std::endl; +#endif + + if (parser != nullptr) { + parser->getErrorListenerDispatch().reportAmbiguity(parser, dfa, startIndex, stopIndex, exact, ambigAlts, configs); + } +} + +void ParserATNSimulator::setPredictionMode(PredictionMode newMode) { + _mode = newMode; +} + +atn::PredictionMode ParserATNSimulator::getPredictionMode() { + return _mode; +} + +Parser* ParserATNSimulator::getParser() { + return parser; +} + +#ifdef _MSC_VER +#pragma warning (disable:4996) // 'getenv': This function or variable may be unsafe. Consider using _dupenv_s instead. +#endif + +bool ParserATNSimulator::getLrLoopSetting() { + char *var = std::getenv("TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT"); + if (var == nullptr) + return false; + std::string value(var); + return value == "true" || value == "1"; +} + +#ifdef _MSC_VER +#pragma warning (default:4996) +#endif + +void ParserATNSimulator::InitializeInstanceFields() { + _mode = PredictionMode::LL; + _startIndex = 0; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ParserATNSimulator.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ParserATNSimulator.h new file mode 100644 index 0000000000..28fd059dd2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ParserATNSimulator.h @@ -0,0 +1,911 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "PredictionMode.h" +#include "dfa/DFAState.h" +#include "atn/ATNSimulator.h" +#include "atn/PredictionContext.h" +#include "atn/PredictionContextMergeCache.h" +#include "atn/ParserATNSimulatorOptions.h" +#include "SemanticContext.h" +#include "atn/ATNConfig.h" + +namespace antlr4 { +namespace atn { + + /** + * The embodiment of the adaptive LL(*), ALL(*), parsing strategy. + * + * <p> + * The basic complexity of the adaptive strategy makes it harder to understand. + * We begin with ATN simulation to build paths in a DFA. Subsequent prediction + * requests go through the DFA first. If they reach a state without an edge for + * the current symbol, the algorithm fails over to the ATN simulation to + * complete the DFA path for the current input (until it finds a conflict state + * or uniquely predicting state).</p> + * + * <p> + * All of that is done without using the outer context because we want to create + * a DFA that is not dependent upon the rule invocation stack when we do a + * prediction. One DFA works in all contexts. We avoid using context not + * necessarily because it's slower, although it can be, but because of the DFA + * caching problem. The closure routine only considers the rule invocation stack + * created during prediction beginning in the decision rule. For example, if + * prediction occurs without invoking another rule's ATN, there are no context + * stacks in the configurations. When lack of context leads to a conflict, we + * don't know if it's an ambiguity or a weakness in the strong LL(*) parsing + * strategy (versus full LL(*)).</p> + * + * <p> + * When SLL yields a configuration set with conflict, we rewind the input and + * retry the ATN simulation, this time using full outer context without adding + * to the DFA. Configuration context stacks will be the full invocation stacks + * from the start rule. If we get a conflict using full context, then we can + * definitively say we have a true ambiguity for that input sequence. If we + * don't get a conflict, it implies that the decision is sensitive to the outer + * context. (It is not context-sensitive in the sense of context-sensitive + * grammars.)</p> + * + * <p> + * The next time we reach this DFA state with an SLL conflict, through DFA + * simulation, we will again retry the ATN simulation using full context mode. + * This is slow because we can't save the results and have to "interpret" the + * ATN each time we get that input.</p> + * + * <p> + * <strong>CACHING FULL CONTEXT PREDICTIONS</strong></p> + * + * <p> + * We could cache results from full context to predicted alternative easily and + * that saves a lot of time but doesn't work in presence of predicates. The set + * of visible predicates from the ATN start state changes depending on the + * context, because closure can fall off the end of a rule. I tried to cache + * tuples (stack context, semantic context, predicted alt) but it was slower + * than interpreting and much more complicated. Also required a huge amount of + * memory. The goal is not to create the world's fastest parser anyway. I'd like + * to keep this algorithm simple. By launching multiple threads, we can improve + * the speed of parsing across a large number of files.</p> + * + * <p> + * There is no strict ordering between the amount of input used by SLL vs LL, + * which makes it really hard to build a cache for full context. Let's say that + * we have input A B C that leads to an SLL conflict with full context X. That + * implies that using X we might only use A B but we could also use A B C D to + * resolve conflict. Input A B C D could predict alternative 1 in one position + * in the input and A B C E could predict alternative 2 in another position in + * input. The conflicting SLL configurations could still be non-unique in the + * full context prediction, which would lead us to requiring more input than the + * original A B C. To make a prediction cache work, we have to track the exact + * input used during the previous prediction. That amounts to a cache that maps + * X to a specific DFA for that context.</p> + * + * <p> + * Something should be done for left-recursive expression predictions. They are + * likely LL(1) + pred eval. Easier to do the whole SLL unless error and retry + * with full LL thing Sam does.</p> + * + * <p> + * <strong>AVOIDING FULL CONTEXT PREDICTION</strong></p> + * + * <p> + * We avoid doing full context retry when the outer context is empty, we did not + * dip into the outer context by falling off the end of the decision state rule, + * or when we force SLL mode.</p> + * + * <p> + * As an example of the not dip into outer context case, consider as super + * constructor calls versus function calls. One grammar might look like + * this:</p> + * + * <pre> + * ctorBody + * : '{' superCall? stat* '}' + * ; + * </pre> + * + * <p> + * Or, you might see something like</p> + * + * <pre> + * stat + * : superCall ';' + * | expression ';' + * | ... + * ; + * </pre> + * + * <p> + * In both cases I believe that no closure operations will dip into the outer + * context. In the first case ctorBody in the worst case will stop at the '}'. + * In the 2nd case it should stop at the ';'. Both cases should stay within the + * entry rule and not dip into the outer context.</p> + * + * <p> + * <strong>PREDICATES</strong></p> + * + * <p> + * Predicates are always evaluated if present in either SLL or LL both. SLL and + * LL simulation deals with predicates differently. SLL collects predicates as + * it performs closure operations like ANTLR v3 did. It delays predicate + * evaluation until it reaches and accept state. This allows us to cache the SLL + * ATN simulation whereas, if we had evaluated predicates on-the-fly during + * closure, the DFA state configuration sets would be different and we couldn't + * build up a suitable DFA.</p> + * + * <p> + * When building a DFA accept state during ATN simulation, we evaluate any + * predicates and return the sole semantically valid alternative. If there is + * more than 1 alternative, we report an ambiguity. If there are 0 alternatives, + * we throw an exception. Alternatives without predicates act like they have + * true predicates. The simple way to think about it is to strip away all + * alternatives with false predicates and choose the minimum alternative that + * remains.</p> + * + * <p> + * When we start in the DFA and reach an accept state that's predicated, we test + * those and return the minimum semantically viable alternative. If no + * alternatives are viable, we throw an exception.</p> + * + * <p> + * During full LL ATN simulation, closure always evaluates predicates and + * on-the-fly. This is crucial to reducing the configuration set size during + * closure. It hits a landmine when parsing with the Java grammar, for example, + * without this on-the-fly evaluation.</p> + * + * <p> + * <strong>SHARING DFA</strong></p> + * + * <p> + * All instances of the same parser share the same decision DFAs through a + * static field. Each instance gets its own ATN simulator but they share the + * same {@link #decisionToDFA} field. They also share a + * {@link PredictionContextCache} object that makes sure that all + * {@link PredictionContext} objects are shared among the DFA states. This makes + * a big size difference.</p> + * + * <p> + * <strong>THREAD SAFETY</strong></p> + * + * <p> + * The {@link ParserATNSimulator} locks on the {@link #decisionToDFA} field when + * it adds a new DFA object to that array. {@link #addDFAEdge} + * locks on the DFA for the current decision when setting the + * {@link DFAState#edges} field. {@link #addDFAState} locks on + * the DFA for the current decision when looking up a DFA state to see if it + * already exists. We must make sure that all requests to add DFA states that + * are equivalent result in the same shared DFA object. This is because lots of + * threads will be trying to update the DFA at once. The + * {@link #addDFAState} method also locks inside the DFA lock + * but this time on the shared context cache when it rebuilds the + * configurations' {@link PredictionContext} objects using cached + * subgraphs/nodes. No other locking occurs, even during DFA simulation. This is + * safe as long as we can guarantee that all threads referencing + * {@code s.edge[t]} get the same physical target {@link DFAState}, or + * {@code null}. Once into the DFA, the DFA simulation does not reference the + * {@link DFA#states} map. It follows the {@link DFAState#edges} field to new + * targets. The DFA simulator will either find {@link DFAState#edges} to be + * {@code null}, to be non-{@code null} and {@code dfa.edges[t]} null, or + * {@code dfa.edges[t]} to be non-null. The + * {@link #addDFAEdge} method could be racing to set the field + * but in either case the DFA simulator works; if {@code null}, and requests ATN + * simulation. It could also race trying to get {@code dfa.edges[t]}, but either + * way it will work because it's not doing a test and set operation.</p> + * + * <p> + * <strong>Starting with SLL then failing to combined SLL/LL (Two-Stage + * Parsing)</strong></p> + * + * <p> + * Sam pointed out that if SLL does not give a syntax error, then there is no + * point in doing full LL, which is slower. We only have to try LL if we get a + * syntax error. For maximum speed, Sam starts the parser set to pure SLL + * mode with the {@link BailErrorStrategy}:</p> + * + * <pre> + * parser.{@link Parser#getInterpreter() getInterpreter()}.{@link #setPredictionMode setPredictionMode}{@code (}{@link PredictionMode#SLL}{@code )}; + * parser.{@link Parser#setErrorHandler setErrorHandler}(new {@link BailErrorStrategy}()); + * </pre> + * + * <p> + * If it does not get a syntax error, then we're done. If it does get a syntax + * error, we need to retry with the combined SLL/LL strategy.</p> + * + * <p> + * The reason this works is as follows. If there are no SLL conflicts, then the + * grammar is SLL (at least for that input set). If there is an SLL conflict, + * the full LL analysis must yield a set of viable alternatives which is a + * subset of the alternatives reported by SLL. If the LL set is a singleton, + * then the grammar is LL but not SLL. If the LL set is the same size as the SLL + * set, the decision is SLL. If the LL set has size > 1, then that decision + * is truly ambiguous on the current input. If the LL set is smaller, then the + * SLL conflict resolution might choose an alternative that the full LL would + * rule out as a possibility based upon better context information. If that's + * the case, then the SLL parse will definitely get an error because the full LL + * analysis says it's not viable. If SLL conflict resolution chooses an + * alternative within the LL set, them both SLL and LL would choose the same + * alternative because they both choose the minimum of multiple conflicting + * alternatives.</p> + * + * <p> + * Let's say we have a set of SLL conflicting alternatives {@code {1, 2, 3}} and + * a smaller LL set called <em>s</em>. If <em>s</em> is {@code {2, 3}}, then SLL + * parsing will get an error because SLL will pursue alternative 1. If + * <em>s</em> is {@code {1, 2}} or {@code {1, 3}} then both SLL and LL will + * choose the same alternative because alternative one is the minimum of either + * set. If <em>s</em> is {@code {2}} or {@code {3}} then SLL will get a syntax + * error. If <em>s</em> is {@code {1}} then SLL will succeed.</p> + * + * <p> + * Of course, if the input is invalid, then we will get an error for sure in + * both SLL and LL parsing. Erroneous input will therefore require 2 passes over + * the input.</p> + */ + class ANTLR4CPP_PUBLIC ParserATNSimulator : public ATNSimulator { + public: + /// Testing only! + ParserATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache); + + ParserATNSimulator(Parser *parser, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache); + + ParserATNSimulator(Parser *parser, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache, + const ParserATNSimulatorOptions &options); + + virtual void reset() override; + virtual void clearDFA() override; + virtual size_t adaptivePredict(TokenStream *input, size_t decision, ParserRuleContext *outerContext); + + static const bool TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT; + + std::vector<dfa::DFA> &decisionToDFA; + + /** Implements first-edge (loop entry) elimination as an optimization + * during closure operations. See antlr/antlr4#1398. + * + * The optimization is to avoid adding the loop entry config when + * the exit path can only lead back to the same + * StarLoopEntryState after popping context at the rule end state + * (traversing only epsilon edges, so we're still in closure, in + * this same rule). + * + * We need to detect any state that can reach loop entry on + * epsilon w/o exiting rule. We don't have to look at FOLLOW + * links, just ensure that all stack tops for config refer to key + * states in LR rule. + * + * To verify we are in the right situation we must first check + * closure is at a StarLoopEntryState generated during LR removal. + * Then we check that each stack top of context is a return state + * from one of these cases: + * + * 1. 'not' expr, '(' type ')' expr. The return state points at loop entry state + * 2. expr op expr. The return state is the block end of internal block of (...)* + * 3. 'between' expr 'and' expr. The return state of 2nd expr reference. + * That state points at block end of internal block of (...)*. + * 4. expr '?' expr ':' expr. The return state points at block end, + * which points at loop entry state. + * + * If any is true for each stack top, then closure does not add a + * config to the current config set for edge[0], the loop entry branch. + * + * Conditions fail if any context for the current config is: + * + * a. empty (we'd fall out of expr to do a global FOLLOW which could + * even be to some weird spot in expr) or, + * b. lies outside of expr or, + * c. lies within expr but at a state not the BlockEndState + * generated during LR removal + * + * Do we need to evaluate predicates ever in closure for this case? + * + * No. Predicates, including precedence predicates, are only + * evaluated when computing a DFA start state. I.e., only before + * the lookahead (but not parser) consumes a token. + * + * There are no epsilon edges allowed in LR rule alt blocks or in + * the "primary" part (ID here). If closure is in + * StarLoopEntryState any lookahead operation will have consumed a + * token as there are no epsilon-paths that lead to + * StarLoopEntryState. We do not have to evaluate predicates + * therefore if we are in the generated StarLoopEntryState of a LR + * rule. Note that when making a prediction starting at that + * decision point, decision d=2, compute-start-state performs + * closure starting at edges[0], edges[1] emanating from + * StarLoopEntryState. That means it is not performing closure on + * StarLoopEntryState during compute-start-state. + * + * How do we know this always gives same prediction answer? + * + * Without predicates, loop entry and exit paths are ambiguous + * upon remaining input +b (in, say, a+b). Either paths lead to + * valid parses. Closure can lead to consuming + immediately or by + * falling out of this call to expr back into expr and loop back + * again to StarLoopEntryState to match +b. In this special case, + * we choose the more efficient path, which is to take the bypass + * path. + * + * The lookahead language has not changed because closure chooses + * one path over the other. Both paths lead to consuming the same + * remaining input during a lookahead operation. If the next token + * is an operator, lookahead will enter the choice block with + * operators. If it is not, lookahead will exit expr. Same as if + * closure had chosen to enter the choice block immediately. + * + * Closure is examining one config (some loopentrystate, some alt, + * context) which means it is considering exactly one alt. Closure + * always copies the same alt to any derived configs. + * + * How do we know this optimization doesn't mess up precedence in + * our parse trees? + * + * Looking through expr from left edge of stat only has to confirm + * that an input, say, a+b+c; begins with any valid interpretation + * of an expression. The precedence actually doesn't matter when + * making a decision in stat seeing through expr. It is only when + * parsing rule expr that we must use the precedence to get the + * right interpretation and, hence, parse tree. + */ + bool canDropLoopEntryEdgeInLeftRecursiveRule(ATNConfig *config) const; + virtual std::string getRuleName(size_t index); + + virtual Ref<ATNConfig> precedenceTransition(Ref<ATNConfig> const& config, const PrecedencePredicateTransition *pt, + bool collectPredicates, bool inContext, bool fullCtx); + + void setPredictionMode(PredictionMode newMode); + PredictionMode getPredictionMode(); + + Parser* getParser(); + + virtual std::string getTokenName(size_t t); + + virtual std::string getLookaheadName(TokenStream *input); + + /// <summary> + /// Used for debugging in adaptivePredict around execATN but I cut + /// it out for clarity now that alg. works well. We can leave this + /// "dead" code for a bit. + /// </summary> + virtual void dumpDeadEndConfigs(NoViableAltException &nvae); + + protected: + Parser *const parser; + + /// <summary> + /// Each prediction operation uses a cache for merge of prediction contexts. + /// Don't keep around as it wastes huge amounts of memory. The merge cache + /// isn't synchronized but we're ok since two threads shouldn't reuse same + /// parser/atnsim object because it can only handle one input at a time. + /// This maps graphs a and b to merged result c. (a,b)->c. We can avoid + /// the merge if we ever see a and b again. Note that (b,a)->c should + /// also be examined during cache lookup. + /// </summary> + PredictionContextMergeCache mergeCache; + size_t _mergeCacheCounter = 0; + + // LAME globals to avoid parameters!!!!! I need these down deep in predTransition + TokenStream *_input; + size_t _startIndex; + ParserRuleContext *_outerContext; + dfa::DFA *_dfa; // Reference into the decisionToDFA vector. + + /// <summary> + /// Performs ATN simulation to compute a predicted alternative based + /// upon the remaining input, but also updates the DFA cache to avoid + /// having to traverse the ATN again for the same input sequence. + /// + /// There are some key conditions we're looking for after computing a new + /// set of ATN configs (proposed DFA state): + /// if the set is empty, there is no viable alternative for current symbol + /// does the state uniquely predict an alternative? + /// does the state have a conflict that would prevent us from + /// putting it on the work list? + /// + /// We also have some key operations to do: + /// add an edge from previous DFA state to potentially new DFA state, D, + /// upon current symbol but only if adding to work list, which means in all + /// cases except no viable alternative (and possibly non-greedy decisions?) + /// collecting predicates and adding semantic context to DFA accept states + /// adding rule context to context-sensitive DFA accept states + /// consuming an input symbol + /// reporting a conflict + /// reporting an ambiguity + /// reporting a context sensitivity + /// reporting insufficient predicates + /// + /// cover these cases: + /// dead end + /// single alt + /// single alt + preds + /// conflict + /// conflict + preds + /// </summary> + virtual size_t execATN(dfa::DFA &dfa, dfa::DFAState *s0, TokenStream *input, size_t startIndex, + ParserRuleContext *outerContext); + + /// <summary> + /// Get an existing target state for an edge in the DFA. If the target state + /// for the edge has not yet been computed or is otherwise not available, + /// this method returns {@code null}. + /// </summary> + /// <param name="previousD"> The current DFA state </param> + /// <param name="t"> The next input symbol </param> + /// <returns> The existing target DFA state for the given input symbol + /// {@code t}, or {@code null} if the target state for this edge is not + /// already cached </returns> + virtual dfa::DFAState* getExistingTargetState(dfa::DFAState *previousD, size_t t); + + /// <summary> + /// Compute a target state for an edge in the DFA, and attempt to add the + /// computed state and corresponding edge to the DFA. + /// </summary> + /// <param name="dfa"> The DFA </param> + /// <param name="previousD"> The current DFA state </param> + /// <param name="t"> The next input symbol + /// </param> + /// <returns> The computed target DFA state for the given input symbol + /// {@code t}. If {@code t} does not lead to a valid DFA state, this method + /// returns <seealso cref="#ERROR"/>. </returns> + virtual dfa::DFAState *computeTargetState(dfa::DFA &dfa, dfa::DFAState *previousD, size_t t); + + virtual void predicateDFAState(dfa::DFAState *dfaState, DecisionState *decisionState); + + // comes back with reach.uniqueAlt set to a valid alt + virtual size_t execATNWithFullContext(dfa::DFA &dfa, dfa::DFAState *D, ATNConfigSet *s0, + TokenStream *input, size_t startIndex, ParserRuleContext *outerContext); // how far we got before failing over + + virtual std::unique_ptr<ATNConfigSet> computeReachSet(ATNConfigSet *closure, size_t t, bool fullCtx); + + /// <summary> + /// Return a configuration set containing only the configurations from + /// {@code configs} which are in a <seealso cref="RuleStopState"/>. If all + /// configurations in {@code configs} are already in a rule stop state, this + /// method simply returns {@code configs}. + /// <p/> + /// When {@code lookToEndOfRule} is true, this method uses + /// <seealso cref="ATN#nextTokens"/> for each configuration in {@code configs} which is + /// not already in a rule stop state to see if a rule stop state is reachable + /// from the configuration via epsilon-only transitions. + /// </summary> + /// <param name="configs"> the configuration set to update </param> + /// <param name="lookToEndOfRule"> when true, this method checks for rule stop states + /// reachable by epsilon-only transitions from each configuration in + /// {@code configs}. + /// </param> + /// <returns> {@code configs} if all configurations in {@code configs} are in a + /// rule stop state, otherwise return a new configuration set containing only + /// the configurations from {@code configs} which are in a rule stop state </returns> + virtual ATNConfigSet* removeAllConfigsNotInRuleStopState(ATNConfigSet *configs, bool lookToEndOfRule); + + virtual std::unique_ptr<ATNConfigSet> computeStartState(ATNState *p, RuleContext *ctx, bool fullCtx); + + /* parrt internal source braindump that doesn't mess up + * external API spec. + + applyPrecedenceFilter is an optimization to avoid highly + nonlinear prediction of expressions and other left recursive + rules. The precedence predicates such as {3>=prec}? Are highly + context-sensitive in that they can only be properly evaluated + in the context of the proper prec argument. Without pruning, + these predicates are normal predicates evaluated when we reach + conflict state (or unique prediction). As we cannot evaluate + these predicates out of context, the resulting conflict leads + to full LL evaluation and nonlinear prediction which shows up + very clearly with fairly large expressions. + + Example grammar: + + e : e '*' e + | e '+' e + | INT + ; + + We convert that to the following: + + e[int prec] + : INT + ( {3>=prec}? '*' e[4] + | {2>=prec}? '+' e[3] + )* + ; + + The (..)* loop has a decision for the inner block as well as + an enter or exit decision, which is what concerns us here. At + the 1st + of input 1+2+3, the loop entry sees both predicates + and the loop exit also sees both predicates by falling off the + edge of e. This is because we have no stack information with + SLL and find the follow of e, which will hit the return states + inside the loop after e[4] and e[3], which brings it back to + the enter or exit decision. In this case, we know that we + cannot evaluate those predicates because we have fallen off + the edge of the stack and will in general not know which prec + parameter is the right one to use in the predicate. + + Because we have special information, that these are precedence + predicates, we can resolve them without failing over to full + LL despite their context sensitive nature. We make an + assumption that prec[-1] <= prec[0], meaning that the current + precedence level is greater than or equal to the precedence + level of recursive invocations above us in the stack. For + example, if predicate {3>=prec}? is true of the current prec, + then one option is to enter the loop to match it now. The + other option is to exit the loop and the left recursive rule + to match the current operator in rule invocation further up + the stack. But, we know that all of those prec are lower or + the same value and so we can decide to enter the loop instead + of matching it later. That means we can strip out the other + configuration for the exit branch. + + So imagine we have (14,1,$,{2>=prec}?) and then + (14,2,$-dipsIntoOuterContext,{2>=prec}?). The optimization + allows us to collapse these two configurations. We know that + if {2>=prec}? is true for the current prec parameter, it will + also be true for any prec from an invoking e call, indicated + by dipsIntoOuterContext. As the predicates are both true, we + have the option to evaluate them early in the decision start + state. We do this by stripping both predicates and choosing to + enter the loop as it is consistent with the notion of operator + precedence. It's also how the full LL conflict resolution + would work. + + The solution requires a different DFA start state for each + precedence level. + + The basic filter mechanism is to remove configurations of the + form (p, 2, pi) if (p, 1, pi) exists for the same p and pi. In + other words, for the same ATN state and predicate context, + remove any configuration associated with an exit branch if + there is a configuration associated with the enter branch. + + It's also the case that the filter evaluates precedence + predicates and resolves conflicts according to precedence + levels. For example, for input 1+2+3 at the first +, we see + prediction filtering + + [(11,1,[$],{3>=prec}?), (14,1,[$],{2>=prec}?), (5,2,[$],up=1), + (11,2,[$],up=1), (14,2,[$],up=1)],hasSemanticContext=true,dipsIntoOuterContext + + to + + [(11,1,[$]), (14,1,[$]), (5,2,[$],up=1)],dipsIntoOuterContext + + This filters because {3>=prec}? evals to true and collapses + (11,1,[$],{3>=prec}?) and (11,2,[$],up=1) since early conflict + resolution based upon rules of operator precedence fits with + our usual match first alt upon conflict. + + We noticed a problem where a recursive call resets precedence + to 0. Sam's fix: each config has flag indicating if it has + returned from an expr[0] call. then just don't filter any + config with that flag set. flag is carried along in + closure(). so to avoid adding field, set bit just under sign + bit of dipsIntoOuterContext (SUPPRESS_PRECEDENCE_FILTER). + With the change you filter "unless (p, 2, pi) was reached + after leaving the rule stop state of the LR rule containing + state p, corresponding to a rule invocation with precedence + level 0" + */ + + /** + * This method transforms the start state computed by + * {@link #computeStartState} to the special start state used by a + * precedence DFA for a particular precedence value. The transformation + * process applies the following changes to the start state's configuration + * set. + * + * <ol> + * <li>Evaluate the precedence predicates for each configuration using + * {@link SemanticContext#evalPrecedence}.</li> + * <li>When {@link ATNConfig#isPrecedenceFilterSuppressed} is {@code false}, + * remove all configurations which predict an alternative greater than 1, + * for which another configuration that predicts alternative 1 is in the + * same ATN state with the same prediction context. This transformation is + * valid for the following reasons: + * <ul> + * <li>The closure block cannot contain any epsilon transitions which bypass + * the body of the closure, so all states reachable via alternative 1 are + * part of the precedence alternatives of the transformed left-recursive + * rule.</li> + * <li>The "primary" portion of a left recursive rule cannot contain an + * epsilon transition, so the only way an alternative other than 1 can exist + * in a state that is also reachable via alternative 1 is by nesting calls + * to the left-recursive rule, with the outer calls not being at the + * preferred precedence level. The + * {@link ATNConfig#isPrecedenceFilterSuppressed} property marks ATN + * configurations which do not meet this condition, and therefore are not + * eligible for elimination during the filtering process.</li> + * </ul> + * </li> + * </ol> + * + * <p> + * The prediction context must be considered by this filter to address + * situations like the following. + * </p> + * <code> + * <pre> + * grammar TA; + * prog: statement* EOF; + * statement: letterA | statement letterA 'b' ; + * letterA: 'a'; + * </pre> + * </code> + * <p> + * If the above grammar, the ATN state immediately before the token + * reference {@code 'a'} in {@code letterA} is reachable from the left edge + * of both the primary and closure blocks of the left-recursive rule + * {@code statement}. The prediction context associated with each of these + * configurations distinguishes between them, and prevents the alternative + * which stepped out to {@code prog} (and then back in to {@code statement} + * from being eliminated by the filter. + * </p> + * + * @param configs The configuration set computed by + * {@link #computeStartState} as the start state for the DFA. + * @return The transformed configuration set representing the start state + * for a precedence DFA at a particular precedence level (determined by + * calling {@link Parser#getPrecedence}). + */ + std::unique_ptr<ATNConfigSet> applyPrecedenceFilter(ATNConfigSet *configs); + + virtual ATNState *getReachableTarget(const Transition *trans, size_t ttype); + + virtual std::vector<Ref<const SemanticContext>> getPredsForAmbigAlts(const antlrcpp::BitSet &ambigAlts, + ATNConfigSet *configs, size_t nalts); + + std::vector<dfa::DFAState::PredPrediction> getPredicatePredictions(const antlrcpp::BitSet &ambigAlts, + const std::vector<Ref<const SemanticContext>> &altToPred); + + /** + * This method is used to improve the localization of error messages by + * choosing an alternative rather than throwing a + * {@link NoViableAltException} in particular prediction scenarios where the + * {@link #ERROR} state was reached during ATN simulation. + * + * <p> + * The default implementation of this method uses the following + * algorithm to identify an ATN configuration which successfully parsed the + * decision entry rule. Choosing such an alternative ensures that the + * {@link ParserRuleContext} returned by the calling rule will be complete + * and valid, and the syntax error will be reported later at a more + * localized location.</p> + * + * <ul> + * <li>If a syntactically valid path or paths reach the end of the decision rule and + * they are semantically valid if predicated, return the min associated alt.</li> + * <li>Else, if a semantically invalid but syntactically valid path exist + * or paths exist, return the minimum associated alt. + * </li> + * <li>Otherwise, return {@link ATN#INVALID_ALT_NUMBER}.</li> + * </ul> + * + * <p> + * In some scenarios, the algorithm described above could predict an + * alternative which will result in a {@link FailedPredicateException} in + * the parser. Specifically, this could occur if the <em>only</em> configuration + * capable of successfully parsing to the end of the decision rule is + * blocked by a semantic predicate. By choosing this alternative within + * {@link #adaptivePredict} instead of throwing a + * {@link NoViableAltException}, the resulting + * {@link FailedPredicateException} in the parser will identify the specific + * predicate which is preventing the parser from successfully parsing the + * decision rule, which helps developers identify and correct logic errors + * in semantic predicates. + * </p> + * + * @param configs The ATN configurations which were valid immediately before + * the {@link #ERROR} state was reached + * @param outerContext The is the \gamma_0 initial parser context from the paper + * or the parser stack at the instant before prediction commences. + * + * @return The value to return from {@link #adaptivePredict}, or + * {@link ATN#INVALID_ALT_NUMBER} if a suitable alternative was not + * identified and {@link #adaptivePredict} should report an error instead. + */ + size_t getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(ATNConfigSet *configs, + ParserRuleContext *outerContext); + + virtual size_t getAltThatFinishedDecisionEntryRule(ATNConfigSet *configs); + + /** Walk the list of configurations and split them according to + * those that have preds evaluating to true/false. If no pred, assume + * true pred and include in succeeded set. Returns Pair of sets. + * + * Create a new set so as not to alter the incoming parameter. + * + * Assumption: the input stream has been restored to the starting point + * prediction, which is where predicates need to evaluate. + */ + std::pair<ATNConfigSet *, ATNConfigSet *> splitAccordingToSemanticValidity(ATNConfigSet *configs, + ParserRuleContext *outerContext); + + /// <summary> + /// Look through a list of predicate/alt pairs, returning alts for the + /// pairs that win. A {@code NONE} predicate indicates an alt containing an + /// unpredicated config which behaves as "always true." If !complete + /// then we stop at the first predicate that evaluates to true. This + /// includes pairs with null predicates. + /// </summary> + antlrcpp::BitSet evalSemanticContext(const std::vector<dfa::DFAState::PredPrediction> &predPredictions, + ParserRuleContext *outerContext, bool complete); + + /** + * Evaluate a semantic context within a specific parser context. + * + * <p> + * This method might not be called for every semantic context evaluated + * during the prediction process. In particular, we currently do not + * evaluate the following but it may change in the future:</p> + * + * <ul> + * <li>Precedence predicates (represented by + * {@link SemanticContext.PrecedencePredicate}) are not currently evaluated + * through this method.</li> + * <li>Operator predicates (represented by {@link SemanticContext.AND} and + * {@link SemanticContext.OR}) are evaluated as a single semantic + * context, rather than evaluating the operands individually. + * Implementations which require evaluation results from individual + * predicates should override this method to explicitly handle evaluation of + * the operands within operator predicates.</li> + * </ul> + * + * @param pred The semantic context to evaluate + * @param parserCallStack The parser context in which to evaluate the + * semantic context + * @param alt The alternative which is guarded by {@code pred} + * @param fullCtx {@code true} if the evaluation is occurring during LL + * prediction; otherwise, {@code false} if the evaluation is occurring + * during SLL prediction + * + * @since 4.3 + */ + virtual bool evalSemanticContext(Ref<const SemanticContext> const& pred, ParserRuleContext *parserCallStack, + size_t alt, bool fullCtx); + + /* TODO: If we are doing predicates, there is no point in pursuing + closure operations if we reach a DFA state that uniquely predicts + alternative. We will not be caching that DFA state and it is a + waste to pursue the closure. Might have to advance when we do + ambig detection thought :( + */ + virtual void closure(Ref<ATNConfig> const& config, ATNConfigSet *configs, ATNConfig::Set &closureBusy, + bool collectPredicates, bool fullCtx, bool treatEofAsEpsilon); + + virtual void closureCheckingStopState(Ref<ATNConfig> const& config, ATNConfigSet *configs, ATNConfig::Set &closureBusy, + bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon); + + /// Do the actual work of walking epsilon edges. + virtual void closure_(Ref<ATNConfig> const& config, ATNConfigSet *configs, ATNConfig::Set &closureBusy, + bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon); + + virtual Ref<ATNConfig> getEpsilonTarget(Ref<ATNConfig> const& config, const Transition *t, bool collectPredicates, + bool inContext, bool fullCtx, bool treatEofAsEpsilon); + virtual Ref<ATNConfig> actionTransition(Ref<ATNConfig> const& config, const ActionTransition *t); + + virtual Ref<ATNConfig> predTransition(Ref<ATNConfig> const& config, const PredicateTransition *pt, bool collectPredicates, + bool inContext, bool fullCtx); + + virtual Ref<ATNConfig> ruleTransition(Ref<ATNConfig> const& config, const RuleTransition *t); + + /** + * Gets a {@link BitSet} containing the alternatives in {@code configs} + * which are part of one or more conflicting alternative subsets. + * + * @param configs The {@link ATNConfigSet} to analyze. + * @return The alternatives in {@code configs} which are part of one or more + * conflicting alternative subsets. If {@code configs} does not contain any + * conflicting subsets, this method returns an empty {@link BitSet}. + */ + virtual antlrcpp::BitSet getConflictingAlts(ATNConfigSet *configs); + + /// <summary> + /// Sam pointed out a problem with the previous definition, v3, of + /// ambiguous states. If we have another state associated with conflicting + /// alternatives, we should keep going. For example, the following grammar + /// + /// s : (ID | ID ID?) ';' ; + /// + /// When the ATN simulation reaches the state before ';', it has a DFA + /// state that looks like: [12|1|[], 6|2|[], 12|2|[]]. Naturally + /// 12|1|[] and 12|2|[] conflict, but we cannot stop processing this node + /// because alternative to has another way to continue, via [6|2|[]]. + /// The key is that we have a single state that has config's only associated + /// with a single alternative, 2, and crucially the state transitions + /// among the configurations are all non-epsilon transitions. That means + /// we don't consider any conflicts that include alternative 2. So, we + /// ignore the conflict between alts 1 and 2. We ignore a set of + /// conflicting alts when there is an intersection with an alternative + /// associated with a single alt state in the state->config-list map. + /// + /// It's also the case that we might have two conflicting configurations but + /// also a 3rd nonconflicting configuration for a different alternative: + /// [1|1|[], 1|2|[], 8|3|[]]. This can come about from grammar: + /// + /// a : A | A | A B ; + /// + /// After matching input A, we reach the stop state for rule A, state 1. + /// State 8 is the state right before B. Clearly alternatives 1 and 2 + /// conflict and no amount of further lookahead will separate the two. + /// However, alternative 3 will be able to continue and so we do not + /// stop working on this state. In the previous example, we're concerned + /// with states associated with the conflicting alternatives. Here alt + /// 3 is not associated with the conflicting configs, but since we can continue + /// looking for input reasonably, I don't declare the state done. We + /// ignore a set of conflicting alts when we have an alternative + /// that we still need to pursue. + /// </summary> + + virtual antlrcpp::BitSet getConflictingAltsOrUniqueAlt(ATNConfigSet *configs); + + virtual NoViableAltException noViableAlt(TokenStream *input, ParserRuleContext *outerContext, + ATNConfigSet *configs, size_t startIndex, bool deleteConfigs); + + static size_t getUniqueAlt(ATNConfigSet *configs); + + /// <summary> + /// Add an edge to the DFA, if possible. This method calls + /// <seealso cref="#addDFAState"/> to ensure the {@code to} state is present in the + /// DFA. If {@code from} is {@code null}, or if {@code t} is outside the + /// range of edges that can be represented in the DFA tables, this method + /// returns without adding the edge to the DFA. + /// <p/> + /// If {@code to} is {@code null}, this method returns {@code null}. + /// Otherwise, this method returns the <seealso cref="DFAState"/> returned by calling + /// <seealso cref="#addDFAState"/> for the {@code to} state. + /// </summary> + /// <param name="dfa"> The DFA </param> + /// <param name="from"> The source state for the edge </param> + /// <param name="t"> The input symbol </param> + /// <param name="to"> The target state for the edge + /// </param> + /// <returns> If {@code to} is {@code null}, this method returns {@code null}; + /// otherwise this method returns the result of calling <seealso cref="#addDFAState"/> + /// on {@code to} </returns> + virtual dfa::DFAState *addDFAEdge(dfa::DFA &dfa, dfa::DFAState *from, ssize_t t, dfa::DFAState *to); + + /// <summary> + /// Add state {@code D} to the DFA if it is not already present, and return + /// the actual instance stored in the DFA. If a state equivalent to {@code D} + /// is already in the DFA, the existing state is returned. Otherwise this + /// method returns {@code D} after adding it to the DFA. + /// <p/> + /// If {@code D} is <seealso cref="#ERROR"/>, this method returns <seealso cref="#ERROR"/> and + /// does not change the DFA. + /// </summary> + /// <param name="dfa"> The dfa </param> + /// <param name="D"> The DFA state to add </param> + /// <returns> The state stored in the DFA. This will be either the existing + /// state if {@code D} is already in the DFA, or {@code D} itself if the + /// state was not already present. </returns> + virtual dfa::DFAState *addDFAState(dfa::DFA &dfa, dfa::DFAState *D); + + virtual void reportAttemptingFullContext(dfa::DFA &dfa, const antlrcpp::BitSet &conflictingAlts, + ATNConfigSet *configs, size_t startIndex, size_t stopIndex); + + virtual void reportContextSensitivity(dfa::DFA &dfa, size_t prediction, ATNConfigSet *configs, + size_t startIndex, size_t stopIndex); + + /// If context sensitive parsing, we know it's ambiguity not conflict. + virtual void reportAmbiguity(dfa::DFA &dfa, + dfa::DFAState *D, // the DFA state from execATN() that had SLL conflicts + size_t startIndex, size_t stopIndex, + bool exact, + const antlrcpp::BitSet &ambigAlts, + ATNConfigSet *configs); // configs that LL not SLL considered conflicting + + private: + // SLL, LL, or LL + exact ambig detection? + PredictionMode _mode; + + static bool getLrLoopSetting(); + void InitializeInstanceFields(); + }; + +} // namespace atn +} // namespace antlr4 + diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ParserATNSimulatorOptions.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ParserATNSimulatorOptions.h new file mode 100644 index 0000000000..ea31226d25 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ParserATNSimulatorOptions.h @@ -0,0 +1,50 @@ +// Copyright 2012-2022 The ANTLR Project +// +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "atn/PredictionContextMergeCacheOptions.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ParserATNSimulatorOptions final { + public: + ParserATNSimulatorOptions& setPredictionContextMergeCacheOptions( + PredictionContextMergeCacheOptions predictionContextMergeCacheOptions) { + _predictionContextMergeCacheOptions = std::move(predictionContextMergeCacheOptions); + return *this; + } + + const PredictionContextMergeCacheOptions& getPredictionContextMergeCacheOptions() const { + return _predictionContextMergeCacheOptions; + } + + private: + PredictionContextMergeCacheOptions _predictionContextMergeCacheOptions; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PlusBlockStartState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PlusBlockStartState.h new file mode 100644 index 0000000000..b6103dc4d0 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PlusBlockStartState.h @@ -0,0 +1,29 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/BlockStartState.h" + +namespace antlr4 { +namespace atn { + + /// Start of {@code (A|B|...)+} loop. Technically a decision state, but + /// we don't use for code generation; somebody might need it, so I'm defining + /// it for completeness. In reality, the <seealso cref="PlusLoopbackState"/> node is the + /// real decision-making note for {@code A+}. + class ANTLR4CPP_PUBLIC PlusBlockStartState final : public BlockStartState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::PLUS_BLOCK_START; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + PlusLoopbackState *loopBackState = nullptr; + + PlusBlockStartState() : BlockStartState(ATNStateType::PLUS_BLOCK_START) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PlusLoopbackState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PlusLoopbackState.h new file mode 100644 index 0000000000..07f25aa0c9 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PlusLoopbackState.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionState.h" + +namespace antlr4 { +namespace atn { + + /// Decision state for {@code A+} and {@code (A|B)+}. It has two transitions: + /// one to the loop back to start of the block and one to exit. + class ANTLR4CPP_PUBLIC PlusLoopbackState final : public DecisionState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::PLUS_LOOP_BACK; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + PlusLoopbackState() : DecisionState(ATNStateType::PLUS_LOOP_BACK) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PrecedencePredicateTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/PrecedencePredicateTransition.cpp new file mode 100644 index 0000000000..b8685e9516 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PrecedencePredicateTransition.cpp @@ -0,0 +1,23 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/PrecedencePredicateTransition.h" + +using namespace antlr4::atn; + +PrecedencePredicateTransition::PrecedencePredicateTransition(ATNState *target, int precedence) + : Transition(TransitionType::PRECEDENCE, target), _predicate(std::make_shared<SemanticContext::PrecedencePredicate>(precedence)) {} + +bool PrecedencePredicateTransition::isEpsilon() const { + return true; +} + +bool PrecedencePredicateTransition::matches(size_t /*symbol*/, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return false; +} + +std::string PrecedencePredicateTransition::toString() const { + return "PRECEDENCE " + Transition::toString() + " { precedence: " + std::to_string(getPrecedence()) + " }"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PrecedencePredicateTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PrecedencePredicateTransition.h new file mode 100644 index 0000000000..3db79a9b73 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PrecedencePredicateTransition.h @@ -0,0 +1,35 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" +#include "atn/SemanticContext.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC PrecedencePredicateTransition final : public Transition { + public: + static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::PRECEDENCE; } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + PrecedencePredicateTransition(ATNState *target, int precedence); + + int getPrecedence() const { return _predicate->precedence; } + + bool isEpsilon() const override; + bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + std::string toString() const override; + + const Ref<const SemanticContext::PrecedencePredicate>& getPredicate() const { return _predicate; } + + private: + const std::shared_ptr<const SemanticContext::PrecedencePredicate> _predicate; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateEvalInfo.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateEvalInfo.cpp new file mode 100644 index 0000000000..73ee2a2b97 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateEvalInfo.cpp @@ -0,0 +1,17 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "SemanticContext.h" + +#include "atn/PredicateEvalInfo.h" + +using namespace antlr4; +using namespace antlr4::atn; + +PredicateEvalInfo::PredicateEvalInfo(size_t decision, TokenStream *input, size_t startIndex, size_t stopIndex, + Ref<const SemanticContext> semctx, bool evalResult, size_t predictedAlt, bool fullCtx) + : DecisionEventInfo(decision, nullptr, input, startIndex, stopIndex, fullCtx), + semctx(std::move(semctx)), predictedAlt(predictedAlt), evalResult(evalResult) { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateEvalInfo.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateEvalInfo.h new file mode 100644 index 0000000000..f343f541cb --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateEvalInfo.h @@ -0,0 +1,62 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionEventInfo.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// This class represents profiling event information for semantic predicate + /// evaluations which occur during prediction. + /// </summary> + /// <seealso cref= ParserATNSimulator#evalSemanticContext + /// + /// @since 4.3 </seealso> + class ANTLR4CPP_PUBLIC PredicateEvalInfo : public DecisionEventInfo { + public: + /// The semantic context which was evaluated. + const Ref<const SemanticContext> semctx; + + /// <summary> + /// The alternative number for the decision which is guarded by the semantic + /// context <seealso cref="#semctx"/>. Note that other ATN + /// configurations may predict the same alternative which are guarded by + /// other semantic contexts and/or <seealso cref="SemanticContext#NONE"/>. + /// </summary> + const size_t predictedAlt; + + /// The result of evaluating the semantic context <seealso cref="#semctx"/>. + const bool evalResult; + + /// <summary> + /// Constructs a new instance of the <seealso cref="PredicateEvalInfo"/> class with the + /// specified detailed predicate evaluation information. + /// </summary> + /// <param name="decision"> The decision number </param> + /// <param name="input"> The input token stream </param> + /// <param name="startIndex"> The start index for the current prediction </param> + /// <param name="stopIndex"> The index at which the predicate evaluation was + /// triggered. Note that the input stream may be reset to other positions for + /// the actual evaluation of individual predicates. </param> + /// <param name="semctx"> The semantic context which was evaluated </param> + /// <param name="evalResult"> The results of evaluating the semantic context </param> + /// <param name="predictedAlt"> The alternative number for the decision which is + /// guarded by the semantic context {@code semctx}. See <seealso cref="#predictedAlt"/> + /// for more information. </param> + /// <param name="fullCtx"> {@code true} if the semantic context was + /// evaluated during LL prediction; otherwise, {@code false} if the semantic + /// context was evaluated during SLL prediction + /// </param> + /// <seealso cref= ParserATNSimulator#evalSemanticContext(SemanticContext, ParserRuleContext, int, boolean) </seealso> + /// <seealso cref= SemanticContext#eval(Recognizer, RuleContext) </seealso> + PredicateEvalInfo(size_t decision, TokenStream *input, size_t startIndex, size_t stopIndex, + Ref<const SemanticContext> semctx, bool evalResult, size_t predictedAlt, bool fullCtx); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateTransition.cpp new file mode 100644 index 0000000000..d76dbd203a --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateTransition.cpp @@ -0,0 +1,24 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/PredicateTransition.h" + +using namespace antlr4::atn; + +PredicateTransition::PredicateTransition(ATNState *target, size_t ruleIndex, size_t predIndex, bool isCtxDependent) + : Transition(TransitionType::PREDICATE, target), _predicate(std::make_shared<SemanticContext::Predicate>(ruleIndex, predIndex, isCtxDependent)) {} + +bool PredicateTransition::isEpsilon() const { + return true; +} + +bool PredicateTransition::matches(size_t /*symbol*/, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return false; +} + +std::string PredicateTransition::toString() const { + return "PREDICATE " + Transition::toString() + " { ruleIndex: " + std::to_string(getRuleIndex()) + + ", predIndex: " + std::to_string(getPredIndex()) + ", isCtxDependent: " + std::to_string(isCtxDependent()) + " }"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateTransition.h new file mode 100644 index 0000000000..e889b1c198 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateTransition.h @@ -0,0 +1,50 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" +#include "atn/SemanticContext.h" + +namespace antlr4 { +namespace atn { + + /// TODO: this is old comment: + /// A tree of semantic predicates from the grammar AST if label==SEMPRED. + /// In the ATN, labels will always be exactly one predicate, but the DFA + /// may have to combine a bunch of them as it collects predicates from + /// multiple ATN configurations into a single DFA state. + class ANTLR4CPP_PUBLIC PredicateTransition final : public Transition { + public: + static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::PREDICATE; } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + PredicateTransition(ATNState *target, size_t ruleIndex, size_t predIndex, bool isCtxDependent); + + size_t getRuleIndex() const { + return _predicate->ruleIndex; + } + + size_t getPredIndex() const { + return _predicate->predIndex; + } + + bool isCtxDependent() const { + return _predicate->isCtxDependent; + } + + bool isEpsilon() const override; + bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + std::string toString() const override; + + const Ref<const SemanticContext::Predicate>& getPredicate() const { return _predicate; } + + private: + const std::shared_ptr<const SemanticContext::Predicate> _predicate; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContext.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContext.cpp new file mode 100644 index 0000000000..704408f04d --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContext.cpp @@ -0,0 +1,579 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/SingletonPredictionContext.h" +#include "misc/MurmurHash.h" +#include "atn/ArrayPredictionContext.h" +#include "atn/PredictionContextCache.h" +#include "atn/PredictionContextMergeCache.h" +#include "RuleContext.h" +#include "ParserRuleContext.h" +#include "atn/RuleTransition.h" +#include "support/Arrays.h" +#include "support/CPPUtils.h" +#include "support/Casts.h" + +#include "atn/PredictionContext.h" + +using namespace antlr4; +using namespace antlr4::misc; +using namespace antlr4::atn; +using namespace antlrcpp; + +namespace { + + void combineCommonParents(std::vector<Ref<const PredictionContext>> &parents) { + std::unordered_set<Ref<const PredictionContext>> uniqueParents; + uniqueParents.reserve(parents.size()); + for (const auto &parent : parents) { + uniqueParents.insert(parent); + } + for (auto &parent : parents) { + parent = *uniqueParents.find(parent); + } + } + + Ref<const PredictionContext> getCachedContextImpl(const Ref<const PredictionContext> &context, + PredictionContextCache &contextCache, + std::unordered_map<Ref<const PredictionContext>, + Ref<const PredictionContext>> &visited) { + if (context->isEmpty()) { + return context; + } + + { + auto iterator = visited.find(context); + if (iterator != visited.end()) { + return iterator->second; // Not necessarly the same as context. + } + } + + auto cached = contextCache.get(context); + if (cached) { + visited[context] = cached; + return cached; + } + + bool changed = false; + + std::vector<Ref<const PredictionContext>> parents(context->size()); + for (size_t i = 0; i < parents.size(); i++) { + auto parent = getCachedContextImpl(context->getParent(i), contextCache, visited); + if (changed || parent != context->getParent(i)) { + if (!changed) { + parents.clear(); + for (size_t j = 0; j < context->size(); j++) { + parents.push_back(context->getParent(j)); + } + + changed = true; + } + + parents[i] = std::move(parent); + } + } + + if (!changed) { + visited[context] = context; + contextCache.put(context); + return context; + } + + Ref<const PredictionContext> updated; + if (parents.empty()) { + updated = PredictionContext::EMPTY; + } else if (parents.size() == 1) { + updated = SingletonPredictionContext::create(std::move(parents[0]), context->getReturnState(0)); + contextCache.put(updated); + } else { + updated = std::make_shared<ArrayPredictionContext>(std::move(parents), downCast<const ArrayPredictionContext*>(context.get())->returnStates); + contextCache.put(updated); + } + + visited[updated] = updated; + visited[context] = updated; + + return updated; + } + + void getAllContextNodesImpl(const Ref<const PredictionContext> &context, + std::vector<Ref<const PredictionContext>> &nodes, + std::unordered_set<const PredictionContext*> &visited) { + + if (visited.find(context.get()) != visited.end()) { + return; // Already done. + } + + visited.insert(context.get()); + nodes.push_back(context); + + for (size_t i = 0; i < context->size(); i++) { + getAllContextNodesImpl(context->getParent(i), nodes, visited); + } + } + + size_t insertOrAssignNodeId(std::unordered_map<const PredictionContext*, size_t> &nodeIds, size_t &nodeId, const PredictionContext *node) { + auto existing = nodeIds.find(node); + if (existing != nodeIds.end()) { + return existing->second; + } + return nodeIds.insert({node, nodeId++}).first->second; + } + +} + +const Ref<const PredictionContext> PredictionContext::EMPTY = std::make_shared<SingletonPredictionContext>(nullptr, PredictionContext::EMPTY_RETURN_STATE); + +//----------------- PredictionContext ---------------------------------------------------------------------------------- + +PredictionContext::PredictionContext(PredictionContextType contextType) : _contextType(contextType), _hashCode(0) {} + +PredictionContext::PredictionContext(PredictionContext&& other) : _contextType(other._contextType), _hashCode(other._hashCode.exchange(0, std::memory_order_relaxed)) {} + +Ref<const PredictionContext> PredictionContext::fromRuleContext(const ATN &atn, RuleContext *outerContext) { + if (outerContext == nullptr) { + return PredictionContext::EMPTY; + } + + // if we are in RuleContext of start rule, s, then PredictionContext + // is EMPTY. Nobody called us. (if we are empty, return empty) + if (outerContext->parent == nullptr || outerContext == &ParserRuleContext::EMPTY) { + return PredictionContext::EMPTY; + } + + // If we have a parent, convert it to a PredictionContext graph + auto parent = PredictionContext::fromRuleContext(atn, RuleContext::is(outerContext->parent) ? downCast<RuleContext*>(outerContext->parent) : nullptr); + const auto *transition = downCast<const RuleTransition*>(atn.states[outerContext->invokingState]->transitions[0].get()); + return SingletonPredictionContext::create(std::move(parent), transition->followState->stateNumber); +} + +bool PredictionContext::hasEmptyPath() const { + // since EMPTY_RETURN_STATE can only appear in the last position, we check last one + return getReturnState(size() - 1) == EMPTY_RETURN_STATE; +} + +size_t PredictionContext::hashCode() const { + auto hash = cachedHashCode(); + if (hash == 0) { + hash = hashCodeImpl(); + if (hash == 0) { + hash = std::numeric_limits<size_t>::max(); + } + _hashCode.store(hash, std::memory_order_relaxed); + } + return hash; +} + +Ref<const PredictionContext> PredictionContext::merge(Ref<const PredictionContext> a, Ref<const PredictionContext> b, + bool rootIsWildcard, PredictionContextMergeCache *mergeCache) { + assert(a && b); + + // share same graph if both same + if (a == b || *a == *b) { + return a; + } + + const auto aType = a->getContextType(); + const auto bType = b->getContextType(); + + if (aType == PredictionContextType::SINGLETON && bType == PredictionContextType::SINGLETON) { + return mergeSingletons(std::static_pointer_cast<const SingletonPredictionContext>(std::move(a)), + std::static_pointer_cast<const SingletonPredictionContext>(std::move(b)), rootIsWildcard, mergeCache); + } + + // At least one of a or b is array. + // If one is $ and rootIsWildcard, return $ as * wildcard. + if (rootIsWildcard) { + if (a == PredictionContext::EMPTY) { + return a; + } + if (b == PredictionContext::EMPTY) { + return b; + } + } + + // convert singleton so both are arrays to normalize + Ref<const ArrayPredictionContext> left; + if (aType == PredictionContextType::SINGLETON) { + left = std::make_shared<ArrayPredictionContext>(downCast<const SingletonPredictionContext&>(*a)); + } else { + left = std::static_pointer_cast<const ArrayPredictionContext>(std::move(a)); + } + Ref<const ArrayPredictionContext> right; + if (bType == PredictionContextType::SINGLETON) { + right = std::make_shared<ArrayPredictionContext>(downCast<const SingletonPredictionContext&>(*b)); + } else { + right = std::static_pointer_cast<const ArrayPredictionContext>(std::move(b)); + } + return mergeArrays(std::move(left), std::move(right), rootIsWildcard, mergeCache); +} + +Ref<const PredictionContext> PredictionContext::mergeSingletons(Ref<const SingletonPredictionContext> a, Ref<const SingletonPredictionContext> b, + bool rootIsWildcard, PredictionContextMergeCache *mergeCache) { + + if (mergeCache) { + auto existing = mergeCache->get(a, b); + if (existing) { + return existing; + } + existing = mergeCache->get(b, a); + if (existing) { + return existing; + } + } + + auto rootMerge = mergeRoot(a, b, rootIsWildcard); + if (rootMerge) { + if (mergeCache) { + return mergeCache->put(a, b, std::move(rootMerge)); + } + return rootMerge; + } + + const auto& parentA = a->parent; + const auto& parentB = b->parent; + if (a->returnState == b->returnState) { // a == b + auto parent = merge(parentA, parentB, rootIsWildcard, mergeCache); + + // If parent is same as existing a or b parent or reduced to a parent, return it. + if (parent == parentA) { // ax + bx = ax, if a=b + return a; + } + if (parent == parentB) { // ax + bx = bx, if a=b + return b; + } + + // else: ax + ay = a'[x,y] + // merge parents x and y, giving array node with x,y then remainders + // of those graphs. dup a, a' points at merged array + // new joined parent so create new singleton pointing to it, a' + auto c = SingletonPredictionContext::create(std::move(parent), a->returnState); + if (mergeCache) { + return mergeCache->put(a, b, std::move(c)); + } + return c; + } + // a != b payloads differ + // see if we can collapse parents due to $+x parents if local ctx + Ref<const PredictionContext> singleParent; + if (a == b || (*parentA == *parentB)) { // ax + bx = [a,b]x + singleParent = parentA; + } + if (singleParent) { // parents are same, sort payloads and use same parent + std::vector<size_t> payloads = { a->returnState, b->returnState }; + if (a->returnState > b->returnState) { + payloads[0] = b->returnState; + payloads[1] = a->returnState; + } + std::vector<Ref<const PredictionContext>> parents = { singleParent, singleParent }; + auto c = std::make_shared<ArrayPredictionContext>(std::move(parents), std::move(payloads)); + if (mergeCache) { + return mergeCache->put(a, b, std::move(c)); + } + return c; + } + + // parents differ and can't merge them. Just pack together + // into array; can't merge. + // ax + by = [ax,by] + if (a->returnState > b->returnState) { // sort by payload + std::vector<size_t> payloads = { b->returnState, a->returnState }; + std::vector<Ref<const PredictionContext>> parents = { b->parent, a->parent }; + auto c = std::make_shared<ArrayPredictionContext>(std::move(parents), std::move(payloads)); + if (mergeCache) { + return mergeCache->put(a, b, std::move(c)); + } + return c; + } + std::vector<size_t> payloads = {a->returnState, b->returnState}; + std::vector<Ref<const PredictionContext>> parents = { a->parent, b->parent }; + auto c = std::make_shared<ArrayPredictionContext>(std::move(parents), std::move(payloads)); + if (mergeCache) { + return mergeCache->put(a, b, std::move(c)); + } + return c; +} + +Ref<const PredictionContext> PredictionContext::mergeRoot(Ref<const SingletonPredictionContext> a, Ref<const SingletonPredictionContext> b, + bool rootIsWildcard) { + if (rootIsWildcard) { + if (a == EMPTY) { // * + b = * + return EMPTY; + } + if (b == EMPTY) { // a + * = * + return EMPTY; + } + } else { + if (a == EMPTY && b == EMPTY) { // $ + $ = $ + return EMPTY; + } + if (a == EMPTY) { // $ + x = [$,x] + std::vector<size_t> payloads = { b->returnState, EMPTY_RETURN_STATE }; + std::vector<Ref<const PredictionContext>> parents = { b->parent, nullptr }; + return std::make_shared<ArrayPredictionContext>(std::move(parents), std::move(payloads)); + } + if (b == EMPTY) { // x + $ = [$,x] ($ is always first if present) + std::vector<size_t> payloads = { a->returnState, EMPTY_RETURN_STATE }; + std::vector<Ref<const PredictionContext>> parents = { a->parent, nullptr }; + return std::make_shared<ArrayPredictionContext>(std::move(parents), std::move(payloads)); + } + } + return nullptr; +} + +Ref<const PredictionContext> PredictionContext::mergeArrays(Ref<const ArrayPredictionContext> a, Ref<const ArrayPredictionContext> b, + bool rootIsWildcard, PredictionContextMergeCache *mergeCache) { + + if (mergeCache) { + auto existing = mergeCache->get(a, b); + if (existing) { + return existing; + } + existing = mergeCache->get(b, a); + if (existing) { + return existing; + } + } + + // merge sorted payloads a + b => M + size_t i = 0; // walks a + size_t j = 0; // walks b + size_t k = 0; // walks target M array + + std::vector<size_t> mergedReturnStates(a->returnStates.size() + b->returnStates.size()); + std::vector<Ref<const PredictionContext>> mergedParents(a->returnStates.size() + b->returnStates.size()); + + // walk and merge to yield mergedParents, mergedReturnStates + while (i < a->returnStates.size() && j < b->returnStates.size()) { + const auto& parentA = a->parents[i]; + const auto& parentB = b->parents[j]; + if (a->returnStates[i] == b->returnStates[j]) { + // same payload (stack tops are equal), must yield merged singleton + size_t payload = a->returnStates[i]; + // $+$ = $ + bool both$ = payload == EMPTY_RETURN_STATE && !parentA && !parentB; + bool ax_ax = (parentA && parentB) && *parentA == *parentB; // ax+ax -> ax + if (both$ || ax_ax) { + mergedParents[k] = parentA; // choose left + mergedReturnStates[k] = payload; + } else { // ax+ay -> a'[x,y] + mergedParents[k] = merge(parentA, parentB, rootIsWildcard, mergeCache); + mergedReturnStates[k] = payload; + } + i++; // hop over left one as usual + j++; // but also skip one in right side since we merge + } else if (a->returnStates[i] < b->returnStates[j]) { // copy a[i] to M + mergedParents[k] = parentA; + mergedReturnStates[k] = a->returnStates[i]; + i++; + } else { // b > a, copy b[j] to M + mergedParents[k] = parentB; + mergedReturnStates[k] = b->returnStates[j]; + j++; + } + k++; + } + + // copy over any payloads remaining in either array + if (i < a->returnStates.size()) { + for (auto p = i; p < a->returnStates.size(); p++) { + mergedParents[k] = a->parents[p]; + mergedReturnStates[k] = a->returnStates[p]; + k++; + } + } else { + for (auto p = j; p < b->returnStates.size(); p++) { + mergedParents[k] = b->parents[p]; + mergedReturnStates[k] = b->returnStates[p]; + k++; + } + } + + // trim merged if we combined a few that had same stack tops + if (k < mergedParents.size()) { // write index < last position; trim + if (k == 1) { // for just one merged element, return singleton top + auto c = SingletonPredictionContext::create(std::move(mergedParents[0]), mergedReturnStates[0]); + if (mergeCache) { + return mergeCache->put(a, b, std::move(c)); + } + return c; + } + mergedParents.resize(k); + mergedReturnStates.resize(k); + } + + ArrayPredictionContext m(std::move(mergedParents), std::move(mergedReturnStates)); + + // if we created same array as a or b, return that instead + // TODO: track whether this is possible above during merge sort for speed + if (m == *a) { + if (mergeCache) { + return mergeCache->put(a, b, a); + } + return a; + } + if (m == *b) { + if (mergeCache) { + return mergeCache->put(a, b, b); + } + return b; + } + + combineCommonParents(m.parents); + auto c = std::make_shared<ArrayPredictionContext>(std::move(m)); + if (mergeCache) { + return mergeCache->put(a, b, std::move(c)); + } + return c; +} + +std::string PredictionContext::toDOTString(const Ref<const PredictionContext> &context) { + if (context == nullptr) { + return ""; + } + + std::stringstream ss; + ss << "digraph G {\n" << "rankdir=LR;\n"; + + std::vector<Ref<const PredictionContext>> nodes = getAllContextNodes(context); + std::unordered_map<const PredictionContext*, size_t> nodeIds; + size_t nodeId = 0; + + for (const auto ¤t : nodes) { + if (current->getContextType() == PredictionContextType::SINGLETON) { + std::string s = std::to_string(insertOrAssignNodeId(nodeIds, nodeId, current.get())); + ss << " s" << s; + std::string returnState = std::to_string(current->getReturnState(0)); + if (current == PredictionContext::EMPTY) { + returnState = "$"; + } + ss << " [label=\"" << returnState << "\"];\n"; + continue; + } + Ref<const ArrayPredictionContext> arr = std::static_pointer_cast<const ArrayPredictionContext>(current); + ss << " s" << insertOrAssignNodeId(nodeIds, nodeId, arr.get()) << " [shape=box, label=\"" << "["; + bool first = true; + for (auto inv : arr->returnStates) { + if (!first) { + ss << ", "; + } + if (inv == EMPTY_RETURN_STATE) { + ss << "$"; + } else { + ss << inv; + } + first = false; + } + ss << "]"; + ss << "\"];\n"; + } + + for (const auto ¤t : nodes) { + if (current == EMPTY) { + continue; + } + for (size_t i = 0; i < current->size(); i++) { + if (!current->getParent(i)) { + continue; + } + ss << " s" << insertOrAssignNodeId(nodeIds, nodeId, current.get()) << "->" << "s" << insertOrAssignNodeId(nodeIds, nodeId, current->getParent(i).get()); + if (current->size() > 1) { + ss << " [label=\"parent[" << i << "]\"];\n"; + } else { + ss << ";\n"; + } + } + } + + ss << "}\n"; + return ss.str(); +} + +// The "visited" map is just a temporary structure to control the retrieval process (which is recursive). +Ref<const PredictionContext> PredictionContext::getCachedContext(const Ref<const PredictionContext> &context, + PredictionContextCache &contextCache) { + std::unordered_map<Ref<const PredictionContext>, Ref<const PredictionContext>> visited; + return getCachedContextImpl(context, contextCache, visited); +} + +std::vector<Ref<const PredictionContext>> PredictionContext::getAllContextNodes(const Ref<const PredictionContext> &context) { + std::vector<Ref<const PredictionContext>> nodes; + std::unordered_set<const PredictionContext*> visited; + getAllContextNodesImpl(context, nodes, visited); + return nodes; +} + +std::vector<std::string> PredictionContext::toStrings(Recognizer *recognizer, int currentState) const { + return toStrings(recognizer, EMPTY, currentState); +} + +std::vector<std::string> PredictionContext::toStrings(Recognizer *recognizer, const Ref<const PredictionContext> &stop, int currentState) const { + + std::vector<std::string> result; + + for (size_t perm = 0; ; perm++) { + size_t offset = 0; + bool last = true; + const PredictionContext *p = this; + size_t stateNumber = currentState; + + std::stringstream ss; + ss << "["; + bool outerContinue = false; + while (!p->isEmpty() && p != stop.get()) { + size_t index = 0; + if (p->size() > 0) { + size_t bits = 1; + while ((1ULL << bits) < p->size()) { + bits++; + } + + size_t mask = (1 << bits) - 1; + index = (perm >> offset) & mask; + last &= index >= p->size() - 1; + if (index >= p->size()) { + outerContinue = true; + break; + } + offset += bits; + } + + if (recognizer != nullptr) { + if (ss.tellp() > 1) { + // first char is '[', if more than that this isn't the first rule + ss << ' '; + } + + const ATN &atn = recognizer->getATN(); + ATNState *s = atn.states[stateNumber]; + std::string ruleName = recognizer->getRuleNames()[s->ruleIndex]; + ss << ruleName; + } else if (p->getReturnState(index) != EMPTY_RETURN_STATE) { + if (!p->isEmpty()) { + if (ss.tellp() > 1) { + // first char is '[', if more than that this isn't the first rule + ss << ' '; + } + + ss << p->getReturnState(index); + } + } + stateNumber = p->getReturnState(index); + p = p->getParent(index).get(); + } + + if (outerContinue) + continue; + + ss << "]"; + result.push_back(ss.str()); + + if (last) { + break; + } + } + + return result; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContext.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContext.h new file mode 100644 index 0000000000..967355af17 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContext.h @@ -0,0 +1,225 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <atomic> + +#include "Recognizer.h" +#include "atn/ATN.h" +#include "atn/ATNState.h" +#include "atn/PredictionContextType.h" + +namespace antlr4 { + + class RuleContext; + +namespace atn { + + class ATN; + class ArrayPredictionContext; + class SingletonPredictionContext; + class PredictionContextCache; + class PredictionContextMergeCache; + + class ANTLR4CPP_PUBLIC PredictionContext { + public: + /// Represents $ in local context prediction, which means wildcard. + /// *+x = *. + static const Ref<const PredictionContext> EMPTY; + + /// Represents $ in an array in full context mode, when $ + /// doesn't mean wildcard: $ + x = [$,x]. Here, + /// $ = EMPTY_RETURN_STATE. + // ml: originally Integer.MAX_VALUE, which would be -1 for us, but this is already used in places where + // -1 is converted to unsigned, so we use a different value here. Any value does the job provided it doesn't + // conflict with real return states. + static constexpr size_t EMPTY_RETURN_STATE = std::numeric_limits<size_t>::max() - 9; + + // dispatch + static Ref<const PredictionContext> merge(Ref<const PredictionContext> a, + Ref<const PredictionContext> b, + bool rootIsWildcard, + PredictionContextMergeCache *mergeCache); + + /// <summary> + /// Merge two <seealso cref="SingletonPredictionContext"/> instances. + /// + /// <p/> + /// + /// Stack tops equal, parents merge is same; return left graph.<br/> + /// <embed src="images/SingletonMerge_SameRootSamePar.svg" type="image/svg+xml"/> + /// + /// <p/> + /// + /// Same stack top, parents differ; merge parents giving array node, then + /// remainders of those graphs. A new root node is created to point to the + /// merged parents.<br/> + /// <embed src="images/SingletonMerge_SameRootDiffPar.svg" type="image/svg+xml"/> + /// + /// <p/> + /// + /// Different stack tops pointing to same parent. Make array node for the + /// root where both element in the root point to the same (original) + /// parent.<br/> + /// <embed src="images/SingletonMerge_DiffRootSamePar.svg" type="image/svg+xml"/> + /// + /// <p/> + /// + /// Different stack tops pointing to different parents. Make array node for + /// the root where each element points to the corresponding original + /// parent.<br/> + /// <embed src="images/SingletonMerge_DiffRootDiffPar.svg" type="image/svg+xml"/> + /// </summary> + /// <param name="a"> the first <seealso cref="SingletonPredictionContext"/> </param> + /// <param name="b"> the second <seealso cref="SingletonPredictionContext"/> </param> + /// <param name="rootIsWildcard"> {@code true} if this is a local-context merge, + /// otherwise false to indicate a full-context merge </param> + /// <param name="mergeCache"> </param> + static Ref<const PredictionContext> mergeSingletons(Ref<const SingletonPredictionContext> a, + Ref<const SingletonPredictionContext> b, + bool rootIsWildcard, + PredictionContextMergeCache *mergeCache); + + /** + * Handle case where at least one of {@code a} or {@code b} is + * {@link #EMPTY}. In the following diagrams, the symbol {@code $} is used + * to represent {@link #EMPTY}. + * + * <h2>Local-Context Merges</h2> + * + * <p>These local-context merge operations are used when {@code rootIsWildcard} + * is true.</p> + * + * <p>{@link #EMPTY} is superset of any graph; return {@link #EMPTY}.<br> + * <embed src="images/LocalMerge_EmptyRoot.svg" type="image/svg+xml"/></p> + * + * <p>{@link #EMPTY} and anything is {@code #EMPTY}, so merged parent is + * {@code #EMPTY}; return left graph.<br> + * <embed src="images/LocalMerge_EmptyParent.svg" type="image/svg+xml"/></p> + * + * <p>Special case of last merge if local context.<br> + * <embed src="images/LocalMerge_DiffRoots.svg" type="image/svg+xml"/></p> + * + * <h2>Full-Context Merges</h2> + * + * <p>These full-context merge operations are used when {@code rootIsWildcard} + * is false.</p> + * + * <p><embed src="images/FullMerge_EmptyRoots.svg" type="image/svg+xml"/></p> + * + * <p>Must keep all contexts; {@link #EMPTY} in array is a special value (and + * null parent).<br> + * <embed src="images/FullMerge_EmptyRoot.svg" type="image/svg+xml"/></p> + * + * <p><embed src="images/FullMerge_SameRoot.svg" type="image/svg+xml"/></p> + * + * @param a the first {@link SingletonPredictionContext} + * @param b the second {@link SingletonPredictionContext} + * @param rootIsWildcard {@code true} if this is a local-context merge, + * otherwise false to indicate a full-context merge + */ + static Ref<const PredictionContext> mergeRoot(Ref<const SingletonPredictionContext> a, + Ref<const SingletonPredictionContext> b, + bool rootIsWildcard); + + /** + * Merge two {@link ArrayPredictionContext} instances. + * + * <p>Different tops, different parents.<br> + * <embed src="images/ArrayMerge_DiffTopDiffPar.svg" type="image/svg+xml"/></p> + * + * <p>Shared top, same parents.<br> + * <embed src="images/ArrayMerge_ShareTopSamePar.svg" type="image/svg+xml"/></p> + * + * <p>Shared top, different parents.<br> + * <embed src="images/ArrayMerge_ShareTopDiffPar.svg" type="image/svg+xml"/></p> + * + * <p>Shared top, all shared parents.<br> + * <embed src="images/ArrayMerge_ShareTopSharePar.svg" type="image/svg+xml"/></p> + * + * <p>Equal tops, merge parents and reduce top to + * {@link SingletonPredictionContext}.<br> + * <embed src="images/ArrayMerge_EqualTop.svg" type="image/svg+xml"/></p> + */ + static Ref<const PredictionContext> mergeArrays(Ref<const ArrayPredictionContext> a, + Ref<const ArrayPredictionContext> b, + bool rootIsWildcard, + PredictionContextMergeCache *mergeCache); + + static std::string toDOTString(const Ref<const PredictionContext> &context); + + static Ref<const PredictionContext> getCachedContext(const Ref<const PredictionContext> &context, + PredictionContextCache &contextCache); + + static std::vector<Ref<const PredictionContext>> getAllContextNodes(const Ref<const PredictionContext> &context); + + /// Convert a RuleContext tree to a PredictionContext graph. + /// Return EMPTY if outerContext is empty. + static Ref<const PredictionContext> fromRuleContext(const ATN &atn, RuleContext *outerContext); + + PredictionContext(const PredictionContext&) = delete; + + virtual ~PredictionContext() = default; + + PredictionContext& operator=(const PredictionContext&) = delete; + PredictionContext& operator=(PredictionContext&&) = delete; + + PredictionContextType getContextType() const { return _contextType; } + + virtual size_t size() const = 0; + virtual const Ref<const PredictionContext>& getParent(size_t index) const = 0; + virtual size_t getReturnState(size_t index) const = 0; + + /// This means only the EMPTY (wildcard? not sure) context is in set. + virtual bool isEmpty() const = 0; + bool hasEmptyPath() const; + + size_t hashCode() const; + + virtual bool equals(const PredictionContext &other) const = 0; + + virtual std::string toString() const = 0; + + std::vector<std::string> toStrings(Recognizer *recognizer, int currentState) const; + std::vector<std::string> toStrings(Recognizer *recognizer, + const Ref<const PredictionContext> &stop, + int currentState) const; + + protected: + explicit PredictionContext(PredictionContextType contextType); + + PredictionContext(PredictionContext&& other); + + virtual size_t hashCodeImpl() const = 0; + + size_t cachedHashCode() const { return _hashCode.load(std::memory_order_relaxed); } + + private: + const PredictionContextType _contextType; + mutable std::atomic<size_t> _hashCode; + }; + + inline bool operator==(const PredictionContext &lhs, const PredictionContext &rhs) { + return lhs.equals(rhs); + } + + inline bool operator!=(const PredictionContext &lhs, const PredictionContext &rhs) { + return !operator==(lhs, rhs); + } + +} // namespace atn +} // namespace antlr4 + +namespace std { + + template <> + struct hash<::antlr4::atn::PredictionContext> { + size_t operator()(const ::antlr4::atn::PredictionContext &predictionContext) const { + return predictionContext.hashCode(); + } + }; + +} // namespace std diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextCache.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextCache.cpp new file mode 100644 index 0000000000..031a35cbf7 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextCache.cpp @@ -0,0 +1,56 @@ +// Copyright 2012-2022 The ANTLR Project +// +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "atn/PredictionContextCache.h" + +using namespace antlr4::atn; + +void PredictionContextCache::put(const Ref<const PredictionContext> &value) { + assert(value); + + _data.insert(value); +} + +Ref<const PredictionContext> PredictionContextCache::get( + const Ref<const PredictionContext> &value) const { + assert(value); + + auto iterator = _data.find(value); + if (iterator == _data.end()) { + return nullptr; + } + return *iterator; +} + +size_t PredictionContextCache::PredictionContextHasher::operator()( + const Ref<const PredictionContext> &predictionContext) const { + return predictionContext->hashCode(); +} + +bool PredictionContextCache::PredictionContextComparer::operator()( + const Ref<const PredictionContext> &lhs, + const Ref<const PredictionContext> &rhs) const { + return *lhs == *rhs; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextCache.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextCache.h new file mode 100644 index 0000000000..78c8210d97 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextCache.h @@ -0,0 +1,63 @@ +// Copyright 2012-2022 The ANTLR Project +// +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "atn/PredictionContext.h" +#include "FlatHashSet.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC PredictionContextCache final { + public: + PredictionContextCache() = default; + + PredictionContextCache(const PredictionContextCache&) = delete; + PredictionContextCache(PredictionContextCache&&) = delete; + + PredictionContextCache& operator=(const PredictionContextCache&) = delete; + PredictionContextCache& operator=(PredictionContextCache&&) = delete; + + void put(const Ref<const PredictionContext> &value); + + Ref<const PredictionContext> get(const Ref<const PredictionContext> &value) const; + + private: + struct ANTLR4CPP_PUBLIC PredictionContextHasher final { + size_t operator()(const Ref<const PredictionContext> &predictionContext) const; + }; + + struct ANTLR4CPP_PUBLIC PredictionContextComparer final { + bool operator()(const Ref<const PredictionContext> &lhs, + const Ref<const PredictionContext> &rhs) const; + }; + + FlatHashSet<Ref<const PredictionContext>, + PredictionContextHasher, PredictionContextComparer> _data; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextMergeCache.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextMergeCache.cpp new file mode 100644 index 0000000000..7160b59998 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextMergeCache.cpp @@ -0,0 +1,167 @@ +// Copyright 2012-2022 The ANTLR Project +// +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "atn/PredictionContextMergeCache.h" + +#include "misc/MurmurHash.h" + +using namespace antlr4::atn; +using namespace antlr4::misc; + +PredictionContextMergeCache::PredictionContextMergeCache( + const PredictionContextMergeCacheOptions &options) : _options(options) {} + +Ref<const PredictionContext> PredictionContextMergeCache::put( + const Ref<const PredictionContext> &key1, + const Ref<const PredictionContext> &key2, + Ref<const PredictionContext> value) { + assert(key1); + assert(key2); + + if (getOptions().getMaxSize() == 0) { + // Cache is effectively disabled. + return value; + } + + auto [existing, inserted] = _entries.try_emplace(std::make_pair(key1.get(), key2.get())); + if (inserted) { + try { + existing->second.reset(new Entry()); + } catch (...) { + _entries.erase(existing); + throw; + } + existing->second->key = std::make_pair(key1, key2); + existing->second->value = std::move(value); + pushToFront(existing->second.get()); + } else { + if (existing->second->value != value) { + existing->second->value = std::move(value); + } + moveToFront(existing->second.get()); + } + compact(existing->second.get()); + return existing->second->value; +} + +Ref<const PredictionContext> PredictionContextMergeCache::get( + const Ref<const PredictionContext> &key1, + const Ref<const PredictionContext> &key2) const { + assert(key1); + assert(key2); + + if (getOptions().getMaxSize() == 0) { + // Cache is effectively disabled. + return nullptr; + } + + auto iterator = _entries.find(std::make_pair(key1.get(), key2.get())); + if (iterator == _entries.end()) { + return nullptr; + } + moveToFront(iterator->second.get()); + return iterator->second->value; +} + +void PredictionContextMergeCache::clear() { + Container().swap(_entries); + _head = _tail = nullptr; + _size = 0; +} + +void PredictionContextMergeCache::moveToFront(Entry *entry) const { + if (entry->prev == nullptr) { + assert(entry == _head); + return; + } + entry->prev->next = entry->next; + if (entry->next != nullptr) { + entry->next->prev = entry->prev; + } else { + assert(entry == _tail); + _tail = entry->prev; + } + entry->prev = nullptr; + entry->next = _head; + _head->prev = entry; + _head = entry; + assert(entry->prev == nullptr); +} + +void PredictionContextMergeCache::pushToFront(Entry *entry) { + ++_size; + entry->prev = nullptr; + entry->next = _head; + if (_head != nullptr) { + _head->prev = entry; + _head = entry; + } else { + assert(entry->next == nullptr); + _head = entry; + _tail = entry; + } + assert(entry->prev == nullptr); +} + +void PredictionContextMergeCache::remove(Entry *entry) { + if (entry->prev != nullptr) { + entry->prev->next = entry->next; + } else { + assert(entry == _head); + _head = entry->next; + } + if (entry->next != nullptr) { + entry->next->prev = entry->prev; + } else { + assert(entry == _tail); + _tail = entry->prev; + } + --_size; + _entries.erase(std::make_pair(entry->key.first.get(), entry->key.second.get())); +} + +void PredictionContextMergeCache::compact(const Entry *preserve) { + Entry *entry = _tail; + while (entry != nullptr && _size > getOptions().getMaxSize()) { + Entry *next = entry->prev; + if (entry != preserve) { + remove(entry); + } + entry = next; + } +} + +size_t PredictionContextMergeCache::PredictionContextHasher::operator()( + const PredictionContextPair &value) const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, value.first->hashCode()); + hash = MurmurHash::update(hash, value.second->hashCode()); + return MurmurHash::finish(hash, 2); +} + +bool PredictionContextMergeCache::PredictionContextComparer::operator()( + const PredictionContextPair &lhs, const PredictionContextPair &rhs) const { + return *lhs.first == *rhs.first && *lhs.second == *rhs.second; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextMergeCache.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextMergeCache.h new file mode 100644 index 0000000000..efaeaef578 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextMergeCache.h @@ -0,0 +1,101 @@ +// Copyright 2012-2022 The ANTLR Project +// +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include <utility> + +#include "atn/PredictionContext.h" +#include "atn/PredictionContextMergeCacheOptions.h" +#include "FlatHashMap.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC PredictionContextMergeCache final { + public: + PredictionContextMergeCache() + : PredictionContextMergeCache(PredictionContextMergeCacheOptions()) {} + + explicit PredictionContextMergeCache(const PredictionContextMergeCacheOptions &options); + + PredictionContextMergeCache(const PredictionContextMergeCache&) = delete; + PredictionContextMergeCache(PredictionContextMergeCache&&) = delete; + + PredictionContextMergeCache& operator=(const PredictionContextMergeCache&) = delete; + PredictionContextMergeCache& operator=(PredictionContextMergeCache&&) = delete; + + Ref<const PredictionContext> put(const Ref<const PredictionContext> &key1, + const Ref<const PredictionContext> &key2, + Ref<const PredictionContext> value); + + Ref<const PredictionContext> get(const Ref<const PredictionContext> &key1, + const Ref<const PredictionContext> &key2) const; + + const PredictionContextMergeCacheOptions& getOptions() const { return _options; } + + void clear(); + + private: + using PredictionContextPair = std::pair<const PredictionContext*, const PredictionContext*>; + + struct ANTLR4CPP_PUBLIC PredictionContextHasher final { + size_t operator()(const PredictionContextPair &value) const; + }; + + struct ANTLR4CPP_PUBLIC PredictionContextComparer final { + bool operator()(const PredictionContextPair &lhs, const PredictionContextPair &rhs) const; + }; + + struct ANTLR4CPP_PUBLIC Entry final { + std::pair<Ref<const PredictionContext>, Ref<const PredictionContext>> key; + Ref<const PredictionContext> value; + Entry *prev = nullptr; + Entry *next = nullptr; + }; + + void moveToFront(Entry *entry) const; + + void pushToFront(Entry *entry); + + void remove(Entry *entry); + + void compact(const Entry *preserve); + + using Container = FlatHashMap<PredictionContextPair, std::unique_ptr<Entry>, + PredictionContextHasher, PredictionContextComparer>; + + const PredictionContextMergeCacheOptions _options; + + Container _entries; + + mutable Entry *_head = nullptr; + mutable Entry *_tail = nullptr; + + size_t _size = 0; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextMergeCacheOptions.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextMergeCacheOptions.h new file mode 100644 index 0000000000..7331cc17e0 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextMergeCacheOptions.h @@ -0,0 +1,71 @@ +// Copyright 2012-2022 The ANTLR Project +// +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include <cstddef> +#include <cstdint> +#include <limits> + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC PredictionContextMergeCacheOptions final { + public: + PredictionContextMergeCacheOptions() = default; + + size_t getMaxSize() const { return _maxSize; } + + bool hasMaxSize() const { return getMaxSize() != std::numeric_limits<size_t>::max(); } + + PredictionContextMergeCacheOptions& setMaxSize(size_t maxSize) { + _maxSize = maxSize; + return *this; + } + + size_t getClearEveryN() const { + return _clearEveryN; + } + + bool hasClearEveryN() const { return getClearEveryN() != 0; } + + PredictionContextMergeCacheOptions& setClearEveryN(uint64_t clearEveryN) { + _clearEveryN = clearEveryN; + return *this; + } + + PredictionContextMergeCacheOptions& neverClear() { + return setClearEveryN(0); + } + + private: + size_t _maxSize = std::numeric_limits<size_t>::max(); + uint64_t _clearEveryN = 1; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextType.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextType.h new file mode 100644 index 0000000000..c8c4473e13 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextType.h @@ -0,0 +1,21 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <cstddef> + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + enum class PredictionContextType : size_t { + SINGLETON = 1, + ARRAY = 2, + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionMode.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionMode.cpp new file mode 100644 index 0000000000..9db0b8bdb9 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionMode.cpp @@ -0,0 +1,202 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/RuleStopState.h" +#include "atn/ATNConfigSet.h" +#include "atn/ATNConfig.h" +#include "misc/MurmurHash.h" +#include "SemanticContext.h" + +#include "PredictionMode.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlrcpp; + +struct AltAndContextConfigHasher +{ + /** + * The hash code is only a function of the {@link ATNState#stateNumber} + * and {@link ATNConfig#context}. + */ + size_t operator () (ATNConfig *o) const { + size_t hashCode = misc::MurmurHash::initialize(7); + hashCode = misc::MurmurHash::update(hashCode, o->state->stateNumber); + hashCode = misc::MurmurHash::update(hashCode, o->context); + return misc::MurmurHash::finish(hashCode, 2); + } +}; + +struct AltAndContextConfigComparer { + bool operator()(ATNConfig *a, ATNConfig *b) const + { + if (a == b) { + return true; + } + return a->state->stateNumber == b->state->stateNumber && *a->context == *b->context; + } +}; + +bool PredictionModeClass::hasSLLConflictTerminatingPrediction(PredictionMode mode, ATNConfigSet *configs) { + /* Configs in rule stop states indicate reaching the end of the decision + * rule (local context) or end of start rule (full context). If all + * configs meet this condition, then none of the configurations is able + * to match additional input so we terminate prediction. + */ + if (allConfigsInRuleStopStates(configs)) { + return true; + } + + bool heuristic; + + // Pure SLL mode parsing or SLL+LL if: + // Don't bother with combining configs from different semantic + // contexts if we can fail over to full LL; costs more time + // since we'll often fail over anyway. + if (mode == PredictionMode::SLL || !configs->hasSemanticContext) { + std::vector<antlrcpp::BitSet> altsets = getConflictingAltSubsets(configs); + heuristic = hasConflictingAltSet(altsets) && !hasStateAssociatedWithOneAlt(configs); + } else { + // dup configs, tossing out semantic predicates + ATNConfigSet dup(true); + for (auto &config : configs->configs) { + Ref<ATNConfig> c = std::make_shared<ATNConfig>(*config, SemanticContext::Empty::Instance); + dup.add(c); + } + std::vector<antlrcpp::BitSet> altsets = getConflictingAltSubsets(&dup); + heuristic = hasConflictingAltSet(altsets) && !hasStateAssociatedWithOneAlt(&dup); + } + + return heuristic; +} + +bool PredictionModeClass::hasConfigInRuleStopState(ATNConfigSet *configs) { + for (const auto &config : configs->configs) { + if (RuleStopState::is(config->state)) { + return true; + } + } + + return false; +} + +bool PredictionModeClass::allConfigsInRuleStopStates(ATNConfigSet *configs) { + for (const auto &config : configs->configs) { + if (!RuleStopState::is(config->state)) { + return false; + } + } + + return true; +} + +size_t PredictionModeClass::resolvesToJustOneViableAlt(const std::vector<antlrcpp::BitSet>& altsets) { + return getSingleViableAlt(altsets); +} + +bool PredictionModeClass::allSubsetsConflict(const std::vector<antlrcpp::BitSet>& altsets) { + return !hasNonConflictingAltSet(altsets); +} + +bool PredictionModeClass::hasNonConflictingAltSet(const std::vector<antlrcpp::BitSet>& altsets) { + for (antlrcpp::BitSet alts : altsets) { + if (alts.count() == 1) { + return true; + } + } + return false; +} + +bool PredictionModeClass::hasConflictingAltSet(const std::vector<antlrcpp::BitSet>& altsets) { + for (antlrcpp::BitSet alts : altsets) { + if (alts.count() > 1) { + return true; + } + } + return false; +} + +bool PredictionModeClass::allSubsetsEqual(const std::vector<antlrcpp::BitSet>& altsets) { + if (altsets.empty()) { + return true; + } + + const antlrcpp::BitSet& first = *altsets.begin(); + for (const antlrcpp::BitSet& alts : altsets) { + if (alts != first) { + return false; + } + } + return true; +} + +size_t PredictionModeClass::getUniqueAlt(const std::vector<antlrcpp::BitSet>& altsets) { + antlrcpp::BitSet all = getAlts(altsets); + if (all.count() == 1) { + return all.nextSetBit(0); + } + return ATN::INVALID_ALT_NUMBER; +} + +antlrcpp::BitSet PredictionModeClass::getAlts(const std::vector<antlrcpp::BitSet>& altsets) { + antlrcpp::BitSet all; + for (const auto &alts : altsets) { + all |= alts; + } + + return all; +} + +antlrcpp::BitSet PredictionModeClass::getAlts(ATNConfigSet *configs) { + antlrcpp::BitSet alts; + for (const auto &config : configs->configs) { + alts.set(config->alt); + } + return alts; +} + +std::vector<antlrcpp::BitSet> PredictionModeClass::getConflictingAltSubsets(ATNConfigSet *configs) { + std::unordered_map<ATNConfig*, antlrcpp::BitSet, AltAndContextConfigHasher, AltAndContextConfigComparer> configToAlts; + for (auto &config : configs->configs) { + configToAlts[config.get()].set(config->alt); + } + std::vector<antlrcpp::BitSet> values; + values.reserve(configToAlts.size()); + for (const auto &pair : configToAlts) { + values.push_back(pair.second); + } + return values; +} + +std::unordered_map<ATNState*, antlrcpp::BitSet> PredictionModeClass::getStateToAltMap(ATNConfigSet *configs) { + std::unordered_map<ATNState*, antlrcpp::BitSet> m; + for (const auto &c : configs->configs) { + m[c->state].set(c->alt); + } + return m; +} + +bool PredictionModeClass::hasStateAssociatedWithOneAlt(ATNConfigSet *configs) { + auto x = getStateToAltMap(configs); + for (const auto &pair : x){ + if (pair.second.count() == 1) return true; + } + return false; +} + +size_t PredictionModeClass::getSingleViableAlt(const std::vector<antlrcpp::BitSet>& altsets) { + antlrcpp::BitSet viableAlts; + for (const auto &alts : altsets) { + size_t minAlt = alts.nextSetBit(0); + + viableAlts.set(minAlt); + if (viableAlts.count() > 1) // more than 1 viable alt + { + return ATN::INVALID_ALT_NUMBER; + } + } + + return viableAlts.nextSetBit(0); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionMode.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionMode.h new file mode 100644 index 0000000000..4868ea2ff2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionMode.h @@ -0,0 +1,436 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "support/BitSet.h" + +namespace antlr4 { +namespace atn { + + /** + * This enumeration defines the prediction modes available in ANTLR 4 along with + * utility methods for analyzing configuration sets for conflicts and/or + * ambiguities. + */ + enum class PredictionMode { + /** + * The SLL(*) prediction mode. This prediction mode ignores the current + * parser context when making predictions. This is the fastest prediction + * mode, and provides correct results for many grammars. This prediction + * mode is more powerful than the prediction mode provided by ANTLR 3, but + * may result in syntax errors for grammar and input combinations which are + * not SLL. + * + * <p> + * When using this prediction mode, the parser will either return a correct + * parse tree (i.e. the same parse tree that would be returned with the + * {@link #LL} prediction mode), or it will report a syntax error. If a + * syntax error is encountered when using the {@link #SLL} prediction mode, + * it may be due to either an actual syntax error in the input or indicate + * that the particular combination of grammar and input requires the more + * powerful {@link #LL} prediction abilities to complete successfully.</p> + * + * <p> + * This prediction mode does not provide any guarantees for prediction + * behavior for syntactically-incorrect inputs.</p> + */ + SLL, + + /** + * The LL(*) prediction mode. This prediction mode allows the current parser + * context to be used for resolving SLL conflicts that occur during + * prediction. This is the fastest prediction mode that guarantees correct + * parse results for all combinations of grammars with syntactically correct + * inputs. + * + * <p> + * When using this prediction mode, the parser will make correct decisions + * for all syntactically-correct grammar and input combinations. However, in + * cases where the grammar is truly ambiguous this prediction mode might not + * report a precise answer for <em>exactly which</em> alternatives are + * ambiguous.</p> + * + * <p> + * This prediction mode does not provide any guarantees for prediction + * behavior for syntactically-incorrect inputs.</p> + */ + LL, + + /** + * The LL(*) prediction mode with exact ambiguity detection. In addition to + * the correctness guarantees provided by the {@link #LL} prediction mode, + * this prediction mode instructs the prediction algorithm to determine the + * complete and exact set of ambiguous alternatives for every ambiguous + * decision encountered while parsing. + * + * <p> + * This prediction mode may be used for diagnosing ambiguities during + * grammar development. Due to the performance overhead of calculating sets + * of ambiguous alternatives, this prediction mode should be avoided when + * the exact results are not necessary.</p> + * + * <p> + * This prediction mode does not provide any guarantees for prediction + * behavior for syntactically-incorrect inputs.</p> + */ + LL_EXACT_AMBIG_DETECTION + }; + + class ANTLR4CPP_PUBLIC PredictionModeClass { + public: + /** + * Computes the SLL prediction termination condition. + * + * <p> + * This method computes the SLL prediction termination condition for both of + * the following cases.</p> + * + * <ul> + * <li>The usual SLL+LL fallback upon SLL conflict</li> + * <li>Pure SLL without LL fallback</li> + * </ul> + * + * <p><strong>COMBINED SLL+LL PARSING</strong></p> + * + * <p>When LL-fallback is enabled upon SLL conflict, correct predictions are + * ensured regardless of how the termination condition is computed by this + * method. Due to the substantially higher cost of LL prediction, the + * prediction should only fall back to LL when the additional lookahead + * cannot lead to a unique SLL prediction.</p> + * + * <p>Assuming combined SLL+LL parsing, an SLL configuration set with only + * conflicting subsets should fall back to full LL, even if the + * configuration sets don't resolve to the same alternative (e.g. + * {@code {1,2}} and {@code {3,4}}. If there is at least one non-conflicting + * configuration, SLL could continue with the hopes that more lookahead will + * resolve via one of those non-conflicting configurations.</p> + * + * <p>Here's the prediction termination rule them: SLL (for SLL+LL parsing) + * stops when it sees only conflicting configuration subsets. In contrast, + * full LL keeps going when there is uncertainty.</p> + * + * <p><strong>HEURISTIC</strong></p> + * + * <p>As a heuristic, we stop prediction when we see any conflicting subset + * unless we see a state that only has one alternative associated with it. + * The single-alt-state thing lets prediction continue upon rules like + * (otherwise, it would admit defeat too soon):</p> + * + * <p>{@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;}</p> + * + * <p>When the ATN simulation reaches the state before {@code ';'}, it has a + * DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally + * {@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop + * processing this node because alternative to has another way to continue, + * via {@code [6|2|[]]}.</p> + * + * <p>It also let's us continue for this rule:</p> + * + * <p>{@code [1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;}</p> + * + * <p>After matching input A, we reach the stop state for rule A, state 1. + * State 8 is the state right before B. Clearly alternatives 1 and 2 + * conflict and no amount of further lookahead will separate the two. + * However, alternative 3 will be able to continue and so we do not stop + * working on this state. In the previous example, we're concerned with + * states associated with the conflicting alternatives. Here alt 3 is not + * associated with the conflicting configs, but since we can continue + * looking for input reasonably, don't declare the state done.</p> + * + * <p><strong>PURE SLL PARSING</strong></p> + * + * <p>To handle pure SLL parsing, all we have to do is make sure that we + * combine stack contexts for configurations that differ only by semantic + * predicate. From there, we can do the usual SLL termination heuristic.</p> + * + * <p><strong>PREDICATES IN SLL+LL PARSING</strong></p> + * + * <p>SLL decisions don't evaluate predicates until after they reach DFA stop + * states because they need to create the DFA cache that works in all + * semantic situations. In contrast, full LL evaluates predicates collected + * during start state computation so it can ignore predicates thereafter. + * This means that SLL termination detection can totally ignore semantic + * predicates.</p> + * + * <p>Implementation-wise, {@link ATNConfigSet} combines stack contexts but not + * semantic predicate contexts so we might see two configurations like the + * following.</p> + * + * <p>{@code (s, 1, x, {}), (s, 1, x', {p})}</p> + * + * <p>Before testing these configurations against others, we have to merge + * {@code x} and {@code x'} (without modifying the existing configurations). + * For example, we test {@code (x+x')==x''} when looking for conflicts in + * the following configurations.</p> + * + * <p>{@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})}</p> + * + * <p>If the configuration set has predicates (as indicated by + * {@link ATNConfigSet#hasSemanticContext}), this algorithm makes a copy of + * the configurations to strip out all of the predicates so that a standard + * {@link ATNConfigSet} will merge everything ignoring predicates.</p> + */ + static bool hasSLLConflictTerminatingPrediction(PredictionMode mode, ATNConfigSet *configs); + + /// <summary> + /// Checks if any configuration in {@code configs} is in a + /// <seealso cref="RuleStopState"/>. Configurations meeting this condition have + /// reached + /// the end of the decision rule (local context) or end of start rule (full + /// context). + /// </summary> + /// <param name="configs"> the configuration set to test </param> + /// <returns> {@code true} if any configuration in {@code configs} is in a + /// <seealso cref="RuleStopState"/>, otherwise {@code false} </returns> + static bool hasConfigInRuleStopState(ATNConfigSet *configs); + + /// <summary> + /// Checks if all configurations in {@code configs} are in a + /// <seealso cref="RuleStopState"/>. Configurations meeting this condition have + /// reached + /// the end of the decision rule (local context) or end of start rule (full + /// context). + /// </summary> + /// <param name="configs"> the configuration set to test </param> + /// <returns> {@code true} if all configurations in {@code configs} are in a + /// <seealso cref="RuleStopState"/>, otherwise {@code false} </returns> + static bool allConfigsInRuleStopStates(ATNConfigSet *configs); + + /** + * Full LL prediction termination. + * + * <p>Can we stop looking ahead during ATN simulation or is there some + * uncertainty as to which alternative we will ultimately pick, after + * consuming more input? Even if there are partial conflicts, we might know + * that everything is going to resolve to the same minimum alternative. That + * means we can stop since no more lookahead will change that fact. On the + * other hand, there might be multiple conflicts that resolve to different + * minimums. That means we need more look ahead to decide which of those + * alternatives we should predict.</p> + * + * <p>The basic idea is to split the set of configurations {@code C}, into + * conflicting subsets {@code (s, _, ctx, _)} and singleton subsets with + * non-conflicting configurations. Two configurations conflict if they have + * identical {@link ATNConfig#state} and {@link ATNConfig#context} values + * but different {@link ATNConfig#alt} value, e.g. {@code (s, i, ctx, _)} + * and {@code (s, j, ctx, _)} for {@code i!=j}.</p> + * + * <p>Reduce these configuration subsets to the set of possible alternatives. + * You can compute the alternative subsets in one pass as follows:</p> + * + * <p>{@code A_s,ctx = {i | (s, i, ctx, _)}} for each configuration in + * {@code C} holding {@code s} and {@code ctx} fixed.</p> + * + * <p>Or in pseudo-code, for each configuration {@code c} in {@code C}:</p> + * + * <pre> + * map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not + * alt and not pred + * </pre> + * + * <p>The values in {@code map} are the set of {@code A_s,ctx} sets.</p> + * + * <p>If {@code |A_s,ctx|=1} then there is no conflict associated with + * {@code s} and {@code ctx}.</p> + * + * <p>Reduce the subsets to singletons by choosing a minimum of each subset. If + * the union of these alternative subsets is a singleton, then no amount of + * more lookahead will help us. We will always pick that alternative. If, + * however, there is more than one alternative, then we are uncertain which + * alternative to predict and must continue looking for resolution. We may + * or may not discover an ambiguity in the future, even if there are no + * conflicting subsets this round.</p> + * + * <p>The biggest sin is to terminate early because it means we've made a + * decision but were uncertain as to the eventual outcome. We haven't used + * enough lookahead. On the other hand, announcing a conflict too late is no + * big deal; you will still have the conflict. It's just inefficient. It + * might even look until the end of file.</p> + * + * <p>No special consideration for semantic predicates is required because + * predicates are evaluated on-the-fly for full LL prediction, ensuring that + * no configuration contains a semantic context during the termination + * check.</p> + * + * <p><strong>CONFLICTING CONFIGS</strong></p> + * + * <p>Two configurations {@code (s, i, x)} and {@code (s, j, x')}, conflict + * when {@code i!=j} but {@code x=x'}. Because we merge all + * {@code (s, i, _)} configurations together, that means that there are at + * most {@code n} configurations associated with state {@code s} for + * {@code n} possible alternatives in the decision. The merged stacks + * complicate the comparison of configuration contexts {@code x} and + * {@code x'}. Sam checks to see if one is a subset of the other by calling + * merge and checking to see if the merged result is either {@code x} or + * {@code x'}. If the {@code x} associated with lowest alternative {@code i} + * is the superset, then {@code i} is the only possible prediction since the + * others resolve to {@code min(i)} as well. However, if {@code x} is + * associated with {@code j>i} then at least one stack configuration for + * {@code j} is not in conflict with alternative {@code i}. The algorithm + * should keep going, looking for more lookahead due to the uncertainty.</p> + * + * <p>For simplicity, I'm doing a equality check between {@code x} and + * {@code x'} that lets the algorithm continue to consume lookahead longer + * than necessary. The reason I like the equality is of course the + * simplicity but also because that is the test you need to detect the + * alternatives that are actually in conflict.</p> + * + * <p><strong>CONTINUE/STOP RULE</strong></p> + * + * <p>Continue if union of resolved alternative sets from non-conflicting and + * conflicting alternative subsets has more than one alternative. We are + * uncertain about which alternative to predict.</p> + * + * <p>The complete set of alternatives, {@code [i for (_,i,_)]}, tells us which + * alternatives are still in the running for the amount of input we've + * consumed at this point. The conflicting sets let us to strip away + * configurations that won't lead to more states because we resolve + * conflicts to the configuration with a minimum alternate for the + * conflicting set.</p> + * + * <p><strong>CASES</strong></p> + * + * <ul> + * + * <li>no conflicts and more than 1 alternative in set => continue</li> + * + * <li> {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s, 3, z)}, + * {@code (s', 1, y)}, {@code (s', 2, y)} yields non-conflicting set + * {@code {3}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} = + * {@code {1,3}} => continue + * </li> + * + * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)}, + * {@code (s', 2, y)}, {@code (s'', 1, z)} yields non-conflicting set + * {@code {1}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} = + * {@code {1}} => stop and predict 1</li> + * + * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)}, + * {@code (s', 2, y)} yields conflicting, reduced sets {@code {1}} U + * {@code {1}} = {@code {1}} => stop and predict 1, can announce + * ambiguity {@code {1,2}}</li> + * + * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 2, y)}, + * {@code (s', 3, y)} yields conflicting, reduced sets {@code {1}} U + * {@code {2}} = {@code {1,2}} => continue</li> + * + * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 3, y)}, + * {@code (s', 4, y)} yields conflicting, reduced sets {@code {1}} U + * {@code {3}} = {@code {1,3}} => continue</li> + * + * </ul> + * + * <p><strong>EXACT AMBIGUITY DETECTION</strong></p> + * + * <p>If all states report the same conflicting set of alternatives, then we + * know we have the exact ambiguity set.</p> + * + * <p><code>|A_<em>i</em>|>1</code> and + * <code>A_<em>i</em> = A_<em>j</em></code> for all <em>i</em>, <em>j</em>.</p> + * + * <p>In other words, we continue examining lookahead until all {@code A_i} + * have more than one alternative and all {@code A_i} are the same. If + * {@code A={{1,2}, {1,3}}}, then regular LL prediction would terminate + * because the resolved set is {@code {1}}. To determine what the real + * ambiguity is, we have to know whether the ambiguity is between one and + * two or one and three so we keep going. We can only stop prediction when + * we need exact ambiguity detection when the sets look like + * {@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc...</p> + */ + static size_t resolvesToJustOneViableAlt(const std::vector<antlrcpp::BitSet> &altsets); + + /// <summary> + /// Determines if every alternative subset in {@code altsets} contains more + /// than one alternative. + /// </summary> + /// <param name="altsets"> a collection of alternative subsets </param> + /// <returns> {@code true} if every <seealso cref="BitSet"/> in {@code altsets} + /// has + /// <seealso cref="BitSet#cardinality cardinality"/> > 1, otherwise {@code + /// false} </returns> + static bool allSubsetsConflict(const std::vector<antlrcpp::BitSet> &altsets); + + /// <summary> + /// Determines if any single alternative subset in {@code altsets} contains + /// exactly one alternative. + /// </summary> + /// <param name="altsets"> a collection of alternative subsets </param> + /// <returns> {@code true} if {@code altsets} contains a <seealso + /// cref="BitSet"/> with + /// <seealso cref="BitSet#cardinality cardinality"/> 1, otherwise {@code false} + /// </returns> + static bool hasNonConflictingAltSet(const std::vector<antlrcpp::BitSet> &altsets); + + /// <summary> + /// Determines if any single alternative subset in {@code altsets} contains + /// more than one alternative. + /// </summary> + /// <param name="altsets"> a collection of alternative subsets </param> + /// <returns> {@code true} if {@code altsets} contains a <seealso + /// cref="BitSet"/> with + /// <seealso cref="BitSet#cardinality cardinality"/> > 1, otherwise {@code + /// false} </returns> + static bool hasConflictingAltSet(const std::vector<antlrcpp::BitSet> &altsets); + + /// <summary> + /// Determines if every alternative subset in {@code altsets} is equivalent. + /// </summary> + /// <param name="altsets"> a collection of alternative subsets </param> + /// <returns> {@code true} if every member of {@code altsets} is equal to the + /// others, otherwise {@code false} </returns> + static bool allSubsetsEqual(const std::vector<antlrcpp::BitSet> &altsets); + + /// <summary> + /// Returns the unique alternative predicted by all alternative subsets in + /// {@code altsets}. If no such alternative exists, this method returns + /// <seealso cref="ATN#INVALID_ALT_NUMBER"/>. + /// </summary> + /// <param name="altsets"> a collection of alternative subsets </param> + static size_t getUniqueAlt(const std::vector<antlrcpp::BitSet> &altsets); + + /// <summary> + /// Gets the complete set of represented alternatives for a collection of + /// alternative subsets. This method returns the union of each <seealso + /// cref="BitSet"/> + /// in {@code altsets}. + /// </summary> + /// <param name="altsets"> a collection of alternative subsets </param> + /// <returns> the set of represented alternatives in {@code altsets} </returns> + static antlrcpp::BitSet getAlts(const std::vector<antlrcpp::BitSet> &altsets); + + /** Get union of all alts from configs. @since 4.5.1 */ + static antlrcpp::BitSet getAlts(ATNConfigSet *configs); + + /// <summary> + /// This function gets the conflicting alt subsets from a configuration set. + /// For each configuration {@code c} in {@code configs}: + /// + /// <pre> + /// map[c] U= c.<seealso cref="ATNConfig#alt alt"/> # map hash/equals uses s and + /// x, not + /// alt and not pred + /// </pre> + /// </summary> + static std::vector<antlrcpp::BitSet> getConflictingAltSubsets(ATNConfigSet *configs); + + /// <summary> + /// Get a map from state to alt subset from a configuration set. For each + /// configuration {@code c} in {@code configs}: + /// + /// <pre> + /// map[c.<seealso cref="ATNConfig#state state"/>] U= c.<seealso + /// cref="ATNConfig#alt alt"/> + /// </pre> + /// </summary> + static std::unordered_map<ATNState*, antlrcpp::BitSet> getStateToAltMap(ATNConfigSet *configs); + + static bool hasStateAssociatedWithOneAlt(ATNConfigSet *configs); + + static size_t getSingleViableAlt(const std::vector<antlrcpp::BitSet> &altsets); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ProfilingATNSimulator.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ProfilingATNSimulator.cpp new file mode 100644 index 0000000000..9fd86d67d4 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ProfilingATNSimulator.cpp @@ -0,0 +1,179 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/PredicateEvalInfo.h" +#include "atn/LookaheadEventInfo.h" +#include "Parser.h" +#include "atn/ATNConfigSet.h" +#include "support/CPPUtils.h" + +#include "atn/ProfilingATNSimulator.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::dfa; +using namespace antlrcpp; + +using namespace std::chrono; + +ProfilingATNSimulator::ProfilingATNSimulator(Parser *parser) + : ParserATNSimulator(parser, parser->getInterpreter<ParserATNSimulator>()->atn, + parser->getInterpreter<ParserATNSimulator>()->decisionToDFA, + parser->getInterpreter<ParserATNSimulator>()->getSharedContextCache()) { + for (size_t i = 0; i < atn.decisionToState.size(); i++) { + _decisions.push_back(DecisionInfo(i)); + } +} + +size_t ProfilingATNSimulator::adaptivePredict(TokenStream *input, size_t decision, ParserRuleContext *outerContext) { + auto onExit = finally([this](){ + _currentDecision = 0; // Originally -1, but that makes no sense (index into a vector and init value is also 0). + }); + + _sllStopIndex = -1; + _llStopIndex = -1; + _currentDecision = decision; + high_resolution_clock::time_point start = high_resolution_clock::now(); + size_t alt = ParserATNSimulator::adaptivePredict(input, decision, outerContext); + high_resolution_clock::time_point stop = high_resolution_clock::now(); + _decisions[decision].timeInPrediction += duration_cast<nanoseconds>(stop - start).count(); + _decisions[decision].invocations++; + + long long SLL_k = _sllStopIndex - _startIndex + 1; + _decisions[decision].SLL_TotalLook += SLL_k; + _decisions[decision].SLL_MinLook = _decisions[decision].SLL_MinLook == 0 ? SLL_k : std::min(_decisions[decision].SLL_MinLook, SLL_k); + if (SLL_k > _decisions[decision].SLL_MaxLook) { + _decisions[decision].SLL_MaxLook = SLL_k; + _decisions[decision].SLL_MaxLookEvent = std::make_shared<LookaheadEventInfo>(decision, nullptr, alt, input, _startIndex, _sllStopIndex, false); + } + + if (_llStopIndex >= 0) { + long long LL_k = _llStopIndex - _startIndex + 1; + _decisions[decision].LL_TotalLook += LL_k; + _decisions[decision].LL_MinLook = _decisions[decision].LL_MinLook == 0 ? LL_k : std::min(_decisions[decision].LL_MinLook, LL_k); + if (LL_k > _decisions[decision].LL_MaxLook) { + _decisions[decision].LL_MaxLook = LL_k; + _decisions[decision].LL_MaxLookEvent = std::make_shared<LookaheadEventInfo>(decision, nullptr, alt, input, _startIndex, _llStopIndex, true); + } + } + + return alt; +} + +DFAState* ProfilingATNSimulator::getExistingTargetState(DFAState *previousD, size_t t) { + // this method is called after each time the input position advances + // during SLL prediction + _sllStopIndex = (int)_input->index(); + + DFAState *existingTargetState = ParserATNSimulator::getExistingTargetState(previousD, t); + if (existingTargetState != nullptr) { + _decisions[_currentDecision].SLL_DFATransitions++; // count only if we transition over a DFA state + if (existingTargetState == ERROR.get()) { + _decisions[_currentDecision].errors.push_back( + ErrorInfo(_currentDecision, previousD->configs.get(), _input, _startIndex, _sllStopIndex, false) + ); + } + } + + _currentState = existingTargetState; + return existingTargetState; +} + +DFAState* ProfilingATNSimulator::computeTargetState(DFA &dfa, DFAState *previousD, size_t t) { + DFAState *state = ParserATNSimulator::computeTargetState(dfa, previousD, t); + _currentState = state; + return state; +} + +std::unique_ptr<ATNConfigSet> ProfilingATNSimulator::computeReachSet(ATNConfigSet *closure, size_t t, bool fullCtx) { + if (fullCtx) { + // this method is called after each time the input position advances + // during full context prediction + _llStopIndex = (int)_input->index(); + } + + std::unique_ptr<ATNConfigSet> reachConfigs = ParserATNSimulator::computeReachSet(closure, t, fullCtx); + if (fullCtx) { + _decisions[_currentDecision].LL_ATNTransitions++; // count computation even if error + if (reachConfigs != nullptr) { + } else { // no reach on current lookahead symbol. ERROR. + // TODO: does not handle delayed errors per getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule() + _decisions[_currentDecision].errors.push_back(ErrorInfo(_currentDecision, closure, _input, _startIndex, _llStopIndex, true)); + } + } else { + ++_decisions[_currentDecision].SLL_ATNTransitions; + if (reachConfigs != nullptr) { + } else { // no reach on current lookahead symbol. ERROR. + _decisions[_currentDecision].errors.push_back(ErrorInfo(_currentDecision, closure, _input, _startIndex, _sllStopIndex, false)); + } + } + return reachConfigs; +} + +bool ProfilingATNSimulator::evalSemanticContext(Ref<const SemanticContext> const& pred, ParserRuleContext *parserCallStack, + size_t alt, bool fullCtx) { + bool result = ParserATNSimulator::evalSemanticContext(pred, parserCallStack, alt, fullCtx); + if (!(std::dynamic_pointer_cast<const SemanticContext::PrecedencePredicate>(pred) != nullptr)) { + bool fullContext = _llStopIndex >= 0; + int stopIndex = fullContext ? _llStopIndex : _sllStopIndex; + _decisions[_currentDecision].predicateEvals.push_back( + PredicateEvalInfo(_currentDecision, _input, _startIndex, stopIndex, pred, result, alt, fullCtx)); + } + + return result; +} + +void ProfilingATNSimulator::reportAttemptingFullContext(DFA &dfa, const BitSet &conflictingAlts, ATNConfigSet *configs, + size_t startIndex, size_t stopIndex) { + if (conflictingAlts.count() > 0) { + conflictingAltResolvedBySLL = conflictingAlts.nextSetBit(0); + } else { + conflictingAltResolvedBySLL = configs->getAlts().nextSetBit(0); + } + _decisions[_currentDecision].LL_Fallback++; + ParserATNSimulator::reportAttemptingFullContext(dfa, conflictingAlts, configs, startIndex, stopIndex); +} + +void ProfilingATNSimulator::reportContextSensitivity(DFA &dfa, size_t prediction, ATNConfigSet *configs, + size_t startIndex, size_t stopIndex) { + if (prediction != conflictingAltResolvedBySLL) { + _decisions[_currentDecision].contextSensitivities.push_back( + ContextSensitivityInfo(_currentDecision, configs, _input, startIndex, stopIndex) + ); + } + ParserATNSimulator::reportContextSensitivity(dfa, prediction, configs, startIndex, stopIndex); +} + +void ProfilingATNSimulator::reportAmbiguity(DFA &dfa, DFAState *D, size_t startIndex, size_t stopIndex, bool exact, + const BitSet &ambigAlts, ATNConfigSet *configs) { + size_t prediction; + if (ambigAlts.count() > 0) { + prediction = ambigAlts.nextSetBit(0); + } else { + prediction = configs->getAlts().nextSetBit(0); + } + if (configs->fullCtx && prediction != conflictingAltResolvedBySLL) { + // Even though this is an ambiguity we are reporting, we can + // still detect some context sensitivities. Both SLL and LL + // are showing a conflict, hence an ambiguity, but if they resolve + // to different minimum alternatives we have also identified a + // context sensitivity. + _decisions[_currentDecision].contextSensitivities.push_back( + ContextSensitivityInfo(_currentDecision, configs, _input, startIndex, stopIndex) + ); + } + _decisions[_currentDecision].ambiguities.push_back( + AmbiguityInfo(_currentDecision, configs, ambigAlts, _input, startIndex, stopIndex, configs->fullCtx) + ); + ParserATNSimulator::reportAmbiguity(dfa, D, startIndex, stopIndex, exact, ambigAlts, configs); +} + +std::vector<DecisionInfo> ProfilingATNSimulator::getDecisionInfo() const { + return _decisions; +} + +DFAState* ProfilingATNSimulator::getCurrentState() const { + return _currentState; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ProfilingATNSimulator.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ProfilingATNSimulator.h new file mode 100644 index 0000000000..551efb8556 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ProfilingATNSimulator.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ParserATNSimulator.h" +#include "atn/DecisionInfo.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ProfilingATNSimulator : public ParserATNSimulator { + public: + explicit ProfilingATNSimulator(Parser *parser); + + virtual size_t adaptivePredict(TokenStream *input, size_t decision, ParserRuleContext *outerContext) override; + + virtual std::vector<DecisionInfo> getDecisionInfo() const; + virtual dfa::DFAState* getCurrentState() const; + + protected: + std::vector<DecisionInfo> _decisions; + + int _sllStopIndex = 0; + int _llStopIndex = 0; + + size_t _currentDecision = 0; + dfa::DFAState *_currentState; + + /// <summary> + /// At the point of LL failover, we record how SLL would resolve the conflict so that + /// we can determine whether or not a decision / input pair is context-sensitive. + /// If LL gives a different result than SLL's predicted alternative, we have a + /// context sensitivity for sure. The converse is not necessarily true, however. + /// It's possible that after conflict resolution chooses minimum alternatives, + /// SLL could get the same answer as LL. Regardless of whether or not the result indicates + /// an ambiguity, it is not treated as a context sensitivity because LL prediction + /// was not required in order to produce a correct prediction for this decision and input sequence. + /// It may in fact still be a context sensitivity but we don't know by looking at the + /// minimum alternatives for the current input. + /// </summary> + size_t conflictingAltResolvedBySLL = 0; + + virtual dfa::DFAState* getExistingTargetState(dfa::DFAState *previousD, size_t t) override; + virtual dfa::DFAState* computeTargetState(dfa::DFA &dfa, dfa::DFAState *previousD, size_t t) override; + virtual std::unique_ptr<ATNConfigSet> computeReachSet(ATNConfigSet *closure, size_t t, bool fullCtx) override; + virtual bool evalSemanticContext(Ref<const SemanticContext> const& pred, ParserRuleContext *parserCallStack, + size_t alt, bool fullCtx) override; + virtual void reportAttemptingFullContext(dfa::DFA &dfa, const antlrcpp::BitSet &conflictingAlts, ATNConfigSet *configs, + size_t startIndex, size_t stopIndex) override; + virtual void reportContextSensitivity(dfa::DFA &dfa, size_t prediction, ATNConfigSet *configs, + size_t startIndex, size_t stopIndex) override; + virtual void reportAmbiguity(dfa::DFA &dfa, dfa::DFAState *D, size_t startIndex, size_t stopIndex, bool exact, + const antlrcpp::BitSet &ambigAlts, ATNConfigSet *configs) override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/RangeTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/RangeTransition.cpp new file mode 100644 index 0000000000..342e550de9 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/RangeTransition.cpp @@ -0,0 +1,26 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/IntervalSet.h" + +#include "atn/RangeTransition.h" + +using namespace antlr4; +using namespace antlr4::atn; + +RangeTransition::RangeTransition(ATNState *target, size_t from, size_t to) : Transition(TransitionType::RANGE, target), from(from), to(to) { +} + +misc::IntervalSet RangeTransition::label() const { + return misc::IntervalSet::of((int)from, (int)to); +} + +bool RangeTransition::matches(size_t symbol, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return symbol >= from && symbol <= to; +} + +std::string RangeTransition::toString() const { + return "RANGE " + Transition::toString() + " { from: " + std::to_string(from) + ", to: " + std::to_string(to) + " }"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/RangeTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/RangeTransition.h new file mode 100644 index 0000000000..b75c60e247 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/RangeTransition.h @@ -0,0 +1,31 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC RangeTransition final : public Transition { + public: + static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::RANGE; } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + const size_t from; + const size_t to; + + RangeTransition(ATNState *target, size_t from, size_t to); + + virtual misc::IntervalSet label() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/RuleStartState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/RuleStartState.h new file mode 100644 index 0000000000..549491514b --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/RuleStartState.h @@ -0,0 +1,26 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC RuleStartState final : public ATNState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::RULE_START; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + RuleStopState *stopState = nullptr; + bool isLeftRecursiveRule = false; + + RuleStartState() : ATNState(ATNStateType::RULE_START) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/RuleStopState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/RuleStopState.h new file mode 100644 index 0000000000..7792a1265c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/RuleStopState.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + /// The last node in the ATN for a rule, unless that rule is the start symbol. + /// In that case, there is one transition to EOF. Later, we might encode + /// references to all calls to this rule to compute FOLLOW sets for + /// error handling. + class ANTLR4CPP_PUBLIC RuleStopState final : public ATNState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::RULE_STOP; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + RuleStopState() : ATNState(ATNStateType::RULE_STOP) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/RuleTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/RuleTransition.cpp new file mode 100644 index 0000000000..ba50dd03dd --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/RuleTransition.cpp @@ -0,0 +1,33 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/RuleStartState.h" +#include "atn/RuleTransition.h" + +using namespace antlr4::atn; + +RuleTransition::RuleTransition(RuleStartState *ruleStart, size_t ruleIndex, ATNState *followState) + : RuleTransition(ruleStart, ruleIndex, 0, followState) { +} + +RuleTransition::RuleTransition(RuleStartState *ruleStart, size_t ruleIndex, int precedence, ATNState *followState) + : Transition(TransitionType::RULE, ruleStart), ruleIndex(ruleIndex), precedence(precedence) { + this->followState = followState; +} + +bool RuleTransition::isEpsilon() const { + return true; +} + +bool RuleTransition::matches(size_t /*symbol*/, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return false; +} + +std::string RuleTransition::toString() const { + std::stringstream ss; + ss << "RULE " << Transition::toString() << " { ruleIndex: " << ruleIndex << ", precedence: " << precedence << + ", followState: " << std::hex << followState << " }"; + return ss.str(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/RuleTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/RuleTransition.h new file mode 100644 index 0000000000..396ef700f2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/RuleTransition.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC RuleTransition final : public Transition { + public: + static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::RULE; } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + /// Ptr to the rule definition object for this rule ref. + const size_t ruleIndex; // no Rule object at runtime + + const int precedence; + + /// What node to begin computations following ref to rule. + ATNState *followState; + + /// @deprecated Use + /// <seealso cref="#RuleTransition(RuleStartState, size_t, int, ATNState)"/> instead. + RuleTransition(RuleStartState *ruleStart, size_t ruleIndex, ATNState *followState); + + RuleTransition(RuleStartState *ruleStart, size_t ruleIndex, int precedence, ATNState *followState); + RuleTransition(RuleTransition const&) = delete; + RuleTransition& operator=(RuleTransition const&) = delete; + + virtual bool isEpsilon() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/SemanticContext.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/SemanticContext.cpp new file mode 100644 index 0000000000..7d7fe068df --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/SemanticContext.cpp @@ -0,0 +1,418 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include <functional> +#include <unordered_set> + +#include "misc/MurmurHash.h" +#include "support/Casts.h" +#include "support/CPPUtils.h" +#include "support/Arrays.h" + +#include "SemanticContext.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlrcpp; + +namespace { + + struct SemanticContextHasher final { + size_t operator()(const SemanticContext *semanticContext) const { + return semanticContext->hashCode(); + } + }; + + struct SemanticContextComparer final { + bool operator()(const SemanticContext *lhs, const SemanticContext *rhs) const { + return *lhs == *rhs; + } + }; + + template <typename Comparer> + void insertSemanticContext(const Ref<const SemanticContext> &semanticContext, + std::unordered_set<const SemanticContext*, SemanticContextHasher, SemanticContextComparer> &operandSet, + std::vector<Ref<const SemanticContext>> &operandList, + Ref<const SemanticContext::PrecedencePredicate> &precedencePredicate, + Comparer comparer) { + if (semanticContext != nullptr) { + if (semanticContext->getContextType() == SemanticContextType::PRECEDENCE) { + if (precedencePredicate == nullptr || comparer(downCast<const SemanticContext::PrecedencePredicate*>(semanticContext.get())->precedence, precedencePredicate->precedence)) { + precedencePredicate = std::static_pointer_cast<const SemanticContext::PrecedencePredicate>(semanticContext); + } + } else { + auto [existing, inserted] = operandSet.insert(semanticContext.get()); + if (inserted) { + operandList.push_back(semanticContext); + } + } + } + } + + template <typename Comparer> + void insertSemanticContext(Ref<const SemanticContext> &&semanticContext, + std::unordered_set<const SemanticContext*, SemanticContextHasher, SemanticContextComparer> &operandSet, + std::vector<Ref<const SemanticContext>> &operandList, + Ref<const SemanticContext::PrecedencePredicate> &precedencePredicate, + Comparer comparer) { + if (semanticContext != nullptr) { + if (semanticContext->getContextType() == SemanticContextType::PRECEDENCE) { + if (precedencePredicate == nullptr || comparer(downCast<const SemanticContext::PrecedencePredicate*>(semanticContext.get())->precedence, precedencePredicate->precedence)) { + precedencePredicate = std::static_pointer_cast<const SemanticContext::PrecedencePredicate>(std::move(semanticContext)); + } + } else { + auto [existing, inserted] = operandSet.insert(semanticContext.get()); + if (inserted) { + operandList.push_back(std::move(semanticContext)); + } + } + } + } + + size_t predictOperandCapacity(const Ref<const SemanticContext> &x) { + switch (x->getContextType()) { + case SemanticContextType::AND: + return downCast<const SemanticContext::AND&>(*x).getOperands().size(); + case SemanticContextType::OR: + return downCast<const SemanticContext::OR&>(*x).getOperands().size(); + default: + return 1; + } + } + + size_t predictOperandCapacity(const Ref<const SemanticContext> &a, const Ref<const SemanticContext> &b) { + return predictOperandCapacity(a) + predictOperandCapacity(b); + } + +} + +//------------------ Predicate ----------------------------------------------------------------------------------------- + +SemanticContext::Predicate::Predicate(size_t ruleIndex, size_t predIndex, bool isCtxDependent) + : SemanticContext(SemanticContextType::PREDICATE), ruleIndex(ruleIndex), predIndex(predIndex), isCtxDependent(isCtxDependent) {} + +bool SemanticContext::Predicate::eval(Recognizer *parser, RuleContext *parserCallStack) const { + RuleContext *localctx = nullptr; + if (isCtxDependent) { + localctx = parserCallStack; + } + return parser->sempred(localctx, ruleIndex, predIndex); +} + +size_t SemanticContext::Predicate::hashCode() const { + size_t hashCode = misc::MurmurHash::initialize(); + hashCode = misc::MurmurHash::update(hashCode, static_cast<size_t>(getContextType())); + hashCode = misc::MurmurHash::update(hashCode, ruleIndex); + hashCode = misc::MurmurHash::update(hashCode, predIndex); + hashCode = misc::MurmurHash::update(hashCode, isCtxDependent ? 1 : 0); + hashCode = misc::MurmurHash::finish(hashCode, 4); + return hashCode; +} + +bool SemanticContext::Predicate::equals(const SemanticContext &other) const { + if (this == &other) { + return true; + } + if (getContextType() != other.getContextType()) { + return false; + } + const Predicate &p = downCast<const Predicate&>(other); + return ruleIndex == p.ruleIndex && predIndex == p.predIndex && isCtxDependent == p.isCtxDependent; +} + +std::string SemanticContext::Predicate::toString() const { + return std::string("{") + std::to_string(ruleIndex) + std::string(":") + std::to_string(predIndex) + std::string("}?"); +} + +//------------------ PrecedencePredicate ------------------------------------------------------------------------------- + +SemanticContext::PrecedencePredicate::PrecedencePredicate(int precedence) : SemanticContext(SemanticContextType::PRECEDENCE), precedence(precedence) {} + +bool SemanticContext::PrecedencePredicate::eval(Recognizer *parser, RuleContext *parserCallStack) const { + return parser->precpred(parserCallStack, precedence); +} + +Ref<const SemanticContext> SemanticContext::PrecedencePredicate::evalPrecedence(Recognizer *parser, + RuleContext *parserCallStack) const { + if (parser->precpred(parserCallStack, precedence)) { + return SemanticContext::Empty::Instance; + } + return nullptr; +} + +size_t SemanticContext::PrecedencePredicate::hashCode() const { + size_t hashCode = misc::MurmurHash::initialize(); + hashCode = misc::MurmurHash::update(hashCode, static_cast<size_t>(getContextType())); + hashCode = misc::MurmurHash::update(hashCode, static_cast<size_t>(precedence)); + return misc::MurmurHash::finish(hashCode, 2); +} + +bool SemanticContext::PrecedencePredicate::equals(const SemanticContext &other) const { + if (this == &other) { + return true; + } + if (getContextType() != other.getContextType()) { + return false; + } + const PrecedencePredicate &predicate = downCast<const PrecedencePredicate&>(other); + return precedence == predicate.precedence; +} + +std::string SemanticContext::PrecedencePredicate::toString() const { + return "{" + std::to_string(precedence) + ">=prec}?"; +} + +//------------------ AND ----------------------------------------------------------------------------------------------- + +SemanticContext::AND::AND(Ref<const SemanticContext> a, Ref<const SemanticContext> b) : Operator(SemanticContextType::AND) { + std::unordered_set<const SemanticContext*, SemanticContextHasher, SemanticContextComparer> operands; + Ref<const SemanticContext::PrecedencePredicate> precedencePredicate; + + _opnds.reserve(predictOperandCapacity(a, b) + 1); + + if (a->getContextType() == SemanticContextType::AND) { + for (const auto &operand : downCast<const AND*>(a.get())->getOperands()) { + insertSemanticContext(operand, operands, _opnds, precedencePredicate, std::less<int>{}); + } + } else { + insertSemanticContext(std::move(a), operands, _opnds, precedencePredicate, std::less<int>{}); + } + + if (b->getContextType() == SemanticContextType::AND) { + for (const auto &operand : downCast<const AND*>(b.get())->getOperands()) { + insertSemanticContext(operand, operands, _opnds, precedencePredicate, std::less<int>{}); + } + } else { + insertSemanticContext(std::move(b), operands, _opnds, precedencePredicate, std::less<int>{}); + } + + if (precedencePredicate != nullptr) { + // interested in the transition with the lowest precedence + auto [existing, inserted] = operands.insert(precedencePredicate.get()); + if (inserted) { + _opnds.push_back(std::move(precedencePredicate)); + } + } +} + +const std::vector<Ref<const SemanticContext>>& SemanticContext::AND::getOperands() const { + return _opnds; +} + +bool SemanticContext::AND::equals(const SemanticContext &other) const { + if (this == &other) { + return true; + } + if (getContextType() != other.getContextType()) { + return false; + } + const AND &context = downCast<const AND&>(other); + return Arrays::equals(getOperands(), context.getOperands()); +} + +size_t SemanticContext::AND::hashCode() const { + size_t hash = misc::MurmurHash::initialize(); + hash = misc::MurmurHash::update(hash, static_cast<size_t>(getContextType())); + return misc::MurmurHash::hashCode(getOperands(), hash); +} + +bool SemanticContext::AND::eval(Recognizer *parser, RuleContext *parserCallStack) const { + for (const auto &opnd : getOperands()) { + if (!opnd->eval(parser, parserCallStack)) { + return false; + } + } + return true; +} + +Ref<const SemanticContext> SemanticContext::AND::evalPrecedence(Recognizer *parser, RuleContext *parserCallStack) const { + bool differs = false; + std::vector<Ref<const SemanticContext>> operands; + for (const auto &context : getOperands()) { + auto evaluated = context->evalPrecedence(parser, parserCallStack); + differs |= (evaluated != context); + if (evaluated == nullptr) { + // The AND context is false if any element is false. + return nullptr; + } + if (evaluated != Empty::Instance) { + // Reduce the result by skipping true elements. + operands.push_back(std::move(evaluated)); + } + } + + if (!differs) { + return shared_from_this(); + } + + if (operands.empty()) { + // All elements were true, so the AND context is true. + return Empty::Instance; + } + + Ref<const SemanticContext> result = std::move(operands[0]); + for (size_t i = 1; i < operands.size(); ++i) { + result = SemanticContext::And(std::move(result), std::move(operands[i])); + } + + return result; +} + +std::string SemanticContext::AND::toString() const { + std::string tmp; + for (const auto &var : getOperands()) { + tmp += var->toString() + " && "; + } + return tmp; +} + +//------------------ OR ------------------------------------------------------------------------------------------------ + +SemanticContext::OR::OR(Ref<const SemanticContext> a, Ref<const SemanticContext> b) : Operator(SemanticContextType::OR) { + std::unordered_set<const SemanticContext*, SemanticContextHasher, SemanticContextComparer> operands; + Ref<const SemanticContext::PrecedencePredicate> precedencePredicate; + + _opnds.reserve(predictOperandCapacity(a, b) + 1); + + if (a->getContextType() == SemanticContextType::OR) { + for (const auto &operand : downCast<const OR*>(a.get())->getOperands()) { + insertSemanticContext(operand, operands, _opnds, precedencePredicate, std::greater<int>{}); + } + } else { + insertSemanticContext(std::move(a), operands, _opnds, precedencePredicate, std::greater<int>{}); + } + + if (b->getContextType() == SemanticContextType::OR) { + for (const auto &operand : downCast<const OR*>(b.get())->getOperands()) { + insertSemanticContext(operand, operands, _opnds, precedencePredicate, std::greater<int>{}); + } + } else { + insertSemanticContext(std::move(b), operands, _opnds, precedencePredicate, std::greater<int>{}); + } + + if (precedencePredicate != nullptr) { + // interested in the transition with the highest precedence + auto [existing, inserted] = operands.insert(precedencePredicate.get()); + if (inserted) { + _opnds.push_back(std::move(precedencePredicate)); + } + } +} + +const std::vector<Ref<const SemanticContext>>& SemanticContext::OR::getOperands() const { + return _opnds; +} + +bool SemanticContext::OR::equals(const SemanticContext &other) const { + if (this == &other) { + return true; + } + if (getContextType() != other.getContextType()) { + return false; + } + const OR &context = downCast<const OR&>(other); + return Arrays::equals(getOperands(), context.getOperands()); +} + +size_t SemanticContext::OR::hashCode() const { + size_t hash = misc::MurmurHash::initialize(); + hash = misc::MurmurHash::update(hash, static_cast<size_t>(getContextType())); + return misc::MurmurHash::hashCode(getOperands(), hash); +} + +bool SemanticContext::OR::eval(Recognizer *parser, RuleContext *parserCallStack) const { + for (const auto &opnd : getOperands()) { + if (opnd->eval(parser, parserCallStack)) { + return true; + } + } + return false; +} + +Ref<const SemanticContext> SemanticContext::OR::evalPrecedence(Recognizer *parser, RuleContext *parserCallStack) const { + bool differs = false; + std::vector<Ref<const SemanticContext>> operands; + for (const auto &context : getOperands()) { + auto evaluated = context->evalPrecedence(parser, parserCallStack); + differs |= (evaluated != context); + if (evaluated == Empty::Instance) { + // The OR context is true if any element is true. + return Empty::Instance; + } + if (evaluated != nullptr) { + // Reduce the result by skipping false elements. + operands.push_back(std::move(evaluated)); + } + } + + if (!differs) { + return shared_from_this(); + } + + if (operands.empty()) { + // All elements were false, so the OR context is false. + return nullptr; + } + + Ref<const SemanticContext> result = std::move(operands[0]); + for (size_t i = 1; i < operands.size(); ++i) { + result = SemanticContext::Or(std::move(result), std::move(operands[i])); + } + + return result; +} + +std::string SemanticContext::OR::toString() const { + std::string tmp; + for(const auto &var : getOperands()) { + tmp += var->toString() + " || "; + } + return tmp; +} + +//------------------ SemanticContext ----------------------------------------------------------------------------------- + +const Ref<const SemanticContext> SemanticContext::Empty::Instance = std::make_shared<Predicate>(INVALID_INDEX, INVALID_INDEX, false); + +Ref<const SemanticContext> SemanticContext::evalPrecedence(Recognizer * /*parser*/, RuleContext * /*parserCallStack*/) const { + return shared_from_this(); +} + +Ref<const SemanticContext> SemanticContext::And(Ref<const SemanticContext> a, Ref<const SemanticContext> b) { + if (!a || a == Empty::Instance) { + return b; + } + + if (!b || b == Empty::Instance) { + return a; + } + + Ref<AND> result = std::make_shared<AND>(std::move(a), std::move(b)); + if (result->getOperands().size() == 1) { + return result->getOperands()[0]; + } + + return result; +} + +Ref<const SemanticContext> SemanticContext::Or(Ref<const SemanticContext> a, Ref<const SemanticContext> b) { + if (!a) { + return b; + } + if (!b) { + return a; + } + + if (a == Empty::Instance || b == Empty::Instance) { + return Empty::Instance; + } + + Ref<OR> result = std::make_shared<OR>(std::move(a), std::move(b)); + if (result->getOperands().size() == 1) { + return result->getOperands()[0]; + } + + return result; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/SemanticContext.h b/contrib/libs/antlr4_cpp_runtime/src/atn/SemanticContext.h new file mode 100644 index 0000000000..8116fc0b56 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/SemanticContext.h @@ -0,0 +1,237 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Recognizer.h" +#include "support/CPPUtils.h" +#include "atn/SemanticContextType.h" + +namespace antlr4 { +namespace atn { + + /// A tree structure used to record the semantic context in which + /// an ATN configuration is valid. It's either a single predicate, + /// a conjunction "p1 && p2", or a sum of products "p1||p2". + /// + /// I have scoped the AND, OR, and Predicate subclasses of + /// SemanticContext within the scope of this outer class. + class ANTLR4CPP_PUBLIC SemanticContext : public std::enable_shared_from_this<SemanticContext> { + public: + virtual ~SemanticContext() = default; + + SemanticContextType getContextType() const { return _contextType; } + + /// <summary> + /// For context independent predicates, we evaluate them without a local + /// context (i.e., null context). That way, we can evaluate them without + /// having to create proper rule-specific context during prediction (as + /// opposed to the parser, which creates them naturally). In a practical + /// sense, this avoids a cast exception from RuleContext to myruleContext. + /// <p/> + /// For context dependent predicates, we must pass in a local context so that + /// references such as $arg evaluate properly as _localctx.arg. We only + /// capture context dependent predicates in the context in which we begin + /// prediction, so we passed in the outer context here in case of context + /// dependent predicate evaluation. + /// </summary> + virtual bool eval(Recognizer *parser, RuleContext *parserCallStack) const = 0; + + /** + * Evaluate the precedence predicates for the context and reduce the result. + * + * @param parser The parser instance. + * @param parserCallStack + * @return The simplified semantic context after precedence predicates are + * evaluated, which will be one of the following values. + * <ul> + * <li>{@link #NONE}: if the predicate simplifies to {@code true} after + * precedence predicates are evaluated.</li> + * <li>{@code null}: if the predicate simplifies to {@code false} after + * precedence predicates are evaluated.</li> + * <li>{@code this}: if the semantic context is not changed as a result of + * precedence predicate evaluation.</li> + * <li>A non-{@code null} {@link SemanticContext}: the new simplified + * semantic context after precedence predicates are evaluated.</li> + * </ul> + */ + virtual Ref<const SemanticContext> evalPrecedence(Recognizer *parser, RuleContext *parserCallStack) const; + + virtual size_t hashCode() const = 0; + + virtual bool equals(const SemanticContext &other) const = 0; + + virtual std::string toString() const = 0; + + static Ref<const SemanticContext> And(Ref<const SemanticContext> a, Ref<const SemanticContext> b); + + /// See also: ParserATNSimulator::getPredsForAmbigAlts. + static Ref<const SemanticContext> Or(Ref<const SemanticContext> a, Ref<const SemanticContext> b); + + class Empty; + class Predicate; + class PrecedencePredicate; + class Operator; + class AND; + class OR; + + protected: + explicit SemanticContext(SemanticContextType contextType) : _contextType(contextType) {} + + private: + const SemanticContextType _contextType; + }; + + inline bool operator==(const SemanticContext &lhs, const SemanticContext &rhs) { + return lhs.equals(rhs); + } + + inline bool operator!=(const SemanticContext &lhs, const SemanticContext &rhs) { + return !operator==(lhs, rhs); + } + + class ANTLR4CPP_PUBLIC SemanticContext::Empty : public SemanticContext{ + public: + /** + * The default {@link SemanticContext}, which is semantically equivalent to + * a predicate of the form {@code {true}?}. + */ + static const Ref<const SemanticContext> Instance; + }; + + class ANTLR4CPP_PUBLIC SemanticContext::Predicate final : public SemanticContext { + public: + static bool is(const SemanticContext &semanticContext) { return semanticContext.getContextType() == SemanticContextType::PREDICATE; } + + static bool is(const SemanticContext *semanticContext) { return semanticContext != nullptr && is(*semanticContext); } + + const size_t ruleIndex; + const size_t predIndex; + const bool isCtxDependent; // e.g., $i ref in pred + + Predicate(size_t ruleIndex, size_t predIndex, bool isCtxDependent); + + bool eval(Recognizer *parser, RuleContext *parserCallStack) const override; + size_t hashCode() const override; + bool equals(const SemanticContext &other) const override; + std::string toString() const override; + }; + + class ANTLR4CPP_PUBLIC SemanticContext::PrecedencePredicate final : public SemanticContext { + public: + static bool is(const SemanticContext &semanticContext) { return semanticContext.getContextType() == SemanticContextType::PRECEDENCE; } + + static bool is(const SemanticContext *semanticContext) { return semanticContext != nullptr && is(*semanticContext); } + + const int precedence; + + explicit PrecedencePredicate(int precedence); + + bool eval(Recognizer *parser, RuleContext *parserCallStack) const override; + Ref<const SemanticContext> evalPrecedence(Recognizer *parser, RuleContext *parserCallStack) const override; + size_t hashCode() const override; + bool equals(const SemanticContext &other) const override; + std::string toString() const override; + }; + + /** + * This is the base class for semantic context "operators", which operate on + * a collection of semantic context "operands". + * + * @since 4.3 + */ + class ANTLR4CPP_PUBLIC SemanticContext::Operator : public SemanticContext { + public: + static bool is(const SemanticContext &semanticContext) { + const auto contextType = semanticContext.getContextType(); + return contextType == SemanticContextType::AND || contextType == SemanticContextType::OR; + } + + static bool is(const SemanticContext *semanticContext) { return semanticContext != nullptr && is(*semanticContext); } + + /** + * Gets the operands for the semantic context operator. + * + * @return a collection of {@link SemanticContext} operands for the + * operator. + * + * @since 4.3 + */ + + virtual const std::vector<Ref<const SemanticContext>>& getOperands() const = 0; + + protected: + using SemanticContext::SemanticContext; + }; + + /** + * A semantic context which is true whenever none of the contained contexts + * is false. + */ + class ANTLR4CPP_PUBLIC SemanticContext::AND final : public SemanticContext::Operator { + public: + static bool is(const SemanticContext &semanticContext) { return semanticContext.getContextType() == SemanticContextType::AND; } + + static bool is(const SemanticContext *semanticContext) { return semanticContext != nullptr && is(*semanticContext); } + + AND(Ref<const SemanticContext> a, Ref<const SemanticContext> b) ; + + const std::vector<Ref<const SemanticContext>>& getOperands() const override; + + /** + * The evaluation of predicates by this context is short-circuiting, but + * unordered.</p> + */ + bool eval(Recognizer *parser, RuleContext *parserCallStack) const override; + Ref<const SemanticContext> evalPrecedence(Recognizer *parser, RuleContext *parserCallStack) const override; + size_t hashCode() const override; + bool equals(const SemanticContext &other) const override; + std::string toString() const override; + + private: + std::vector<Ref<const SemanticContext>> _opnds; + }; + + /** + * A semantic context which is true whenever at least one of the contained + * contexts is true. + */ + class ANTLR4CPP_PUBLIC SemanticContext::OR final : public SemanticContext::Operator { + public: + static bool is(const SemanticContext &semanticContext) { return semanticContext.getContextType() == SemanticContextType::OR; } + + static bool is(const SemanticContext *semanticContext) { return semanticContext != nullptr && is(*semanticContext); } + + OR(Ref<const SemanticContext> a, Ref<const SemanticContext> b); + + const std::vector<Ref<const SemanticContext>>& getOperands() const override; + + /** + * The evaluation of predicates by this context is short-circuiting, but + * unordered. + */ + bool eval(Recognizer *parser, RuleContext *parserCallStack) const override; + Ref<const SemanticContext> evalPrecedence(Recognizer *parser, RuleContext *parserCallStack) const override; + size_t hashCode() const override; + bool equals(const SemanticContext &other) const override; + std::string toString() const override; + + private: + std::vector<Ref<const SemanticContext>> _opnds; + }; + +} // namespace atn +} // namespace antlr4 + +namespace std { + + template <> + struct hash<::antlr4::atn::SemanticContext> { + size_t operator()(const ::antlr4::atn::SemanticContext &semanticContext) const { + return semanticContext.hashCode(); + } + }; + +} // namespace std diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/SemanticContextType.h b/contrib/libs/antlr4_cpp_runtime/src/atn/SemanticContextType.h new file mode 100644 index 0000000000..bca6e421d2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/SemanticContextType.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <cstddef> + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + enum class SemanticContextType : size_t { + PREDICATE = 1, + PRECEDENCE = 2, + AND = 3, + OR = 4, + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/SerializedATNView.h b/contrib/libs/antlr4_cpp_runtime/src/atn/SerializedATNView.h new file mode 100644 index 0000000000..a723589bc3 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/SerializedATNView.h @@ -0,0 +1,101 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <iterator> +#include <vector> + +#include "antlr4-common.h" +#include "misc/MurmurHash.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC SerializedATNView final { + public: + using value_type = int32_t; + using size_type = size_t; + using difference_type = ptrdiff_t; + using reference = int32_t&; + using const_reference = const int32_t&; + using pointer = int32_t*; + using const_pointer = const int32_t*; + using iterator = const_pointer; + using const_iterator = const_pointer; + using reverse_iterator = std::reverse_iterator<iterator>; + using const_reverse_iterator = std::reverse_iterator<const_iterator>; + + SerializedATNView() = default; + + SerializedATNView(const_pointer data, size_type size) : _data(data), _size(size) {} + + SerializedATNView(const std::vector<int32_t> &serializedATN) : _data(serializedATN.data()), _size(serializedATN.size()) {} + + SerializedATNView(const SerializedATNView&) = default; + + SerializedATNView& operator=(const SerializedATNView&) = default; + + const_iterator begin() const { return data(); } + + const_iterator cbegin() const { return data(); } + + const_iterator end() const { return data() + size(); } + + const_iterator cend() const { return data() + size(); } + + const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } + + const_reverse_iterator crbegin() const { return const_reverse_iterator(cend()); } + + const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } + + const_reverse_iterator crend() const { return const_reverse_iterator(cbegin()); } + + bool empty() const { return size() == 0; } + + const_pointer data() const { return _data; } + + size_type size() const { return _size; } + + size_type size_bytes() const { return size() * sizeof(value_type); } + + const_reference operator[](size_type index) const { return _data[index]; } + + private: + const_pointer _data = nullptr; + size_type _size = 0; + }; + + inline bool operator==(const SerializedATNView &lhs, const SerializedATNView &rhs) { + return (lhs.data() == rhs.data() && lhs.size() == rhs.size()) || + (lhs.size() == rhs.size() && std::memcmp(lhs.data(), rhs.data(), lhs.size_bytes()) == 0); + } + + inline bool operator!=(const SerializedATNView &lhs, const SerializedATNView &rhs) { + return !operator==(lhs, rhs); + } + + inline bool operator<(const SerializedATNView &lhs, const SerializedATNView &rhs) { + int diff = std::memcmp(lhs.data(), rhs.data(), std::min(lhs.size_bytes(), rhs.size_bytes())); + return diff < 0 || (diff == 0 && lhs.size() < rhs.size()); + } + +} // namespace atn +} // namespace antlr4 + +namespace std { + + template <> + struct hash<::antlr4::atn::SerializedATNView> { + size_t operator()(const ::antlr4::atn::SerializedATNView &serializedATNView) const { + return ::antlr4::misc::MurmurHash::hashCode(serializedATNView.data(), serializedATNView.size()); + } + }; + +} // namespace std diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/SetTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/SetTransition.cpp new file mode 100644 index 0000000000..95ec514edb --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/SetTransition.cpp @@ -0,0 +1,28 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Token.h" +#include "misc/IntervalSet.h" + +#include "atn/SetTransition.h" + +using namespace antlr4; +using namespace antlr4::atn; + +SetTransition::SetTransition(TransitionType transitionType, ATNState *target, misc::IntervalSet aSet) + : Transition(transitionType, target), set(aSet.isEmpty() ? misc::IntervalSet::of(Token::INVALID_TYPE) : std::move(aSet)) { +} + +misc::IntervalSet SetTransition::label() const { + return set; +} + +bool SetTransition::matches(size_t symbol, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return set.contains(symbol); +} + +std::string SetTransition::toString() const { + return "SET " + Transition::toString() + " { set: " + set.toString() + "}"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/SetTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/SetTransition.h new file mode 100644 index 0000000000..3a3343ec25 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/SetTransition.h @@ -0,0 +1,38 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// A transition containing a set of values. </summary> + class ANTLR4CPP_PUBLIC SetTransition : public Transition { + public: + static bool is(const Transition &transition) { + const auto transitionType = transition.getTransitionType(); + return transitionType == TransitionType::SET || transitionType == TransitionType::NOT_SET; + } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + const misc::IntervalSet set; + + SetTransition(ATNState *target, misc::IntervalSet set) : SetTransition(TransitionType::SET, target, std::move(set)) {} + + virtual misc::IntervalSet label() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + + protected: + SetTransition(TransitionType transitionType, ATNState *target, misc::IntervalSet set); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/SingletonPredictionContext.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/SingletonPredictionContext.cpp new file mode 100644 index 0000000000..66a91936e9 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/SingletonPredictionContext.cpp @@ -0,0 +1,86 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/SingletonPredictionContext.h" + +#include "support/Casts.h" +#include "misc/MurmurHash.h" + +using namespace antlr4::atn; +using namespace antlrcpp; + +namespace { + + bool cachedHashCodeEqual(size_t lhs, size_t rhs) { + return lhs == rhs || lhs == 0 || rhs == 0; + } + +} + +SingletonPredictionContext::SingletonPredictionContext(Ref<const PredictionContext> parent, size_t returnState) + : PredictionContext(PredictionContextType::SINGLETON), parent(std::move(parent)), returnState(returnState) { + assert(returnState != ATNState::INVALID_STATE_NUMBER); +} + +Ref<const SingletonPredictionContext> SingletonPredictionContext::create(Ref<const PredictionContext> parent, size_t returnState) { + if (returnState == EMPTY_RETURN_STATE && parent == nullptr) { + // someone can pass in the bits of an array ctx that mean $ + return std::dynamic_pointer_cast<const SingletonPredictionContext>(EMPTY); + } + return std::make_shared<SingletonPredictionContext>(std::move(parent), returnState); +} + +bool SingletonPredictionContext::isEmpty() const { + return parent == nullptr && returnState == EMPTY_RETURN_STATE; +} + +size_t SingletonPredictionContext::size() const { + return 1; +} + +const Ref<const PredictionContext>& SingletonPredictionContext::getParent(size_t index) const { + assert(index == 0); + static_cast<void>(index); + return parent; +} + +size_t SingletonPredictionContext::getReturnState(size_t index) const { + assert(index == 0); + static_cast<void>(index); + return returnState; +} + +size_t SingletonPredictionContext::hashCodeImpl() const { + size_t hash = misc::MurmurHash::initialize(); + hash = misc::MurmurHash::update(hash, static_cast<size_t>(getContextType())); + hash = misc::MurmurHash::update(hash, parent); + hash = misc::MurmurHash::update(hash, returnState); + return misc::MurmurHash::finish(hash, 3); +} + +bool SingletonPredictionContext::equals(const PredictionContext &other) const { + if (this == std::addressof(other)) { + return true; + } + if (getContextType() != other.getContextType()) { + return false; + } + const auto &singleton = downCast<const SingletonPredictionContext&>(other); + return returnState == singleton.returnState && + cachedHashCodeEqual(cachedHashCode(), singleton.cachedHashCode()) && + (parent == singleton.parent || (parent != nullptr && singleton.parent != nullptr && *parent == *singleton.parent)); +} + +std::string SingletonPredictionContext::toString() const { + //std::string up = !parent.expired() ? parent.lock()->toString() : ""; + std::string up = parent != nullptr ? parent->toString() : ""; + if (up.length() == 0) { + if (returnState == EMPTY_RETURN_STATE) { + return "$"; + } + return std::to_string(returnState); + } + return std::to_string(returnState) + " " + up; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/SingletonPredictionContext.h b/contrib/libs/antlr4_cpp_runtime/src/atn/SingletonPredictionContext.h new file mode 100644 index 0000000000..1784c4f045 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/SingletonPredictionContext.h @@ -0,0 +1,43 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/PredictionContext.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC SingletonPredictionContext final : public PredictionContext { + public: + static bool is(const PredictionContext &predictionContext) { return predictionContext.getContextType() == PredictionContextType::SINGLETON; } + + static bool is(const PredictionContext *predictionContext) { return predictionContext != nullptr && is(*predictionContext); } + + static Ref<const SingletonPredictionContext> create(Ref<const PredictionContext> parent, size_t returnState); + + // Usually a parent is linked via a weak ptr. Not so here as we have kinda reverse reference chain. + // There are no child contexts stored here and often the parent context is left dangling when it's + // owning ATNState is released. In order to avoid having this context released as well (leaving all other contexts + // which got this one as parent with a null reference) we use a shared_ptr here instead, to keep those left alone + // parent contexts alive. + const Ref<const PredictionContext> parent; + const size_t returnState; + + SingletonPredictionContext(Ref<const PredictionContext> parent, size_t returnState); + + bool isEmpty() const override; + size_t size() const override; + const Ref<const PredictionContext>& getParent(size_t index) const override; + size_t getReturnState(size_t index) const override; + bool equals(const PredictionContext &other) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/StarBlockStartState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/StarBlockStartState.h new file mode 100644 index 0000000000..17fd43fde8 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/StarBlockStartState.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/BlockStartState.h" + +namespace antlr4 { +namespace atn { + + /// The block that begins a closure loop. + class ANTLR4CPP_PUBLIC StarBlockStartState final : public BlockStartState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::STAR_BLOCK_START; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + StarBlockStartState() : BlockStartState(ATNStateType::STAR_BLOCK_START) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/StarLoopEntryState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/StarLoopEntryState.h new file mode 100644 index 0000000000..a62eb812b1 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/StarLoopEntryState.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC StarLoopEntryState final : public DecisionState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::STAR_LOOP_ENTRY; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + /** + * Indicates whether this state can benefit from a precedence DFA during SLL + * decision making. + * + * <p>This is a computed property that is calculated during ATN deserialization + * and stored for use in {@link ParserATNSimulator} and + * {@link ParserInterpreter}.</p> + * + * @see DFA#isPrecedenceDfa() + */ + bool isPrecedenceDecision = false; + + StarLoopbackState *loopBackState = nullptr; + + StarLoopEntryState() : DecisionState(ATNStateType::STAR_LOOP_ENTRY) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/StarLoopbackState.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/StarLoopbackState.cpp new file mode 100644 index 0000000000..6dddbc0d4e --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/StarLoopbackState.cpp @@ -0,0 +1,19 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/StarLoopEntryState.h" +#include "atn/Transition.h" +#include "support/Casts.h" + +#include "atn/StarLoopbackState.h" + +using namespace antlr4::atn; + +StarLoopEntryState *StarLoopbackState::getLoopEntryState() const { + if (transitions[0]->target != nullptr && transitions[0]->target->getStateType() == ATNStateType::STAR_LOOP_ENTRY) { + return antlrcpp::downCast<StarLoopEntryState*>(transitions[0]->target); + } + return nullptr; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/StarLoopbackState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/StarLoopbackState.h new file mode 100644 index 0000000000..04ef9db095 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/StarLoopbackState.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC StarLoopbackState final : public ATNState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::STAR_LOOP_BACK; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + StarLoopbackState() : ATNState(ATNStateType::STAR_LOOP_BACK) {} + + StarLoopEntryState *getLoopEntryState() const; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/TokensStartState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/TokensStartState.h new file mode 100644 index 0000000000..8e41636283 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/TokensStartState.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionState.h" + +namespace antlr4 { +namespace atn { + + /// The Tokens rule start state linking to each lexer rule start state. + class ANTLR4CPP_PUBLIC TokensStartState final : public DecisionState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::TOKEN_START; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + TokensStartState() : DecisionState(ATNStateType::TOKEN_START) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/Transition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/Transition.cpp new file mode 100644 index 0000000000..b918cddfcf --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/Transition.cpp @@ -0,0 +1,36 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Exceptions.h" +#include "support/Arrays.h" + +#include "atn/Transition.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlrcpp; + +Transition::Transition(TransitionType transitionType, ATNState *target) : _transitionType(transitionType) { + if (target == nullptr) { + throw NullPointerException("target cannot be null."); + } + + this->target = target; +} + +bool Transition::isEpsilon() const { + return false; +} + +misc::IntervalSet Transition::label() const { + return misc::IntervalSet::EMPTY_SET; +} + +std::string Transition::toString() const { + std::stringstream ss; + ss << "(Transition " << std::hex << this << ", target: " << std::hex << target << ')'; + + return ss.str(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/Transition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/Transition.h new file mode 100644 index 0000000000..4c88d698ae --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/Transition.h @@ -0,0 +1,65 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "misc/IntervalSet.h" +#include "atn/TransitionType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// An ATN transition between any two ATN states. Subclasses define + /// atom, set, epsilon, action, predicate, rule transitions. + /// <p/> + /// This is a one way link. It emanates from a state (usually via a list of + /// transitions) and has a target state. + /// <p/> + /// Since we never have to change the ATN transitions once we construct it, + /// we can fix these transitions as specific classes. The DFA transitions + /// on the other hand need to update the labels as it adds transitions to + /// the states. We'll use the term Edge for the DFA to distinguish them from + /// ATN transitions. + /// </summary> + class ANTLR4CPP_PUBLIC Transition { + public: + /// The target of this transition. + // ml: this is a reference into the ATN. + ATNState *target; + + virtual ~Transition() = default; + + TransitionType getTransitionType() const { return _transitionType; } + + /** + * Determines if the transition is an "epsilon" transition. + * + * <p>The default implementation returns {@code false}.</p> + * + * @return {@code true} if traversing this transition in the ATN does not + * consume an input symbol; otherwise, {@code false} if traversing this + * transition consumes (matches) an input symbol. + */ + virtual bool isEpsilon() const; + virtual misc::IntervalSet label() const; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const = 0; + + virtual std::string toString() const; + + Transition(Transition const&) = delete; + Transition& operator=(Transition const&) = delete; + + protected: + Transition(TransitionType transitionType, ATNState *target); + + private: + const TransitionType _transitionType; + }; + + using ConstTransitionPtr = std::unique_ptr<const Transition>; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/TransitionType.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/TransitionType.cpp new file mode 100644 index 0000000000..78769b2ada --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/TransitionType.cpp @@ -0,0 +1,27 @@ +#include "atn/TransitionType.h" + +std::string antlr4::atn::transitionTypeName(TransitionType transitionType) { + switch (transitionType) { + case TransitionType::EPSILON: + return "EPSILON"; + case TransitionType::RANGE: + return "RANGE"; + case TransitionType::RULE: + return "RULE"; + case TransitionType::PREDICATE: + return "PREDICATE"; + case TransitionType::ATOM: + return "ATOM"; + case TransitionType::ACTION: + return "ACTION"; + case TransitionType::SET: + return "SET"; + case TransitionType::NOT_SET: + return "NOT_SET"; + case TransitionType::WILDCARD: + return "WILDCARD"; + case TransitionType::PRECEDENCE: + return "PRECEDENCE"; + } + return "UNKNOWN"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/TransitionType.h b/contrib/libs/antlr4_cpp_runtime/src/atn/TransitionType.h new file mode 100644 index 0000000000..d5d5f3bd97 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/TransitionType.h @@ -0,0 +1,33 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <cstddef> +#include <string> + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + // Constants for transition serialization. + enum class TransitionType : size_t { + EPSILON = 1, + RANGE = 2, + RULE = 3, + PREDICATE = 4, // e.g., {isType(input.LT(1))}? + ATOM = 5, + ACTION = 6, + SET = 7, // ~(A|B) or ~atom, wildcard, which convert to next 2 + NOT_SET = 8, + WILDCARD = 9, + PRECEDENCE = 10, + }; + + ANTLR4CPP_PUBLIC std::string transitionTypeName(TransitionType transitionType); + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/WildcardTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/WildcardTransition.cpp new file mode 100644 index 0000000000..03ec00d399 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/WildcardTransition.cpp @@ -0,0 +1,21 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATNState.h" + +#include "atn/WildcardTransition.h" + +using namespace antlr4::atn; + +WildcardTransition::WildcardTransition(ATNState *target) : Transition(TransitionType::WILDCARD, target) { +} + +bool WildcardTransition::matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const { + return symbol >= minVocabSymbol && symbol <= maxVocabSymbol; +} + +std::string WildcardTransition::toString() const { + return "WILDCARD " + Transition::toString() + " {}"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/WildcardTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/WildcardTransition.h new file mode 100644 index 0000000000..d8d663f1fd --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/WildcardTransition.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC WildcardTransition final : public Transition { + public: + static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::WILDCARD; } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + explicit WildcardTransition(ATNState *target); + + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/DFA.cpp b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFA.cpp new file mode 100644 index 0000000000..4cc0ab7cc1 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFA.cpp @@ -0,0 +1,115 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "dfa/DFASerializer.h" +#include "dfa/LexerDFASerializer.h" +#include "support/CPPUtils.h" +#include "atn/StarLoopEntryState.h" +#include "atn/ATNConfigSet.h" +#include "support/Casts.h" + +#include "dfa/DFA.h" + +using namespace antlr4; +using namespace antlr4::dfa; +using namespace antlrcpp; + +DFA::DFA(atn::DecisionState *atnStartState) : DFA(atnStartState, 0) { +} + +DFA::DFA(atn::DecisionState *atnStartState, size_t decision) + : atnStartState(atnStartState), s0(nullptr), decision(decision) { + + _precedenceDfa = false; + if (atn::StarLoopEntryState::is(atnStartState)) { + if (downCast<atn::StarLoopEntryState*>(atnStartState)->isPrecedenceDecision) { + _precedenceDfa = true; + s0 = new DFAState(std::unique_ptr<atn::ATNConfigSet>(new atn::ATNConfigSet())); + s0->isAcceptState = false; + s0->requiresFullContext = false; + } + } +} + +DFA::DFA(DFA &&other) : atnStartState(other.atnStartState), s0(other.s0), decision(other.decision) { + // Source states are implicitly cleared by the move. + states = std::move(other.states); + + other.atnStartState = nullptr; + other.decision = 0; + other.s0 = nullptr; + _precedenceDfa = other._precedenceDfa; + other._precedenceDfa = false; +} + +DFA::~DFA() { + bool s0InList = (s0 == nullptr); + for (auto *state : states) { + if (state == s0) + s0InList = true; + delete state; + } + + if (!s0InList) { + delete s0; + } +} + +bool DFA::isPrecedenceDfa() const { + return _precedenceDfa; +} + +DFAState* DFA::getPrecedenceStartState(int precedence) const { + assert(_precedenceDfa); // Only precedence DFAs may contain a precedence start state. + + auto iterator = s0->edges.find(precedence); + if (iterator == s0->edges.end()) + return nullptr; + + return iterator->second; +} + +void DFA::setPrecedenceStartState(int precedence, DFAState *startState) { + if (!isPrecedenceDfa()) { + throw IllegalStateException("Only precedence DFAs may contain a precedence start state."); + } + + if (precedence < 0) { + return; + } + + s0->edges[precedence] = startState; +} + +std::vector<DFAState *> DFA::getStates() const { + std::vector<DFAState *> result; + for (auto *state : states) + result.push_back(state); + + std::sort(result.begin(), result.end(), [](DFAState *o1, DFAState *o2) -> bool { + return o1->stateNumber < o2->stateNumber; + }); + + return result; +} + +std::string DFA::toString(const Vocabulary &vocabulary) const { + if (s0 == nullptr) { + return ""; + } + + DFASerializer serializer(this, vocabulary); + return serializer.toString(); +} + +std::string DFA::toLexerString() const { + if (s0 == nullptr) { + return ""; + } + LexerDFASerializer serializer(this); + + return serializer.toString(); +} + diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/DFA.h b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFA.h new file mode 100644 index 0000000000..360eda8ba7 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFA.h @@ -0,0 +1,96 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "dfa/DFAState.h" + +namespace antlr4 { +namespace dfa { + + class ANTLR4CPP_PUBLIC DFA final { + private: + struct DFAStateHasher final { + size_t operator()(const DFAState *dfaState) const { + return dfaState->hashCode(); + } + }; + + struct DFAStateComparer final { + bool operator()(const DFAState *lhs, const DFAState *rhs) const { + return lhs == rhs || *lhs == *rhs; + } + }; + + public: + /// A set of all DFA states. Use a map so we can get old state back. + /// Set only allows you to see if it's there. + + /// From which ATN state did we create this DFA? + atn::DecisionState *atnStartState; + std::unordered_set<DFAState*, DFAStateHasher, DFAStateComparer> states; // States are owned by this class. + DFAState *s0; + size_t decision; + + explicit DFA(atn::DecisionState *atnStartState); + DFA(atn::DecisionState *atnStartState, size_t decision); + DFA(const DFA &other) = delete; + DFA(DFA &&other); + ~DFA(); + + /** + * Gets whether this DFA is a precedence DFA. Precedence DFAs use a special + * start state {@link #s0} which is not stored in {@link #states}. The + * {@link DFAState#edges} array for this start state contains outgoing edges + * supplying individual start states corresponding to specific precedence + * values. + * + * @return {@code true} if this is a precedence DFA; otherwise, + * {@code false}. + * @see Parser#getPrecedence() + */ + bool isPrecedenceDfa() const; + + /** + * Get the start state for a specific precedence value. + * + * @param precedence The current precedence. + * @return The start state corresponding to the specified precedence, or + * {@code null} if no start state exists for the specified precedence. + * + * @throws IllegalStateException if this is not a precedence DFA. + * @see #isPrecedenceDfa() + */ + DFAState* getPrecedenceStartState(int precedence) const; + + /** + * Set the start state for a specific precedence value. + * + * @param precedence The current precedence. + * @param startState The start state corresponding to the specified + * precedence. + * + * @throws IllegalStateException if this is not a precedence DFA. + * @see #isPrecedenceDfa() + */ + void setPrecedenceStartState(int precedence, DFAState *startState); + + /// Return a list of all states in this DFA, ordered by state number. + std::vector<DFAState *> getStates() const; + + std::string toString(const Vocabulary &vocabulary) const; + + std::string toLexerString() const; + + private: + /** + * {@code true} if this DFA is for a precedence decision; otherwise, + * {@code false}. This is the backing field for {@link #isPrecedenceDfa}. + */ + bool _precedenceDfa; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/DFASerializer.cpp b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFASerializer.cpp new file mode 100644 index 0000000000..64d01769de --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFASerializer.cpp @@ -0,0 +1,60 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "dfa/DFA.h" +#include "Vocabulary.h" + +#include "dfa/DFASerializer.h" + +using namespace antlr4::dfa; + +DFASerializer::DFASerializer(const DFA *dfa, const Vocabulary &vocabulary) : _dfa(dfa), _vocabulary(vocabulary) { +} + +std::string DFASerializer::toString() const { + if (_dfa->s0 == nullptr) { + return ""; + } + + std::stringstream ss; + std::vector<DFAState *> states = _dfa->getStates(); + for (auto *s : states) { + for (size_t i = 0; i < s->edges.size(); i++) { + DFAState *t = s->edges[i]; + if (t != nullptr && t->stateNumber != INT32_MAX) { + ss << getStateString(s); + std::string label = getEdgeLabel(i); + ss << "-" << label << "->" << getStateString(t) << "\n"; + } + } + } + + return ss.str(); +} + +std::string DFASerializer::getEdgeLabel(size_t i) const { + return _vocabulary.getDisplayName(i); // ml: no longer needed -1 as we use a map for edges, without offset. +} + +std::string DFASerializer::getStateString(DFAState *s) const { + size_t n = s->stateNumber; + + const std::string baseStateStr = std::string(s->isAcceptState ? ":" : "") + "s" + std::to_string(n) + + (s->requiresFullContext ? "^" : ""); + + if (s->isAcceptState) { + if (!s->predicates.empty()) { + std::string buf; + for (size_t i = 0; i < s->predicates.size(); i++) { + buf.append(s->predicates[i].toString()); + } + return baseStateStr + "=>" + buf; + } else { + return baseStateStr + "=>" + std::to_string(s->prediction); + } + } else { + return baseStateStr; + } +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/DFASerializer.h b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFASerializer.h new file mode 100644 index 0000000000..b541714078 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFASerializer.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Vocabulary.h" + +namespace antlr4 { +namespace dfa { + + /// A DFA walker that knows how to dump them to serialized strings. + class ANTLR4CPP_PUBLIC DFASerializer { + public: + DFASerializer(const DFA *dfa, const Vocabulary &vocabulary); + + virtual ~DFASerializer() = default; + + std::string toString() const; + + protected: + virtual std::string getEdgeLabel(size_t i) const; + std::string getStateString(DFAState *s) const; + + private: + const DFA *_dfa; + const Vocabulary &_vocabulary; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/DFAState.cpp b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFAState.cpp new file mode 100644 index 0000000000..e591b204c7 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFAState.cpp @@ -0,0 +1,59 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATNConfigSet.h" +#include "atn/SemanticContext.h" +#include "atn/ATNConfig.h" +#include "misc/MurmurHash.h" + +#include "dfa/DFAState.h" + +using namespace antlr4::dfa; +using namespace antlr4::atn; + +std::string DFAState::PredPrediction::toString() const { + return std::string("(") + pred->toString() + ", " + std::to_string(alt) + ")"; +} + +std::set<size_t> DFAState::getAltSet() const { + std::set<size_t> alts; + if (configs != nullptr) { + for (size_t i = 0; i < configs->size(); i++) { + alts.insert(configs->get(i)->alt); + } + } + return alts; +} + +size_t DFAState::hashCode() const { + return configs != nullptr ? configs->hashCode() : 0; +} + +bool DFAState::equals(const DFAState &other) const { + if (this == std::addressof(other)) { + return true; + } + return configs == other.configs || + (configs != nullptr && other.configs != nullptr && *configs == *other.configs); +} + +std::string DFAState::toString() const { + std::stringstream ss; + ss << stateNumber; + if (configs) { + ss << ":" << configs->toString(); + } + if (isAcceptState) { + ss << " => "; + if (!predicates.empty()) { + for (size_t i = 0; i < predicates.size(); i++) { + ss << predicates[i].toString(); + } + } else { + ss << prediction; + } + } + return ss.str(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/DFAState.h b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFAState.h new file mode 100644 index 0000000000..f555cc45cf --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFAState.h @@ -0,0 +1,154 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +#include "atn/ATNConfigSet.h" +#include "FlatHashMap.h" + +namespace antlr4 { +namespace dfa { + + /// <summary> + /// A DFA state represents a set of possible ATN configurations. + /// As Aho, Sethi, Ullman p. 117 says "The DFA uses its state + /// to keep track of all possible states the ATN can be in after + /// reading each input symbol. That is to say, after reading + /// input a1a2..an, the DFA is in a state that represents the + /// subset T of the states of the ATN that are reachable from the + /// ATN's start state along some path labeled a1a2..an." + /// In conventional NFA->DFA conversion, therefore, the subset T + /// would be a bitset representing the set of states the + /// ATN could be in. We need to track the alt predicted by each + /// state as well, however. More importantly, we need to maintain + /// a stack of states, tracking the closure operations as they + /// jump from rule to rule, emulating rule invocations (method calls). + /// I have to add a stack to simulate the proper lookahead sequences for + /// the underlying LL grammar from which the ATN was derived. + /// <p/> + /// I use a set of ATNConfig objects not simple states. An ATNConfig + /// is both a state (ala normal conversion) and a RuleContext describing + /// the chain of rules (if any) followed to arrive at that state. + /// <p/> + /// A DFA state may have multiple references to a particular state, + /// but with different ATN contexts (with same or different alts) + /// meaning that state was reached via a different set of rule invocations. + /// </summary> + class ANTLR4CPP_PUBLIC DFAState final { + public: + struct ANTLR4CPP_PUBLIC PredPrediction final { + public: + Ref<const atn::SemanticContext> pred; // never null; at least SemanticContext.NONE + int alt; + + PredPrediction() = delete; + + PredPrediction(const PredPrediction&) = default; + PredPrediction(PredPrediction&&) = default; + + PredPrediction(Ref<const atn::SemanticContext> pred, int alt) : pred(std::move(pred)), alt(alt) {} + + PredPrediction& operator=(const PredPrediction&) = default; + PredPrediction& operator=(PredPrediction&&) = default; + + std::string toString() const; + }; + + std::unique_ptr<atn::ATNConfigSet> configs; + + /// {@code edges[symbol]} points to target of symbol. Shift up by 1 so (-1) + /// <seealso cref="Token#EOF"/> maps to {@code edges[0]}. + // ml: this is a sparse list, so we use a map instead of a vector. + // Watch out: we no longer have the -1 offset, as it isn't needed anymore. + FlatHashMap<size_t, DFAState*> edges; + + /// if accept state, what ttype do we match or alt do we predict? + /// This is set to <seealso cref="ATN#INVALID_ALT_NUMBER"/> when <seealso cref="#predicates"/>{@code !=null} or + /// <seealso cref="#requiresFullContext"/>. + size_t prediction = 0; + + Ref<const atn::LexerActionExecutor> lexerActionExecutor; + + /// <summary> + /// During SLL parsing, this is a list of predicates associated with the + /// ATN configurations of the DFA state. When we have predicates, + /// <seealso cref="#requiresFullContext"/> is {@code false} since full context prediction evaluates predicates + /// on-the-fly. If this is not null, then <seealso cref="#prediction"/> is + /// <seealso cref="ATN#INVALID_ALT_NUMBER"/>. + /// <p/> + /// We only use these for non-<seealso cref="#requiresFullContext"/> but conflicting states. That + /// means we know from the context (it's $ or we don't dip into outer + /// context) that it's an ambiguity not a conflict. + /// <p/> + /// This list is computed by <seealso cref="ParserATNSimulator#predicateDFAState"/>. + /// </summary> + std::vector<PredPrediction> predicates; + + int stateNumber = -1; + + bool isAcceptState = false; + + /// <summary> + /// Indicates that this state was created during SLL prediction that + /// discovered a conflict between the configurations in the state. Future + /// <seealso cref="ParserATNSimulator#execATN"/> invocations immediately jumped doing + /// full context prediction if this field is true. + /// </summary> + bool requiresFullContext = false; + + /// Map a predicate to a predicted alternative. + DFAState() = default; + + explicit DFAState(int stateNumber) : stateNumber(stateNumber) {} + + explicit DFAState(std::unique_ptr<atn::ATNConfigSet> configs) : configs(std::move(configs)) {} + + /// <summary> + /// Get the set of all alts mentioned by all ATN configurations in this + /// DFA state. + /// </summary> + std::set<size_t> getAltSet() const; + + size_t hashCode() const; + + /// Two DFAState instances are equal if their ATN configuration sets + /// are the same. This method is used to see if a state already exists. + /// + /// Because the number of alternatives and number of ATN configurations are + /// finite, there is a finite number of DFA states that can be processed. + /// This is necessary to show that the algorithm terminates. + /// + /// Cannot test the DFA state numbers here because in + /// ParserATNSimulator#addDFAState we need to know if any other state + /// exists that has this exact set of ATN configurations. The + /// stateNumber is irrelevant. + bool equals(const DFAState &other) const; + + std::string toString() const; + }; + + inline bool operator==(const DFAState &lhs, const DFAState &rhs) { + return lhs.equals(rhs); + } + + inline bool operator!=(const DFAState &lhs, const DFAState &rhs) { + return !operator==(lhs, rhs); + } + +} // namespace dfa +} // namespace antlr4 + +namespace std { + + template <> + struct hash<::antlr4::dfa::DFAState> { + size_t operator()(const ::antlr4::dfa::DFAState &dfaState) const { + return dfaState.hashCode(); + } + }; + +} // namespace std diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/LexerDFASerializer.cpp b/contrib/libs/antlr4_cpp_runtime/src/dfa/LexerDFASerializer.cpp new file mode 100644 index 0000000000..20ed734743 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/LexerDFASerializer.cpp @@ -0,0 +1,17 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Vocabulary.h" + +#include "dfa/LexerDFASerializer.h" + +using namespace antlr4::dfa; + +LexerDFASerializer::LexerDFASerializer(const DFA *dfa) : DFASerializer(dfa, Vocabulary()) { +} + +std::string LexerDFASerializer::getEdgeLabel(size_t i) const { + return std::string("'") + static_cast<char>(i) + "'"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/LexerDFASerializer.h b/contrib/libs/antlr4_cpp_runtime/src/dfa/LexerDFASerializer.h new file mode 100644 index 0000000000..eed7f4f0c5 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/LexerDFASerializer.h @@ -0,0 +1,22 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "dfa/DFASerializer.h" + +namespace antlr4 { +namespace dfa { + + class ANTLR4CPP_PUBLIC LexerDFASerializer final : public DFASerializer { + public: + explicit LexerDFASerializer(const DFA *dfa); + + protected: + std::string getEdgeLabel(size_t i) const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/internal/Synchronization.cpp b/contrib/libs/antlr4_cpp_runtime/src/internal/Synchronization.cpp new file mode 100644 index 0000000000..dd30ef971b --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/internal/Synchronization.cpp @@ -0,0 +1,100 @@ +// Copyright 2012-2022 The ANTLR Project +// +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "internal/Synchronization.h" + +using namespace antlr4::internal; + +void Mutex::lock() { +#if ANTLR4CPP_USING_ABSEIL + _impl.Lock(); +#else + _impl.lock(); +#endif +} + +bool Mutex::try_lock() { +#if ANTLR4CPP_USING_ABSEIL + return _impl.TryLock(); +#else + return _impl.try_lock(); +#endif +} + +void Mutex::unlock() { +#if ANTLR4CPP_USING_ABSEIL + _impl.Unlock(); +#else + _impl.unlock(); +#endif +} + +void SharedMutex::lock() { +#if ANTLR4CPP_USING_ABSEIL + _impl.WriterLock(); +#else + _impl.lock(); +#endif +} + +bool SharedMutex::try_lock() { +#if ANTLR4CPP_USING_ABSEIL + return _impl.WriterTryLock(); +#else + return _impl.try_lock(); +#endif +} + +void SharedMutex::unlock() { +#if ANTLR4CPP_USING_ABSEIL + _impl.WriterUnlock(); +#else + _impl.unlock(); +#endif +} + +void SharedMutex::lock_shared() { +#if ANTLR4CPP_USING_ABSEIL + _impl.ReaderLock(); +#else + _impl.lock_shared(); +#endif +} + +bool SharedMutex::try_lock_shared() { +#if ANTLR4CPP_USING_ABSEIL + return _impl.ReaderTryLock(); +#else + return _impl.try_lock_shared(); +#endif +} + +void SharedMutex::unlock_shared() { +#if ANTLR4CPP_USING_ABSEIL + _impl.ReaderUnlock(); +#else + _impl.unlock_shared(); +#endif +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/internal/Synchronization.h b/contrib/libs/antlr4_cpp_runtime/src/internal/Synchronization.h new file mode 100644 index 0000000000..0f1ff9587d --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/internal/Synchronization.h @@ -0,0 +1,154 @@ +// Copyright 2012-2022 The ANTLR Project +// +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "antlr4-common.h" + +#include <mutex> +#include <shared_mutex> +#include <utility> + +#if ANTLR4CPP_USING_ABSEIL +#error #include "absl/base/call_once.h" +#error #include "absl/base/thread_annotations.h" +#error #include "absl/synchronization/mutex.h" +#define ANTLR4CPP_NO_THREAD_SAFTEY_ANALYSIS ABSL_NO_THREAD_SAFETY_ANALYSIS +#else +#define ANTLR4CPP_NO_THREAD_SAFTEY_ANALYSIS +#endif + +// By default ANTLRv4 uses synchronization primitives provided by the C++ standard library. In most +// deployments this is fine, however in some using custom synchronization primitives may be +// preferred. This header allows that by optionally supporting some alternative implementations and +// allowing for more easier patching of other alternatives. + +namespace antlr4::internal { + + // Must be compatible with C++ standard library Mutex requirement. + class ANTLR4CPP_PUBLIC Mutex final { + public: + Mutex() = default; + + // No copying or moving, we are as strict as possible to support other implementations. + Mutex(const Mutex&) = delete; + Mutex(Mutex&&) = delete; + + // No copying or moving, we are as strict as possible to support other implementations. + Mutex& operator=(const Mutex&) = delete; + Mutex& operator=(Mutex&&) = delete; + + void lock() ANTLR4CPP_NO_THREAD_SAFTEY_ANALYSIS; + + bool try_lock() ANTLR4CPP_NO_THREAD_SAFTEY_ANALYSIS; + + void unlock() ANTLR4CPP_NO_THREAD_SAFTEY_ANALYSIS; + + private: +#if ANTLR4CPP_USING_ABSEIL + absl::Mutex _impl; +#else + std::mutex _impl; +#endif + }; + + template <typename Mutex> + using UniqueLock = std::unique_lock<Mutex>; + + // Must be compatible with C++ standard library SharedMutex requirement. + class ANTLR4CPP_PUBLIC SharedMutex final { + public: + SharedMutex() = default; + + // No copying or moving, we are as strict as possible to support other implementations. + SharedMutex(const SharedMutex&) = delete; + SharedMutex(SharedMutex&&) = delete; + + // No copying or moving, we are as strict as possible to support other implementations. + SharedMutex& operator=(const SharedMutex&) = delete; + SharedMutex& operator=(SharedMutex&&) = delete; + + void lock() ANTLR4CPP_NO_THREAD_SAFTEY_ANALYSIS; + + bool try_lock() ANTLR4CPP_NO_THREAD_SAFTEY_ANALYSIS; + + void unlock() ANTLR4CPP_NO_THREAD_SAFTEY_ANALYSIS; + + void lock_shared() ANTLR4CPP_NO_THREAD_SAFTEY_ANALYSIS; + + bool try_lock_shared() ANTLR4CPP_NO_THREAD_SAFTEY_ANALYSIS; + + void unlock_shared() ANTLR4CPP_NO_THREAD_SAFTEY_ANALYSIS; + + private: +#if ANTLR4CPP_USING_ABSEIL + absl::Mutex _impl; +#else + std::shared_mutex _impl; +#endif + }; + + template <typename Mutex> + using SharedLock = std::shared_lock<Mutex>; + + class OnceFlag; + + template <typename Callable, typename... Args> + void call_once(OnceFlag &onceFlag, Callable &&callable, Args&&... args); + + // Must be compatible with std::once_flag. + class ANTLR4CPP_PUBLIC OnceFlag final { + public: + constexpr OnceFlag() = default; + + // No copying or moving, we are as strict as possible to support other implementations. + OnceFlag(const OnceFlag&) = delete; + OnceFlag(OnceFlag&&) = delete; + + // No copying or moving, we are as strict as possible to support other implementations. + OnceFlag& operator=(const OnceFlag&) = delete; + OnceFlag& operator=(OnceFlag&&) = delete; + + private: + template <typename Callable, typename... Args> + friend void call_once(OnceFlag &onceFlag, Callable &&callable, Args&&... args); + +#if ANTLR4CPP_USING_ABSEIL + absl::once_flag _impl; +#else + std::once_flag _impl; +#endif + }; + + template <typename Callable, typename... Args> + void call_once(OnceFlag &onceFlag, Callable &&callable, Args&&... args) { +#if ANTLR4CPP_USING_ABSEIL + absl::call_once(onceFlag._impl, std::forward<Callable>(callable), std::forward<Args>(args)...); +#else + std::call_once(onceFlag._impl, std::forward<Callable>(callable), std::forward<Args>(args)...); +#endif + } + +} // namespace antlr4::internal diff --git a/contrib/libs/antlr4_cpp_runtime/src/misc/InterpreterDataReader.cpp b/contrib/libs/antlr4_cpp_runtime/src/misc/InterpreterDataReader.cpp new file mode 100644 index 0000000000..1a236eccfb --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/misc/InterpreterDataReader.cpp @@ -0,0 +1,124 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATN.h" +#include "atn/ATNDeserializer.h" +#include "Vocabulary.h" + +#include "misc/InterpreterDataReader.h" + +using namespace antlr4::dfa; +using namespace antlr4::atn; +using namespace antlr4::misc; + +InterpreterData::InterpreterData(std::vector<std::string> const& literalNames, std::vector<std::string> const& symbolicNames) +: vocabulary(literalNames, symbolicNames) { +} + +InterpreterData InterpreterDataReader::parseFile(std::string const& fileName) { + // The structure of the data file is very simple. Everything is line based with empty lines + // separating the different parts. For lexers the layout is: + // token literal names: + // ... + // + // token symbolic names: + // ... + // + // rule names: + // ... + // + // channel names: + // ... + // + // mode names: + // ... + // + // atn: + // <a single line with comma separated int values> enclosed in a pair of squared brackets. + // + // Data for a parser does not contain channel and mode names. + + std::ifstream input(fileName); + if (!input.good()) + return {}; + + std::vector<std::string> literalNames; + std::vector<std::string> symbolicNames; + + std::string line; + + std::getline(input, line, '\n'); + assert(line == "token literal names:"); + while (true) { + std::getline(input, line, '\n'); + if (line.empty()) + break; + + literalNames.push_back(line == "null" ? "" : line); + }; + + std::getline(input, line, '\n'); + assert(line == "token symbolic names:"); + while (true) { + std::getline(input, line, '\n'); + if (line.empty()) + break; + + symbolicNames.push_back(line == "null" ? "" : line); + }; + InterpreterData result(literalNames, symbolicNames); + + std::getline(input, line, '\n'); + assert(line == "rule names:"); + while (true) { + std::getline(input, line, '\n'); + if (line.empty()) + break; + + result.ruleNames.push_back(line); + }; + + std::getline(input, line, '\n'); + if (line == "channel names:") { + while (true) { + std::getline(input, line, '\n'); + if (line.empty()) + break; + + result.channels.push_back(line); + }; + + std::getline(input, line, '\n'); + assert(line == "mode names:"); + while (true) { + std::getline(input, line, '\n'); + if (line.empty()) + break; + + result.modes.push_back(line); + }; + } + + std::vector<int32_t> serializedATN; + + std::getline(input, line, '\n'); + assert(line == "atn:"); + std::getline(input, line, '\n'); + std::stringstream tokenizer(line); + std::string value; + while (tokenizer.good()) { + std::getline(tokenizer, value, ','); + unsigned long number; + if (value[0] == '[') + number = std::strtoul(&value[1], nullptr, 10); + else + number = std::strtoul(value.c_str(), nullptr, 10); + serializedATN.push_back(static_cast<int32_t>(number)); + } + + ATNDeserializer deserializer; + result.atn = deserializer.deserialize(serializedATN); + return result; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/misc/InterpreterDataReader.h b/contrib/libs/antlr4_cpp_runtime/src/misc/InterpreterDataReader.h new file mode 100644 index 0000000000..4b83dd129d --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/misc/InterpreterDataReader.h @@ -0,0 +1,33 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" +#include "atn/ATN.h" +#include "Vocabulary.h" + +namespace antlr4 { +namespace misc { + + struct InterpreterData { + std::unique_ptr<atn::ATN> atn; + dfa::Vocabulary vocabulary; + std::vector<std::string> ruleNames; + std::vector<std::string> channels; // Only valid for lexer grammars. + std::vector<std::string> modes; // ditto + + InterpreterData() {}; // For invalid content. + InterpreterData(std::vector<std::string> const& literalNames, std::vector<std::string> const& symbolicNames); + }; + + // A class to read plain text interpreter data produced by ANTLR. + class ANTLR4CPP_PUBLIC InterpreterDataReader { + public: + static InterpreterData parseFile(std::string const& fileName); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/misc/Interval.cpp b/contrib/libs/antlr4_cpp_runtime/src/misc/Interval.cpp new file mode 100644 index 0000000000..f0d0bfb491 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/misc/Interval.cpp @@ -0,0 +1,61 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/Interval.h" + +using namespace antlr4::misc; + +const Interval Interval::INVALID; + +size_t Interval::hashCode() const { + size_t hash = 23; + hash = hash * 31 + static_cast<size_t>(a); + hash = hash * 31 + static_cast<size_t>(b); + return hash; +} + +bool Interval::startsBeforeDisjoint(const Interval &other) const { + return a < other.a && b < other.a; +} + +bool Interval::startsBeforeNonDisjoint(const Interval &other) const { + return a <= other.a && b >= other.a; +} + +bool Interval::startsAfter(const Interval &other) const { + return a > other.a; +} + +bool Interval::startsAfterDisjoint(const Interval &other) const { + return a > other.b; +} + +bool Interval::startsAfterNonDisjoint(const Interval &other) const { + return a > other.a && a <= other.b; // b >= other.b implied +} + +bool Interval::disjoint(const Interval &other) const { + return startsBeforeDisjoint(other) || startsAfterDisjoint(other); +} + +bool Interval::adjacent(const Interval &other) const { + return a == other.b + 1 || b == other.a - 1; +} + +bool Interval::properlyContains(const Interval &other) const { + return other.a >= a && other.b <= b; +} + +Interval Interval::Union(const Interval &other) const { + return Interval(std::min(a, other.a), std::max(b, other.b)); +} + +Interval Interval::intersection(const Interval &other) const { + return Interval(std::max(a, other.a), std::min(b, other.b)); +} + +std::string Interval::toString() const { + return std::to_string(a) + ".." + std::to_string(b); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/misc/Interval.h b/contrib/libs/antlr4_cpp_runtime/src/misc/Interval.h new file mode 100644 index 0000000000..32abf629a8 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/misc/Interval.h @@ -0,0 +1,84 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace misc { + + // Helpers to convert certain unsigned symbols (e.g. Token::EOF) to their original numeric value (e.g. -1) + // and vice versa. This is needed mostly for intervals to keep their original order and for toString() + // methods to print the original numeric value (e.g. for tests). + constexpr size_t numericToSymbol(ssize_t v) { return static_cast<size_t>(v); } + constexpr ssize_t symbolToNumeric(size_t v) { return static_cast<ssize_t>(v); } + + /// An immutable inclusive interval a..b + class ANTLR4CPP_PUBLIC Interval final { + public: + static const Interval INVALID; + + // Must stay signed to guarantee the correct sort order. + ssize_t a; + ssize_t b; + + constexpr Interval() : Interval(static_cast<ssize_t>(-1), static_cast<ssize_t>(-2)) {} + + constexpr explicit Interval(size_t a_, size_t b_) : Interval(symbolToNumeric(a_), symbolToNumeric(b_)) {} + + constexpr Interval(ssize_t a_, ssize_t b_) : a(a_), b(b_) {} + + /// return number of elements between a and b inclusively. x..x is length 1. + /// if b < a, then length is 0. 9..10 has length 2. + constexpr size_t length() const { return b >= a ? static_cast<size_t>(b - a + 1) : 0; } + + constexpr bool operator==(const Interval &other) const { return a == other.a && b == other.b; } + + size_t hashCode() const; + + /// <summary> + /// Does this start completely before other? Disjoint </summary> + bool startsBeforeDisjoint(const Interval &other) const; + + /// <summary> + /// Does this start at or before other? Nondisjoint </summary> + bool startsBeforeNonDisjoint(const Interval &other) const; + + /// <summary> + /// Does this.a start after other.b? May or may not be disjoint </summary> + bool startsAfter(const Interval &other) const; + + /// <summary> + /// Does this start completely after other? Disjoint </summary> + bool startsAfterDisjoint(const Interval &other) const; + + /// <summary> + /// Does this start after other? NonDisjoint </summary> + bool startsAfterNonDisjoint(const Interval &other) const; + + /// <summary> + /// Are both ranges disjoint? I.e., no overlap? </summary> + bool disjoint(const Interval &other) const; + + /// <summary> + /// Are two intervals adjacent such as 0..41 and 42..42? </summary> + bool adjacent(const Interval &other) const; + + bool properlyContains(const Interval &other) const; + + /// <summary> + /// Return the interval computed from combining this and other </summary> + Interval Union(const Interval &other) const; + + /// <summary> + /// Return the interval in common between this and o </summary> + Interval intersection(const Interval &other) const; + + std::string toString() const; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/misc/IntervalSet.cpp b/contrib/libs/antlr4_cpp_runtime/src/misc/IntervalSet.cpp new file mode 100644 index 0000000000..d230bf45f6 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/misc/IntervalSet.cpp @@ -0,0 +1,501 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" +#include "Exceptions.h" +#include "Vocabulary.h" + +#include "misc/IntervalSet.h" + +using namespace antlr4; +using namespace antlr4::misc; + +IntervalSet const IntervalSet::COMPLETE_CHAR_SET = + IntervalSet::of(Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE); + +IntervalSet const IntervalSet::EMPTY_SET; + +IntervalSet::IntervalSet() : _intervals() { +} + +IntervalSet::IntervalSet(const IntervalSet &set) : IntervalSet() { + _intervals = set._intervals; +} + +IntervalSet::IntervalSet(IntervalSet&& set) : IntervalSet(std::move(set._intervals)) { +} + +IntervalSet::IntervalSet(std::vector<Interval>&& intervals) : _intervals(std::move(intervals)) { +} + +IntervalSet& IntervalSet::operator=(const IntervalSet& other) { + _intervals = other._intervals; + return *this; +} + +IntervalSet& IntervalSet::operator=(IntervalSet&& other) { + _intervals = move(other._intervals); + return *this; +} + +IntervalSet IntervalSet::of(ssize_t a) { + return IntervalSet({ Interval(a, a) }); +} + +IntervalSet IntervalSet::of(ssize_t a, ssize_t b) { + return IntervalSet({ Interval(a, b) }); +} + +void IntervalSet::clear() { + _intervals.clear(); +} + +void IntervalSet::add(ssize_t el) { + add(el, el); +} + +void IntervalSet::add(ssize_t a, ssize_t b) { + add(Interval(a, b)); +} + +void IntervalSet::add(const Interval &addition) { + if (addition.b < addition.a) { + return; + } + + // find position in list + for (auto iterator = _intervals.begin(); iterator != _intervals.end(); ++iterator) { + Interval r = *iterator; + if (addition == r) { + return; + } + + if (addition.adjacent(r) || !addition.disjoint(r)) { + // next to each other, make a single larger interval + Interval bigger = addition.Union(r); + *iterator = bigger; + + // make sure we didn't just create an interval that + // should be merged with next interval in list + while (iterator + 1 != _intervals.end()) { + Interval next = *++iterator; + if (!bigger.adjacent(next) && bigger.disjoint(next)) { + break; + } + + // if we bump up against or overlap next, merge + iterator = _intervals.erase(iterator);// remove this one + --iterator; // move backwards to what we just set + *iterator = bigger.Union(next); // set to 3 merged ones + // ml: no need to advance iterator, we do that in the next round anyway. ++iterator; // first call to next after previous duplicates the result + } + return; + } + + if (addition.startsBeforeDisjoint(r)) { + // insert before r + //--iterator; + _intervals.insert(iterator, addition); + return; + } + + // if disjoint and after r, a future iteration will handle it + } + + // ok, must be after last interval (and disjoint from last interval) + // just add it + _intervals.push_back(addition); +} + +IntervalSet IntervalSet::Or(const std::vector<IntervalSet> &sets) { + IntervalSet result; + for (const auto &s : sets) { + result.addAll(s); + } + return result; +} + +IntervalSet& IntervalSet::addAll(const IntervalSet &set) { + // walk set and add each interval + for (auto const& interval : set._intervals) { + add(interval); + } + return *this; +} + +IntervalSet IntervalSet::complement(ssize_t minElement, ssize_t maxElement) const { + return complement(IntervalSet::of(minElement, maxElement)); +} + +IntervalSet IntervalSet::complement(const IntervalSet &vocabulary) const { + return vocabulary.subtract(*this); +} + +IntervalSet IntervalSet::subtract(const IntervalSet &other) const { + return subtract(*this, other); +} + +IntervalSet IntervalSet::subtract(const IntervalSet &left, const IntervalSet &right) { + if (left.isEmpty()) { + return IntervalSet(); + } + + if (right.isEmpty()) { + // right set has no elements; just return the copy of the current set + return left; + } + + IntervalSet result(left); + size_t resultI = 0; + size_t rightI = 0; + while (resultI < result._intervals.size() && rightI < right._intervals.size()) { + Interval &resultInterval = result._intervals[resultI]; + const Interval &rightInterval = right._intervals[rightI]; + + // operation: (resultInterval - rightInterval) and update indexes + + if (rightInterval.b < resultInterval.a) { + rightI++; + continue; + } + + if (rightInterval.a > resultInterval.b) { + resultI++; + continue; + } + + Interval beforeCurrent; + Interval afterCurrent; + if (rightInterval.a > resultInterval.a) { + beforeCurrent = Interval(resultInterval.a, rightInterval.a - 1); + } + + if (rightInterval.b < resultInterval.b) { + afterCurrent = Interval(rightInterval.b + 1, resultInterval.b); + } + + if (beforeCurrent.a > -1) { // -1 is the default value + if (afterCurrent.a > -1) { + // split the current interval into two + result._intervals[resultI] = beforeCurrent; + result._intervals.insert(result._intervals.begin() + resultI + 1, afterCurrent); + resultI++; + rightI++; + } else { + // replace the current interval + result._intervals[resultI] = beforeCurrent; + resultI++; + } + } else { + if (afterCurrent.a > -1) { + // replace the current interval + result._intervals[resultI] = afterCurrent; + rightI++; + } else { + // remove the current interval (thus no need to increment resultI) + result._intervals.erase(result._intervals.begin() + resultI); + } + } + } + + // If rightI reached right.intervals.size(), no more intervals to subtract from result. + // If resultI reached result.intervals.size(), we would be subtracting from an empty set. + // Either way, we are done. + return result; +} + +IntervalSet IntervalSet::Or(const IntervalSet &a) const { + IntervalSet result; + result.addAll(*this); + result.addAll(a); + return result; +} + +IntervalSet IntervalSet::And(const IntervalSet &other) const { + IntervalSet intersection; + size_t i = 0; + size_t j = 0; + + // iterate down both interval lists looking for nondisjoint intervals + while (i < _intervals.size() && j < other._intervals.size()) { + Interval mine = _intervals[i]; + Interval theirs = other._intervals[j]; + + if (mine.startsBeforeDisjoint(theirs)) { + // move this iterator looking for interval that might overlap + i++; + } else if (theirs.startsBeforeDisjoint(mine)) { + // move other iterator looking for interval that might overlap + j++; + } else if (mine.properlyContains(theirs)) { + // overlap, add intersection, get next theirs + intersection.add(mine.intersection(theirs)); + j++; + } else if (theirs.properlyContains(mine)) { + // overlap, add intersection, get next mine + intersection.add(mine.intersection(theirs)); + i++; + } else if (!mine.disjoint(theirs)) { + // overlap, add intersection + intersection.add(mine.intersection(theirs)); + + // Move the iterator of lower range [a..b], but not + // the upper range as it may contain elements that will collide + // with the next iterator. So, if mine=[0..115] and + // theirs=[115..200], then intersection is 115 and move mine + // but not theirs as theirs may collide with the next range + // in thisIter. + // move both iterators to next ranges + if (mine.startsAfterNonDisjoint(theirs)) { + j++; + } else if (theirs.startsAfterNonDisjoint(mine)) { + i++; + } + } + } + + return intersection; +} + + +bool IntervalSet::contains(ssize_t el) const { + if (_intervals.empty() || el < _intervals.front().a || el > _intervals.back().b) { + return false; + } + + return std::binary_search(_intervals.begin(), _intervals.end(), Interval(el, el), [](const Interval &lhs, const Interval &rhs) { + return lhs.b < rhs.a; + }); +} + +bool IntervalSet::isEmpty() const { + return _intervals.empty(); +} + +ssize_t IntervalSet::getSingleElement() const { + if (_intervals.size() == 1) { + if (_intervals[0].a == _intervals[0].b) { + return _intervals[0].a; + } + } + + return Token::INVALID_TYPE; // XXX: this value is 0, but 0 is a valid interval range, how can that work? +} + +ssize_t IntervalSet::getMaxElement() const { + if (_intervals.empty()) { + return Token::INVALID_TYPE; + } + + return _intervals.back().b; +} + +ssize_t IntervalSet::getMinElement() const { + if (_intervals.empty()) { + return Token::INVALID_TYPE; + } + + return _intervals.front().a; +} + +std::vector<Interval> const& IntervalSet::getIntervals() const { + return _intervals; +} + +size_t IntervalSet::hashCode() const { + size_t hash = MurmurHash::initialize(); + for (const auto &interval : _intervals) { + hash = MurmurHash::update(hash, interval.a); + hash = MurmurHash::update(hash, interval.b); + } + + return MurmurHash::finish(hash, _intervals.size() * 2); +} + +bool IntervalSet::operator == (const IntervalSet &other) const { + if (_intervals.empty() && other._intervals.empty()) + return true; + + if (_intervals.size() != other._intervals.size()) + return false; + + return std::equal(_intervals.begin(), _intervals.end(), other._intervals.begin()); +} + +std::string IntervalSet::toString() const { + return toString(false); +} + +std::string IntervalSet::toString(bool elemAreChar) const { + if (_intervals.empty()) { + return "{}"; + } + + std::stringstream ss; + size_t effectiveSize = size(); + if (effectiveSize > 1) { + ss << "{"; + } + + bool firstEntry = true; + for (const auto &interval : _intervals) { + if (!firstEntry) + ss << ", "; + firstEntry = false; + + ssize_t a = interval.a; + ssize_t b = interval.b; + if (a == b) { + if (a == -1) { + ss << "<EOF>"; + } else if (elemAreChar) { + ss << "'" << static_cast<char>(a) << "'"; + } else { + ss << a; + } + } else { + if (elemAreChar) { + ss << "'" << static_cast<char>(a) << "'..'" << static_cast<char>(b) << "'"; + } else { + ss << a << ".." << b; + } + } + } + if (effectiveSize > 1) { + ss << "}"; + } + + return ss.str(); +} + +std::string IntervalSet::toString(const dfa::Vocabulary &vocabulary) const { + if (_intervals.empty()) { + return "{}"; + } + + std::stringstream ss; + size_t effectiveSize = size(); + if (effectiveSize > 1) { + ss << "{"; + } + + bool firstEntry = true; + for (const auto &interval : _intervals) { + if (!firstEntry) + ss << ", "; + firstEntry = false; + + ssize_t a = interval.a; + ssize_t b = interval.b; + if (a == b) { + ss << elementName(vocabulary, a); + } else { + for (ssize_t i = a; i <= b; i++) { + if (i > a) { + ss << ", "; + } + ss << elementName(vocabulary, i); + } + } + } + if (effectiveSize > 1) { + ss << "}"; + } + + return ss.str(); +} + +std::string IntervalSet::elementName(const dfa::Vocabulary &vocabulary, ssize_t a) const { + if (a == -1) { + return "<EOF>"; + } else if (a == -2) { + return "<EPSILON>"; + } else { + return vocabulary.getDisplayName(a); + } +} + +size_t IntervalSet::size() const { + size_t result = 0; + for (const auto &interval : _intervals) { + result += size_t(interval.b - interval.a + 1); + } + return result; +} + +std::vector<ssize_t> IntervalSet::toList() const { + std::vector<ssize_t> result; + for (const auto &interval : _intervals) { + ssize_t a = interval.a; + ssize_t b = interval.b; + for (ssize_t v = a; v <= b; v++) { + result.push_back(v); + } + } + return result; +} + +std::set<ssize_t> IntervalSet::toSet() const { + std::set<ssize_t> result; + for (const auto &interval : _intervals) { + ssize_t a = interval.a; + ssize_t b = interval.b; + for (ssize_t v = a; v <= b; v++) { + result.insert(v); + } + } + return result; +} + +ssize_t IntervalSet::get(size_t i) const { + size_t index = 0; + for (const auto &interval : _intervals) { + ssize_t a = interval.a; + ssize_t b = interval.b; + for (ssize_t v = a; v <= b; v++) { + if (index == i) { + return v; + } + index++; + } + } + return -1; +} + +void IntervalSet::remove(ssize_t el) { + for (size_t i = 0; i < _intervals.size(); ++i) { + Interval &interval = _intervals[i]; + ssize_t a = interval.a; + ssize_t b = interval.b; + if (el < a) { + break; // list is sorted and el is before this interval; not here + } + + // if whole interval x..x, rm + if (el == a && el == b) { + _intervals.erase(_intervals.begin() + (long)i); + break; + } + // if on left edge x..b, adjust left + if (el == a) { + interval.a++; + break; + } + // if on right edge a..x, adjust right + if (el == b) { + interval.b--; + break; + } + // if in middle a..x..b, split interval + if (el > a && el < b) { // found in this interval + ssize_t oldb = interval.b; + interval.b = el - 1; // [a..x-1] + add(el + 1, oldb); // add [x+1..b] + + break; // ml: not in the Java code but I believe we also should stop searching here, as we found x. + } + } +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/misc/IntervalSet.h b/contrib/libs/antlr4_cpp_runtime/src/misc/IntervalSet.h new file mode 100644 index 0000000000..49565dc691 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/misc/IntervalSet.h @@ -0,0 +1,188 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "misc/Interval.h" +#include "Exceptions.h" + +namespace antlr4 { +namespace misc { + + /** + * This class implements the {@link IntSet} backed by a sorted array of + * non-overlapping intervals. It is particularly efficient for representing + * large collections of numbers, where the majority of elements appear as part + * of a sequential range of numbers that are all part of the set. For example, + * the set { 1, 2, 3, 4, 7, 8 } may be represented as { [1, 4], [7, 8] }. + * + * <p> + * This class is able to represent sets containing any combination of values in + * the range {@link Integer#MIN_VALUE} to {@link Integer#MAX_VALUE} + * (inclusive).</p> + */ + class ANTLR4CPP_PUBLIC IntervalSet final { + public: + static IntervalSet const COMPLETE_CHAR_SET; + static IntervalSet const EMPTY_SET; + + private: + /// The list of sorted, disjoint intervals. + std::vector<Interval> _intervals; + + explicit IntervalSet(std::vector<Interval>&& intervals); + + public: + IntervalSet(); + IntervalSet(IntervalSet const& set); + IntervalSet(IntervalSet&& set); + + template<typename T1, typename... T_NEXT> + IntervalSet(int, T1 t1, T_NEXT&&... next) : IntervalSet() { + // The first int argument is an ignored count for compatibility + // with the previous varargs based interface. + addItems(t1, std::forward<T_NEXT>(next)...); + } + + IntervalSet& operator=(IntervalSet const& set); + IntervalSet& operator=(IntervalSet&& set); + + /// Create a set with a single element, el. + static IntervalSet of(ssize_t a); + + /// Create a set with all ints within range [a..b] (inclusive) + static IntervalSet of(ssize_t a, ssize_t b); + + void clear(); + + /// Add a single element to the set. An isolated element is stored + /// as a range el..el. + void add(ssize_t el); + + /// Add interval; i.e., add all integers from a to b to set. + /// If b<a, do nothing. + /// Keep list in sorted order (by left range value). + /// If overlap, combine ranges. For example, + /// If this is {1..5, 10..20}, adding 6..7 yields + /// {1..5, 6..7, 10..20}. Adding 4..8 yields {1..8, 10..20}. + void add(ssize_t a, ssize_t b); + + /// combine all sets in the array returned the or'd value + static IntervalSet Or(const std::vector<IntervalSet> &sets); + + // Copy on write so we can cache a..a intervals and sets of that. + void add(const Interval &addition); + IntervalSet& addAll(const IntervalSet &set); + + template<typename T1, typename... T_NEXT> + void addItems(T1 t1, T_NEXT&&... next) { + add(t1); + addItems(std::forward<T_NEXT>(next)...); + } + + IntervalSet complement(ssize_t minElement, ssize_t maxElement) const; + + /// Given the set of possible values (rather than, say UNICODE or MAXINT), + /// return a new set containing all elements in vocabulary, but not in + /// this. The computation is (vocabulary - this). + /// + /// 'this' is assumed to be either a subset or equal to vocabulary. + IntervalSet complement(const IntervalSet &vocabulary) const; + + /// Compute this-other via this&~other. + /// Return a new set containing all elements in this but not in other. + /// other is assumed to be a subset of this; + /// anything that is in other but not in this will be ignored. + IntervalSet subtract(const IntervalSet &other) const; + + /** + * Compute the set difference between two interval sets. The specific + * operation is {@code left - right}. If either of the input sets is + * {@code null}, it is treated as though it was an empty set. + */ + static IntervalSet subtract(const IntervalSet &left, const IntervalSet &right); + + IntervalSet Or(const IntervalSet &a) const; + + /// Return a new set with the intersection of this set with other. Because + /// the intervals are sorted, we can use an iterator for each list and + /// just walk them together. This is roughly O(min(n,m)) for interval + /// list lengths n and m. + IntervalSet And(const IntervalSet &other) const; + + /// Is el in any range of this set? + bool contains(ssize_t el) const; + + /// return true if this set has no members + bool isEmpty() const; + + /// If this set is a single integer, return it otherwise Token.INVALID_TYPE. + ssize_t getSingleElement() const; + + /** + * Returns the maximum value contained in the set. + * + * @return the maximum value contained in the set. If the set is empty, this + * method returns {@link Token#INVALID_TYPE}. + */ + ssize_t getMaxElement() const; + + /** + * Returns the minimum value contained in the set. + * + * @return the minimum value contained in the set. If the set is empty, this + * method returns {@link Token#INVALID_TYPE}. + */ + ssize_t getMinElement() const; + + /// <summary> + /// Return a list of Interval objects. </summary> + std::vector<Interval> const& getIntervals() const; + + size_t hashCode() const; + + /// Are two IntervalSets equal? Because all intervals are sorted + /// and disjoint, equals is a simple linear walk over both lists + /// to make sure they are the same. + bool operator == (const IntervalSet &other) const; + std::string toString() const; + std::string toString(bool elemAreChar) const; + + std::string toString(const dfa::Vocabulary &vocabulary) const; + + protected: + std::string elementName(const dfa::Vocabulary &vocabulary, ssize_t a) const; + + public: + size_t size() const; + std::vector<ssize_t> toList() const; + std::set<ssize_t> toSet() const; + + /// Get the ith element of ordered set. Used only by RandomPhrase so + /// don't bother to implement if you're not doing that for a new + /// ANTLR code gen target. + ssize_t get(size_t i) const; + void remove(ssize_t el); + + private: + void addItems() { /* No-op */ } + }; + +} // namespace atn +} // namespace antlr4 + +// Hash function for IntervalSet. + +namespace std { + using antlr4::misc::IntervalSet; + + template <> struct hash<IntervalSet> + { + size_t operator() (const IntervalSet &x) const + { + return x.hashCode(); + } + }; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/misc/MurmurHash.cpp b/contrib/libs/antlr4_cpp_runtime/src/misc/MurmurHash.cpp new file mode 100644 index 0000000000..09072c9f7e --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/misc/MurmurHash.cpp @@ -0,0 +1,120 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include <cstddef> +#include <cstdint> +#include <cstring> + +#include "misc/MurmurHash.h" + +using namespace antlr4::misc; + +// A variation of the MurmurHash3 implementation (https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp) +// Here we unrolled the loop used there into individual calls to update(), as we usually hash object fields +// instead of entire buffers. + +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) + +#include <stdlib.h> + +#define ROTL32(x,y) _rotl(x,y) +#define ROTL64(x,y) _rotl64(x,y) + +#elif ANTLR4CPP_HAVE_BUILTIN(__builtin_rotateleft32) && ANTLR4CPP_HAVE_BUILTIN(__builtin_rotateleft64) + +#define ROTL32(x, y) __builtin_rotateleft32(x, y) +#define ROTL64(x, y) __builtin_rotateleft64(x, y) + +#else // defined(_MSC_VER) + +// Other compilers + +namespace { + +constexpr uint32_t ROTL32(uint32_t x, int r) { + return (x << r) | (x >> (32 - r)); +} +constexpr uint64_t ROTL64(uint64_t x, int r) { + return (x << r) | (x >> (64 - r)); +} + +} + +#endif // !defined(_MSC_VER) + +#if SIZE_MAX == UINT64_MAX + +size_t MurmurHash::update(size_t hash, size_t value) { + size_t k1 = value; + k1 *= UINT64_C(0x87c37b91114253d5); + k1 = ROTL64(k1, 31); + k1 *= UINT64_C(0x4cf5ad432745937f); + + hash ^= k1; + hash = ROTL64(hash, 27); + hash = hash * 5 + UINT64_C(0x52dce729); + + return hash; +} + +size_t MurmurHash::finish(size_t hash, size_t entryCount) { + hash ^= entryCount * 8; + hash ^= hash >> 33; + hash *= UINT64_C(0xff51afd7ed558ccd); + hash ^= hash >> 33; + hash *= UINT64_C(0xc4ceb9fe1a85ec53); + hash ^= hash >> 33; + return hash; +} + +#elif SIZE_MAX == UINT32_MAX + +size_t MurmurHash::update(size_t hash, size_t value) { + size_t k1 = value; + k1 *= UINT32_C(0xCC9E2D51); + k1 = ROTL32(k1, 15); + k1 *= UINT32_C(0x1B873593); + + hash ^= k1; + hash = ROTL32(hash, 13); + hash = hash * 5 + UINT32_C(0xE6546B64); + + return hash; +} + +size_t MurmurHash::finish(size_t hash, size_t entryCount) { + hash ^= entryCount * 4; + hash ^= hash >> 16; + hash *= UINT32_C(0x85EBCA6B); + hash ^= hash >> 13; + hash *= UINT32_C(0xC2B2AE35); + hash ^= hash >> 16; + return hash; +} + +#else +#error "Expected sizeof(size_t) to be 4 or 8." +#endif + +size_t MurmurHash::update(size_t hash, const void *data, size_t size) { + size_t value; + const uint8_t *bytes = static_cast<const uint8_t*>(data); + while (size >= sizeof(size_t)) { + std::memcpy(&value, bytes, sizeof(size_t)); + hash = update(hash, value); + bytes += sizeof(size_t); + size -= sizeof(size_t); + } + if (size != 0) { + value = 0; + std::memcpy(&value, bytes, size); + hash = update(hash, value); + } + return hash; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/misc/MurmurHash.h b/contrib/libs/antlr4_cpp_runtime/src/misc/MurmurHash.h new file mode 100644 index 0000000000..cde7ac7906 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/misc/MurmurHash.h @@ -0,0 +1,102 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <cstdint> +#include <type_traits> + +#include "antlr4-common.h" + +namespace antlr4 { +namespace misc { + + class ANTLR4CPP_PUBLIC MurmurHash final { + private: + static constexpr size_t DEFAULT_SEED = 0; + + /// Initialize the hash using the default seed value. + /// Returns the intermediate hash value. + public: + static size_t initialize() { return initialize(DEFAULT_SEED); } + + /// Initialize the hash using the specified seed. + static size_t initialize(size_t seed) { return seed; } + + /// Update the intermediate hash value for the next input {@code value}. + /// <param name="hash"> the intermediate hash value </param> + /// <param name="value"> the value to add to the current hash </param> + /// Returns the updated intermediate hash value. + static size_t update(size_t hash, size_t value); + + /** + * Update the intermediate hash value for the next input {@code value}. + * + * @param hash the intermediate hash value + * @param value the value to add to the current hash + * @return the updated intermediate hash value + */ + template <class T> + static size_t update(size_t hash, Ref<T> const& value) { + return update(hash, value != nullptr ? value->hashCode() : 0); + } + + template <class T> + static size_t update(size_t hash, T *value) { + return update(hash, value != nullptr ? value->hashCode() : 0); + } + + static size_t update(size_t hash, const void *data, size_t size); + + template <typename T> + static size_t update(size_t hash, const T *data, size_t size) { + return update(hash, static_cast<const void*>(data), size * sizeof(std::remove_reference_t<T>)); + } + + /// <summary> + /// Apply the final computation steps to the intermediate value {@code hash} + /// to form the final result of the MurmurHash 3 hash function. + /// </summary> + /// <param name="hash"> the intermediate hash value </param> + /// <param name="entryCount"> the number of calls to update() before calling finish() </param> + /// <returns> the final hash result </returns> + static size_t finish(size_t hash, size_t entryCount); + + /// Utility function to compute the hash code of an array using the MurmurHash3 algorithm. + /// + /// @param <T> the array element type </param> + /// <param name="data"> the array data </param> + /// <param name="seed"> the seed for the MurmurHash algorithm </param> + /// <returns> the hash code of the data </returns> + template<typename T> // where T is C array type + static size_t hashCode(const std::vector<Ref<T>> &data, size_t seed = DEFAULT_SEED) { + size_t hash = initialize(seed); + for (auto &entry : data) { + hash = update(hash, entry); + } + return finish(hash, data.size()); + } + + static size_t hashCode(const void *data, size_t size, size_t seed = DEFAULT_SEED) { + size_t hash = initialize(seed); + hash = update(hash, data, size); + return finish(hash, size); + } + + template <typename T> + static size_t hashCode(const T *data, size_t size, size_t seed = DEFAULT_SEED) { + return hashCode(static_cast<const void*>(data), size * sizeof(std::remove_reference_t<T>), seed); + } + + private: + MurmurHash() = delete; + + MurmurHash(const MurmurHash&) = delete; + + MurmurHash& operator=(const MurmurHash&) = delete; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/misc/Predicate.cpp b/contrib/libs/antlr4_cpp_runtime/src/misc/Predicate.cpp new file mode 100644 index 0000000000..c35f1921c4 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/misc/Predicate.cpp @@ -0,0 +1,4 @@ +#include "misc/Predicate.h" + +antlr4::misc::Predicate::~Predicate() { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/misc/Predicate.h b/contrib/libs/antlr4_cpp_runtime/src/misc/Predicate.h new file mode 100644 index 0000000000..1032d53fed --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/misc/Predicate.h @@ -0,0 +1,21 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace misc { + + class ANTLR4CPP_PUBLIC Predicate { + public: + virtual ~Predicate(); + + virtual bool test(tree::ParseTree *t) = 0; + }; + +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/Any.cpp b/contrib/libs/antlr4_cpp_runtime/src/support/Any.cpp new file mode 100644 index 0000000000..a1ed50d456 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/Any.cpp @@ -0,0 +1,8 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Any.h" + +using namespace antlrcpp; diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/Any.h b/contrib/libs/antlr4_cpp_runtime/src/support/Any.h new file mode 100644 index 0000000000..fa5df58946 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/Any.h @@ -0,0 +1,16 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +// A standard C++ class loosely modeled after boost::Any. + +#pragma once + +#include "antlr4-common.h" + +namespace antlrcpp { + + using Any = std::any; + +} // namespace antlrcpp diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/Arrays.cpp b/contrib/libs/antlr4_cpp_runtime/src/support/Arrays.cpp new file mode 100644 index 0000000000..b3c4f94f2f --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/Arrays.cpp @@ -0,0 +1,43 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "tree/ParseTree.h" +#include "Exceptions.h" + +#include "support/Arrays.h" + +using namespace antlrcpp; + +std::string Arrays::listToString(const std::vector<std::string> &list, const std::string &separator) +{ + std::stringstream ss; + bool firstEntry = true; + + ss << '['; + for (const auto &entry : list) { + ss << entry; + if (firstEntry) { + ss << separator; + firstEntry = false; + } + } + + ss << ']'; + return ss.str(); +} + +template <> +std::string Arrays::toString(const std::vector<antlr4::tree::ParseTree*> &source) { + std::string result = "["; + bool firstEntry = true; + for (auto *value : source) { + result += value->toStringTree(); + if (firstEntry) { + result += ", "; + firstEntry = false; + } + } + return result + "]"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/Arrays.h b/contrib/libs/antlr4_cpp_runtime/src/support/Arrays.h new file mode 100644 index 0000000000..04b852d986 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/Arrays.h @@ -0,0 +1,149 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlrcpp { + + class ANTLR4CPP_PUBLIC Arrays { + public: + + static std::string listToString(const std::vector<std::string> &list, const std::string &separator); + + template <typename T> + static bool equals(const std::vector<T> &a, const std::vector<T> &b) { + if (a.size() != b.size()) + return false; + + for (size_t i = 0; i < a.size(); ++i) + if (!(a[i] == b[i])) + return false; + + return true; + } + + template <typename T> + static bool equals(const std::vector<T *> &a, const std::vector<T *> &b) { + if (a.size() != b.size()) + return false; + + for (size_t i = 0; i < a.size(); ++i) { + if (!a[i] && !b[i]) + continue; + if (!a[i] || !b[i]) + return false; + if (a[i] == b[i]) + continue; + + if (!(*a[i] == *b[i])) + return false; + } + + return true; + } + + template <typename T> + static bool equals(const std::vector<Ref<T>> &a, const std::vector<Ref<T>> &b) { + if (a.size() != b.size()) + return false; + + for (size_t i = 0; i < a.size(); ++i) { + if (!a[i] && !b[i]) + continue; + if (!a[i] || !b[i]) + return false; + if (a[i] == b[i]) + continue; + + if (!(*a[i] == *b[i])) + return false; + } + + return true; + } + + template <typename T> + static bool equals(const std::vector<std::unique_ptr<T>> &a, const std::vector<std::unique_ptr<T>> &b) { + if (a.size() != b.size()) + return false; + + for (size_t i = 0; i < a.size(); ++i) { + if (!a[i] && !b[i]) + continue; + if (!a[i] || !b[i]) + return false; + if (a[i] == b[i]) + continue; + + if (!(*a[i] == *b[i])) + return false; + } + + return true; + } + + template <typename T> + static std::string toString(const std::vector<T> &source) { + std::string result = "["; + bool firstEntry = true; + for (auto &value : source) { + result += value.toString(); + if (firstEntry) { + result += ", "; + firstEntry = false; + } + } + return result + "]"; + } + + template <typename T> + static std::string toString(const std::vector<Ref<T>> &source) { + std::string result = "["; + bool firstEntry = true; + for (auto &value : source) { + result += value->toString(); + if (firstEntry) { + result += ", "; + firstEntry = false; + } + } + return result + "]"; + } + + template <typename T> + static std::string toString(const std::vector<std::unique_ptr<T>> &source) { + std::string result = "["; + bool firstEntry = true; + for (auto &value : source) { + result += value->toString(); + if (firstEntry) { + result += ", "; + firstEntry = false; + } + } + return result + "]"; + } + + template <typename T> + static std::string toString(const std::vector<T *> &source) { + std::string result = "["; + bool firstEntry = true; + for (auto value : source) { + result += value->toString(); + if (firstEntry) { + result += ", "; + firstEntry = false; + } + } + return result + "]"; + } + + }; + + template <> + std::string Arrays::toString(const std::vector<antlr4::tree::ParseTree *> &source); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/BitSet.h b/contrib/libs/antlr4_cpp_runtime/src/support/BitSet.h new file mode 100644 index 0000000000..bb30364be0 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/BitSet.h @@ -0,0 +1,76 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlrcpp { + + class ANTLR4CPP_PUBLIC BitSet : public std::bitset<2048> { + public: + size_t nextSetBit(size_t pos) const { + for (size_t i = pos; i < size(); i++){ + if (test(i)) { + return i; + } + } + + return INVALID_INDEX; + } + + // Prints a list of every index for which the bitset contains a bit in true. + friend std::wostream& operator << (std::wostream& os, const BitSet& obj) + { + os << "{"; + size_t total = obj.count(); + for (size_t i = 0; i < obj.size(); i++){ + if (obj.test(i)){ + os << i; + --total; + if (total > 1){ + os << ", "; + } + } + } + + os << "}"; + return os; + } + + static std::string subStringRepresentation(const std::vector<BitSet>::iterator &begin, + const std::vector<BitSet>::iterator &end) { + std::string result; + std::vector<BitSet>::iterator vectorIterator; + + for (vectorIterator = begin; vectorIterator != end; vectorIterator++) { + result += vectorIterator->toString(); + } + // Grab the end + result += end->toString(); + + return result; + } + + std::string toString() const { + std::stringstream stream; + stream << "{"; + bool valueAdded = false; + for (size_t i = 0; i < size(); ++i){ + if (test(i)){ + if (valueAdded) { + stream << ", "; + } + stream << i; + valueAdded = true; + } + } + + stream << "}"; + return stream.str(); + } + + }; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/CPPUtils.cpp b/contrib/libs/antlr4_cpp_runtime/src/support/CPPUtils.cpp new file mode 100644 index 0000000000..95321b3dc1 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/CPPUtils.cpp @@ -0,0 +1,207 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "support/CPPUtils.h" + +namespace antlrcpp { + + std::string join(const std::vector<std::string> &strings, const std::string &separator) { + std::string str; + bool firstItem = true; + for (const std::string &s : strings) { + if (!firstItem) { + str.append(separator); + } + firstItem = false; + str.append(s); + } + return str; + } + + std::map<std::string, size_t> toMap(const std::vector<std::string> &keys) { + std::map<std::string, size_t> result; + for (size_t i = 0; i < keys.size(); ++i) { + result.insert({ keys[i], i }); + } + return result; + } + + std::string escapeWhitespace(std::string str, bool escapeSpaces) { + std::string result; + for (auto c : str) { + switch (c) { + case '\n': + result += "\\n"; + break; + + case '\r': + result += "\\r"; + break; + + case '\t': + result += "\\t"; + break; + + case ' ': + if (escapeSpaces) { + result += "\u00B7"; + break; + } + result += c; + break; + + default: + result += c; + break; + } + } + + return result; + } + + std::string toHexString(const int t) { + std::stringstream stream; + stream << std::uppercase << std::hex << t; + return stream.str(); + } + + std::string arrayToString(const std::vector<std::string> &data) { + std::string answer; + size_t toReserve = 0; + for (const auto &sub : data) { + toReserve += sub.size(); + } + answer.reserve(toReserve); + for (const auto &sub: data) { + answer.append(sub); + } + return answer; + } + + std::string replaceString(const std::string &s, const std::string &from, const std::string &to) { + std::string::size_type p; + std::string ss, res; + + ss = s; + p = ss.find(from); + while (p != std::string::npos) { + if (p > 0) + res.append(ss.substr(0, p)).append(to); + else + res.append(to); + ss = ss.substr(p + from.size()); + p = ss.find(from); + } + res.append(ss); + + return res; + } + + std::vector<std::string> split(const std::string &s, const std::string &sep, int count) { + std::vector<std::string> parts; + std::string ss = s; + + std::string::size_type p; + + if (s.empty()) + return parts; + + if (count == 0) + count= -1; + + p = ss.find(sep); + while (!ss.empty() && p != std::string::npos && (count < 0 || count > 0)) { + parts.push_back(ss.substr(0, p)); + ss = ss.substr(p+sep.size()); + + --count; + p = ss.find(sep); + } + parts.push_back(ss); + + return parts; + } + + //-------------------------------------------------------------------------------------------------- + + // Debugging helper. Adds indentation to all lines in the given string. + std::string indent(const std::string &s, const std::string &indentation, bool includingFirst) { + std::vector<std::string> parts = split(s, "\n", -1); + for (size_t i = 0; i < parts.size(); ++i) { + if (i == 0 && !includingFirst) + continue; + parts[i].insert(0, indentation); + } + + return join(parts, "\n"); + } + + //-------------------------------------------------------------------------------------------------- + + // Recursively get the error from a, possibly nested, exception. +#if defined(_MSC_FULL_VER) && _MSC_FULL_VER < 190023026 + // No nested exceptions before VS 2015. + template <typename T> + std::exception_ptr get_nested(const T &/*e*/) { + try { + return nullptr; + } + catch (const std::bad_cast &) { + return nullptr; + } + } +#else + template <typename T> + std::exception_ptr get_nested(const T &e) { + try { + auto nested = dynamic_cast<const std::nested_exception&>(e); + return nested.nested_ptr(); + } + catch (const std::bad_cast &) { + return nullptr; + } + } +#endif + + std::string what(std::exception_ptr eptr) { + if (!eptr) { + throw std::bad_exception(); + } + + std::string result; + std::size_t nestCount = 0; + + next: { + try { + std::exception_ptr yeptr; + std::swap(eptr, yeptr); + std::rethrow_exception(yeptr); + } + catch (const std::exception &e) { + result += e.what(); + eptr = get_nested(e); + } + catch (const std::string &e) { + result += e; + } + catch (const char *e) { + result += e; + } + catch (...) { + result += "cannot be determined"; + } + + if (eptr) { + result += " ("; + ++nestCount; + goto next; + } + } + + result += std::string(nestCount, ')'); + return result; + } + +} // namespace antlrcpp diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/CPPUtils.h b/contrib/libs/antlr4_cpp_runtime/src/support/CPPUtils.h new file mode 100644 index 0000000000..2eb1a36037 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/CPPUtils.h @@ -0,0 +1,65 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlrcpp { + + ANTLR4CPP_PUBLIC std::string join(const std::vector<std::string> &strings, const std::string &separator); + ANTLR4CPP_PUBLIC std::map<std::string, size_t> toMap(const std::vector<std::string> &keys); + ANTLR4CPP_PUBLIC std::string escapeWhitespace(std::string str, bool escapeSpaces); + ANTLR4CPP_PUBLIC std::string toHexString(const int t); + ANTLR4CPP_PUBLIC std::string arrayToString(const std::vector<std::string> &data); + ANTLR4CPP_PUBLIC std::string replaceString(const std::string &s, const std::string &from, const std::string &to); + ANTLR4CPP_PUBLIC std::vector<std::string> split(const std::string &s, const std::string &sep, int count); + ANTLR4CPP_PUBLIC std::string indent(const std::string &s, const std::string &indentation, bool includingFirst = true); + + // Using RAII + a lambda to implement a "finally" replacement. + template <typename OnEnd> + struct FinalAction { + FinalAction(OnEnd f) : _cleanUp { std::move(f) } {} + FinalAction(FinalAction &&other) : + _cleanUp(std::move(other._cleanUp)), _enabled(other._enabled) { + other._enabled = false; // Don't trigger the lambda after ownership has moved. + } + ~FinalAction() { if (_enabled) _cleanUp(); } + + void disable() { _enabled = false; } + private: + OnEnd _cleanUp; + bool _enabled {true}; + }; + + template <typename OnEnd> + FinalAction<OnEnd> finally(OnEnd f) { + return FinalAction<OnEnd>(std::move(f)); + } + + // Convenience functions to avoid lengthy dynamic_cast() != nullptr checks in many places. + template <typename T1, typename T2> + inline bool is(T2 *obj) { // For pointer types. + return dynamic_cast<typename std::add_const<T1>::type>(obj) != nullptr; + } + + template <typename T1, typename T2> + inline bool is(Ref<T2> const& obj) { // For shared pointers. + return dynamic_cast<T1 *>(obj.get()) != nullptr; + } + + template <typename T> + std::string toString(const T &o) { + std::stringstream ss; + // typeid gives the mangled class name, but that's all what's possible + // in a portable way. + ss << typeid(o).name() << "@" << std::hex << reinterpret_cast<uintptr_t>(&o); + return ss.str(); + } + + // Get the error text from an exception pointer or the current exception. + ANTLR4CPP_PUBLIC std::string what(std::exception_ptr eptr = std::current_exception()); + +} // namespace antlrcpp diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/Casts.h b/contrib/libs/antlr4_cpp_runtime/src/support/Casts.h new file mode 100644 index 0000000000..2ded955dcd --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/Casts.h @@ -0,0 +1,34 @@ +/* Copyright (c) 2012-2021 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <cassert> +#include <memory> +#include <type_traits> + +namespace antlrcpp { + + template <typename To, typename From> + To downCast(From* from) { + static_assert(std::is_pointer_v<To>, "Target type not a pointer."); + static_assert(std::is_base_of_v<From, std::remove_pointer_t<To>>, "Target type not derived from source type."); + #if !defined(__GNUC__) || defined(__GXX_RTTI) + assert(from == nullptr || dynamic_cast<To>(from) != nullptr); + #endif + return static_cast<To>(from); + } + + template <typename To, typename From> + To downCast(From& from) { + static_assert(std::is_lvalue_reference_v<To>, "Target type not a lvalue reference."); + static_assert(std::is_base_of_v<From, std::remove_reference_t<To>>, "Target type not derived from source type."); + #if !defined(__GNUC__) || defined(__GXX_RTTI) + assert(dynamic_cast<std::add_pointer_t<std::remove_reference_t<To>>>(std::addressof(from)) != nullptr); + #endif + return static_cast<To>(from); + } + +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/Declarations.h b/contrib/libs/antlr4_cpp_runtime/src/support/Declarations.h new file mode 100644 index 0000000000..8e960676cf --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/Declarations.h @@ -0,0 +1,161 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +namespace antlr4 { + class ANTLRErrorListener; + class ANTLRErrorStrategy; + class ANTLRFileStream; + class ANTLRInputStream; + class BailErrorStrategy; + class BaseErrorListener; + class BufferedTokenStream; + class CharStream; + class CommonToken; + class CommonTokenFactory; + class CommonTokenStream; + class ConsoleErrorListener; + class DefaultErrorStrategy; + class DiagnosticErrorListener; + class EmptyStackException; + class FailedPredicateException; + class IllegalArgumentException; + class IllegalStateException; + class InputMismatchException; + class IntStream; + class InterpreterRuleContext; + class Lexer; + class LexerInterpreter; + class LexerNoViableAltException; + class ListTokenSource; + class NoSuchElementException; + class NoViableAltException; + class NullPointerException; + class ParseCancellationException; + class Parser; + class ParserInterpreter; + class ParserRuleContext; + class ProxyErrorListener; + class RecognitionException; + class Recognizer; + class RuleContext; + class Token; + template<typename Symbol> class TokenFactory; + class TokenSource; + class TokenStream; + class TokenStreamRewriter; + class UnbufferedCharStream; + class UnbufferedTokenStream; + class WritableToken; + + namespace misc { + class InterpreterDataReader; + class Interval; + class IntervalSet; + class MurmurHash; + class Utils; + class Predicate; + } + namespace atn { + class ATN; + class ATNConfig; + class ATNConfigSet; + class ATNDeserializationOptions; + class ATNDeserializer; + class ATNSerializer; + class ATNSimulator; + class ATNState; + enum class ATNType; + class ActionTransition; + class ArrayPredictionContext; + class AtomTransition; + class BasicBlockStartState; + class BasicState; + class BlockEndState; + class BlockStartState; + class DecisionState; + class EpsilonTransition; + class LL1Analyzer; + class LexerAction; + class LexerActionExecutor; + class LexerATNConfig; + class LexerATNSimulator; + class LexerMoreAction; + class LexerPopModeAction; + class LexerSkipAction; + class LookaheadEventInfo; + class LoopEndState; + class NotSetTransition; + class OrderedATNConfigSet; + class ParseInfo; + class ParserATNSimulator; + class PlusBlockStartState; + class PlusLoopbackState; + class PrecedencePredicateTransition; + class PredicateTransition; + class PredictionContext; + enum class PredictionMode; + class PredictionModeClass; + class RangeTransition; + class RuleStartState; + class RuleStopState; + class RuleTransition; + class SemanticContext; + class SetTransition; + class SingletonPredictionContext; + class StarBlockStartState; + class StarLoopEntryState; + class StarLoopbackState; + class TokensStartState; + class Transition; + class WildcardTransition; + } + namespace dfa { + class DFA; + class DFASerializer; + class DFAState; + class LexerDFASerializer; + class Vocabulary; + } + namespace tree { + class AbstractParseTreeVisitor; + class ErrorNode; + class ErrorNodeImpl; + class ParseTree; + class ParseTreeListener; + template<typename T> class ParseTreeProperty; + class ParseTreeVisitor; + class ParseTreeWalker; + class SyntaxTree; + class TerminalNode; + class TerminalNodeImpl; + class Tree; + class Trees; + + namespace pattern { + class Chunk; + class ParseTreeMatch; + class ParseTreePattern; + class ParseTreePatternMatcher; + class RuleTagToken; + class TagChunk; + class TextChunk; + class TokenTagToken; + } + + namespace xpath { + class XPath; + class XPathElement; + class XPathLexerErrorListener; + class XPathRuleAnywhereElement; + class XPathRuleElement; + class XPathTokenAnywhereElement; + class XPathTokenElement; + class XPathWildcardAnywhereElement; + class XPathWildcardElement; + } + } +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/StringUtils.cpp b/contrib/libs/antlr4_cpp_runtime/src/support/StringUtils.cpp new file mode 100644 index 0000000000..9ee274c8de --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/StringUtils.cpp @@ -0,0 +1,38 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "support/StringUtils.h" + +namespace antlrcpp { + + std::string escapeWhitespace(std::string_view in) { + std::string out; + escapeWhitespace(out, in); + out.shrink_to_fit(); + return out; + } + + std::string& escapeWhitespace(std::string& out, std::string_view in) { + out.reserve(in.size()); // Best case, no escaping. + for (const auto &c : in) { + switch (c) { + case '\t': + out.append("\\t"); + break; + case '\r': + out.append("\\r"); + break; + case '\n': + out.append("\\n"); + break; + default: + out.push_back(c); + break; + } + } + return out; + } + +} // namespace antrlcpp diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/StringUtils.h b/contrib/libs/antlr4_cpp_runtime/src/support/StringUtils.h new file mode 100644 index 0000000000..aee0d46d6e --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/StringUtils.h @@ -0,0 +1,16 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlrcpp { + + ANTLR4CPP_PUBLIC std::string escapeWhitespace(std::string_view in); + + ANTLR4CPP_PUBLIC std::string& escapeWhitespace(std::string& out, std::string_view in); + +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/Unicode.h b/contrib/libs/antlr4_cpp_runtime/src/support/Unicode.h new file mode 100644 index 0000000000..f0f84375ad --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/Unicode.h @@ -0,0 +1,28 @@ +/* Copyright (c) 2021 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlrcpp { + + class ANTLR4CPP_PUBLIC Unicode final { + public: + static constexpr char32_t REPLACEMENT_CHARACTER = 0xfffd; + + static constexpr bool isValid(char32_t codePoint) { + return codePoint < 0xd800 || (codePoint > 0xdfff && codePoint <= 0x10ffff); + } + + private: + Unicode() = delete; + Unicode(const Unicode&) = delete; + Unicode(Unicode&&) = delete; + Unicode& operator=(const Unicode&) = delete; + Unicode& operator=(Unicode&&) = delete; + }; + +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/Utf8.cpp b/contrib/libs/antlr4_cpp_runtime/src/support/Utf8.cpp new file mode 100644 index 0000000000..294e9f1b21 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/Utf8.cpp @@ -0,0 +1,242 @@ +/* Copyright (c) 2021 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include <cassert> +#include <cstdint> + +#include "support/Utf8.h" +#include "support/Unicode.h" + +// The below implementation is based off of https://github.com/google/cel-cpp/internal/utf8.cc, +// which is itself based off of https://go.googlesource.com/go/+/refs/heads/master/src/unicode/utf8/utf8.go. +// If for some reason you feel the need to copy this implementation, please retain a comment +// referencing the two source files and giving credit, as well as maintaining any and all +// obligations required by the BSD 3-clause license that governs this file. + +namespace antlrcpp { + +namespace { + +#undef SELF + constexpr uint8_t SELF = 0x80; + +#undef LOW + constexpr uint8_t LOW = 0x80; +#undef HIGH + constexpr uint8_t HIGH = 0xbf; + +#undef MASKX + constexpr uint8_t MASKX = 0x3f; +#undef MASK2 + constexpr uint8_t MASK2 = 0x1f; +#undef MASK3 + constexpr uint8_t MASK3 = 0xf; +#undef MASK4 + constexpr uint8_t MASK4 = 0x7; + +#undef TX + constexpr uint8_t TX = 0x80; +#undef T2 + constexpr uint8_t T2 = 0xc0; +#undef T3 + constexpr uint8_t T3 = 0xe0; +#undef T4 + constexpr uint8_t T4 = 0xf0; + +#undef XX + constexpr uint8_t XX = 0xf1; +#undef AS + constexpr uint8_t AS = 0xf0; +#undef S1 + constexpr uint8_t S1 = 0x02; +#undef S2 + constexpr uint8_t S2 = 0x13; +#undef S3 + constexpr uint8_t S3 = 0x03; +#undef S4 + constexpr uint8_t S4 = 0x23; +#undef S5 + constexpr uint8_t S5 = 0x34; +#undef S6 + constexpr uint8_t S6 = 0x04; +#undef S7 + constexpr uint8_t S7 = 0x44; + + // NOLINTBEGIN + // clang-format off +#undef LEADING + constexpr uint8_t LEADING[256] = { + // 1 2 3 4 5 6 7 8 9 A B C D E F + AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x00-0x0F + AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x10-0x1F + AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x20-0x2F + AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x30-0x3F + AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x40-0x4F + AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x50-0x5F + AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x60-0x6F + AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x70-0x7F + // 1 2 3 4 5 6 7 8 9 A B C D E F + XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0x80-0x8F + XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0x90-0x9F + XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0xA0-0xAF + XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0xB0-0xBF + XX, XX, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, // 0xC0-0xCF + S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, // 0xD0-0xDF + S2, S3, S3, S3, S3, S3, S3, S3, S3, S3, S3, S3, S3, S4, S3, S3, // 0xE0-0xEF + S5, S6, S6, S6, S7, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0xF0-0xFF + }; + // clang-format on + // NOLINTEND + +#undef ACCEPT + constexpr std::pair<uint8_t, uint8_t> ACCEPT[16] = { + {LOW, HIGH}, {0xa0, HIGH}, {LOW, 0x9f}, {0x90, HIGH}, + {LOW, 0x8f}, {0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}, + {0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}, + {0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}, + }; + +} // namespace + + std::pair<char32_t, size_t> Utf8::decode(std::string_view input) { + assert(!input.empty()); + const auto b = static_cast<uint8_t>(input.front()); + input.remove_prefix(1); + if (b < SELF) { + return {static_cast<char32_t>(b), 1}; + } + const auto leading = LEADING[b]; + if (leading == XX) { + return {Unicode::REPLACEMENT_CHARACTER, 1}; + } + auto size = static_cast<size_t>(leading & 7) - 1; + if (size > input.size()) { + return {Unicode::REPLACEMENT_CHARACTER, 1}; + } + const auto& accept = ACCEPT[leading >> 4]; + const auto b1 = static_cast<uint8_t>(input.front()); + input.remove_prefix(1); + if (b1 < accept.first || b1 > accept.second) { + return {Unicode::REPLACEMENT_CHARACTER, 1}; + } + if (size <= 1) { + return {(static_cast<char32_t>(b & MASK2) << 6) | + static_cast<char32_t>(b1 & MASKX), + 2}; + } + const auto b2 = static_cast<uint8_t>(input.front()); + input.remove_prefix(1); + if (b2 < LOW || b2 > HIGH) { + return {Unicode::REPLACEMENT_CHARACTER, 1}; + } + if (size <= 2) { + return {(static_cast<char32_t>(b & MASK3) << 12) | + (static_cast<char32_t>(b1 & MASKX) << 6) | + static_cast<char32_t>(b2 & MASKX), + 3}; + } + const auto b3 = static_cast<uint8_t>(input.front()); + input.remove_prefix(1); + if (b3 < LOW || b3 > HIGH) { + return {Unicode::REPLACEMENT_CHARACTER, 1}; + } + return {(static_cast<char32_t>(b & MASK4) << 18) | + (static_cast<char32_t>(b1 & MASKX) << 12) | + (static_cast<char32_t>(b2 & MASKX) << 6) | + static_cast<char32_t>(b3 & MASKX), + 4}; + } + + std::optional<std::u32string> Utf8::strictDecode(std::string_view input) { + std::u32string output; + char32_t codePoint; + size_t codeUnits; + output.reserve(input.size()); // Worst case is each byte is a single Unicode code point. + for (size_t index = 0; index < input.size(); index += codeUnits) { + std::tie(codePoint, codeUnits) = Utf8::decode(input.substr(index)); + if (codePoint == Unicode::REPLACEMENT_CHARACTER && codeUnits == 1) { + // Condition is only met when an illegal byte sequence is encountered. See Utf8::decode. + return std::nullopt; + } + output.push_back(codePoint); + } + output.shrink_to_fit(); + return output; + } + + std::u32string Utf8::lenientDecode(std::string_view input) { + std::u32string output; + char32_t codePoint; + size_t codeUnits; + output.reserve(input.size()); // Worst case is each byte is a single Unicode code point. + for (size_t index = 0; index < input.size(); index += codeUnits) { + std::tie(codePoint, codeUnits) = Utf8::decode(input.substr(index)); + output.push_back(codePoint); + } + output.shrink_to_fit(); + return output; + } + + std::string& Utf8::encode(std::string* buffer, char32_t codePoint) { + assert(buffer != nullptr); + if (!Unicode::isValid(codePoint)) { + codePoint = Unicode::REPLACEMENT_CHARACTER; + } + if (codePoint <= 0x7f) { + buffer->push_back(static_cast<char>(static_cast<uint8_t>(codePoint))); + } else if (codePoint <= 0x7ff) { + buffer->push_back( + static_cast<char>(T2 | static_cast<uint8_t>(codePoint >> 6))); + buffer->push_back( + static_cast<char>(TX | (static_cast<uint8_t>(codePoint) & MASKX))); + } else if (codePoint <= 0xffff) { + buffer->push_back( + static_cast<char>(T3 | static_cast<uint8_t>(codePoint >> 12))); + buffer->push_back(static_cast<char>( + TX | (static_cast<uint8_t>(codePoint >> 6) & MASKX))); + buffer->push_back( + static_cast<char>(TX | (static_cast<uint8_t>(codePoint) & MASKX))); + } else { + buffer->push_back( + static_cast<char>(T4 | static_cast<uint8_t>(codePoint >> 18))); + buffer->push_back(static_cast<char>( + TX | (static_cast<uint8_t>(codePoint >> 12) & MASKX))); + buffer->push_back(static_cast<char>( + TX | (static_cast<uint8_t>(codePoint >> 6) & MASKX))); + buffer->push_back( + static_cast<char>(TX | (static_cast<uint8_t>(codePoint) & MASKX))); + } + return *buffer; + } + + std::optional<std::string> Utf8::strictEncode(std::u32string_view input) { + std::string output; + output.reserve(input.size() * 4); // Worst case is each Unicode code point encodes to 4 bytes. + for (size_t index = 0; index < input.size(); index++) { + char32_t codePoint = input[index]; + if (!Unicode::isValid(codePoint)) { + return std::nullopt; + } + Utf8::encode(&output, codePoint); + } + output.shrink_to_fit(); + return output; + } + + std::string Utf8::lenientEncode(std::u32string_view input) { + std::string output; + output.reserve(input.size() * 4); // Worst case is each Unicode code point encodes to 4 bytes. + for (size_t index = 0; index < input.size(); index++) { + char32_t codePoint = input[index]; + if (!Unicode::isValid(codePoint)) { + codePoint = Unicode::REPLACEMENT_CHARACTER; + } + Utf8::encode(&output, codePoint); + } + output.shrink_to_fit(); + return output; + } + +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/Utf8.h b/contrib/libs/antlr4_cpp_runtime/src/support/Utf8.h new file mode 100644 index 0000000000..e4828441cd --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/Utf8.h @@ -0,0 +1,54 @@ +/* Copyright (c) 2021 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <optional> +#include <string> +#include <string_view> +#include <tuple> + +#include "antlr4-common.h" + +namespace antlrcpp { + + class ANTLR4CPP_PUBLIC Utf8 final { + public: + // Decodes the next code point, returning the decoded code point and the number + // of code units (a.k.a. bytes) consumed. In the event that an invalid code unit + // sequence is returned the replacement character, U+FFFD, is returned with a + // code unit count of 1. As U+FFFD requires 3 code units when encoded, this can + // be used to differentiate valid input from malformed input. + static std::pair<char32_t, size_t> decode(std::string_view input); + + // Decodes the given UTF-8 encoded input into a string of code points. + static std::optional<std::u32string> strictDecode(std::string_view input); + + // Decodes the given UTF-8 encoded input into a string of code points. Unlike strictDecode(), + // each byte in an illegal byte sequence is replaced with the Unicode replacement character, + // U+FFFD. + static std::u32string lenientDecode(std::string_view input); + + // Encodes the given code point and appends it to the buffer. If the code point + // is an unpaired surrogate or outside of the valid Unicode range it is replaced + // with the replacement character, U+FFFD. + static std::string& encode(std::string *buffer, char32_t codePoint); + + // Encodes the given Unicode code point string as UTF-8. + static std::optional<std::string> strictEncode(std::u32string_view input); + + // Encodes the given Unicode code point string as UTF-8. Unlike strictEncode(), + // each invalid Unicode code point is replaced with the Unicode replacement character, U+FFFD. + static std::string lenientEncode(std::u32string_view input); + + private: + Utf8() = delete; + Utf8(const Utf8&) = delete; + Utf8(Utf8&&) = delete; + Utf8& operator=(const Utf8&) = delete; + Utf8& operator=(Utf8&&) = delete; + }; + +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/AbstractParseTreeVisitor.h b/contrib/libs/antlr4_cpp_runtime/src/tree/AbstractParseTreeVisitor.h new file mode 100644 index 0000000000..25505278f2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/AbstractParseTreeVisitor.h @@ -0,0 +1,129 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "tree/ParseTree.h" +#include "tree/ParseTreeVisitor.h" + +namespace antlr4 { +namespace tree { + + class ANTLR4CPP_PUBLIC AbstractParseTreeVisitor : public ParseTreeVisitor { + public: + /// The default implementation calls <seealso cref="ParseTree#accept"/> on the + /// specified tree. + virtual std::any visit(ParseTree *tree) override { + return tree->accept(this); + } + + /** + * <p>The default implementation initializes the aggregate result to + * {@link #defaultResult defaultResult()}. Before visiting each child, it + * calls {@link #shouldVisitNextChild shouldVisitNextChild}; if the result + * is {@code false} no more children are visited and the current aggregate + * result is returned. After visiting a child, the aggregate result is + * updated by calling {@link #aggregateResult aggregateResult} with the + * previous aggregate result and the result of visiting the child.</p> + * + * <p>The default implementation is not safe for use in visitors that modify + * the tree structure. Visitors that modify the tree should override this + * method to behave properly in respect to the specific algorithm in use.</p> + */ + virtual std::any visitChildren(ParseTree *node) override { + std::any result = defaultResult(); + size_t n = node->children.size(); + for (size_t i = 0; i < n; i++) { + if (!shouldVisitNextChild(node, result)) { + break; + } + + std::any childResult = node->children[i]->accept(this); + result = aggregateResult(std::move(result), std::move(childResult)); + } + + return result; + } + + /// The default implementation returns the result of + /// <seealso cref="#defaultResult defaultResult"/>. + virtual std::any visitTerminal(TerminalNode * /*node*/) override { + return defaultResult(); + } + + /// The default implementation returns the result of + /// <seealso cref="#defaultResult defaultResult"/>. + virtual std::any visitErrorNode(ErrorNode * /*node*/) override { + return defaultResult(); + } + + protected: + /// <summary> + /// Gets the default value returned by visitor methods. This value is + /// returned by the default implementations of + /// <seealso cref="#visitTerminal visitTerminal"/>, <seealso cref="#visitErrorNode visitErrorNode"/>. + /// The default implementation of <seealso cref="#visitChildren visitChildren"/> + /// initializes its aggregate result to this value. + /// <p/> + /// The base implementation returns {@code std::any()}. + /// </summary> + /// <returns> The default value returned by visitor methods. </returns> + virtual std::any defaultResult() { + return std::any(); + } + + /// <summary> + /// Aggregates the results of visiting multiple children of a node. After + /// either all children are visited or <seealso cref="#shouldVisitNextChild"/> returns + /// {@code false}, the aggregate value is returned as the result of + /// <seealso cref="#visitChildren"/>. + /// <p/> + /// The default implementation returns {@code nextResult}, meaning + /// <seealso cref="#visitChildren"/> will return the result of the last child visited + /// (or return the initial value if the node has no children). + /// </summary> + /// <param name="aggregate"> The previous aggregate value. In the default + /// implementation, the aggregate value is initialized to + /// <seealso cref="#defaultResult"/>, which is passed as the {@code aggregate} argument + /// to this method after the first child node is visited. </param> + /// <param name="nextResult"> The result of the immediately preceeding call to visit + /// a child node. + /// </param> + /// <returns> The updated aggregate result. </returns> + virtual std::any aggregateResult(std::any /*aggregate*/, std::any nextResult) { + return nextResult; + } + + /// <summary> + /// This method is called after visiting each child in + /// <seealso cref="#visitChildren"/>. This method is first called before the first + /// child is visited; at that point {@code currentResult} will be the initial + /// value (in the default implementation, the initial value is returned by a + /// call to <seealso cref="#defaultResult"/>. This method is not called after the last + /// child is visited. + /// <p/> + /// The default implementation always returns {@code true}, indicating that + /// {@code visitChildren} should only return after all children are visited. + /// One reason to override this method is to provide a "short circuit" + /// evaluation option for situations where the result of visiting a single + /// child has the potential to determine the result of the visit operation as + /// a whole. + /// </summary> + /// <param name="node"> The <seealso cref="ParseTree"/> whose children are currently being + /// visited. </param> + /// <param name="currentResult"> The current aggregate result of the children visited + /// to the current point. + /// </param> + /// <returns> {@code true} to continue visiting children. Otherwise return + /// {@code false} to stop visiting children and immediately return the + /// current aggregate result from <seealso cref="#visitChildren"/>. </returns> + virtual bool shouldVisitNextChild(ParseTree * /*node*/, const std::any &/*currentResult*/) { + return true; + } + + }; + +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/ErrorNode.h b/contrib/libs/antlr4_cpp_runtime/src/tree/ErrorNode.h new file mode 100644 index 0000000000..319ce39e0d --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/ErrorNode.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "tree/TerminalNode.h" + +namespace antlr4 { +namespace tree { + + class ANTLR4CPP_PUBLIC ErrorNode : public TerminalNode { + public: + static bool is(const tree::ParseTree &parseTree) { return parseTree.getTreeType() == tree::ParseTreeType::ERROR; } + + static bool is(const tree::ParseTree *parseTree) { return parseTree != nullptr && is(*parseTree); } + + protected: + using TerminalNode::TerminalNode; + }; + +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/ErrorNodeImpl.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/ErrorNodeImpl.cpp new file mode 100644 index 0000000000..142791dd96 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/ErrorNodeImpl.cpp @@ -0,0 +1,54 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/Interval.h" +#include "Token.h" +#include "RuleContext.h" +#include "tree/ParseTreeVisitor.h" + +#include "tree/ErrorNodeImpl.h" + +using namespace antlr4; +using namespace antlr4::tree; + +Token* ErrorNodeImpl::getSymbol() const { + return symbol; +} + +void ErrorNodeImpl::setParent(RuleContext *parent_) { + this->parent = parent_; +} + +misc::Interval ErrorNodeImpl::getSourceInterval() { + if (symbol == nullptr) { + return misc::Interval::INVALID; + } + + size_t tokenIndex = symbol->getTokenIndex(); + return misc::Interval(tokenIndex, tokenIndex); +} + +std::any ErrorNodeImpl::accept(ParseTreeVisitor *visitor) { + return visitor->visitErrorNode(this); +} + +std::string ErrorNodeImpl::getText() { + return symbol->getText(); +} + +std::string ErrorNodeImpl::toStringTree(Parser * /*parser*/, bool /*pretty*/) { + return toString(); +} + +std::string ErrorNodeImpl::toString() { + if (symbol->getType() == Token::EOF) { + return "<EOF>"; + } + return symbol->getText(); +} + +std::string ErrorNodeImpl::toStringTree(bool /*pretty*/) { + return toString(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/ErrorNodeImpl.h b/contrib/libs/antlr4_cpp_runtime/src/tree/ErrorNodeImpl.h new file mode 100644 index 0000000000..8bafb62552 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/ErrorNodeImpl.h @@ -0,0 +1,43 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "tree/ErrorNode.h" +#include "tree/TerminalNodeImpl.h" +#include "misc/Interval.h" + +#include "support/Any.h" + +namespace antlr4 { +namespace tree { + + /// <summary> + /// Represents a token that was consumed during resynchronization + /// rather than during a valid match operation. For example, + /// we will create this kind of a node during single token insertion + /// and deletion as well as during "consume until error recovery set" + /// upon no viable alternative exceptions. + /// </summary> + class ANTLR4CPP_PUBLIC ErrorNodeImpl : public ErrorNode { + public: + Token *symbol; + + explicit ErrorNodeImpl(Token *symbol) : ErrorNode(ParseTreeType::ERROR), symbol(symbol) {} + + virtual Token* getSymbol() const override; + virtual void setParent(RuleContext *parent) override; + virtual misc::Interval getSourceInterval() override; + + virtual std::any accept(ParseTreeVisitor *visitor) override; + + virtual std::string getText() override; + virtual std::string toStringTree(Parser *parser, bool pretty = false) override; + virtual std::string toString() override; + virtual std::string toStringTree(bool pretty = false) override; + }; + +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/IterativeParseTreeWalker.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/IterativeParseTreeWalker.cpp new file mode 100644 index 0000000000..83e6339518 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/IterativeParseTreeWalker.cpp @@ -0,0 +1,66 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "support/CPPUtils.h" +#include "support/Casts.h" + +#include "tree/ParseTreeListener.h" +#include "tree/ParseTree.h" +#include "tree/ErrorNode.h" + +#include "IterativeParseTreeWalker.h" + +using namespace antlr4::tree; +using namespace antlrcpp; + +void IterativeParseTreeWalker::walk(ParseTreeListener *listener, ParseTree *t) const { + std::vector<std::pair<ParseTree*, size_t>> stack; + ParseTree *currentNode = t; + size_t currentIndex = 0; + + while (currentNode != nullptr) { + // pre-order visit + if (ErrorNode::is(*currentNode)) { + listener->visitErrorNode(downCast<ErrorNode*>(currentNode)); + } else if (TerminalNode::is(*currentNode)) { + listener->visitTerminal(downCast<TerminalNode*>(currentNode)); + } else { + enterRule(listener, currentNode); + } + + // Move down to first child, if it exists. + if (!currentNode->children.empty()) { + stack.push_back(std::make_pair(currentNode, currentIndex)); + currentIndex = 0; + currentNode = currentNode->children[0]; + continue; + } + + // No child nodes, so walk tree. + do { + // post-order visit + if (!TerminalNode::is(*currentNode)) { + exitRule(listener, currentNode); + } + + // No parent, so no siblings. + if (stack.empty()) { + currentNode = nullptr; + currentIndex = 0; + break; + } + + // Move to next sibling if possible. + if (stack.back().first->children.size() > ++currentIndex) { + currentNode = stack.back().first->children[currentIndex]; + break; + } + + // No next sibling, so move up. + std::tie(currentNode, currentIndex) = stack.back(); + stack.pop_back(); + } while (currentNode != nullptr); + } +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/IterativeParseTreeWalker.h b/contrib/libs/antlr4_cpp_runtime/src/tree/IterativeParseTreeWalker.h new file mode 100644 index 0000000000..8957d87e44 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/IterativeParseTreeWalker.h @@ -0,0 +1,53 @@ +/* + * [The "BSD license"] + * Copyright (c) 2012 Terence Parr + * Copyright (c) 2012 Sam Harwell + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include "antlr4-common.h" + +#include "tree/ParseTreeWalker.h" + +namespace antlr4 { +namespace tree { + + class ParseTreeListener; + + /** + * An iterative (read: non-recursive) pre-order and post-order tree walker that + * doesn't use the thread stack but heap-based stacks. Makes it possible to + * process deeply nested parse trees. + */ + class ANTLR4CPP_PUBLIC IterativeParseTreeWalker : public ParseTreeWalker { + public: + virtual void walk(ParseTreeListener *listener, ParseTree *t) const override; + }; + +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTree.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTree.cpp new file mode 100644 index 0000000000..8756398d88 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTree.cpp @@ -0,0 +1,12 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "tree/ParseTree.h" + +using namespace antlr4::tree; + +bool ParseTree::operator == (const ParseTree &other) const { + return &other == this; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTree.h b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTree.h new file mode 100644 index 0000000000..cf8027b8fd --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTree.h @@ -0,0 +1,111 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "support/Any.h" +#include "tree/ParseTreeType.h" + +namespace antlr4 { +namespace tree { + + /// An interface to access the tree of <seealso cref="RuleContext"/> objects created + /// during a parse that makes the data structure look like a simple parse tree. + /// This node represents both internal nodes, rule invocations, + /// and leaf nodes, token matches. + /// + /// The payload is either a <seealso cref="Token"/> or a <seealso cref="RuleContext"/> object. + // ml: This class unites 4 Java classes: RuleNode, ParseTree, SyntaxTree and Tree. + class ANTLR4CPP_PUBLIC ParseTree { + public: + ParseTree(ParseTree const&) = delete; + + virtual ~ParseTree() = default; + + ParseTree& operator=(ParseTree const&) = delete; + + /// The parent of this node. If the return value is null, then this + /// node is the root of the tree. + ParseTree *parent = nullptr; + + /// If we are debugging or building a parse tree for a visitor, + /// we need to track all of the tokens and rule invocations associated + /// with this rule's context. This is empty for parsing w/o tree constr. + /// operation because we don't the need to track the details about + /// how we parse this rule. + // ml: memory is not managed here, but by the owning class. This is just for the structure. + std::vector<ParseTree *> children; + + /// Print out a whole tree, not just a node, in LISP format + /// {@code (root child1 .. childN)}. Print just a node if this is a leaf. + virtual std::string toStringTree(bool pretty = false) = 0; + virtual std::string toString() = 0; + + /// Specialize toStringTree so that it can print out more information + /// based upon the parser. + virtual std::string toStringTree(Parser *parser, bool pretty = false) = 0; + + virtual bool operator == (const ParseTree &other) const; + + /// The <seealso cref="ParseTreeVisitor"/> needs a double dispatch method. + // ml: This has been changed to use Any instead of a template parameter, to avoid the need of a virtual template function. + virtual std::any accept(ParseTreeVisitor *visitor) = 0; + + /// Return the combined text of all leaf nodes. Does not get any + /// off-channel tokens (if any) so won't return whitespace and + /// comments if they are sent to parser on hidden channel. + virtual std::string getText() = 0; + + /** + * Return an {@link Interval} indicating the index in the + * {@link TokenStream} of the first and last token associated with this + * subtree. If this node is a leaf, then the interval represents a single + * token and has interval i..i for token index i. + * + * <p>An interval of i..i-1 indicates an empty interval at position + * i in the input stream, where 0 <= i <= the size of the input + * token stream. Currently, the code base can only have i=0..n-1 but + * in concept one could have an empty interval after EOF. </p> + * + * <p>If source interval is unknown, this returns {@link Interval#INVALID}.</p> + * + * <p>As a weird special case, the source interval for rules matched after + * EOF is unspecified.</p> + */ + virtual misc::Interval getSourceInterval() = 0; + + ParseTreeType getTreeType() const { return _treeType; } + + protected: + explicit ParseTree(ParseTreeType treeType) : _treeType(treeType) {} + + private: + const ParseTreeType _treeType; + }; + + // A class to help managing ParseTree instances without the need of a shared_ptr. + class ANTLR4CPP_PUBLIC ParseTreeTracker { + public: + template<typename T, typename ... Args> + T* createInstance(Args&& ... args) { + static_assert(std::is_base_of<ParseTree, T>::value, "Argument must be a parse tree type"); + T* result = new T(args...); + _allocated.push_back(result); + return result; + } + + void reset() { + for (auto * entry : _allocated) + delete entry; + _allocated.clear(); + } + + private: + std::vector<ParseTree *> _allocated; + }; + + +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeListener.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeListener.cpp new file mode 100644 index 0000000000..ce12297586 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeListener.cpp @@ -0,0 +1,9 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "ParseTreeListener.h" + +antlr4::tree::ParseTreeListener::~ParseTreeListener() { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeListener.h b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeListener.h new file mode 100644 index 0000000000..60c7d8861a --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeListener.h @@ -0,0 +1,39 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace tree { + + /** This interface describes the minimal core of methods triggered + * by {@link ParseTreeWalker}. E.g., + * + * ParseTreeWalker walker = new ParseTreeWalker(); + * walker.walk(myParseTreeListener, myParseTree); <-- triggers events in your listener + * + * If you want to trigger events in multiple listeners during a single + * tree walk, you can use the ParseTreeDispatcher object available at + * + * https://github.com/antlr/antlr4/issues/841 + */ + class ANTLR4CPP_PUBLIC ParseTreeListener { + public: + virtual ~ParseTreeListener(); + + virtual void visitTerminal(TerminalNode *node) = 0; + virtual void visitErrorNode(ErrorNode *node) = 0; + virtual void enterEveryRule(ParserRuleContext *ctx) = 0; + virtual void exitEveryRule(ParserRuleContext *ctx) = 0; + + bool operator == (const ParseTreeListener &other) { + return this == &other; + } + }; + +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeProperty.h b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeProperty.h new file mode 100644 index 0000000000..efd5e73bf8 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeProperty.h @@ -0,0 +1,50 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace tree { + + /// <summary> + /// Associate a property with a parse tree node. Useful with parse tree listeners + /// that need to associate values with particular tree nodes, kind of like + /// specifying a return value for the listener event method that visited a + /// particular node. Example: + /// + /// <pre> + /// ParseTreeProperty<Integer> values = new ParseTreeProperty<Integer>(); + /// values.put(tree, 36); + /// int x = values.get(tree); + /// values.removeFrom(tree); + /// </pre> + /// + /// You would make one decl (values here) in the listener and use lots of times + /// in your event methods. + /// </summary> + template<typename V> + class ANTLR4CPP_PUBLIC ParseTreeProperty { + public: + virtual ~ParseTreeProperty() {} + virtual V get(ParseTree *node) { + return _annotations[node]; + } + virtual void put(ParseTree *node, V value) { + _annotations[node] = value; + } + virtual V removeFrom(ParseTree *node) { + auto value = _annotations[node]; + _annotations.erase(node); + return value; + } + + protected: + std::map<ParseTree*, V> _annotations; + }; + +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeType.h b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeType.h new file mode 100644 index 0000000000..17e0512b00 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeType.h @@ -0,0 +1,22 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <cstddef> + +#include "antlr4-common.h" + +namespace antlr4 { +namespace tree { + + enum class ParseTreeType : size_t { + TERMINAL = 1, + ERROR = 2, + RULE = 3, + }; + +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeVisitor.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeVisitor.cpp new file mode 100644 index 0000000000..a329919c13 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeVisitor.cpp @@ -0,0 +1,9 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "ParseTreeVisitor.h" + +antlr4::tree::ParseTreeVisitor::~ParseTreeVisitor() { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeVisitor.h b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeVisitor.h new file mode 100644 index 0000000000..02d9dc9b95 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeVisitor.h @@ -0,0 +1,57 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "support/Any.h" + +namespace antlr4 { +namespace tree { + + /// <summary> + /// This interface defines the basic notion of a parse tree visitor. Generated + /// visitors implement this interface and the {@code XVisitor} interface for + /// grammar {@code X}. + /// </summary> + /// @param <T> The return type of the visit operation. Use <seealso cref="Void"/> for + /// operations with no return type. </param> + // ml: no template parameter here, to avoid the need for virtual template functions. Instead we have our Any class. + class ANTLR4CPP_PUBLIC ParseTreeVisitor { + public: + virtual ~ParseTreeVisitor(); + + /// <summary> + /// Visit a parse tree, and return a user-defined result of the operation. + /// </summary> + /// <param name="tree"> The <seealso cref="ParseTree"/> to visit. </param> + /// <returns> The result of visiting the parse tree. </returns> + virtual std::any visit(ParseTree *tree) = 0; + + /// <summary> + /// Visit the children of a node, and return a user-defined result of the + /// operation. + /// </summary> + /// <param name="node"> The <seealso cref="ParseTree"/> whose children should be visited. </param> + /// <returns> The result of visiting the children of the node. </returns> + virtual std::any visitChildren(ParseTree *node) = 0; + + /// <summary> + /// Visit a terminal node, and return a user-defined result of the operation. + /// </summary> + /// <param name="node"> The <seealso cref="TerminalNode"/> to visit. </param> + /// <returns> The result of visiting the node. </returns> + virtual std::any visitTerminal(TerminalNode *node) = 0; + + /// <summary> + /// Visit an error node, and return a user-defined result of the operation. + /// </summary> + /// <param name="node"> The <seealso cref="ErrorNode"/> to visit. </param> + /// <returns> The result of visiting the node. </returns> + virtual std::any visitErrorNode(ErrorNode *node) = 0; + + }; + +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeWalker.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeWalker.cpp new file mode 100644 index 0000000000..3da4bec5c5 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeWalker.cpp @@ -0,0 +1,48 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "tree/ErrorNode.h" +#include "ParserRuleContext.h" +#include "tree/ParseTreeListener.h" +#include "support/CPPUtils.h" +#include "support/Casts.h" + +#include "tree/IterativeParseTreeWalker.h" +#include "tree/ParseTreeWalker.h" + +using namespace antlr4::tree; +using namespace antlrcpp; + +static IterativeParseTreeWalker defaultWalker; +ParseTreeWalker &ParseTreeWalker::DEFAULT = defaultWalker; + +void ParseTreeWalker::walk(ParseTreeListener *listener, ParseTree *t) const { + if (ErrorNode::is(*t)) { + listener->visitErrorNode(downCast<ErrorNode*>(t)); + return; + } + if (TerminalNode::is(*t)) { + listener->visitTerminal(downCast<TerminalNode*>(t)); + return; + } + + enterRule(listener, t); + for (auto &child : t->children) { + walk(listener, child); + } + exitRule(listener, t); +} + +void ParseTreeWalker::enterRule(ParseTreeListener *listener, ParseTree *r) const { + auto *ctx = downCast<ParserRuleContext*>(r); + listener->enterEveryRule(ctx); + ctx->enterRule(listener); +} + +void ParseTreeWalker::exitRule(ParseTreeListener *listener, ParseTree *r) const { + auto *ctx = downCast<ParserRuleContext*>(r); + ctx->exitRule(listener); + listener->exitEveryRule(ctx); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeWalker.h b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeWalker.h new file mode 100644 index 0000000000..718cbbd1e4 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeWalker.h @@ -0,0 +1,55 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace tree { + + class ANTLR4CPP_PUBLIC ParseTreeWalker { + public: + static ParseTreeWalker &DEFAULT; + + virtual ~ParseTreeWalker() = default; + + /** + * <summary> + * Performs a walk on the given parse tree starting at the root and going down recursively + * with depth-first search. On each node, <seealso cref="ParseTreeWalker#enterRule"/> is called before + * recursively walking down into child nodes, then + * <seealso cref="ParseTreeWalker#exitRule"/> is called after the recursive call to wind up. + * </summary> + * <param name='listener'> The listener used by the walker to process grammar rules </param> + * <param name='t'> The parse tree to be walked on </param> + */ + virtual void walk(ParseTreeListener *listener, ParseTree *t) const; + + protected: + + /** + * <summary> + * Enters a grammar rule by first triggering the generic event <seealso cref="ParseTreeListener#enterEveryRule"/> + * then by triggering the event specific to the given parse tree node + * </summary> + * <param name='listener'> The listener responding to the trigger events </param> + * <param name='r'> The grammar rule containing the rule context </param> + */ + virtual void enterRule(ParseTreeListener *listener, ParseTree *r) const; + + /** + * <summary> + * Exits a grammar rule by first triggering the event specific to the given parse tree node + * then by triggering the generic event <seealso cref="ParseTreeListener#exitEveryRule"/> + * </summary> + * <param name='listener'> The listener responding to the trigger events </param> + * <param name='r'> The grammar rule containing the rule context </param> + */ + virtual void exitRule(ParseTreeListener *listener, ParseTree *r) const; + }; + +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/TerminalNode.h b/contrib/libs/antlr4_cpp_runtime/src/tree/TerminalNode.h new file mode 100644 index 0000000000..9f7466edc5 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/TerminalNode.h @@ -0,0 +1,40 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "tree/ParseTree.h" + +namespace antlr4 { +namespace tree { + + class ANTLR4CPP_PUBLIC TerminalNode : public ParseTree { + public: + static bool is(const tree::ParseTree &parseTree) { + const auto treeType = parseTree.getTreeType(); + return treeType == ParseTreeType::TERMINAL || treeType == ParseTreeType::ERROR; + } + + static bool is(const tree::ParseTree *parseTree) { return parseTree != nullptr && is(*parseTree); } + + virtual Token* getSymbol() const = 0; + + /** Set the parent for this leaf node. + * + * Technically, this is not backward compatible as it changes + * the interface but no one was able to create custom + * TerminalNodes anyway so I'm adding as it improves internal + * code quality. + * + * @since 4.7 + */ + virtual void setParent(RuleContext *parent) = 0; + + protected: + using ParseTree::ParseTree; + }; + +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/TerminalNodeImpl.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/TerminalNodeImpl.cpp new file mode 100644 index 0000000000..8eeb299fee --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/TerminalNodeImpl.cpp @@ -0,0 +1,54 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/Interval.h" +#include "Token.h" +#include "RuleContext.h" +#include "tree/ParseTreeVisitor.h" + +#include "tree/TerminalNodeImpl.h" + +using namespace antlr4; +using namespace antlr4::tree; + +Token* TerminalNodeImpl::getSymbol() const { + return symbol; +} + +void TerminalNodeImpl::setParent(RuleContext *parent_) { + this->parent = parent_; +} + +misc::Interval TerminalNodeImpl::getSourceInterval() { + if (symbol == nullptr) { + return misc::Interval::INVALID; + } + + size_t tokenIndex = symbol->getTokenIndex(); + return misc::Interval(tokenIndex, tokenIndex); +} + +std::any TerminalNodeImpl::accept(ParseTreeVisitor *visitor) { + return visitor->visitTerminal(this); +} + +std::string TerminalNodeImpl::getText() { + return symbol->getText(); +} + +std::string TerminalNodeImpl::toStringTree(Parser * /*parser*/, bool /*pretty*/) { + return toString(); +} + +std::string TerminalNodeImpl::toString() { + if (symbol->getType() == Token::EOF) { + return "<EOF>"; + } + return symbol->getText(); +} + +std::string TerminalNodeImpl::toStringTree(bool /*pretty*/) { + return toString(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/TerminalNodeImpl.h b/contrib/libs/antlr4_cpp_runtime/src/tree/TerminalNodeImpl.h new file mode 100644 index 0000000000..1f8adacc6a --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/TerminalNodeImpl.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "tree/TerminalNode.h" + +namespace antlr4 { +namespace tree { + + class ANTLR4CPP_PUBLIC TerminalNodeImpl : public TerminalNode { + public: + Token *symbol; + + explicit TerminalNodeImpl(Token *symbol) : TerminalNode(ParseTreeType::TERMINAL), symbol(symbol) {} + + virtual Token* getSymbol() const override; + virtual void setParent(RuleContext *parent) override; + virtual misc::Interval getSourceInterval() override; + + virtual std::any accept(ParseTreeVisitor *visitor) override; + + virtual std::string getText() override; + virtual std::string toStringTree(Parser *parser, bool pretty = false) override; + virtual std::string toString() override; + virtual std::string toStringTree(bool pretty = false) override; + }; + +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/Trees.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/Trees.cpp new file mode 100644 index 0000000000..f4065949b2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/Trees.cpp @@ -0,0 +1,241 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "tree/ErrorNode.h" +#include "Parser.h" +#include "ParserRuleContext.h" +#include "support/CPPUtils.h" +#include "tree/TerminalNodeImpl.h" +#include "atn/ATN.h" +#include "misc/Interval.h" +#include "Token.h" +#include "CommonToken.h" +#include "misc/Predicate.h" + +#include "tree/Trees.h" + +using namespace antlr4; +using namespace antlr4::misc; +using namespace antlr4::tree; + +using namespace antlrcpp; + +Trees::Trees() { +} + +std::string Trees::toStringTree(ParseTree *t, bool pretty) { + return toStringTree(t, nullptr, pretty); +} + +std::string Trees::toStringTree(ParseTree *t, Parser *recog, bool pretty) { + if (recog == nullptr) + return toStringTree(t, std::vector<std::string>(), pretty); + return toStringTree(t, recog->getRuleNames(), pretty); +} + +std::string Trees::toStringTree(ParseTree *t, const std::vector<std::string> &ruleNames, bool pretty) { + std::string temp = antlrcpp::escapeWhitespace(Trees::getNodeText(t, ruleNames), false); + if (t->children.empty()) { + return temp; + } + + std::stringstream ss; + ss << "(" << temp << ' '; + + // Implement the recursive walk as iteration to avoid trouble with deep nesting. + std::stack<size_t> stack; + size_t childIndex = 0; + ParseTree *run = t; + size_t indentationLevel = 1; + while (childIndex < run->children.size()) { + if (childIndex > 0) { + ss << ' '; + } + ParseTree *child = run->children[childIndex]; + temp = antlrcpp::escapeWhitespace(Trees::getNodeText(child, ruleNames), false); + if (!child->children.empty()) { + // Go deeper one level. + stack.push(childIndex); + run = child; + childIndex = 0; + if (pretty) { + ++indentationLevel; + ss << std::endl; + for (size_t i = 0; i < indentationLevel; ++i) { + ss << " "; + } + } + ss << "(" << temp << " "; + } else { + ss << temp; + while (++childIndex == run->children.size()) { + if (stack.size() > 0) { + // Reached the end of the current level. See if we can step up from here. + childIndex = stack.top(); + stack.pop(); + run = run->parent; + if (pretty) { + --indentationLevel; + } + ss << ")"; + } else { + break; + } + } + } + } + + ss << ")"; + return ss.str(); +} + +std::string Trees::getNodeText(ParseTree *t, Parser *recog) { + return getNodeText(t, recog->getRuleNames()); +} + +std::string Trees::getNodeText(ParseTree *t, const std::vector<std::string> &ruleNames) { + if (ruleNames.size() > 0) { + if (is<RuleContext *>(t)) { + size_t ruleIndex = dynamic_cast<RuleContext *>(t)->getRuleIndex(); + std::string ruleName = ruleNames[ruleIndex]; + size_t altNumber = dynamic_cast<RuleContext *>(t)->getAltNumber(); + if (altNumber != atn::ATN::INVALID_ALT_NUMBER) { + return ruleName + ":" + std::to_string(altNumber); + } + return ruleName; + } else if (is<ErrorNode *>(t)) { + return t->toString(); + } else if (is<TerminalNode *>(t)) { + Token *symbol = dynamic_cast<TerminalNode *>(t)->getSymbol(); + if (symbol != nullptr) { + std::string s = symbol->getText(); + return s; + } + } + } + // no recog for rule names + if (is<RuleContext *>(t)) { + return dynamic_cast<RuleContext *>(t)->getText(); + } + + if (is<TerminalNodeImpl *>(t)) { + return dynamic_cast<TerminalNodeImpl *>(t)->getSymbol()->getText(); + } + + return ""; +} + +std::vector<ParseTree *> Trees::getAncestors(ParseTree *t) { + std::vector<ParseTree *> ancestors; + ParseTree *parent = t->parent; + while (parent != nullptr) { + ancestors.insert(ancestors.begin(), parent); // insert at start + parent = parent->parent; + } + return ancestors; +} + +template<typename T> +static void _findAllNodes(ParseTree *t, size_t index, bool findTokens, std::vector<T> &nodes) { + // check this node (the root) first + if (findTokens && is<TerminalNode *>(t)) { + TerminalNode *tnode = dynamic_cast<TerminalNode *>(t); + if (tnode->getSymbol()->getType() == index) { + nodes.push_back(t); + } + } else if (!findTokens && is<ParserRuleContext *>(t)) { + ParserRuleContext *ctx = dynamic_cast<ParserRuleContext *>(t); + if (ctx->getRuleIndex() == index) { + nodes.push_back(t); + } + } + // check children + for (size_t i = 0; i < t->children.size(); i++) { + _findAllNodes(t->children[i], index, findTokens, nodes); + } +} + +bool Trees::isAncestorOf(ParseTree *t, ParseTree *u) { + if (t == nullptr || u == nullptr || t->parent == nullptr) { + return false; + } + + ParseTree *p = u->parent; + while (p != nullptr) { + if (t == p) { + return true; + } + p = p->parent; + } + return false; +} + +std::vector<ParseTree *> Trees::findAllTokenNodes(ParseTree *t, size_t ttype) { + return findAllNodes(t, ttype, true); +} + +std::vector<ParseTree *> Trees::findAllRuleNodes(ParseTree *t, size_t ruleIndex) { + return findAllNodes(t, ruleIndex, false); +} + +std::vector<ParseTree *> Trees::findAllNodes(ParseTree *t, size_t index, bool findTokens) { + std::vector<ParseTree *> nodes; + _findAllNodes<ParseTree *>(t, index, findTokens, nodes); + return nodes; +} + +std::vector<ParseTree *> Trees::getDescendants(ParseTree *t) { + std::vector<ParseTree *> nodes; + nodes.push_back(t); + std::size_t n = t->children.size(); + for (size_t i = 0 ; i < n ; i++) { + auto descentants = getDescendants(t->children[i]); + for (auto *entry: descentants) { + nodes.push_back(entry); + } + } + return nodes; +} + +std::vector<ParseTree *> Trees::descendants(ParseTree *t) { + return getDescendants(t); +} + +ParserRuleContext* Trees::getRootOfSubtreeEnclosingRegion(ParseTree *t, size_t startTokenIndex, size_t stopTokenIndex) { + size_t n = t->children.size(); + for (size_t i = 0; i < n; i++) { + ParserRuleContext *r = getRootOfSubtreeEnclosingRegion(t->children[i], startTokenIndex, stopTokenIndex); + if (r != nullptr) { + return r; + } + } + + if (is<ParserRuleContext *>(t)) { + ParserRuleContext *r = dynamic_cast<ParserRuleContext *>(t); + if (startTokenIndex >= r->getStart()->getTokenIndex() && // is range fully contained in t? + (r->getStop() == nullptr || stopTokenIndex <= r->getStop()->getTokenIndex())) { + // note: r.getStop()==null likely implies that we bailed out of parser and there's nothing to the right + return r; + } + } + return nullptr; +} + +ParseTree * Trees::findNodeSuchThat(ParseTree *t, Ref<Predicate> const& pred) { + if (pred->test(t)) { + return t; + } + + size_t n = t->children.size(); + for (size_t i = 0 ; i < n ; ++i) { + ParseTree *u = findNodeSuchThat(t->children[i], pred); + if (u != nullptr) { + return u; + } + } + + return nullptr; +} + diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/Trees.h b/contrib/libs/antlr4_cpp_runtime/src/tree/Trees.h new file mode 100644 index 0000000000..f779158d01 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/Trees.h @@ -0,0 +1,78 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "tree/TerminalNode.h" +#include "ParserRuleContext.h" +#include "Recognizer.h" + +namespace antlr4 { +namespace tree { + + /// A set of utility routines useful for all kinds of ANTLR trees. + class ANTLR4CPP_PUBLIC Trees { + public: + /// Print out a whole tree in LISP form. getNodeText is used on the + /// node payloads to get the text for the nodes. Detect + /// parse trees and extract data appropriately. + static std::string toStringTree(ParseTree *t, bool pretty = false); + + /// Print out a whole tree in LISP form. getNodeText is used on the + /// node payloads to get the text for the nodes. Detect + /// parse trees and extract data appropriately. + static std::string toStringTree(ParseTree *t, Parser *recog, bool pretty = false); + + /// Print out a whole tree in LISP form. getNodeText is used on the + /// node payloads to get the text for the nodes. Detect + /// parse trees and extract data appropriately. + static std::string toStringTree(ParseTree *t, const std::vector<std::string> &ruleNames, bool pretty = false); + static std::string getNodeText(ParseTree *t, Parser *recog); + static std::string getNodeText(ParseTree *t, const std::vector<std::string> &ruleNames); + + /// Return a list of all ancestors of this node. The first node of + /// list is the root and the last is the parent of this node. + static std::vector<ParseTree *> getAncestors(ParseTree *t); + + /** Return true if t is u's parent or a node on path to root from u. + * Use == not equals(). + * + * @since 4.5.1 + */ + static bool isAncestorOf(ParseTree *t, ParseTree *u); + static std::vector<ParseTree *> findAllTokenNodes(ParseTree *t, size_t ttype); + static std::vector<ParseTree *> findAllRuleNodes(ParseTree *t, size_t ruleIndex); + static std::vector<ParseTree *> findAllNodes(ParseTree *t, size_t index, bool findTokens); + + /** Get all descendents; includes t itself. + * + * @since 4.5.1 + */ + static std::vector<ParseTree *> getDescendants(ParseTree *t); + + /** @deprecated */ + static std::vector<ParseTree *> descendants(ParseTree *t); + + /** Find smallest subtree of t enclosing range startTokenIndex..stopTokenIndex + * inclusively using postorder traversal. Recursive depth-first-search. + * + * @since 4.5.1 + */ + static ParserRuleContext* getRootOfSubtreeEnclosingRegion(ParseTree *t, + size_t startTokenIndex, // inclusive + size_t stopTokenIndex); // inclusive + + /** Return first node satisfying the pred + * + * @since 4.5.1 + */ + static ParseTree* findNodeSuchThat(ParseTree *t, Ref<misc::Predicate> const& pred); + + private: + Trees(); + }; + +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/Chunk.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/Chunk.cpp new file mode 100644 index 0000000000..5320f910b9 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/Chunk.cpp @@ -0,0 +1,9 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "tree/pattern/Chunk.h" + +antlr4::tree::pattern::Chunk::~Chunk() { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/Chunk.h b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/Chunk.h new file mode 100644 index 0000000000..61079a8ca8 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/Chunk.h @@ -0,0 +1,44 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace tree { +namespace pattern { + + /// <summary> + /// A chunk is either a token tag, a rule tag, or a span of literal text within a + /// tree pattern. + /// <p/> + /// The method <seealso cref="ParseTreePatternMatcher#split(String)"/> returns a list of + /// chunks in preparation for creating a token stream by + /// <seealso cref="ParseTreePatternMatcher#tokenize(String)"/>. From there, we get a parse + /// tree from with <seealso cref="ParseTreePatternMatcher#compile(String, int)"/>. These + /// chunks are converted to <seealso cref="RuleTagToken"/>, <seealso cref="TokenTagToken"/>, or the + /// regular tokens of the text surrounding the tags. + /// </summary> + class ANTLR4CPP_PUBLIC Chunk { + public: + Chunk() = default; + Chunk(Chunk const&) = default; + virtual ~Chunk(); + + Chunk& operator=(Chunk const&) = default; + + /// This method returns a text representation of the tag chunk. Labeled tags + /// are returned in the form {@code label:tag}, and unlabeled tags are + /// returned as just the tag name. + virtual std::string toString() { + std::string str; + return str; + } + }; + +} // namespace pattern +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreeMatch.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreeMatch.cpp new file mode 100644 index 0000000000..41896d6df7 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreeMatch.cpp @@ -0,0 +1,69 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Exceptions.h" + +#include "tree/pattern/ParseTreeMatch.h" + +using namespace antlr4::tree; +using namespace antlr4::tree::pattern; + +ParseTreeMatch::ParseTreeMatch(ParseTree *tree, const ParseTreePattern &pattern, + const std::map<std::string, std::vector<ParseTree *>> &labels, + ParseTree *mismatchedNode) + : _tree(tree), _pattern(pattern), _labels(labels), _mismatchedNode(mismatchedNode) { + if (tree == nullptr) { + throw IllegalArgumentException("tree cannot be nul"); + } +} + +ParseTreeMatch::~ParseTreeMatch() { +} + +ParseTree* ParseTreeMatch::get(const std::string &label) { + auto iterator = _labels.find(label); + if (iterator == _labels.end() || iterator->second.empty()) { + return nullptr; + } + + return iterator->second.back(); // return last if multiple +} + +std::vector<ParseTree *> ParseTreeMatch::getAll(const std::string &label) { + auto iterator = _labels.find(label); + if (iterator == _labels.end()) { + return {}; + } + + return iterator->second; +} + +std::map<std::string, std::vector<ParseTree *>>& ParseTreeMatch::getLabels() { + return _labels; +} + +ParseTree *ParseTreeMatch::getMismatchedNode() { + return _mismatchedNode; +} + +bool ParseTreeMatch::succeeded() { + return _mismatchedNode == nullptr; +} + +const ParseTreePattern& ParseTreeMatch::getPattern() { + return _pattern; +} + +ParseTree * ParseTreeMatch::getTree() { + return _tree; +} + +std::string ParseTreeMatch::toString() { + if (succeeded()) { + return "Match succeeded; found " + std::to_string(_labels.size()) + " labels"; + } else { + return "Match failed; found " + std::to_string(_labels.size()) + " labels"; + } +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreeMatch.h b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreeMatch.h new file mode 100644 index 0000000000..eefde46c83 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreeMatch.h @@ -0,0 +1,132 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace tree { +namespace pattern { + + /// Represents the result of matching a ParseTree against a tree pattern. + class ANTLR4CPP_PUBLIC ParseTreeMatch { + private: + /// This is the backing field for getTree(). + ParseTree *_tree; + + /// This is the backing field for getPattern(). + const ParseTreePattern &_pattern; + + /// This is the backing field for getLabels(). + std::map<std::string, std::vector<ParseTree *>> _labels; + + /// This is the backing field for getMismatchedNode(). + ParseTree *_mismatchedNode; + + public: + /// <summary> + /// Constructs a new instance of <seealso cref="ParseTreeMatch"/> from the specified + /// parse tree and pattern. + /// </summary> + /// <param name="tree"> The parse tree to match against the pattern. </param> + /// <param name="pattern"> The parse tree pattern. </param> + /// <param name="labels"> A mapping from label names to collections of + /// <seealso cref="ParseTree"/> objects located by the tree pattern matching process. </param> + /// <param name="mismatchedNode"> The first node which failed to match the tree + /// pattern during the matching process. + /// </param> + /// <exception cref="IllegalArgumentException"> if {@code tree} is {@code null} </exception> + /// <exception cref="IllegalArgumentException"> if {@code pattern} is {@code null} </exception> + /// <exception cref="IllegalArgumentException"> if {@code labels} is {@code null} </exception> + ParseTreeMatch(ParseTree *tree, ParseTreePattern const& pattern, + const std::map<std::string, std::vector<ParseTree *>> &labels, ParseTree *mismatchedNode); + ParseTreeMatch(ParseTreeMatch const&) = default; + virtual ~ParseTreeMatch(); + + /// <summary> + /// Get the last node associated with a specific {@code label}. + /// <p/> + /// For example, for pattern {@code <id:ID>}, {@code get("id")} returns the + /// node matched for that {@code ID}. If more than one node + /// matched the specified label, only the last is returned. If there is + /// no node associated with the label, this returns {@code null}. + /// <p/> + /// Pattern tags like {@code <ID>} and {@code <expr>} without labels are + /// considered to be labeled with {@code ID} and {@code expr}, respectively. + /// </summary> + /// <param name="labe"> The label to check. + /// </param> + /// <returns> The last <seealso cref="ParseTree"/> to match a tag with the specified + /// label, or {@code null} if no parse tree matched a tag with the label. </returns> + virtual ParseTree* get(const std::string &label); + + /// <summary> + /// Return all nodes matching a rule or token tag with the specified label. + /// <p/> + /// If the {@code label} is the name of a parser rule or token in the + /// grammar, the resulting list will contain both the parse trees matching + /// rule or tags explicitly labeled with the label and the complete set of + /// parse trees matching the labeled and unlabeled tags in the pattern for + /// the parser rule or token. For example, if {@code label} is {@code "foo"}, + /// the result will contain <em>all</em> of the following. + /// + /// <ul> + /// <li>Parse tree nodes matching tags of the form {@code <foo:anyRuleName>} and + /// {@code <foo:AnyTokenName>}.</li> + /// <li>Parse tree nodes matching tags of the form {@code <anyLabel:foo>}.</li> + /// <li>Parse tree nodes matching tags of the form {@code <foo>}.</li> + /// </ul> + /// </summary> + /// <param name="labe"> The label. + /// </param> + /// <returns> A collection of all <seealso cref="ParseTree"/> nodes matching tags with + /// the specified {@code label}. If no nodes matched the label, an empty list + /// is returned. </returns> + virtual std::vector<ParseTree *> getAll(const std::string &label); + + /// <summary> + /// Return a mapping from label → [list of nodes]. + /// <p/> + /// The map includes special entries corresponding to the names of rules and + /// tokens referenced in tags in the original pattern. For additional + /// information, see the description of <seealso cref="#getAll(String)"/>. + /// </summary> + /// <returns> A mapping from labels to parse tree nodes. If the parse tree + /// pattern did not contain any rule or token tags, this map will be empty. </returns> + virtual std::map<std::string, std::vector<ParseTree *>>& getLabels(); + + /// <summary> + /// Get the node at which we first detected a mismatch. + /// </summary> + /// <returns> the node at which we first detected a mismatch, or {@code null} + /// if the match was successful. </returns> + virtual ParseTree* getMismatchedNode(); + + /// <summary> + /// Gets a value indicating whether the match operation succeeded. + /// </summary> + /// <returns> {@code true} if the match operation succeeded; otherwise, + /// {@code false}. </returns> + virtual bool succeeded(); + + /// <summary> + /// Get the tree pattern we are matching against. + /// </summary> + /// <returns> The tree pattern we are matching against. </returns> + virtual const ParseTreePattern& getPattern(); + + /// <summary> + /// Get the parse tree we are trying to match to a pattern. + /// </summary> + /// <returns> The <seealso cref="ParseTree"/> we are trying to match to a pattern. </returns> + virtual ParseTree* getTree(); + + virtual std::string toString(); + }; + +} // namespace pattern +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreePattern.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreePattern.cpp new file mode 100644 index 0000000000..ca7f8f20d6 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreePattern.cpp @@ -0,0 +1,64 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "tree/ParseTree.h" +#include "tree/pattern/ParseTreePatternMatcher.h" +#include "tree/pattern/ParseTreeMatch.h" + +#include "tree/xpath/XPath.h" +#include "tree/xpath/XPathElement.h" + +#include "tree/pattern/ParseTreePattern.h" + +using namespace antlr4::tree; +using namespace antlr4::tree::pattern; + +using namespace antlrcpp; + +ParseTreePattern::ParseTreePattern(ParseTreePatternMatcher *matcher, const std::string &pattern, int patternRuleIndex_, + ParseTree *patternTree) + : patternRuleIndex(patternRuleIndex_), _pattern(pattern), _patternTree(patternTree), _matcher(matcher) { +} + +ParseTreePattern::~ParseTreePattern() { +} + +ParseTreeMatch ParseTreePattern::match(ParseTree *tree) { + return _matcher->match(tree, *this); +} + +bool ParseTreePattern::matches(ParseTree *tree) { + return _matcher->match(tree, *this).succeeded(); +} + +std::vector<ParseTreeMatch> ParseTreePattern::findAll(ParseTree *tree, const std::string &xpath) { + xpath::XPath finder(_matcher->getParser(), xpath); + std::vector<ParseTree *> subtrees = finder.evaluate(tree); + std::vector<ParseTreeMatch> matches; + for (auto *t : subtrees) { + ParseTreeMatch aMatch = match(t); + if (aMatch.succeeded()) { + matches.push_back(aMatch); + } + } + return matches; +} + + +ParseTreePatternMatcher *ParseTreePattern::getMatcher() const { + return _matcher; +} + +std::string ParseTreePattern::getPattern() const { + return _pattern; +} + +int ParseTreePattern::getPatternRuleIndex() const { + return patternRuleIndex; +} + +ParseTree* ParseTreePattern::getPatternTree() const { + return _patternTree; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreePattern.h b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreePattern.h new file mode 100644 index 0000000000..d5b86ff473 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreePattern.h @@ -0,0 +1,105 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace tree { +namespace pattern { + + /// <summary> + /// A pattern like {@code <ID> = <expr>;} converted to a <seealso cref="ParseTree"/> by + /// <seealso cref="ParseTreePatternMatcher#compile(String, int)"/>. + /// </summary> + class ANTLR4CPP_PUBLIC ParseTreePattern { + public: + /// <summary> + /// Construct a new instance of the <seealso cref="ParseTreePattern"/> class. + /// </summary> + /// <param name="matcher"> The <seealso cref="ParseTreePatternMatcher"/> which created this + /// tree pattern. </param> + /// <param name="pattern"> The tree pattern in concrete syntax form. </param> + /// <param name="patternRuleIndex"> The parser rule which serves as the root of the + /// tree pattern. </param> + /// <param name="patternTree"> The tree pattern in <seealso cref="ParseTree"/> form. </param> + ParseTreePattern(ParseTreePatternMatcher *matcher, const std::string &pattern, int patternRuleIndex, + ParseTree *patternTree); + ParseTreePattern(ParseTreePattern const&) = default; + virtual ~ParseTreePattern(); + + /// <summary> + /// Match a specific parse tree against this tree pattern. + /// </summary> + /// <param name="tree"> The parse tree to match against this tree pattern. </param> + /// <returns> A <seealso cref="ParseTreeMatch"/> object describing the result of the + /// match operation. The <seealso cref="ParseTreeMatch#succeeded()"/> method can be + /// used to determine whether or not the match was successful. </returns> + virtual ParseTreeMatch match(ParseTree *tree); + + /// <summary> + /// Determine whether or not a parse tree matches this tree pattern. + /// </summary> + /// <param name="tree"> The parse tree to match against this tree pattern. </param> + /// <returns> {@code true} if {@code tree} is a match for the current tree + /// pattern; otherwise, {@code false}. </returns> + virtual bool matches(ParseTree *tree); + + /// Find all nodes using XPath and then try to match those subtrees against + /// this tree pattern. + /// @param tree The ParseTree to match against this pattern. + /// @param xpath An expression matching the nodes + /// + /// @returns A collection of ParseTreeMatch objects describing the + /// successful matches. Unsuccessful matches are omitted from the result, + /// regardless of the reason for the failure. + virtual std::vector<ParseTreeMatch> findAll(ParseTree *tree, const std::string &xpath); + + /// <summary> + /// Get the <seealso cref="ParseTreePatternMatcher"/> which created this tree pattern. + /// </summary> + /// <returns> The <seealso cref="ParseTreePatternMatcher"/> which created this tree + /// pattern. </returns> + virtual ParseTreePatternMatcher *getMatcher() const; + + /// <summary> + /// Get the tree pattern in concrete syntax form. + /// </summary> + /// <returns> The tree pattern in concrete syntax form. </returns> + virtual std::string getPattern() const; + + /// <summary> + /// Get the parser rule which serves as the outermost rule for the tree + /// pattern. + /// </summary> + /// <returns> The parser rule which serves as the outermost rule for the tree + /// pattern. </returns> + virtual int getPatternRuleIndex() const; + + /// <summary> + /// Get the tree pattern as a <seealso cref="ParseTree"/>. The rule and token tags from + /// the pattern are present in the parse tree as terminal nodes with a symbol + /// of type <seealso cref="RuleTagToken"/> or <seealso cref="TokenTagToken"/>. + /// </summary> + /// <returns> The tree pattern as a <seealso cref="ParseTree"/>. </returns> + virtual ParseTree* getPatternTree() const; + + private: + const int patternRuleIndex; + + /// This is the backing field for <seealso cref="#getPattern()"/>. + const std::string _pattern; + + /// This is the backing field for <seealso cref="#getPatternTree()"/>. + ParseTree *_patternTree; + + /// This is the backing field for <seealso cref="#getMatcher()"/>. + ParseTreePatternMatcher *const _matcher; + }; + +} // namespace pattern +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreePatternMatcher.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreePatternMatcher.cpp new file mode 100644 index 0000000000..4c28658954 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreePatternMatcher.cpp @@ -0,0 +1,370 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "tree/pattern/ParseTreePattern.h" +#include "tree/pattern/ParseTreeMatch.h" +#include "tree/TerminalNode.h" +#include "CommonTokenStream.h" +#include "ParserInterpreter.h" +#include "tree/pattern/TokenTagToken.h" +#include "ParserRuleContext.h" +#include "tree/pattern/RuleTagToken.h" +#include "tree/pattern/TagChunk.h" +#include "atn/ATN.h" +#include "Lexer.h" +#include "BailErrorStrategy.h" + +#include "ListTokenSource.h" +#include "tree/pattern/TextChunk.h" +#include "ANTLRInputStream.h" +#include "support/Arrays.h" +#include "Exceptions.h" +#include "support/CPPUtils.h" + +#include "tree/pattern/ParseTreePatternMatcher.h" + +using namespace antlr4; +using namespace antlr4::tree; +using namespace antlr4::tree::pattern; +using namespace antlrcpp; + +ParseTreePatternMatcher::CannotInvokeStartRule::CannotInvokeStartRule(const RuntimeException &e) : RuntimeException(e.what()) { +} + +ParseTreePatternMatcher::CannotInvokeStartRule::~CannotInvokeStartRule() { +} + +ParseTreePatternMatcher::StartRuleDoesNotConsumeFullPattern::~StartRuleDoesNotConsumeFullPattern() { +} + +ParseTreePatternMatcher::ParseTreePatternMatcher(Lexer *lexer, Parser *parser) : _lexer(lexer), _parser(parser) { + InitializeInstanceFields(); +} + +ParseTreePatternMatcher::~ParseTreePatternMatcher() { +} + +void ParseTreePatternMatcher::setDelimiters(const std::string &start, const std::string &stop, const std::string &escapeLeft) { + if (start.empty()) { + throw IllegalArgumentException("start cannot be null or empty"); + } + + if (stop.empty()) { + throw IllegalArgumentException("stop cannot be null or empty"); + } + + _start = start; + _stop = stop; + _escape = escapeLeft; +} + +bool ParseTreePatternMatcher::matches(ParseTree *tree, const std::string &pattern, int patternRuleIndex) { + ParseTreePattern p = compile(pattern, patternRuleIndex); + return matches(tree, p); +} + +bool ParseTreePatternMatcher::matches(ParseTree *tree, const ParseTreePattern &pattern) { + std::map<std::string, std::vector<ParseTree *>> labels; + ParseTree *mismatchedNode = matchImpl(tree, pattern.getPatternTree(), labels); + return mismatchedNode == nullptr; +} + +ParseTreeMatch ParseTreePatternMatcher::match(ParseTree *tree, const std::string &pattern, int patternRuleIndex) { + ParseTreePattern p = compile(pattern, patternRuleIndex); + return match(tree, p); +} + +ParseTreeMatch ParseTreePatternMatcher::match(ParseTree *tree, const ParseTreePattern &pattern) { + std::map<std::string, std::vector<ParseTree *>> labels; + tree::ParseTree *mismatchedNode = matchImpl(tree, pattern.getPatternTree(), labels); + return ParseTreeMatch(tree, pattern, labels, mismatchedNode); +} + +ParseTreePattern ParseTreePatternMatcher::compile(const std::string &pattern, int patternRuleIndex) { + ListTokenSource tokenSrc(tokenize(pattern)); + CommonTokenStream tokens(&tokenSrc); + + ParserInterpreter parserInterp(_parser->getGrammarFileName(), _parser->getVocabulary(), + _parser->getRuleNames(), _parser->getATNWithBypassAlts(), &tokens); + + ParserRuleContext *tree = nullptr; + try { + parserInterp.setErrorHandler(std::make_shared<BailErrorStrategy>()); + tree = parserInterp.parse(patternRuleIndex); + } catch (ParseCancellationException &e) { +#if defined(_MSC_FULL_VER) && _MSC_FULL_VER < 190023026 + // rethrow_if_nested is not available before VS 2015. + throw e; +#else + std::rethrow_if_nested(e); // Unwrap the nested exception. +#endif + } catch (RecognitionException &re) { + throw re; +#if defined(_MSC_FULL_VER) && _MSC_FULL_VER < 190023026 + } catch (std::exception &e) { + // throw_with_nested is not available before VS 2015. + throw e; +#else + } catch (std::exception & /*e*/) { + std::throw_with_nested(RuntimeException("Cannot invoke start rule")); // Wrap any other exception. +#endif + } + + // Make sure tree pattern compilation checks for a complete parse + if (tokens.LA(1) != Token::EOF) { + throw StartRuleDoesNotConsumeFullPattern(); + } + + return ParseTreePattern(this, pattern, patternRuleIndex, tree); +} + +Lexer* ParseTreePatternMatcher::getLexer() { + return _lexer; +} + +Parser* ParseTreePatternMatcher::getParser() { + return _parser; +} + +ParseTree* ParseTreePatternMatcher::matchImpl(ParseTree *tree, ParseTree *patternTree, + std::map<std::string, std::vector<ParseTree *>> &labels) { + if (tree == nullptr) { + throw IllegalArgumentException("tree cannot be nul"); + } + + if (patternTree == nullptr) { + throw IllegalArgumentException("patternTree cannot be nul"); + } + + // x and <ID>, x and y, or x and x; or could be mismatched types + if (is<TerminalNode *>(tree) && is<TerminalNode *>(patternTree)) { + TerminalNode *t1 = dynamic_cast<TerminalNode *>(tree); + TerminalNode *t2 = dynamic_cast<TerminalNode *>(patternTree); + + ParseTree *mismatchedNode = nullptr; + // both are tokens and they have same type + if (t1->getSymbol()->getType() == t2->getSymbol()->getType()) { + if (is<TokenTagToken *>(t2->getSymbol())) { // x and <ID> + TokenTagToken *tokenTagToken = dynamic_cast<TokenTagToken *>(t2->getSymbol()); + + // track label->list-of-nodes for both token name and label (if any) + labels[tokenTagToken->getTokenName()].push_back(tree); + if (tokenTagToken->getLabel() != "") { + labels[tokenTagToken->getLabel()].push_back(tree); + } + } else if (t1->getText() == t2->getText()) { + // x and x + } else { + // x and y + if (mismatchedNode == nullptr) { + mismatchedNode = t1; + } + } + } else { + if (mismatchedNode == nullptr) { + mismatchedNode = t1; + } + } + + return mismatchedNode; + } + + if (is<ParserRuleContext *>(tree) && is<ParserRuleContext *>(patternTree)) { + ParserRuleContext *r1 = dynamic_cast<ParserRuleContext *>(tree); + ParserRuleContext *r2 = dynamic_cast<ParserRuleContext *>(patternTree); + ParseTree *mismatchedNode = nullptr; + + // (expr ...) and <expr> + RuleTagToken *ruleTagToken = getRuleTagToken(r2); + if (ruleTagToken != nullptr) { + //ParseTreeMatch *m = nullptr; // unused? + if (r1->getRuleIndex() == r2->getRuleIndex()) { + // track label->list-of-nodes for both rule name and label (if any) + labels[ruleTagToken->getRuleName()].push_back(tree); + if (ruleTagToken->getLabel() != "") { + labels[ruleTagToken->getLabel()].push_back(tree); + } + } else { + if (!mismatchedNode) { + mismatchedNode = r1; + } + } + + return mismatchedNode; + } + + // (expr ...) and (expr ...) + if (r1->children.size() != r2->children.size()) { + if (mismatchedNode == nullptr) { + mismatchedNode = r1; + } + + return mismatchedNode; + } + + std::size_t n = r1->children.size(); + for (size_t i = 0; i < n; i++) { + ParseTree *childMatch = matchImpl(r1->children[i], patternTree->children[i], labels); + if (childMatch) { + return childMatch; + } + } + + return mismatchedNode; + } + + // if nodes aren't both tokens or both rule nodes, can't match + return tree; +} + +RuleTagToken* ParseTreePatternMatcher::getRuleTagToken(ParseTree *t) { + if (t->children.size() == 1 && is<TerminalNode *>(t->children[0])) { + TerminalNode *c = dynamic_cast<TerminalNode *>(t->children[0]); + if (is<RuleTagToken *>(c->getSymbol())) { + return dynamic_cast<RuleTagToken *>(c->getSymbol()); + } + } + return nullptr; +} + +std::vector<std::unique_ptr<Token>> ParseTreePatternMatcher::tokenize(const std::string &pattern) { + // split pattern into chunks: sea (raw input) and islands (<ID>, <expr>) + std::vector<Chunk> chunks = split(pattern); + + // create token stream from text and tags + std::vector<std::unique_ptr<Token>> tokens; + for (auto chunk : chunks) { + if (is<TagChunk *>(&chunk)) { + TagChunk &tagChunk = (TagChunk&)chunk; + // add special rule token or conjure up new token from name + if (isupper(tagChunk.getTag()[0])) { + size_t ttype = _parser->getTokenType(tagChunk.getTag()); + if (ttype == Token::INVALID_TYPE) { + throw IllegalArgumentException("Unknown token " + tagChunk.getTag() + " in pattern: " + pattern); + } + tokens.emplace_back(new TokenTagToken(tagChunk.getTag(), (int)ttype, tagChunk.getLabel())); + } else if (islower(tagChunk.getTag()[0])) { + size_t ruleIndex = _parser->getRuleIndex(tagChunk.getTag()); + if (ruleIndex == INVALID_INDEX) { + throw IllegalArgumentException("Unknown rule " + tagChunk.getTag() + " in pattern: " + pattern); + } + size_t ruleImaginaryTokenType = _parser->getATNWithBypassAlts().ruleToTokenType[ruleIndex]; + tokens.emplace_back(new RuleTagToken(tagChunk.getTag(), ruleImaginaryTokenType, tagChunk.getLabel())); + } else { + throw IllegalArgumentException("invalid tag: " + tagChunk.getTag() + " in pattern: " + pattern); + } + } else { + TextChunk &textChunk = (TextChunk&)chunk; + ANTLRInputStream input(textChunk.getText()); + _lexer->setInputStream(&input); + std::unique_ptr<Token> t(_lexer->nextToken()); + while (t->getType() != Token::EOF) { + tokens.push_back(std::move(t)); + t = _lexer->nextToken(); + } + _lexer->setInputStream(nullptr); + } + } + + return tokens; +} + +std::vector<Chunk> ParseTreePatternMatcher::split(const std::string &pattern) { + size_t p = 0; + size_t n = pattern.length(); + std::vector<Chunk> chunks; + + // find all start and stop indexes first, then collect + std::vector<size_t> starts; + std::vector<size_t> stops; + while (p < n) { + if (p == pattern.find(_escape + _start,p)) { + p += _escape.length() + _start.length(); + } else if (p == pattern.find(_escape + _stop,p)) { + p += _escape.length() + _stop.length(); + } else if (p == pattern.find(_start,p)) { + starts.push_back(p); + p += _start.length(); + } else if (p == pattern.find(_stop,p)) { + stops.push_back(p); + p += _stop.length(); + } else { + p++; + } + } + + if (starts.size() > stops.size()) { + throw IllegalArgumentException("unterminated tag in pattern: " + pattern); + } + + if (starts.size() < stops.size()) { + throw IllegalArgumentException("missing start tag in pattern: " + pattern); + } + + size_t ntags = starts.size(); + for (size_t i = 0; i < ntags; i++) { + if (starts[i] >= stops[i]) { + throw IllegalArgumentException("tag delimiters out of order in pattern: " + pattern); + } + } + + // collect into chunks now + if (ntags == 0) { + std::string text = pattern.substr(0, n); + chunks.push_back(TextChunk(text)); + } + + if (ntags > 0 && starts[0] > 0) { // copy text up to first tag into chunks + std::string text = pattern.substr(0, starts[0]); + chunks.push_back(TextChunk(text)); + } + + for (size_t i = 0; i < ntags; i++) { + // copy inside of <tag> + std::string tag = pattern.substr(starts[i] + _start.length(), stops[i] - (starts[i] + _start.length())); + std::string ruleOrToken = tag; + std::string label = ""; + size_t colon = tag.find(':'); + if (colon != std::string::npos) { + label = tag.substr(0,colon); + ruleOrToken = tag.substr(colon + 1, tag.length() - (colon + 1)); + } + chunks.push_back(TagChunk(label, ruleOrToken)); + if (i + 1 < ntags) { + // copy from end of <tag> to start of next + std::string text = pattern.substr(stops[i] + _stop.length(), starts[i + 1] - (stops[i] + _stop.length())); + chunks.push_back(TextChunk(text)); + } + } + + if (ntags > 0) { + size_t afterLastTag = stops[ntags - 1] + _stop.length(); + if (afterLastTag < n) { // copy text from end of last tag to end + std::string text = pattern.substr(afterLastTag, n - afterLastTag); + chunks.push_back(TextChunk(text)); + } + } + + // strip out all backslashes from text chunks but not tags + for (size_t i = 0; i < chunks.size(); i++) { + Chunk &c = chunks[i]; + if (is<TextChunk *>(&c)) { + TextChunk &tc = (TextChunk&)c; + std::string unescaped = tc.getText(); + unescaped.erase(std::remove(unescaped.begin(), unescaped.end(), '\\'), unescaped.end()); + if (unescaped.length() < tc.getText().length()) { + chunks[i] = TextChunk(unescaped); + } + } + } + + return chunks; +} + +void ParseTreePatternMatcher::InitializeInstanceFields() { + _start = "<"; + _stop = ">"; + _escape = "\\"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreePatternMatcher.h b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreePatternMatcher.h new file mode 100644 index 0000000000..8641fc9a00 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreePatternMatcher.h @@ -0,0 +1,185 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Exceptions.h" + +namespace antlr4 { +namespace tree { +namespace pattern { + + /// <summary> + /// A tree pattern matching mechanism for ANTLR <seealso cref="ParseTree"/>s. + /// <p/> + /// Patterns are strings of source input text with special tags representing + /// token or rule references such as: + /// <p/> + /// {@code <ID> = <expr>;} + /// <p/> + /// Given a pattern start rule such as {@code statement}, this object constructs + /// a <seealso cref="ParseTree"/> with placeholders for the {@code ID} and {@code expr} + /// subtree. Then the <seealso cref="#match"/> routines can compare an actual + /// <seealso cref="ParseTree"/> from a parse with this pattern. Tag {@code <ID>} matches + /// any {@code ID} token and tag {@code <expr>} references the result of the + /// {@code expr} rule (generally an instance of {@code ExprContext}. + /// <p/> + /// Pattern {@code x = 0;} is a similar pattern that matches the same pattern + /// except that it requires the identifier to be {@code x} and the expression to + /// be {@code 0}. + /// <p/> + /// The <seealso cref="#matches"/> routines return {@code true} or {@code false} based + /// upon a match for the tree rooted at the parameter sent in. The + /// <seealso cref="#match"/> routines return a <seealso cref="ParseTreeMatch"/> object that + /// contains the parse tree, the parse tree pattern, and a map from tag name to + /// matched nodes (more below). A subtree that fails to match, returns with + /// <seealso cref="ParseTreeMatch#mismatchedNode"/> set to the first tree node that did not + /// match. + /// <p/> + /// For efficiency, you can compile a tree pattern in string form to a + /// <seealso cref="ParseTreePattern"/> object. + /// <p/> + /// See {@code TestParseTreeMatcher} for lots of examples. + /// <seealso cref="ParseTreePattern"/> has two static helper methods: + /// <seealso cref="ParseTreePattern#findAll"/> and <seealso cref="ParseTreePattern#match"/> that + /// are easy to use but not super efficient because they create new + /// <seealso cref="ParseTreePatternMatcher"/> objects each time and have to compile the + /// pattern in string form before using it. + /// <p/> + /// The lexer and parser that you pass into the <seealso cref="ParseTreePatternMatcher"/> + /// constructor are used to parse the pattern in string form. The lexer converts + /// the {@code <ID> = <expr>;} into a sequence of four tokens (assuming lexer + /// throws out whitespace or puts it on a hidden channel). Be aware that the + /// input stream is reset for the lexer (but not the parser; a + /// <seealso cref="ParserInterpreter"/> is created to parse the input.). Any user-defined + /// fields you have put into the lexer might get changed when this mechanism asks + /// it to scan the pattern string. + /// <p/> + /// Normally a parser does not accept token {@code <expr>} as a valid + /// {@code expr} but, from the parser passed in, we create a special version of + /// the underlying grammar representation (an <seealso cref="ATN"/>) that allows imaginary + /// tokens representing rules ({@code <expr>}) to match entire rules. We call + /// these <em>bypass alternatives</em>. + /// <p/> + /// Delimiters are {@code <} and {@code >}, with {@code \} as the escape string + /// by default, but you can set them to whatever you want using + /// <seealso cref="#setDelimiters"/>. You must escape both start and stop strings + /// {@code \<} and {@code \>}. + /// </summary> + class ANTLR4CPP_PUBLIC ParseTreePatternMatcher { + public: + class CannotInvokeStartRule : public RuntimeException { + public: + CannotInvokeStartRule(const RuntimeException &e); + ~CannotInvokeStartRule(); + }; + + // Fixes https://github.com/antlr/antlr4/issues/413 + // "Tree pattern compilation doesn't check for a complete parse" + class StartRuleDoesNotConsumeFullPattern : public RuntimeException { + public: + StartRuleDoesNotConsumeFullPattern() = default; + StartRuleDoesNotConsumeFullPattern(StartRuleDoesNotConsumeFullPattern const&) = default; + ~StartRuleDoesNotConsumeFullPattern(); + + StartRuleDoesNotConsumeFullPattern& operator=(StartRuleDoesNotConsumeFullPattern const&) = default; + }; + + /// Constructs a <seealso cref="ParseTreePatternMatcher"/> or from a <seealso cref="Lexer"/> and + /// <seealso cref="Parser"/> object. The lexer input stream is altered for tokenizing + /// the tree patterns. The parser is used as a convenient mechanism to get + /// the grammar name, plus token, rule names. + ParseTreePatternMatcher(Lexer *lexer, Parser *parser); + virtual ~ParseTreePatternMatcher(); + + /// <summary> + /// Set the delimiters used for marking rule and token tags within concrete + /// syntax used by the tree pattern parser. + /// </summary> + /// <param name="start"> The start delimiter. </param> + /// <param name="stop"> The stop delimiter. </param> + /// <param name="escapeLeft"> The escape sequence to use for escaping a start or stop delimiter. + /// </param> + /// <exception cref="IllegalArgumentException"> if {@code start} is {@code null} or empty. </exception> + /// <exception cref="IllegalArgumentException"> if {@code stop} is {@code null} or empty. </exception> + virtual void setDelimiters(const std::string &start, const std::string &stop, const std::string &escapeLeft); + + /// <summary> + /// Does {@code pattern} matched as rule {@code patternRuleIndex} match {@code tree}? </summary> + virtual bool matches(ParseTree *tree, const std::string &pattern, int patternRuleIndex); + + /// <summary> + /// Does {@code pattern} matched as rule patternRuleIndex match tree? Pass in a + /// compiled pattern instead of a string representation of a tree pattern. + /// </summary> + virtual bool matches(ParseTree *tree, const ParseTreePattern &pattern); + + /// <summary> + /// Compare {@code pattern} matched as rule {@code patternRuleIndex} against + /// {@code tree} and return a <seealso cref="ParseTreeMatch"/> object that contains the + /// matched elements, or the node at which the match failed. + /// </summary> + virtual ParseTreeMatch match(ParseTree *tree, const std::string &pattern, int patternRuleIndex); + + /// <summary> + /// Compare {@code pattern} matched against {@code tree} and return a + /// <seealso cref="ParseTreeMatch"/> object that contains the matched elements, or the + /// node at which the match failed. Pass in a compiled pattern instead of a + /// string representation of a tree pattern. + /// </summary> + virtual ParseTreeMatch match(ParseTree *tree, const ParseTreePattern &pattern); + + /// <summary> + /// For repeated use of a tree pattern, compile it to a + /// <seealso cref="ParseTreePattern"/> using this method. + /// </summary> + virtual ParseTreePattern compile(const std::string &pattern, int patternRuleIndex); + + /// <summary> + /// Used to convert the tree pattern string into a series of tokens. The + /// input stream is reset. + /// </summary> + virtual Lexer* getLexer(); + + /// <summary> + /// Used to collect to the grammar file name, token names, rule names for + /// used to parse the pattern into a parse tree. + /// </summary> + virtual Parser* getParser(); + + // ---- SUPPORT CODE ---- + + virtual std::vector<std::unique_ptr<Token>> tokenize(const std::string &pattern); + + /// Split "<ID> = <e:expr>;" into 4 chunks for tokenizing by tokenize(). + virtual std::vector<Chunk> split(const std::string &pattern); + + protected: + std::string _start; + std::string _stop; + std::string _escape; // e.g., \< and \> must escape BOTH! + + /// Recursively walk {@code tree} against {@code patternTree}, filling + /// {@code match.}<seealso cref="ParseTreeMatch#labels labels"/>. + /// + /// <returns> the first node encountered in {@code tree} which does not match + /// a corresponding node in {@code patternTree}, or {@code null} if the match + /// was successful. The specific node returned depends on the matching + /// algorithm used by the implementation, and may be overridden. </returns> + virtual ParseTree* matchImpl(ParseTree *tree, ParseTree *patternTree, std::map<std::string, std::vector<ParseTree *>> &labels); + + /// Is t <expr> subtree? + virtual RuleTagToken* getRuleTagToken(ParseTree *t); + + private: + Lexer *_lexer; + Parser *_parser; + + void InitializeInstanceFields(); + }; + +} // namespace pattern +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/RuleTagToken.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/RuleTagToken.cpp new file mode 100644 index 0000000000..6f3fb73446 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/RuleTagToken.cpp @@ -0,0 +1,77 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Exceptions.h" + +#include "tree/pattern/RuleTagToken.h" + +using namespace antlr4::tree::pattern; + +RuleTagToken::RuleTagToken(const std::string &/*ruleName*/, int _bypassTokenType) : bypassTokenType(_bypassTokenType) { +} + +RuleTagToken::RuleTagToken(const std::string &ruleName, size_t bypassTokenType, const std::string &label) + : ruleName(ruleName), bypassTokenType(bypassTokenType), label(label) { + if (ruleName.empty()) { + throw IllegalArgumentException("ruleName cannot be null or empty."); + } + +} + +std::string RuleTagToken::getRuleName() const { + return ruleName; +} + +std::string RuleTagToken::getLabel() const { + return label; +} + +size_t RuleTagToken::getChannel() const { + return DEFAULT_CHANNEL; +} + +std::string RuleTagToken::getText() const { + if (label != "") { + return std::string("<") + label + std::string(":") + ruleName + std::string(">"); + } + + return std::string("<") + ruleName + std::string(">"); +} + +size_t RuleTagToken::getType() const { + return bypassTokenType; +} + +size_t RuleTagToken::getLine() const { + return 0; +} + +size_t RuleTagToken::getCharPositionInLine() const { + return INVALID_INDEX; +} + +size_t RuleTagToken::getTokenIndex() const { + return INVALID_INDEX; +} + +size_t RuleTagToken::getStartIndex() const { + return INVALID_INDEX; +} + +size_t RuleTagToken::getStopIndex() const { + return INVALID_INDEX; +} + +antlr4::TokenSource *RuleTagToken::getTokenSource() const { + return nullptr; +} + +antlr4::CharStream *RuleTagToken::getInputStream() const { + return nullptr; +} + +std::string RuleTagToken::toString() const { + return ruleName + ":" + std::to_string(bypassTokenType); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/RuleTagToken.h b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/RuleTagToken.h new file mode 100644 index 0000000000..cb0e50399e --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/RuleTagToken.h @@ -0,0 +1,117 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Token.h" + +namespace antlr4 { +namespace tree { +namespace pattern { + + /// <summary> + /// A <seealso cref="Token"/> object representing an entire subtree matched by a parser + /// rule; e.g., {@code <expr>}. These tokens are created for <seealso cref="TagChunk"/> + /// chunks where the tag corresponds to a parser rule. + /// </summary> + class ANTLR4CPP_PUBLIC RuleTagToken : public Token { + /// <summary> + /// This is the backing field for <seealso cref="#getRuleName"/>. + /// </summary> + private: + const std::string ruleName; + + /// The token type for the current token. This is the token type assigned to + /// the bypass alternative for the rule during ATN deserialization. + const size_t bypassTokenType; + + /// This is the backing field for <seealso cref="#getLabe"/>. + const std::string label; + + public: + /// <summary> + /// Constructs a new instance of <seealso cref="RuleTagToken"/> with the specified rule + /// name and bypass token type and no label. + /// </summary> + /// <param name="ruleName"> The name of the parser rule this rule tag matches. </param> + /// <param name="bypassTokenType"> The bypass token type assigned to the parser rule. + /// </param> + /// <exception cref="IllegalArgumentException"> if {@code ruleName} is {@code null} + /// or empty. </exception> + RuleTagToken(const std::string &ruleName, int bypassTokenType); //this(ruleName, bypassTokenType, nullptr); + + /// <summary> + /// Constructs a new instance of <seealso cref="RuleTagToken"/> with the specified rule + /// name, bypass token type, and label. + /// </summary> + /// <param name="ruleName"> The name of the parser rule this rule tag matches. </param> + /// <param name="bypassTokenType"> The bypass token type assigned to the parser rule. </param> + /// <param name="label"> The label associated with the rule tag, or {@code null} if + /// the rule tag is unlabeled. + /// </param> + /// <exception cref="IllegalArgumentException"> if {@code ruleName} is {@code null} + /// or empty. </exception> + RuleTagToken(const std::string &ruleName, size_t bypassTokenType, const std::string &label); + + /// <summary> + /// Gets the name of the rule associated with this rule tag. + /// </summary> + /// <returns> The name of the parser rule associated with this rule tag. </returns> + std::string getRuleName() const; + + /// <summary> + /// Gets the label associated with the rule tag. + /// </summary> + /// <returns> The name of the label associated with the rule tag, or + /// {@code null} if this is an unlabeled rule tag. </returns> + std::string getLabel() const; + + /// <summary> + /// {@inheritDoc} + /// <p/> + /// Rule tag tokens are always placed on the <seealso cref="#DEFAULT_CHANNE"/>. + /// </summary> + virtual size_t getChannel() const override; + + /// <summary> + /// {@inheritDoc} + /// <p/> + /// This method returns the rule tag formatted with {@code <} and {@code >} + /// delimiters. + /// </summary> + virtual std::string getText() const override; + + /// Rule tag tokens have types assigned according to the rule bypass + /// transitions created during ATN deserialization. + virtual size_t getType() const override; + + /// The implementation for <seealso cref="RuleTagToken"/> always returns 0. + virtual size_t getLine() const override; + + /// The implementation for <seealso cref="RuleTagToken"/> always returns INVALID_INDEX. + virtual size_t getCharPositionInLine() const override; + + /// The implementation for <seealso cref="RuleTagToken"/> always returns INVALID_INDEX. + virtual size_t getTokenIndex() const override; + + /// The implementation for <seealso cref="RuleTagToken"/> always returns INVALID_INDEX. + virtual size_t getStartIndex() const override; + + /// The implementation for <seealso cref="RuleTagToken"/> always returns INVALID_INDEX. + virtual size_t getStopIndex() const override; + + /// The implementation for <seealso cref="RuleTagToken"/> always returns {@code null}. + virtual TokenSource *getTokenSource() const override; + + /// The implementation for <seealso cref="RuleTagToken"/> always returns {@code null}. + virtual CharStream *getInputStream() const override; + + /// The implementation for <seealso cref="RuleTagToken"/> returns a string of the form {@code ruleName:bypassTokenType}. + virtual std::string toString() const override; + }; + +} // namespace pattern +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TagChunk.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TagChunk.cpp new file mode 100644 index 0000000000..63e97aeaa2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TagChunk.cpp @@ -0,0 +1,39 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Exceptions.h" + +#include "tree/pattern/TagChunk.h" + +using namespace antlr4::tree::pattern; + +TagChunk::TagChunk(const std::string &tag) : TagChunk("", tag) { +} + +TagChunk::TagChunk(const std::string &label, const std::string &tag) : _tag(tag), _label(label) { + if (tag.empty()) { + throw IllegalArgumentException("tag cannot be null or empty"); + } + +} + +TagChunk::~TagChunk() { +} + +std::string TagChunk::getTag() { + return _tag; +} + +std::string TagChunk::getLabel() { + return _label; +} + +std::string TagChunk::toString() { + if (!_label.empty()) { + return _label + ":" + _tag; + } + + return _tag; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TagChunk.h b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TagChunk.h new file mode 100644 index 0000000000..1cdae78995 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TagChunk.h @@ -0,0 +1,86 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Chunk.h" + +namespace antlr4 { +namespace tree { +namespace pattern { + + /// <summary> + /// Represents a placeholder tag in a tree pattern. A tag can have any of the + /// following forms. + /// + /// <ul> + /// <li>{@code expr}: An unlabeled placeholder for a parser rule {@code expr}.</li> + /// <li>{@code ID}: An unlabeled placeholder for a token of type {@code ID}.</li> + /// <li>{@code e:expr}: A labeled placeholder for a parser rule {@code expr}.</li> + /// <li>{@code id:ID}: A labeled placeholder for a token of type {@code ID}.</li> + /// </ul> + /// + /// This class does not perform any validation on the tag or label names aside + /// from ensuring that the tag is a non-null, non-empty string. + /// </summary> + class ANTLR4CPP_PUBLIC TagChunk : public Chunk { + public: + /// <summary> + /// Construct a new instance of <seealso cref="TagChunk"/> using the specified tag and + /// no label. + /// </summary> + /// <param name="tag"> The tag, which should be the name of a parser rule or token + /// type. + /// </param> + /// <exception cref="IllegalArgumentException"> if {@code tag} is {@code null} or + /// empty. </exception> + TagChunk(const std::string &tag); + virtual ~TagChunk(); + + /// <summary> + /// Construct a new instance of <seealso cref="TagChunk"/> using the specified label + /// and tag. + /// </summary> + /// <param name="label"> The label for the tag. If this is {@code null}, the + /// <seealso cref="TagChunk"/> represents an unlabeled tag. </param> + /// <param name="tag"> The tag, which should be the name of a parser rule or token + /// type. + /// </param> + /// <exception cref="IllegalArgumentException"> if {@code tag} is {@code null} or + /// empty. </exception> + TagChunk(const std::string &label, const std::string &tag); + + /// <summary> + /// Get the tag for this chunk. + /// </summary> + /// <returns> The tag for the chunk. </returns> + std::string getTag(); + + /// <summary> + /// Get the label, if any, assigned to this chunk. + /// </summary> + /// <returns> The label assigned to this chunk, or {@code null} if no label is + /// assigned to the chunk. </returns> + std::string getLabel(); + + /// <summary> + /// This method returns a text representation of the tag chunk. Labeled tags + /// are returned in the form {@code label:tag}, and unlabeled tags are + /// returned as just the tag name. + /// </summary> + virtual std::string toString() override; + + private: + /// This is the backing field for <seealso cref="#getTag"/>. + const std::string _tag; + /// <summary> + /// This is the backing field for <seealso cref="#getLabe"/>. + /// </summary> + const std::string _label; + }; + +} // namespace pattern +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TextChunk.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TextChunk.cpp new file mode 100644 index 0000000000..8e2e6689d7 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TextChunk.cpp @@ -0,0 +1,28 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Exceptions.h" + +#include "tree/pattern/TextChunk.h" + +using namespace antlr4::tree::pattern; + +TextChunk::TextChunk(const std::string &text) : text(text) { + if (text == "") { + throw IllegalArgumentException("text cannot be nul"); + } + +} + +TextChunk::~TextChunk() { +} + +std::string TextChunk::getText() { + return text; +} + +std::string TextChunk::toString() { + return std::string("'") + text + std::string("'"); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TextChunk.h b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TextChunk.h new file mode 100644 index 0000000000..bb7fc7f966 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TextChunk.h @@ -0,0 +1,51 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Chunk.h" + +namespace antlr4 { +namespace tree { +namespace pattern { + + /// <summary> + /// Represents a span of raw text (concrete syntax) between tags in a tree + /// pattern string. + /// </summary> + class ANTLR4CPP_PUBLIC TextChunk : public Chunk { + private: + /// <summary> + /// This is the backing field for <seealso cref="#getText"/>. + /// </summary> + const std::string text; + + /// <summary> + /// Constructs a new instance of <seealso cref="TextChunk"/> with the specified text. + /// </summary> + /// <param name="text"> The text of this chunk. </param> + /// <exception cref="IllegalArgumentException"> if {@code text} is {@code null}. </exception> + public: + TextChunk(const std::string &text); + virtual ~TextChunk(); + + /// <summary> + /// Gets the raw text of this chunk. + /// </summary> + /// <returns> The text of the chunk. </returns> + std::string getText(); + + /// <summary> + /// {@inheritDoc} + /// <p/> + /// The implementation for <seealso cref="TextChunk"/> returns the result of + /// <seealso cref="#getText()"/> in single quotes. + /// </summary> + virtual std::string toString() override; + }; + +} // namespace pattern +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TokenTagToken.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TokenTagToken.cpp new file mode 100644 index 0000000000..f5153c8357 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TokenTagToken.cpp @@ -0,0 +1,36 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "tree/pattern/TokenTagToken.h" + +using namespace antlr4::tree::pattern; + +TokenTagToken::TokenTagToken(const std::string &/*tokenName*/, int type) + : CommonToken(type), tokenName(""), label("") { +} + +TokenTagToken::TokenTagToken(const std::string &tokenName, int type, const std::string &label) + : CommonToken(type), tokenName(tokenName), label(label) { +} + +std::string TokenTagToken::getTokenName() const { + return tokenName; +} + +std::string TokenTagToken::getLabel() const { + return label; +} + +std::string TokenTagToken::getText() const { + if (!label.empty()) { + return "<" + label + ":" + tokenName + ">"; + } + + return "<" + tokenName + ">"; +} + +std::string TokenTagToken::toString() const { + return tokenName + ":" + std::to_string(_type); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TokenTagToken.h b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TokenTagToken.h new file mode 100644 index 0000000000..da9e11cd36 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TokenTagToken.h @@ -0,0 +1,80 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "CommonToken.h" + +namespace antlr4 { +namespace tree { +namespace pattern { + + /// <summary> + /// A <seealso cref="Token"/> object representing a token of a particular type; e.g., + /// {@code <ID>}. These tokens are created for <seealso cref="TagChunk"/> chunks where the + /// tag corresponds to a lexer rule or token type. + /// </summary> + class ANTLR4CPP_PUBLIC TokenTagToken : public CommonToken { + /// <summary> + /// This is the backing field for <seealso cref="#getTokenName"/>. + /// </summary> + private: + const std::string tokenName; + /// <summary> + /// This is the backing field for <seealso cref="#getLabe"/>. + /// </summary> + const std::string label; + + /// <summary> + /// Constructs a new instance of <seealso cref="TokenTagToken"/> for an unlabeled tag + /// with the specified token name and type. + /// </summary> + /// <param name="tokenName"> The token name. </param> + /// <param name="type"> The token type. </param> + public: + TokenTagToken(const std::string &tokenName, int type); //this(tokenName, type, nullptr); + + /// <summary> + /// Constructs a new instance of <seealso cref="TokenTagToken"/> with the specified + /// token name, type, and label. + /// </summary> + /// <param name="tokenName"> The token name. </param> + /// <param name="type"> The token type. </param> + /// <param name="label"> The label associated with the token tag, or {@code null} if + /// the token tag is unlabeled. </param> + TokenTagToken(const std::string &tokenName, int type, const std::string &label); + + /// <summary> + /// Gets the token name. </summary> + /// <returns> The token name. </returns> + std::string getTokenName() const; + + /// <summary> + /// Gets the label associated with the rule tag. + /// </summary> + /// <returns> The name of the label associated with the rule tag, or + /// {@code null} if this is an unlabeled rule tag. </returns> + std::string getLabel() const; + + /// <summary> + /// {@inheritDoc} + /// <p/> + /// The implementation for <seealso cref="TokenTagToken"/> returns the token tag + /// formatted with {@code <} and {@code >} delimiters. + /// </summary> + virtual std::string getText() const override; + + /// <summary> + /// {@inheritDoc} + /// <p/> + /// The implementation for <seealso cref="TokenTagToken"/> returns a string of the form + /// {@code tokenName:type}. + /// </summary> + virtual std::string toString() const override; + }; + +} // namespace pattern +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPath.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPath.cpp new file mode 100644 index 0000000000..c0398962ec --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPath.cpp @@ -0,0 +1,154 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "XPathLexer.h" +#include "XPathLexerErrorListener.h" +#include "XPathElement.h" +#include "XPathWildcardAnywhereElement.h" +#include "XPathWildcardElement.h" +#include "XPathTokenAnywhereElement.h" +#include "XPathTokenElement.h" +#include "XPathRuleAnywhereElement.h" +#include "XPathRuleElement.h" + +#include "XPath.h" + +using namespace antlr4; +using namespace antlr4::tree; +using namespace antlr4::tree::xpath; + +const std::string XPath::WILDCARD = "*"; +const std::string XPath::NOT = "!"; + +XPath::XPath(Parser *parser, const std::string &path) { + _parser = parser; + _path = path; +} + +std::vector<std::unique_ptr<XPathElement>> XPath::split(const std::string &path) { + ANTLRInputStream in(path); + XPathLexer lexer(&in); + lexer.removeErrorListeners(); + XPathLexerErrorListener listener; + lexer.addErrorListener(&listener); + CommonTokenStream tokenStream(&lexer); + try { + tokenStream.fill(); + } catch (LexerNoViableAltException &) { + size_t pos = lexer.getCharPositionInLine(); + std::string msg = "Invalid tokens or characters at index " + std::to_string(pos) + " in path '" + path + "'"; + throw IllegalArgumentException(msg); + } + + std::vector<Token *> tokens = tokenStream.getTokens(); + std::vector<std::unique_ptr<XPathElement>> elements; + size_t n = tokens.size(); + size_t i = 0; + bool done = false; + while (!done && i < n) { + Token *el = tokens[i]; + Token *next = nullptr; + switch (el->getType()) { + case XPathLexer::ROOT: + case XPathLexer::ANYWHERE: { + bool anywhere = el->getType() == XPathLexer::ANYWHERE; + i++; + next = tokens[i]; + bool invert = next->getType() == XPathLexer::BANG; + if (invert) { + i++; + next = tokens[i]; + } + std::unique_ptr<XPathElement> pathElement = getXPathElement(next, anywhere); + pathElement->setInvert(invert); + elements.push_back(std::move(pathElement)); + i++; + break; + + } + case XPathLexer::TOKEN_REF: + case XPathLexer::RULE_REF: + case XPathLexer::WILDCARD: + elements.push_back(getXPathElement(el, false)); + i++; + break; + + case Token::EOF: + done = true; + break; + + default : + throw IllegalArgumentException("Unknown path element " + el->toString()); + } + } + + return elements; +} + +std::unique_ptr<XPathElement> XPath::getXPathElement(Token *wordToken, bool anywhere) { + if (wordToken->getType() == Token::EOF) { + throw IllegalArgumentException("Missing path element at end of path"); + } + + std::string word = wordToken->getText(); + size_t ttype = _parser->getTokenType(word); + ssize_t ruleIndex = _parser->getRuleIndex(word); + switch (wordToken->getType()) { + case XPathLexer::WILDCARD : + if (anywhere) + return std::unique_ptr<XPathWildcardAnywhereElement>(new XPathWildcardAnywhereElement()); + return std::unique_ptr<XPathWildcardElement>(new XPathWildcardElement()); + + case XPathLexer::TOKEN_REF: + case XPathLexer::STRING : + if (ttype == Token::INVALID_TYPE) { + throw IllegalArgumentException(word + " at index " + std::to_string(wordToken->getStartIndex()) + " isn't a valid token name"); + } + if (anywhere) + return std::unique_ptr<XPathTokenAnywhereElement>(new XPathTokenAnywhereElement(word, (int)ttype)); + return std::unique_ptr<XPathTokenElement>(new XPathTokenElement(word, (int)ttype)); + + default : + if (ruleIndex == -1) { + throw IllegalArgumentException(word + " at index " + std::to_string(wordToken->getStartIndex()) + " isn't a valid rule name"); + } + if (anywhere) + return std::unique_ptr<XPathRuleAnywhereElement>(new XPathRuleAnywhereElement(word, (int)ruleIndex)); + return std::unique_ptr<XPathRuleElement>(new XPathRuleElement(word, (int)ruleIndex)); + } +} + +static ParserRuleContext dummyRoot; + +std::vector<ParseTree *> XPath::findAll(ParseTree *tree, std::string const& xpath, Parser *parser) { + XPath p(parser, xpath); + return p.evaluate(tree); +} + +std::vector<ParseTree *> XPath::evaluate(ParseTree *t) { + dummyRoot.children = { t }; // don't set t's parent. + + std::vector<ParseTree *> work = { &dummyRoot }; + + size_t i = 0; + std::vector<std::unique_ptr<XPathElement>> elements = split(_path); + + while (i < elements.size()) { + std::vector<ParseTree *> next; + for (auto *node : work) { + if (!node->children.empty()) { + // only try to match next element if it has children + // e.g., //func/*/stat might have a token node for which + // we can't go looking for stat nodes. + auto matching = elements[i]->evaluate(node); + next.insert(next.end(), matching.begin(), matching.end()); + } + } + i++; + work = next; + } + + return work; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPath.h b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPath.h new file mode 100644 index 0000000000..e38d482d58 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPath.h @@ -0,0 +1,86 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace tree { +namespace xpath { + + /// Represent a subset of XPath XML path syntax for use in identifying nodes in + /// parse trees. + /// + /// <para> + /// Split path into words and separators {@code /} and {@code //} via ANTLR + /// itself then walk path elements from left to right. At each separator-word + /// pair, find set of nodes. Next stage uses those as work list.</para> + /// + /// <para> + /// The basic interface is + /// <seealso cref="XPath#findAll ParseTree.findAll"/>{@code (tree, pathString, parser)}. + /// But that is just shorthand for:</para> + /// + /// <pre> + /// <seealso cref="XPath"/> p = new <seealso cref="XPath#XPath XPath"/>(parser, pathString); + /// return p.<seealso cref="#evaluate evaluate"/>(tree); + /// </pre> + /// + /// <para> + /// See {@code org.antlr.v4.test.TestXPath} for descriptions. In short, this + /// allows operators:</para> + /// + /// <dl> + /// <dt>/</dt> <dd>root</dd> + /// <dt>//</dt> <dd>anywhere</dd> + /// <dt>!</dt> <dd>invert; this must appear directly after root or anywhere + /// operator</dd> + /// </dl> + /// + /// <para> + /// and path elements:</para> + /// + /// <dl> + /// <dt>ID</dt> <dd>token name</dd> + /// <dt>'string'</dt> <dd>any string literal token from the grammar</dd> + /// <dt>expr</dt> <dd>rule name</dd> + /// <dt>*</dt> <dd>wildcard matching any node</dd> + /// </dl> + /// + /// <para> + /// Whitespace is not allowed.</para> + + class ANTLR4CPP_PUBLIC XPath { + public: + static const std::string WILDCARD; // word not operator/separator + static const std::string NOT; // word for invert operator + + XPath(Parser *parser, const std::string &path); + virtual ~XPath() {} + + // TODO: check for invalid token/rule names, bad syntax + virtual std::vector<std::unique_ptr<XPathElement>> split(const std::string &path); + + static std::vector<ParseTree *> findAll(ParseTree *tree, std::string const& xpath, Parser *parser); + + /// Return a list of all nodes starting at {@code t} as root that satisfy the + /// path. The root {@code /} is relative to the node passed to + /// <seealso cref="#evaluate"/>. + virtual std::vector<ParseTree *> evaluate(ParseTree *t); + + protected: + std::string _path; + Parser *_parser; + + /// Convert word like {@code *} or {@code ID} or {@code expr} to a path + /// element. {@code anywhere} is {@code true} if {@code //} precedes the + /// word. + virtual std::unique_ptr<XPathElement> getXPathElement(Token *wordToken, bool anywhere); + }; + +} // namespace xpath +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathElement.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathElement.cpp new file mode 100644 index 0000000000..64b122df13 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathElement.cpp @@ -0,0 +1,31 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "support/CPPUtils.h" + +#include "XPathElement.h" + +using namespace antlr4::tree; +using namespace antlr4::tree::xpath; + +XPathElement::XPathElement(const std::string &nodeName) { + _nodeName = nodeName; +} + +XPathElement::~XPathElement() { +} + +std::vector<ParseTree *> XPathElement::evaluate(ParseTree * /*t*/) { + return {}; +} + +std::string XPathElement::toString() const { + std::string inv = _invert ? "!" : ""; + return antlrcpp::toString(*this) + "[" + inv + _nodeName + "]"; +} + +void XPathElement::setInvert(bool value) { + _invert = value; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathElement.h b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathElement.h new file mode 100644 index 0000000000..f339117d7f --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathElement.h @@ -0,0 +1,40 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace tree { + class ParseTree; + +namespace xpath { + + class ANTLR4CPP_PUBLIC XPathElement { + public: + /// Construct element like {@code /ID} or {@code ID} or {@code /*} etc... + /// op is null if just node + XPathElement(const std::string &nodeName); + XPathElement(XPathElement const&) = default; + virtual ~XPathElement(); + + XPathElement& operator=(XPathElement const&) = default; + + /// Given tree rooted at {@code t} return all nodes matched by this path + /// element. + virtual std::vector<ParseTree *> evaluate(ParseTree *t); + virtual std::string toString() const; + + void setInvert(bool value); + + protected: + std::string _nodeName; + bool _invert = false; + }; + +} // namespace xpath +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathLexer.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathLexer.cpp new file mode 100644 index 0000000000..506d2e1179 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathLexer.cpp @@ -0,0 +1,182 @@ + +// Generated from XPathLexer.g4 by ANTLR 4.9.3 + + +#include "XPathLexer.h" + + +using namespace antlr4; + +namespace { + +struct XPathLexerStaticData final { + XPathLexerStaticData(std::vector<std::string> ruleNames, + std::vector<std::string> channelNames, + std::vector<std::string> modeNames, + std::vector<std::string> literalNames, + std::vector<std::string> symbolicNames) + : ruleNames(std::move(ruleNames)), channelNames(std::move(channelNames)), + modeNames(std::move(modeNames)), literalNames(std::move(literalNames)), + symbolicNames(std::move(symbolicNames)), + vocabulary(this->literalNames, this->symbolicNames) {} + + XPathLexerStaticData(const XPathLexerStaticData&) = delete; + XPathLexerStaticData(XPathLexerStaticData&&) = delete; + XPathLexerStaticData& operator=(const XPathLexerStaticData&) = delete; + XPathLexerStaticData& operator=(XPathLexerStaticData&&) = delete; + + std::vector<antlr4::dfa::DFA> decisionToDFA; + antlr4::atn::PredictionContextCache sharedContextCache; + const std::vector<std::string> ruleNames; + const std::vector<std::string> channelNames; + const std::vector<std::string> modeNames; + const std::vector<std::string> literalNames; + const std::vector<std::string> symbolicNames; + const antlr4::dfa::Vocabulary vocabulary; + antlr4::atn::SerializedATNView serializedATN; + std::unique_ptr<antlr4::atn::ATN> atn; +}; + +::antlr4::internal::OnceFlag xpathLexerOnceFlag; +XPathLexerStaticData *xpathLexerStaticData = nullptr; + +void xpathLexerInitialize() { + assert(xpathLexerStaticData == nullptr); + auto staticData = std::make_unique<XPathLexerStaticData>( + std::vector<std::string>{ + "ANYWHERE", "ROOT", "WILDCARD", "BANG", "ID", "NameChar", "NameStartChar", + "STRING" + }, + std::vector<std::string>{ + "DEFAULT_TOKEN_CHANNEL", "HIDDEN" + }, + std::vector<std::string>{ + "DEFAULT_MODE" + }, + std::vector<std::string>{ + "", "", "", "'//'", "'/'", "'*'", "'!'" + }, + std::vector<std::string>{ + "", "TOKEN_REF", "RULE_REF", "ANYWHERE", "ROOT", "WILDCARD", "BANG", "ID", + "STRING" + } + ); + static const int32_t serializedATNSegment[] = { + 0x4, 0x0, 0x8, 0x32, 0x6, -1, 0x2, 0x0, 0x7, 0x0, 0x2, 0x1, 0x7, + 0x1, 0x2, 0x2, 0x7, 0x2, 0x2, 0x3, 0x7, 0x3, 0x2, 0x4, 0x7, 0x4, + 0x2, 0x5, 0x7, 0x5, 0x2, 0x6, 0x7, 0x6, 0x2, 0x7, 0x7, 0x7, 0x1, + 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x1, 0x1, 0x1, 0x1, 0x2, 0x1, 0x2, + 0x1, 0x3, 0x1, 0x3, 0x1, 0x4, 0x1, 0x4, 0x5, 0x4, 0x1d, 0x8, 0x4, + 0xa, 0x4, 0xc, 0x4, 0x20, 0x9, 0x4, 0x1, 0x4, 0x1, 0x4, 0x1, 0x5, + 0x1, 0x5, 0x3, 0x5, 0x26, 0x8, 0x5, 0x1, 0x6, 0x1, 0x6, 0x1, 0x7, + 0x1, 0x7, 0x5, 0x7, 0x2c, 0x8, 0x7, 0xa, 0x7, 0xc, 0x7, 0x2f, 0x9, + 0x7, 0x1, 0x7, 0x1, 0x7, 0x1, 0x2d, 0x0, 0x8, 0x1, 0x3, 0x3, 0x4, + 0x5, 0x5, 0x7, 0x6, 0x9, 0x7, 0xb, 0x0, 0xd, 0x0, 0xf, 0x8, 0x1, + 0x0, 0x2, 0x5, 0x0, 0x30, 0x39, 0x5f, 0x5f, 0xb7, 0xb7, 0x300, 0x36f, + 0x203f, 0x2040, 0xd, 0x0, 0x41, 0x5a, 0x61, 0x7a, 0xc0, 0xd6, 0xd8, + 0xf6, 0xf8, 0x2ff, 0x370, 0x37d, 0x37f, 0x1fff, 0x200c, 0x200d, 0x2070, + 0x218f, 0x2c00, 0x2fef, 0x3001, 0xd7ff, 0xf900, 0xfdcf, 0xfdf0, -1, + 0x0, 0x32, 0x0, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, + 0x0, 0x0, 0x5, 0x1, 0x0, 0x0, 0x0, 0x0, 0x7, 0x1, 0x0, 0x0, 0x0, + 0x0, 0x9, 0x1, 0x0, 0x0, 0x0, 0x0, 0xf, 0x1, 0x0, 0x0, 0x0, 0x1, + 0x11, 0x1, 0x0, 0x0, 0x0, 0x3, 0x14, 0x1, 0x0, 0x0, 0x0, 0x5, 0x16, + 0x1, 0x0, 0x0, 0x0, 0x7, 0x18, 0x1, 0x0, 0x0, 0x0, 0x9, 0x1a, 0x1, + 0x0, 0x0, 0x0, 0xb, 0x25, 0x1, 0x0, 0x0, 0x0, 0xd, 0x27, 0x1, 0x0, + 0x0, 0x0, 0xf, 0x29, 0x1, 0x0, 0x0, 0x0, 0x11, 0x12, 0x5, 0x2f, 0x0, + 0x0, 0x12, 0x13, 0x5, 0x2f, 0x0, 0x0, 0x13, 0x2, 0x1, 0x0, 0x0, 0x0, + 0x14, 0x15, 0x5, 0x2f, 0x0, 0x0, 0x15, 0x4, 0x1, 0x0, 0x0, 0x0, 0x16, + 0x17, 0x5, 0x2a, 0x0, 0x0, 0x17, 0x6, 0x1, 0x0, 0x0, 0x0, 0x18, 0x19, + 0x5, 0x21, 0x0, 0x0, 0x19, 0x8, 0x1, 0x0, 0x0, 0x0, 0x1a, 0x1e, 0x3, + 0xd, 0x6, 0x0, 0x1b, 0x1d, 0x3, 0xb, 0x5, 0x0, 0x1c, 0x1b, 0x1, 0x0, + 0x0, 0x0, 0x1d, 0x20, 0x1, 0x0, 0x0, 0x0, 0x1e, 0x1c, 0x1, 0x0, 0x0, + 0x0, 0x1e, 0x1f, 0x1, 0x0, 0x0, 0x0, 0x1f, 0x21, 0x1, 0x0, 0x0, 0x0, + 0x20, 0x1e, 0x1, 0x0, 0x0, 0x0, 0x21, 0x22, 0x6, 0x4, 0x0, 0x0, 0x22, + 0xa, 0x1, 0x0, 0x0, 0x0, 0x23, 0x26, 0x3, 0xd, 0x6, 0x0, 0x24, 0x26, + 0x7, 0x0, 0x0, 0x0, 0x25, 0x23, 0x1, 0x0, 0x0, 0x0, 0x25, 0x24, 0x1, + 0x0, 0x0, 0x0, 0x26, 0xc, 0x1, 0x0, 0x0, 0x0, 0x27, 0x28, 0x7, 0x1, + 0x0, 0x0, 0x28, 0xe, 0x1, 0x0, 0x0, 0x0, 0x29, 0x2d, 0x5, 0x27, 0x0, + 0x0, 0x2a, 0x2c, 0x9, 0x0, 0x0, 0x0, 0x2b, 0x2a, 0x1, 0x0, 0x0, 0x0, + 0x2c, 0x2f, 0x1, 0x0, 0x0, 0x0, 0x2d, 0x2e, 0x1, 0x0, 0x0, 0x0, 0x2d, + 0x2b, 0x1, 0x0, 0x0, 0x0, 0x2e, 0x30, 0x1, 0x0, 0x0, 0x0, 0x2f, 0x2d, + 0x1, 0x0, 0x0, 0x0, 0x30, 0x31, 0x5, 0x27, 0x0, 0x0, 0x31, 0x10, + 0x1, 0x0, 0x0, 0x0, 0x4, 0x0, 0x1e, 0x25, 0x2d, 0x1, 0x1, 0x4, 0x0, + }; + + staticData->serializedATN = antlr4::atn::SerializedATNView(serializedATNSegment, sizeof(serializedATNSegment) / sizeof(serializedATNSegment[0])); + + atn::ATNDeserializer deserializer; + staticData->atn = deserializer.deserialize(staticData->serializedATN); + + size_t count = staticData->atn->getNumberOfDecisions(); + staticData->decisionToDFA.reserve(count); + for (size_t i = 0; i < count; i++) { + staticData->decisionToDFA.emplace_back(staticData->atn->getDecisionState(i), i); + } + xpathLexerStaticData = staticData.release(); +} + +} + +XPathLexer::XPathLexer(CharStream *input) : Lexer(input) { + XPathLexer::initialize(); + _interpreter = new atn::LexerATNSimulator(this, *xpathLexerStaticData->atn, xpathLexerStaticData->decisionToDFA, xpathLexerStaticData->sharedContextCache); +} + +XPathLexer::~XPathLexer() { + delete _interpreter; +} + +std::string XPathLexer::getGrammarFileName() const { + return "XPathLexer.g4"; +} + +const std::vector<std::string>& XPathLexer::getRuleNames() const { + return xpathLexerStaticData->ruleNames; +} + +const std::vector<std::string>& XPathLexer::getChannelNames() const { + return xpathLexerStaticData->channelNames; +} + +const std::vector<std::string>& XPathLexer::getModeNames() const { + return xpathLexerStaticData->modeNames; +} + +const dfa::Vocabulary& XPathLexer::getVocabulary() const { + return xpathLexerStaticData->vocabulary; +} + +antlr4::atn::SerializedATNView XPathLexer::getSerializedATN() const { + return xpathLexerStaticData->serializedATN; +} + +const atn::ATN& XPathLexer::getATN() const { + return *xpathLexerStaticData->atn; +} + +void XPathLexer::action(RuleContext *context, size_t ruleIndex, size_t actionIndex) { + switch (ruleIndex) { + case 4: IDAction(antlrcpp::downCast<antlr4::RuleContext *>(context), actionIndex); break; + + default: + break; + } +} + +void XPathLexer::IDAction(antlr4::RuleContext *context, size_t actionIndex) { + switch (actionIndex) { + case 0: + if (isupper(getText()[0])) + setType(TOKEN_REF); + else + setType(RULE_REF); + break; + + default: + break; + } +} + +void XPathLexer::initialize() { + ::antlr4::internal::call_once(xpathLexerOnceFlag, xpathLexerInitialize); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathLexer.h b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathLexer.h new file mode 100644 index 0000000000..6926d2161e --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathLexer.h @@ -0,0 +1,47 @@ + +// Generated from XPathLexer.g4 by ANTLR 4.9.3 + +#pragma once + + +#include "antlr4-runtime.h" + + +class XPathLexer : public antlr4::Lexer { +public: + enum { + TOKEN_REF = 1, RULE_REF = 2, ANYWHERE = 3, ROOT = 4, WILDCARD = 5, BANG = 6, + ID = 7, STRING = 8 + }; + + explicit XPathLexer(antlr4::CharStream *input); + + ~XPathLexer() override; + + virtual std::string getGrammarFileName() const override; + + virtual const std::vector<std::string>& getRuleNames() const override; + + virtual const std::vector<std::string>& getChannelNames() const override; + + virtual const std::vector<std::string>& getModeNames() const override; + + virtual const antlr4::dfa::Vocabulary& getVocabulary() const override; + + virtual antlr4::atn::SerializedATNView getSerializedATN() const override; + + virtual const antlr4::atn::ATN& getATN() const override; + + virtual void action(antlr4::RuleContext *context, size_t ruleIndex, size_t actionIndex) override; + + // By default the static state used to implement the lexer is lazily initialized during the first + // call to the constructor. You can call this function if you wish to initialize the static state + // ahead of time. + static void initialize(); +private: + // Individual action functions triggered by action() above. + void IDAction(antlr4::RuleContext *context, size_t actionIndex); + + // Individual semantic predicate functions triggered by sempred() above. +}; + diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathLexerErrorListener.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathLexerErrorListener.cpp new file mode 100644 index 0000000000..2804c8ee3d --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathLexerErrorListener.cpp @@ -0,0 +1,13 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "XPathLexerErrorListener.h" + +using namespace antlr4; +using namespace antlr4::tree::xpath; + +void XPathLexerErrorListener::syntaxError(Recognizer * /*recognizer*/, Token * /*offendingSymbol*/, + size_t /*line*/, size_t /*charPositionInLine*/, const std::string &/*msg*/, std::exception_ptr /*e*/) { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathLexerErrorListener.h b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathLexerErrorListener.h new file mode 100644 index 0000000000..c0c3eaaca7 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathLexerErrorListener.h @@ -0,0 +1,22 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "BaseErrorListener.h" + +namespace antlr4 { +namespace tree { +namespace xpath { + + class ANTLR4CPP_PUBLIC XPathLexerErrorListener : public BaseErrorListener { + public: + virtual void syntaxError(Recognizer *recognizer, Token *offendingSymbol, size_t line, + size_t charPositionInLine, const std::string &msg, std::exception_ptr e) override; + }; + +} // namespace xpath +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathRuleAnywhereElement.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathRuleAnywhereElement.cpp new file mode 100644 index 0000000000..9ca910df2e --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathRuleAnywhereElement.cpp @@ -0,0 +1,20 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "tree/ParseTree.h" +#include "tree/Trees.h" + +#include "tree/xpath/XPathRuleAnywhereElement.h" + +using namespace antlr4::tree; +using namespace antlr4::tree::xpath; + +XPathRuleAnywhereElement::XPathRuleAnywhereElement(const std::string &ruleName, int ruleIndex) : XPathElement(ruleName) { + _ruleIndex = ruleIndex; +} + +std::vector<ParseTree *> XPathRuleAnywhereElement::evaluate(ParseTree *t) { + return Trees::findAllRuleNodes(t, _ruleIndex); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathRuleAnywhereElement.h b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathRuleAnywhereElement.h new file mode 100644 index 0000000000..2ceb75ceed --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathRuleAnywhereElement.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "XPathElement.h" + +namespace antlr4 { +namespace tree { +namespace xpath { + + /// Either {@code ID} at start of path or {@code ...//ID} in middle of path. + class ANTLR4CPP_PUBLIC XPathRuleAnywhereElement : public XPathElement { + public: + XPathRuleAnywhereElement(const std::string &ruleName, int ruleIndex); + + virtual std::vector<ParseTree *> evaluate(ParseTree *t) override; + + protected: + int _ruleIndex = 0; + }; + +} // namespace xpath +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathRuleElement.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathRuleElement.cpp new file mode 100644 index 0000000000..1d145fb575 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathRuleElement.cpp @@ -0,0 +1,30 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "tree/ParseTree.h" +#include "tree/Trees.h" + +#include "XPathRuleElement.h" + +using namespace antlr4::tree; +using namespace antlr4::tree::xpath; + +XPathRuleElement::XPathRuleElement(const std::string &ruleName, size_t ruleIndex) : XPathElement(ruleName) { + _ruleIndex = ruleIndex; +} + +std::vector<ParseTree *> XPathRuleElement::evaluate(ParseTree *t) { + // return all children of t that match nodeName + std::vector<ParseTree *> nodes; + for (auto *c : t->children) { + if (antlrcpp::is<ParserRuleContext *>(c)) { + ParserRuleContext *ctx = dynamic_cast<ParserRuleContext *>(c); + if ((ctx->getRuleIndex() == _ruleIndex && !_invert) || (ctx->getRuleIndex() != _ruleIndex && _invert)) { + nodes.push_back(ctx); + } + } + } + return nodes; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathRuleElement.h b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathRuleElement.h new file mode 100644 index 0000000000..b57276f033 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathRuleElement.h @@ -0,0 +1,26 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "XPathElement.h" + +namespace antlr4 { +namespace tree { +namespace xpath { + + class ANTLR4CPP_PUBLIC XPathRuleElement : public XPathElement { + public: + XPathRuleElement(const std::string &ruleName, size_t ruleIndex); + + virtual std::vector<ParseTree *> evaluate(ParseTree *t) override; + + protected: + size_t _ruleIndex = 0; + }; + +} // namespace xpath +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathTokenAnywhereElement.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathTokenAnywhereElement.cpp new file mode 100644 index 0000000000..c557c9d675 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathTokenAnywhereElement.cpp @@ -0,0 +1,20 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "tree/ParseTree.h" +#include "tree/Trees.h" + +#include "XPathTokenAnywhereElement.h" + +using namespace antlr4::tree; +using namespace antlr4::tree::xpath; + +XPathTokenAnywhereElement::XPathTokenAnywhereElement(const std::string &tokenName, int tokenType) : XPathElement(tokenName) { + this->tokenType = tokenType; +} + +std::vector<ParseTree *> XPathTokenAnywhereElement::evaluate(ParseTree *t) { + return Trees::findAllTokenNodes(t, tokenType); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathTokenAnywhereElement.h b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathTokenAnywhereElement.h new file mode 100644 index 0000000000..2045d91b32 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathTokenAnywhereElement.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "XPathElement.h" + +namespace antlr4 { +namespace tree { +namespace xpath { + + class ANTLR4CPP_PUBLIC XPathTokenAnywhereElement : public XPathElement { + protected: + int tokenType = 0; + public: + XPathTokenAnywhereElement(const std::string &tokenName, int tokenType); + + virtual std::vector<ParseTree *> evaluate(ParseTree *t) override; + }; + +} // namespace xpath +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathTokenElement.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathTokenElement.cpp new file mode 100644 index 0000000000..d52fc26afd --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathTokenElement.cpp @@ -0,0 +1,33 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "tree/ParseTree.h" +#include "tree/Trees.h" +#include "support/CPPUtils.h" +#include "Token.h" + +#include "XPathTokenElement.h" + +using namespace antlr4; +using namespace antlr4::tree; +using namespace antlr4::tree::xpath; + +XPathTokenElement::XPathTokenElement(const std::string &tokenName, size_t tokenType) : XPathElement(tokenName) { + _tokenType = tokenType; +} + +std::vector<ParseTree *> XPathTokenElement::evaluate(ParseTree *t) { + // return all children of t that match nodeName + std::vector<ParseTree *> nodes; + for (auto *c : t->children) { + if (antlrcpp::is<TerminalNode *>(c)) { + TerminalNode *tnode = dynamic_cast<TerminalNode *>(c); + if ((tnode->getSymbol()->getType() == _tokenType && !_invert) || (tnode->getSymbol()->getType() != _tokenType && _invert)) { + nodes.push_back(tnode); + } + } + } + return nodes; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathTokenElement.h b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathTokenElement.h new file mode 100644 index 0000000000..7221530ce6 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathTokenElement.h @@ -0,0 +1,26 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "XPathElement.h" + +namespace antlr4 { +namespace tree { +namespace xpath { + + class ANTLR4CPP_PUBLIC XPathTokenElement : public XPathElement { + public: + XPathTokenElement(const std::string &tokenName, size_t tokenType); + + virtual std::vector<ParseTree *> evaluate(ParseTree *t) override; + + protected: + size_t _tokenType = 0; + }; + +} // namespace xpath +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathWildcardAnywhereElement.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathWildcardAnywhereElement.cpp new file mode 100644 index 0000000000..4ff424f056 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathWildcardAnywhereElement.cpp @@ -0,0 +1,23 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "XPath.h" +#include "tree/ParseTree.h" +#include "tree/Trees.h" + +#include "XPathWildcardAnywhereElement.h" + +using namespace antlr4::tree; +using namespace antlr4::tree::xpath; + +XPathWildcardAnywhereElement::XPathWildcardAnywhereElement() : XPathElement(XPath::WILDCARD) { +} + +std::vector<ParseTree *> XPathWildcardAnywhereElement::evaluate(ParseTree *t) { + if (_invert) { + return {}; // !* is weird but valid (empty) + } + return Trees::getDescendants(t); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathWildcardAnywhereElement.h b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathWildcardAnywhereElement.h new file mode 100644 index 0000000000..dc5d1e5a29 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathWildcardAnywhereElement.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "XPathElement.h" + +namespace antlr4 { +namespace tree { +namespace xpath { + + class ANTLR4CPP_PUBLIC XPathWildcardAnywhereElement : public XPathElement { + public: + XPathWildcardAnywhereElement(); + + virtual std::vector<ParseTree *> evaluate(ParseTree *t) override; + }; + +} // namespace xpath +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathWildcardElement.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathWildcardElement.cpp new file mode 100644 index 0000000000..aabda5a9be --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathWildcardElement.cpp @@ -0,0 +1,24 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "XPath.h" +#include "tree/ParseTree.h" +#include "tree/Trees.h" + +#include "XPathWildcardElement.h" + +using namespace antlr4::tree; +using namespace antlr4::tree::xpath; + +XPathWildcardElement::XPathWildcardElement() : XPathElement(XPath::WILDCARD) { +} + +std::vector<ParseTree *> XPathWildcardElement::evaluate(ParseTree *t) { + if (_invert) { + return {}; // !* is weird but valid (empty) + } + + return t->children; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathWildcardElement.h b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathWildcardElement.h new file mode 100644 index 0000000000..accb461de2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathWildcardElement.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "XPathElement.h" + +namespace antlr4 { +namespace tree { +namespace xpath { + + class ANTLR4CPP_PUBLIC XPathWildcardElement : public XPathElement { + public: + XPathWildcardElement(); + + virtual std::vector<ParseTree *> evaluate(ParseTree *t) override; + }; + +} // namespace xpath +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/ya.make b/contrib/libs/antlr4_cpp_runtime/ya.make new file mode 100644 index 0000000000..cad37b0d49 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/ya.make @@ -0,0 +1,169 @@ +# Generated by devtools/yamaker from nixpkgs 22.05. + +LIBRARY() + +LICENSE(BSD-3-Clause) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + +SUBSCRIBER( + g:cpp-contrib + g:ymake +) + +VERSION(4.11.1) + +ORIGINAL_SOURCE(https://github.com/antlr/antlr4/archive/4.11.1.tar.gz) + +ADDINCL( + GLOBAL contrib/libs/antlr4_cpp_runtime/src +) + +NO_COMPILER_WARNINGS() + +NO_UTIL() + +CFLAGS( + GLOBAL -DANTLR4CPP_STATIC +) + +SRCS( + src/ANTLRErrorListener.cpp + src/ANTLRErrorStrategy.cpp + src/ANTLRFileStream.cpp + src/ANTLRInputStream.cpp + src/BailErrorStrategy.cpp + src/BaseErrorListener.cpp + src/BufferedTokenStream.cpp + src/CharStream.cpp + src/CommonToken.cpp + src/CommonTokenFactory.cpp + src/CommonTokenStream.cpp + src/ConsoleErrorListener.cpp + src/DefaultErrorStrategy.cpp + src/DiagnosticErrorListener.cpp + src/Exceptions.cpp + src/FailedPredicateException.cpp + src/InputMismatchException.cpp + src/IntStream.cpp + src/InterpreterRuleContext.cpp + src/Lexer.cpp + src/LexerInterpreter.cpp + src/LexerNoViableAltException.cpp + src/ListTokenSource.cpp + src/NoViableAltException.cpp + src/Parser.cpp + src/ParserInterpreter.cpp + src/ParserRuleContext.cpp + src/ProxyErrorListener.cpp + src/RecognitionException.cpp + src/Recognizer.cpp + src/RuleContext.cpp + src/RuleContextWithAltNum.cpp + src/RuntimeMetaData.cpp + src/Token.cpp + src/TokenSource.cpp + src/TokenStream.cpp + src/TokenStreamRewriter.cpp + src/UnbufferedCharStream.cpp + src/UnbufferedTokenStream.cpp + src/Vocabulary.cpp + src/WritableToken.cpp + src/atn/ATN.cpp + src/atn/ATNConfig.cpp + src/atn/ATNConfigSet.cpp + src/atn/ATNDeserializationOptions.cpp + src/atn/ATNDeserializer.cpp + src/atn/ATNSimulator.cpp + src/atn/ATNState.cpp + src/atn/ATNStateType.cpp + src/atn/ActionTransition.cpp + src/atn/AmbiguityInfo.cpp + src/atn/ArrayPredictionContext.cpp + src/atn/AtomTransition.cpp + src/atn/ContextSensitivityInfo.cpp + src/atn/DecisionEventInfo.cpp + src/atn/DecisionInfo.cpp + src/atn/DecisionState.cpp + src/atn/EpsilonTransition.cpp + src/atn/ErrorInfo.cpp + src/atn/LL1Analyzer.cpp + src/atn/LexerATNConfig.cpp + src/atn/LexerATNSimulator.cpp + src/atn/LexerAction.cpp + src/atn/LexerActionExecutor.cpp + src/atn/LexerChannelAction.cpp + src/atn/LexerCustomAction.cpp + src/atn/LexerIndexedCustomAction.cpp + src/atn/LexerModeAction.cpp + src/atn/LexerMoreAction.cpp + src/atn/LexerPopModeAction.cpp + src/atn/LexerPushModeAction.cpp + src/atn/LexerSkipAction.cpp + src/atn/LexerTypeAction.cpp + src/atn/LookaheadEventInfo.cpp + src/atn/NotSetTransition.cpp + src/atn/OrderedATNConfigSet.cpp + src/atn/ParseInfo.cpp + src/atn/ParserATNSimulator.cpp + src/atn/PrecedencePredicateTransition.cpp + src/atn/PredicateEvalInfo.cpp + src/atn/PredicateTransition.cpp + src/atn/PredictionContext.cpp + src/atn/PredictionContextCache.cpp + src/atn/PredictionContextMergeCache.cpp + src/atn/PredictionMode.cpp + src/atn/ProfilingATNSimulator.cpp + src/atn/RangeTransition.cpp + src/atn/RuleTransition.cpp + src/atn/SemanticContext.cpp + src/atn/SetTransition.cpp + src/atn/SingletonPredictionContext.cpp + src/atn/StarLoopbackState.cpp + src/atn/Transition.cpp + src/atn/TransitionType.cpp + src/atn/WildcardTransition.cpp + src/dfa/DFA.cpp + src/dfa/DFASerializer.cpp + src/dfa/DFAState.cpp + src/dfa/LexerDFASerializer.cpp + src/internal/Synchronization.cpp + src/misc/InterpreterDataReader.cpp + src/misc/Interval.cpp + src/misc/IntervalSet.cpp + src/misc/MurmurHash.cpp + src/misc/Predicate.cpp + src/support/Any.cpp + src/support/Arrays.cpp + src/support/CPPUtils.cpp + src/support/StringUtils.cpp + src/support/Utf8.cpp + src/tree/ErrorNodeImpl.cpp + src/tree/IterativeParseTreeWalker.cpp + src/tree/ParseTree.cpp + src/tree/ParseTreeListener.cpp + src/tree/ParseTreeVisitor.cpp + src/tree/ParseTreeWalker.cpp + src/tree/TerminalNodeImpl.cpp + src/tree/Trees.cpp + src/tree/pattern/Chunk.cpp + src/tree/pattern/ParseTreeMatch.cpp + src/tree/pattern/ParseTreePattern.cpp + src/tree/pattern/ParseTreePatternMatcher.cpp + src/tree/pattern/RuleTagToken.cpp + src/tree/pattern/TagChunk.cpp + src/tree/pattern/TextChunk.cpp + src/tree/pattern/TokenTagToken.cpp + src/tree/xpath/XPath.cpp + src/tree/xpath/XPathElement.cpp + src/tree/xpath/XPathLexer.cpp + src/tree/xpath/XPathLexerErrorListener.cpp + src/tree/xpath/XPathRuleAnywhereElement.cpp + src/tree/xpath/XPathRuleElement.cpp + src/tree/xpath/XPathTokenAnywhereElement.cpp + src/tree/xpath/XPathTokenElement.cpp + src/tree/xpath/XPathWildcardAnywhereElement.cpp + src/tree/xpath/XPathWildcardElement.cpp +) + +END() |