#ifndef JINJA2CPP_SRC_TEMPLATE_PARSER_H #define JINJA2CPP_SRC_TEMPLATE_PARSER_H #include "error_handling.h" #include "expression_parser.h" #include "helpers.h" #include "lexer.h" #include "lexertk.h" #include "renderer.h" #include "statements.h" #include "template_parser.h" #include "value_visitors.h" #include <boost/algorithm/string/classification.hpp> #include <jinja2cpp/error_info.h> #include <jinja2cpp/template_env.h> #include <contrib/restricted/expected-lite/include/nonstd/expected.hpp> #include <list> #include <sstream> #include <string> #include <vector> #ifdef JINJA2CPP_USE_REGEX_BOOST #include <boost/regex.hpp> template <typename CharType> using BasicRegex = boost::basic_regex<CharType>; using Regex = boost::regex; using WideRegex = boost::wregex; template <typename CharIterator> using RegexIterator = boost::regex_iterator<CharIterator>; #else #include <regex> template <typename CharType> using BasicRegex = std::basic_regex<CharType>; using Regex = std::regex; using WideRegex = std::wregex; template <typename CharIterator> using RegexIterator = std::regex_iterator<CharIterator>; #endif namespace jinja2 { template<typename CharT> struct ParserTraits; struct KeywordsInfo { MultiStringLiteral name; Keyword type; }; struct TokenStrInfo : MultiStringLiteral { template<typename CharT> auto GetName() const { return MultiStringLiteral::template GetValue<CharT>(); } }; template<typename T = void> struct ParserTraitsBase { static Token::Type s_keywords[]; static KeywordsInfo s_keywordsInfo[41]; static std::unordered_map<int, MultiStringLiteral> s_tokens; static MultiStringLiteral s_regexp; }; template<typename T> MultiStringLiteral ParserTraitsBase<T>::s_regexp = UNIVERSAL_STR( R"((\{\{)|(\}\})|(\{%[\+\-]?\s+raw\s+[\+\-]?%\})|(\{%[\+\-]?\s+endraw\s+[\+\-]?%\})|(\{%\s+meta\s+%\})|(\{%\s+endmeta\s+%\})|(\{%)|(%\})|(\{#)|(#\})|(\n))"); template<> struct ParserTraits<char> : public ParserTraitsBase<> { static Regex GetRoughTokenizer() { return Regex(s_regexp.GetValueStr<char>()); } static Regex GetKeywords() { std::string pattern; std::string prefix("(^"); std::string postfix("$)"); bool isFirst = true; for (auto& info : s_keywordsInfo) { if (!isFirst) pattern += "|"; else isFirst = false; pattern += prefix + info.name.charValue + postfix; } return Regex(pattern); } static std::string GetAsString(const std::string& str, CharRange range) { return str.substr(range.startOffset, range.size()); } static InternalValue RangeToNum(const std::string& str, CharRange range, Token::Type hint) { char buff[std::max(std::numeric_limits<int64_t>::max_digits10, std::numeric_limits<double>::max_digits10) * 2 + 1]; std::copy(str.data() + range.startOffset, str.data() + range.endOffset, buff); buff[range.size()] = 0; InternalValue result; if (hint == Token::IntegerNum) { result = InternalValue(static_cast<int64_t>(strtoll(buff, nullptr, 0))); } else { char* endBuff = nullptr; int64_t val = strtoll(buff, &endBuff, 10); if ((errno == ERANGE) || *endBuff) { endBuff = nullptr; double dblVal = strtod(buff, nullptr); result = static_cast<double>(dblVal); } else result = static_cast<int64_t>(val); } return result; } }; template<> struct ParserTraits<wchar_t> : public ParserTraitsBase<> { static WideRegex GetRoughTokenizer() { return WideRegex(s_regexp.GetValueStr<wchar_t>()); } static WideRegex GetKeywords() { std::wstring pattern; std::wstring prefix(L"(^"); std::wstring postfix(L"$)"); bool isFirst = true; for (auto& info : s_keywordsInfo) { if (!isFirst) pattern += L"|"; else isFirst = false; pattern += prefix + info.name.wcharValue + postfix; } return WideRegex(pattern); } static std::string GetAsString(const std::wstring& str, CharRange range) { auto srcStr = str.substr(range.startOffset, range.size()); return detail::StringConverter<std::wstring, std::string>::DoConvert(srcStr); } static InternalValue RangeToNum(const std::wstring& str, CharRange range, Token::Type hint) { wchar_t buff[std::max(std::numeric_limits<int64_t>::max_digits10, std::numeric_limits<double>::max_digits10) * 2 + 1]; std::copy(str.data() + range.startOffset, str.data() + range.endOffset, buff); buff[range.size()] = 0; InternalValue result; if (hint == Token::IntegerNum) { result = static_cast<int64_t>(wcstoll(buff, nullptr, 0)); } else { wchar_t* endBuff = nullptr; int64_t val = wcstoll(buff, &endBuff, 10); if ((errno == ERANGE) || *endBuff) { endBuff = nullptr; double dblVal = wcstod(buff, nullptr); result = static_cast<double>(dblVal); } else result = static_cast<int64_t>(val); } return result; } }; struct StatementInfo { enum Type { TemplateRoot, IfStatement, ElseIfStatement, ForStatement, SetStatement, ExtendsStatement, BlockStatement, ParentBlockStatement, MacroStatement, MacroCallStatement, WithStatement, FilterStatement }; using ComposedPtr = std::shared_ptr<ComposedRenderer>; Type type; ComposedPtr currentComposition; std::vector<ComposedPtr> compositions; Token token; RendererPtr renderer; static StatementInfo Create(Type type, const Token& tok, ComposedPtr renderers = std::make_shared<ComposedRenderer>()) { StatementInfo result; result.type = type; result.currentComposition = renderers; result.compositions.push_back(renderers); result.token = tok; return result; } }; using StatementInfoList = std::list<StatementInfo>; class StatementsParser { public: using ParseResult = nonstd::expected<void, ParseError>; StatementsParser(const Settings& settings, TemplateEnv* env) : m_settings(settings) , m_env(env) { } ParseResult Parse(LexScanner& lexer, StatementInfoList& statementsInfo); private: ParseResult ParseFor(LexScanner& lexer, StatementInfoList& statementsInfo, const Token& stmtTok); ParseResult ParseEndFor(LexScanner& lexer, StatementInfoList& statementsInfo, const Token& stmtTok); ParseResult ParseIf(LexScanner& lexer, StatementInfoList& statementsInfo, const Token& stmtTok); ParseResult ParseElse(LexScanner& lexer, StatementInfoList& statementsInfo, const Token& stmtTok); ParseResult ParseElIf(LexScanner& lexer, StatementInfoList& statementsInfo, const Token& stmtTok); ParseResult ParseEndIf(LexScanner& lexer, StatementInfoList& statementsInfo, const Token& pos); ParseResult ParseSet(LexScanner& lexer, StatementInfoList& statementsInfo, const Token& pos); ParseResult ParseEndSet(LexScanner& lexer, StatementInfoList& statementsInfo, const Token& stmtTok); ParseResult ParseBlock(LexScanner& lexer, StatementInfoList& statementsInfo, const Token& stmtTok); ParseResult ParseEndBlock(LexScanner& lexer, StatementInfoList& statementsInfo, const Token& stmtTok); ParseResult ParseExtends(LexScanner& lexer, StatementInfoList& statementsInfo, const Token& stmtTok); ParseResult ParseMacro(LexScanner& lexer, StatementInfoList& statementsInfo, const Token& stmtTok); nonstd::expected<MacroParams, ParseError> ParseMacroParams(LexScanner& lexer); ParseResult ParseEndMacro(LexScanner& lexer, StatementInfoList& statementsInfo, const Token& stmtTok); ParseResult ParseCall(LexScanner& lexer, StatementInfoList& statementsInfo, const Token& stmtTok); ParseResult ParseEndCall(LexScanner& lexer, StatementInfoList& statementsInfo, const Token& stmtTok); ParseResult ParseInclude(LexScanner& lexer, StatementInfoList& statementsInfo, const Token& stmtTok); ParseResult ParseImport(LexScanner& lexer, StatementInfoList& statementsInfo, const Token& stmtTok); ParseResult ParseFrom(LexScanner& lexer, StatementInfoList& statementsInfo, const Token& stmtTok); ParseResult ParseDo(LexScanner& lexer, StatementInfoList& statementsInfo, const Token& stmtTok); ParseResult ParseWith(LexScanner& lexer, StatementInfoList& statementsInfo, const Token& token); ParseResult ParseEndWith(LexScanner& lexer, StatementInfoList& statementsInfo, const Token& stmtTok); ParseResult ParseFilter(LexScanner& lexer, StatementInfoList& statementsInfo, const Token& stmtTok); ParseResult ParseEndFilter(LexScanner& lexer, StatementInfoList& statementsInfo, const Token& stmtTok); private: Settings m_settings; TemplateEnv* m_env; }; template<typename CharT> class TemplateParser : public LexerHelper { public: using string_t = std::basic_string<CharT>; using traits_t = ParserTraits<CharT>; using sregex_iterator = RegexIterator<typename string_t::const_iterator>; using ErrorInfo = ErrorInfoTpl<CharT>; using ParseResult = nonstd::expected<RendererPtr, std::vector<ErrorInfo>>; TemplateParser(const string_t* tpl, const Settings& setts, TemplateEnv* env, std::string tplName) : m_template(tpl) , m_templateName(std::move(tplName)) , m_settings(setts) , m_env(env) , m_roughTokenizer(traits_t::GetRoughTokenizer()) , m_keywords(traits_t::GetKeywords()) , m_metadataType(setts.m_defaultMetadataType) { } ParseResult Parse() { auto roughResult = DoRoughParsing(); if (!roughResult) { return ParseErrorsToErrorInfo(roughResult.error()); } auto composeRenderer = std::make_shared<ComposedRenderer>(); auto fineResult = DoFineParsing(composeRenderer); if (!fineResult) return ParseErrorsToErrorInfo(fineResult.error()); return composeRenderer; } MetadataInfo<CharT> GetMetadataInfo() const { MetadataInfo<CharT> result; result.metadataType = m_metadataType; result.metadata = m_metadata; result.location = m_metadataLocation; return result; } private: enum { RM_Unknown = 0, RM_ExprBegin = 1, RM_ExprEnd, RM_RawBegin, RM_RawEnd, RM_MetaBegin, RM_MetaEnd, RM_StmtBegin, RM_StmtEnd, RM_CommentBegin, RM_CommentEnd, RM_NewLine }; struct LineInfo { CharRange range; unsigned lineNumber; }; enum class TextBlockType { RawText, Expression, Statement, Comment, LineStatement, RawBlock, MetaBlock }; struct TextBlockInfo { CharRange range; TextBlockType type; }; nonstd::expected<void, std::vector<ParseError>> DoRoughParsing() { std::vector<ParseError> foundErrors; auto matchBegin = sregex_iterator(m_template->begin(), m_template->end(), m_roughTokenizer); auto matchEnd = sregex_iterator(); auto matches = std::distance(matchBegin, matchEnd); // One line, no customization if (matches == 0) { CharRange range{ 0ULL, m_template->size() }; m_lines.push_back(LineInfo{ range, 0 }); m_textBlocks.push_back( TextBlockInfo{ range, (!m_template->empty() && m_template->front() == '#') ? TextBlockType::LineStatement : TextBlockType::RawText }); return nonstd::expected<void, std::vector<ParseError>>(); } m_currentBlockInfo.range.startOffset = 0; m_currentBlockInfo.range.endOffset = 0; m_currentLineInfo.range = m_currentBlockInfo.range; m_currentLineInfo.lineNumber = 0; if (m_settings.useLineStatements) m_currentBlockInfo.type = m_template->front() == '#' ? TextBlockType::LineStatement : TextBlockType::RawText; else m_currentBlockInfo.type = TextBlockType::RawText; do { auto result = ParseRoughMatch(matchBegin, matchEnd); if (!result) { foundErrors.push_back(result.error()); return nonstd::make_unexpected(std::move(foundErrors)); } } while (matchBegin != matchEnd); FinishCurrentLine(m_template->size()); if (m_currentBlockInfo.type == TextBlockType::RawBlock) { nonstd::expected<void, ParseError> result = MakeParseError(ErrorCode::ExpectedRawEnd, MakeToken(Token::RawEnd, { m_template->size(), m_template->size() })); foundErrors.push_back(result.error()); return nonstd::make_unexpected(std::move(foundErrors)); } else if (m_currentBlockInfo.type == TextBlockType::MetaBlock) { nonstd::expected<void, ParseError> result = MakeParseError(ErrorCode::ExpectedMetaEnd, MakeToken(Token::RawEnd, { m_template->size(), m_template->size() })); foundErrors.push_back(result.error()); return nonstd::make_unexpected(std::move(foundErrors)); } FinishCurrentBlock(m_template->size(), TextBlockType::RawText); if (!foundErrors.empty()) return nonstd::make_unexpected(std::move(foundErrors)); return nonstd::expected<void, std::vector<ParseError>>(); } nonstd::expected<void, ParseError> ParseRoughMatch(sregex_iterator& curMatch, const sregex_iterator& /*endMatch*/) { auto match = *curMatch; ++curMatch; unsigned matchType = RM_Unknown; for (unsigned idx = 1; idx != match.size(); ++idx) { if (match.length(idx) != 0) { matchType = idx; break; } } size_t matchStart = static_cast<size_t>(match.position()); switch (matchType) { case RM_NewLine: FinishCurrentLine(match.position()); m_currentLineInfo.range.startOffset = m_currentLineInfo.range.endOffset + 1; if (m_currentLineInfo.range.startOffset < m_template->size() && (m_currentBlockInfo.type == TextBlockType::RawText || m_currentBlockInfo.type == TextBlockType::LineStatement)) { if (m_currentBlockInfo.type == TextBlockType::LineStatement) { FinishCurrentBlock(matchStart, TextBlockType::RawText); m_currentBlockInfo.range.startOffset = m_currentLineInfo.range.startOffset; } if (m_settings.useLineStatements) m_currentBlockInfo.type = (*m_template)[m_currentLineInfo.range.startOffset] == '#' ? TextBlockType::LineStatement : TextBlockType::RawText; else m_currentBlockInfo.type = TextBlockType::RawText; } break; case RM_CommentBegin: if (m_currentBlockInfo.type == TextBlockType::RawBlock) break; if (m_currentBlockInfo.type != TextBlockType::RawText) { FinishCurrentLine(match.position() + 2); return MakeParseError(ErrorCode::UnexpectedCommentBegin, MakeToken(Token::CommentBegin, { matchStart, matchStart + 2 })); } FinishCurrentBlock(matchStart, TextBlockType::Comment); m_currentBlockInfo.range.startOffset = matchStart + 2; m_currentBlockInfo.type = TextBlockType::Comment; break; case RM_CommentEnd: if (m_currentBlockInfo.type == TextBlockType::RawBlock) break; if (m_currentBlockInfo.type != TextBlockType::Comment) { FinishCurrentLine(match.position() + 2); return MakeParseError(ErrorCode::UnexpectedCommentEnd, MakeToken(Token::CommentEnd, { matchStart, matchStart + 2 })); } m_currentBlockInfo.range.startOffset = FinishCurrentBlock(matchStart, TextBlockType::RawText); break; case RM_ExprBegin: StartControlBlock(TextBlockType::Expression, matchStart); break; case RM_ExprEnd: if (m_currentBlockInfo.type == TextBlockType::RawText) { FinishCurrentLine(match.position() + 2); return MakeParseError(ErrorCode::UnexpectedExprEnd, MakeToken(Token::ExprEnd, { matchStart, matchStart + 2 })); } else if (m_currentBlockInfo.type != TextBlockType::Expression || (*m_template)[match.position() - 1] == '\'') break; m_currentBlockInfo.range.startOffset = FinishCurrentBlock(matchStart, TextBlockType::RawText); break; case RM_StmtBegin: StartControlBlock(TextBlockType::Statement, matchStart); break; case RM_StmtEnd: if (m_currentBlockInfo.type == TextBlockType::RawText) { FinishCurrentLine(match.position() + 2); return MakeParseError(ErrorCode::UnexpectedStmtEnd, MakeToken(Token::StmtEnd, { matchStart, matchStart + 2 })); } else if (m_currentBlockInfo.type != TextBlockType::Statement || (*m_template)[match.position() - 1] == '\'') break; m_currentBlockInfo.range.startOffset = FinishCurrentBlock(matchStart, TextBlockType::RawText); break; case RM_RawBegin: if (m_currentBlockInfo.type == TextBlockType::RawBlock) break; else if (m_currentBlockInfo.type != TextBlockType::RawText && m_currentBlockInfo.type != TextBlockType::Comment) { FinishCurrentLine(match.position() + match.length()); return MakeParseError(ErrorCode::UnexpectedRawBegin, MakeToken(Token::RawBegin, { matchStart, matchStart + match.length() })); } StartControlBlock(TextBlockType::RawBlock, matchStart, matchStart + match.length()); break; case RM_RawEnd: if (m_currentBlockInfo.type == TextBlockType::Comment) break; else if (m_currentBlockInfo.type != TextBlockType::RawBlock) { FinishCurrentLine(match.position() + match.length()); return MakeParseError(ErrorCode::UnexpectedRawEnd, MakeToken(Token::RawEnd, { matchStart, matchStart + match.length() })); } m_currentBlockInfo.range.startOffset = FinishCurrentBlock(matchStart + match.length() - 2, TextBlockType::RawText, matchStart); break; case RM_MetaBegin: if (m_currentBlockInfo.type == TextBlockType::Comment) break; if ((m_currentBlockInfo.type != TextBlockType::RawText && m_currentBlockInfo.type != TextBlockType::Comment) || m_hasMetaBlock) { FinishCurrentLine(match.position() + match.length()); return MakeParseError(ErrorCode::UnexpectedMetaBegin, MakeToken(Token::MetaBegin, { matchStart, matchStart + match.length() })); } StartControlBlock(TextBlockType::MetaBlock, matchStart, matchStart + match.length()); m_metadataLocation.line = m_currentLineInfo.lineNumber + 1; m_metadataLocation.col = static_cast<unsigned>(match.position() - m_currentLineInfo.range.startOffset + 1); m_metadataLocation.fileName = m_templateName; break; case RM_MetaEnd: if (m_currentBlockInfo.type == TextBlockType::Comment) break; if (m_currentBlockInfo.type != TextBlockType::MetaBlock) { FinishCurrentLine(match.position() + match.length()); return MakeParseError(ErrorCode::UnexpectedMetaEnd, MakeToken(Token::MetaEnd, { matchStart, matchStart + match.length() })); } m_currentBlockInfo.range.startOffset = FinishCurrentBlock(matchStart + match.length() - 2, TextBlockType::MetaBlock, matchStart); m_hasMetaBlock = true; break; } return nonstd::expected<void, ParseError>(); } void StartControlBlock(TextBlockType blockType, size_t matchStart, size_t startOffset = 0) { if (!startOffset) startOffset = matchStart + 2; size_t endOffset = matchStart; if (m_currentBlockInfo.type != TextBlockType::RawText || m_currentBlockInfo.type == TextBlockType::RawBlock) return; else endOffset = StripBlockLeft(m_currentBlockInfo, startOffset, endOffset, blockType == TextBlockType::Expression ? false : m_settings.lstripBlocks); FinishCurrentBlock(endOffset, blockType); if (startOffset < m_template->size() && blockType != TextBlockType::MetaBlock) { if ((*m_template)[startOffset] == '+' || (*m_template)[startOffset] == '-') ++startOffset; } m_currentBlockInfo.type = blockType; if (blockType == TextBlockType::RawBlock) startOffset = StripBlockRight(m_currentBlockInfo, startOffset - 2, m_settings.trimBlocks); m_currentBlockInfo.range.startOffset = startOffset; } size_t StripBlockRight(TextBlockInfo& /* currentBlockInfo */, size_t position, bool trimBlocks) { bool doTrim = trimBlocks; size_t newPos = position + 2; if ((m_currentBlockInfo.type != TextBlockType::RawText) && position != 0) { auto ctrlChar = (*m_template)[position - 1]; doTrim = ctrlChar == '-' ? true : (ctrlChar == '+' ? false : doTrim); } if (doTrim) { auto locale = std::locale(); for (; newPos < m_template->size(); ++newPos) { auto ch = (*m_template)[newPos]; if (ch == '\n') { ++newPos; break; } if (!std::isspace(ch, locale)) break; } } return newPos; } size_t StripBlockLeft(TextBlockInfo& currentBlockInfo, size_t ctrlCharPos, size_t endOffset, bool doStrip) { bool doTotalStrip = false; if (ctrlCharPos < m_template->size()) { auto ctrlChar = (*m_template)[ctrlCharPos]; if (ctrlChar == '+') doStrip = false; else doTotalStrip = ctrlChar == '-'; doStrip |= doTotalStrip; } if (!doStrip || (currentBlockInfo.type != TextBlockType::RawText && currentBlockInfo.type != TextBlockType::RawBlock)) return endOffset; auto locale = std::locale(); auto& tpl = *m_template; auto originalOffset = endOffset; bool sameLine = true; for (; endOffset != currentBlockInfo.range.startOffset && endOffset > 0; --endOffset) { auto ch = tpl[endOffset - 1]; if (!std::isspace(ch, locale)) { if (!sameLine) break; return doTotalStrip ? endOffset : originalOffset; } if (ch == '\n') { if (!doTotalStrip) break; sameLine = false; } } return endOffset; } nonstd::expected<void, std::vector<ParseError>> DoFineParsing(std::shared_ptr<ComposedRenderer> renderers) { std::vector<ParseError> errors; StatementInfoList statementsStack; StatementInfo root = StatementInfo::Create(StatementInfo::TemplateRoot, Token(), renderers); statementsStack.push_back(root); for (auto& origBlock : m_textBlocks) { auto block = origBlock; if (block.type == TextBlockType::LineStatement) ++block.range.startOffset; switch (block.type) { case TextBlockType::RawBlock: case TextBlockType::RawText: { auto range = block.range; if (range.size() == 0) break; auto renderer = std::make_shared<RawTextRenderer>(m_template->data() + range.startOffset, range.size()); statementsStack.back().currentComposition->AddRenderer(renderer); break; } case TextBlockType::MetaBlock: { auto range = block.range; if (range.size() == 0) break; auto metadata = std::basic_string_view<CharT>(m_template->data() + range.startOffset, range.size()); if (!boost::algorithm::all(metadata, boost::algorithm::is_space())) m_metadata = metadata; break; } case TextBlockType::Expression: { auto parseResult = InvokeParser<RendererPtr, ExpressionParser>(block); if (parseResult) statementsStack.back().currentComposition->AddRenderer(*parseResult); else errors.push_back(parseResult.error()); break; } case TextBlockType::Statement: case TextBlockType::LineStatement: { auto parseResult = InvokeParser<void, StatementsParser>(block, statementsStack); if (!parseResult) errors.push_back(parseResult.error()); break; } default: break; } } if (!errors.empty()) return nonstd::make_unexpected(std::move(errors)); return nonstd::expected<void, std::vector<ParseError>>(); } template<typename R, typename P, typename... Args> nonstd::expected<R, ParseError> InvokeParser(const TextBlockInfo& block, Args&&... args) { lexertk::generator<CharT> tokenizer; auto range = block.range; auto start = m_template->data(); if (!tokenizer.process(start + range.startOffset, start + range.endOffset)) return MakeParseError(ErrorCode::Unspecified, MakeToken(Token::Unknown, { range.startOffset, range.startOffset + 1 })); tokenizer.begin(); Lexer lexer( [&tokenizer, adjust = range.startOffset]() mutable { lexertk::token tok = tokenizer.next_token(); tok.position += adjust; return tok; }, this); if (!lexer.Preprocess()) return MakeParseError(ErrorCode::Unspecified, MakeToken(Token::Unknown, { range.startOffset, range.startOffset + 1 })); P praser(m_settings, m_env); LexScanner scanner(lexer); auto result = praser.Parse(scanner, std::forward<Args>(args)...); if (!result) return result.get_unexpected(); return result; } nonstd::unexpected_type<std::vector<ErrorInfo>> ParseErrorsToErrorInfo(const std::vector<ParseError>& errors) { std::vector<ErrorInfo> resultErrors; for (auto& e : errors) { typename ErrorInfo::Data errInfoData; errInfoData.code = e.errorCode; errInfoData.srcLoc.fileName = m_templateName; OffsetToLinePos(e.errorToken.range.startOffset, errInfoData.srcLoc.line, errInfoData.srcLoc.col); errInfoData.locationDescr = GetLocationDescr(errInfoData.srcLoc.line, errInfoData.srcLoc.col); errInfoData.extraParams.emplace_back(TokenToString(e.errorToken)); for (auto& tok : e.relatedTokens) { errInfoData.extraParams.emplace_back(TokenToString(tok)); if (tok.range.startOffset != e.errorToken.range.startOffset) { SourceLocation relLoc; relLoc.fileName = m_templateName; OffsetToLinePos(tok.range.startOffset, relLoc.line, relLoc.col); errInfoData.relatedLocs.push_back(std::move(relLoc)); } } resultErrors.emplace_back(errInfoData); } return nonstd::make_unexpected(std::move(resultErrors)); } Token MakeToken(Token::Type type, const CharRange& range, string_t value = string_t()) { Token tok; tok.type = type; tok.range = range; tok.value = TargetString(static_cast<string_t>(value)); return tok; } auto TokenToString(const Token& tok) { auto p = traits_t::s_tokens.find(tok.type); if (p != traits_t::s_tokens.end()) return p->second.template GetValueStr<CharT>(); if (tok.range.size() != 0) return string_t(m_template->substr(tok.range.startOffset, tok.range.size())); else if (tok.type == Token::Identifier) { if (!tok.value.IsEmpty()) { std::basic_string<CharT> tpl; return GetAsSameString(tpl, tok.value).value_or(std::basic_string<CharT>()); } return UNIVERSAL_STR("<<Identifier>>").template GetValueStr<CharT>(); } else if (tok.type == Token::String) return UNIVERSAL_STR("<<String>>").template GetValueStr<CharT>(); return string_t(); } size_t FinishCurrentBlock(size_t position, TextBlockType nextBlockType, size_t matchStart = 0) { size_t newPos = position; if (m_currentBlockInfo.type == TextBlockType::RawBlock || m_currentBlockInfo.type == TextBlockType::MetaBlock) { size_t currentPosition = matchStart ? matchStart : position; auto origPos = position; position = StripBlockLeft(m_currentBlockInfo, currentPosition + 2, currentPosition, m_settings.lstripBlocks); newPos = StripBlockRight(m_currentBlockInfo, origPos, m_settings.trimBlocks); } else { if (m_currentBlockInfo.type == TextBlockType::RawText) position = StripBlockLeft(m_currentBlockInfo, position + 2, position, nextBlockType == TextBlockType::Expression ? false : m_settings.lstripBlocks); else if (nextBlockType == TextBlockType::RawText) newPos = StripBlockRight(m_currentBlockInfo, position, m_currentBlockInfo.type == TextBlockType::Expression ? false : m_settings.trimBlocks); if ((m_currentBlockInfo.type != TextBlockType::RawText) && position != 0) { auto ctrlChar = (*m_template)[position - 1]; if (ctrlChar == '+' || ctrlChar == '-') --position; } } m_currentBlockInfo.range.endOffset = position; m_textBlocks.push_back(m_currentBlockInfo); m_currentBlockInfo.type = TextBlockType::RawText; return newPos; } void FinishCurrentLine(int64_t position) { m_currentLineInfo.range.endOffset = static_cast<size_t>(position); m_lines.push_back(m_currentLineInfo); m_currentLineInfo.lineNumber++; } void OffsetToLinePos(size_t offset, unsigned& line, unsigned& col) { auto p = std::find_if( m_lines.begin(), m_lines.end(), [offset](const LineInfo& info) { return offset >= info.range.startOffset && offset < info.range.endOffset; }); if (p == m_lines.end()) { if (m_lines.empty() || offset != m_lines.back().range.endOffset) { line = 1; col = 1; return; } p = m_lines.end() - 1; } line = p->lineNumber + 1; col = static_cast<unsigned>(offset - p->range.startOffset + 1); } string_t GetLocationDescr(unsigned line, unsigned col) { if (line == 0 && col == 0) return string_t(); --line; --col; auto toCharT = [](char ch) { return static_cast<CharT>(ch); }; auto& lineInfo = m_lines[line]; std::basic_ostringstream<CharT> os; auto origLine = m_template->substr(lineInfo.range.startOffset, lineInfo.range.size()); os << origLine << std::endl; string_t spacePrefix; auto locale = std::locale(); for (auto ch : origLine) { if (!std::isspace(ch, locale)) break; spacePrefix.append(1, ch); } const int headLen = 3; const int tailLen = 7; auto spacePrefixLen = spacePrefix.size(); if (col < spacePrefixLen) { for (unsigned i = 0; i < col; ++i) os << toCharT(' '); os << toCharT('^'); for (int i = 0; i < tailLen; ++i) os << toCharT('-'); return os.str(); } os << spacePrefix; int actualHeadLen = std::min(static_cast<int>(col - spacePrefixLen), headLen); if (actualHeadLen == headLen) { for (std::size_t i = 0; i < col - actualHeadLen - spacePrefixLen; ++i) os << toCharT(' '); } for (int i = 0; i < actualHeadLen; ++i) os << toCharT('-'); os << toCharT('^'); for (int i = 0; i < tailLen; ++i) os << toCharT('-'); return os.str(); } // LexerHelper interface std::string GetAsString(const CharRange& range) override { return traits_t::GetAsString(*m_template, range); } InternalValue GetAsValue(const CharRange& range, Token::Type type) override { if (type == Token::String) { auto rawValue = CompileEscapes(m_template->substr(range.startOffset, range.size())); return InternalValue(TargetString(std::move(rawValue))); } if (type == Token::IntegerNum || type == Token::FloatNum) return traits_t::RangeToNum(*m_template, range, type); return InternalValue(); } Keyword GetKeyword(const CharRange& range) override { auto matchBegin = sregex_iterator(m_template->begin() + range.startOffset, m_template->begin() + range.endOffset, m_keywords); auto matchEnd = sregex_iterator(); auto matches = std::distance(matchBegin, matchEnd); // One line, no customization if (matches == 0) return Keyword::Unknown; auto& match = *matchBegin; for (size_t idx = 1; idx != match.size(); ++idx) { if (match.length(idx) != 0) { return traits_t::s_keywordsInfo[idx - 1].type; } } return Keyword::Unknown; } char GetCharAt(size_t /*pos*/) override { return '\0'; } private: const string_t* m_template; std::string m_templateName; const Settings& m_settings; TemplateEnv* m_env = nullptr; BasicRegex<CharT> m_roughTokenizer; BasicRegex<CharT> m_keywords; std::vector<LineInfo> m_lines; std::vector<TextBlockInfo> m_textBlocks; LineInfo m_currentLineInfo = {}; TextBlockInfo m_currentBlockInfo = {}; bool m_hasMetaBlock = false; std::basic_string_view<CharT> m_metadata; std::string m_metadataType; SourceLocation m_metadataLocation; }; template<typename T> KeywordsInfo ParserTraitsBase<T>::s_keywordsInfo[41] = { { UNIVERSAL_STR("for"), Keyword::For }, { UNIVERSAL_STR("endfor"), Keyword::Endfor }, { UNIVERSAL_STR("in"), Keyword::In }, { UNIVERSAL_STR("if"), Keyword::If }, { UNIVERSAL_STR("else"), Keyword::Else }, { UNIVERSAL_STR("elif"), Keyword::ElIf }, { UNIVERSAL_STR("endif"), Keyword::EndIf }, { UNIVERSAL_STR("or"), Keyword::LogicalOr }, { UNIVERSAL_STR("and"), Keyword::LogicalAnd }, { UNIVERSAL_STR("not"), Keyword::LogicalNot }, { UNIVERSAL_STR("is"), Keyword::Is }, { UNIVERSAL_STR("block"), Keyword::Block }, { UNIVERSAL_STR("endblock"), Keyword::EndBlock }, { UNIVERSAL_STR("extends"), Keyword::Extends }, { UNIVERSAL_STR("macro"), Keyword::Macro }, { UNIVERSAL_STR("endmacro"), Keyword::EndMacro }, { UNIVERSAL_STR("call"), Keyword::Call }, { UNIVERSAL_STR("endcall"), Keyword::EndCall }, { UNIVERSAL_STR("filter"), Keyword::Filter }, { UNIVERSAL_STR("endfilter"), Keyword::EndFilter }, { UNIVERSAL_STR("set"), Keyword::Set }, { UNIVERSAL_STR("endset"), Keyword::EndSet }, { UNIVERSAL_STR("include"), Keyword::Include }, { UNIVERSAL_STR("import"), Keyword::Import }, { UNIVERSAL_STR("true"), Keyword::True }, { UNIVERSAL_STR("false"), Keyword::False }, { UNIVERSAL_STR("True"), Keyword::True }, { UNIVERSAL_STR("False"), Keyword::False }, { UNIVERSAL_STR("none"), Keyword::None }, { UNIVERSAL_STR("None"), Keyword::None }, { UNIVERSAL_STR("recursive"), Keyword::Recursive }, { UNIVERSAL_STR("scoped"), Keyword::Scoped }, { UNIVERSAL_STR("with"), Keyword::With }, { UNIVERSAL_STR("endwith"), Keyword::EndWith }, { UNIVERSAL_STR("without"), Keyword::Without }, { UNIVERSAL_STR("ignore"), Keyword::Ignore }, { UNIVERSAL_STR("missing"), Keyword::Missing }, { UNIVERSAL_STR("context"), Keyword::Context }, { UNIVERSAL_STR("from"), Keyword::From }, { UNIVERSAL_STR("as"), Keyword::As }, { UNIVERSAL_STR("do"), Keyword::Do }, }; template<typename T> std::unordered_map<int, MultiStringLiteral> ParserTraitsBase<T>::s_tokens = { { Token::Unknown, UNIVERSAL_STR("<<Unknown>>") }, { Token::Lt, UNIVERSAL_STR("<") }, { Token::Gt, UNIVERSAL_STR(">") }, { Token::Plus, UNIVERSAL_STR("+") }, { Token::Minus, UNIVERSAL_STR("-") }, { Token::Percent, UNIVERSAL_STR("%") }, { Token::Mul, UNIVERSAL_STR("*") }, { Token::Div, UNIVERSAL_STR("/") }, { Token::LBracket, UNIVERSAL_STR("(") }, { Token::RBracket, UNIVERSAL_STR(")") }, { Token::LSqBracket, UNIVERSAL_STR("[") }, { Token::RSqBracket, UNIVERSAL_STR("]") }, { Token::LCrlBracket, UNIVERSAL_STR("{") }, { Token::RCrlBracket, UNIVERSAL_STR("}") }, { Token::Assign, UNIVERSAL_STR("=") }, { Token::Comma, UNIVERSAL_STR(",") }, { Token::Eof, UNIVERSAL_STR("<<End of block>>") }, { Token::Equal, UNIVERSAL_STR("==") }, { Token::NotEqual, UNIVERSAL_STR("!=") }, { Token::LessEqual, UNIVERSAL_STR("<=") }, { Token::GreaterEqual, UNIVERSAL_STR(">=") }, { Token::StarStar, UNIVERSAL_STR("**") }, { Token::DashDash, UNIVERSAL_STR("//") }, { Token::LogicalOr, UNIVERSAL_STR("or") }, { Token::LogicalAnd, UNIVERSAL_STR("and") }, { Token::LogicalNot, UNIVERSAL_STR("not") }, { Token::MulMul, UNIVERSAL_STR("**") }, { Token::DivDiv, UNIVERSAL_STR("//") }, { Token::True, UNIVERSAL_STR("true") }, { Token::False, UNIVERSAL_STR("false") }, { Token::None, UNIVERSAL_STR("none") }, { Token::In, UNIVERSAL_STR("in") }, { Token::Is, UNIVERSAL_STR("is") }, { Token::For, UNIVERSAL_STR("for") }, { Token::Endfor, UNIVERSAL_STR("endfor") }, { Token::If, UNIVERSAL_STR("if") }, { Token::Else, UNIVERSAL_STR("else") }, { Token::ElIf, UNIVERSAL_STR("elif") }, { Token::EndIf, UNIVERSAL_STR("endif") }, { Token::Block, UNIVERSAL_STR("block") }, { Token::EndBlock, UNIVERSAL_STR("endblock") }, { Token::Extends, UNIVERSAL_STR("extends") }, { Token::Macro, UNIVERSAL_STR("macro") }, { Token::EndMacro, UNIVERSAL_STR("endmacro") }, { Token::Call, UNIVERSAL_STR("call") }, { Token::EndCall, UNIVERSAL_STR("endcall") }, { Token::Filter, UNIVERSAL_STR("filter") }, { Token::EndFilter, UNIVERSAL_STR("endfilter") }, { Token::Set, UNIVERSAL_STR("set") }, { Token::EndSet, UNIVERSAL_STR("endset") }, { Token::Include, UNIVERSAL_STR("include") }, { Token::Import, UNIVERSAL_STR("import") }, { Token::Recursive, UNIVERSAL_STR("recursive") }, { Token::Scoped, UNIVERSAL_STR("scoped") }, { Token::With, UNIVERSAL_STR("with") }, { Token::EndWith, UNIVERSAL_STR("endwith") }, { Token::Without, UNIVERSAL_STR("without") }, { Token::Ignore, UNIVERSAL_STR("ignore") }, { Token::Missing, UNIVERSAL_STR("missing") }, { Token::Context, UNIVERSAL_STR("context") }, { Token::From, UNIVERSAL_STR("form") }, { Token::As, UNIVERSAL_STR("as") }, { Token::Do, UNIVERSAL_STR("do") }, { Token::RawBegin, UNIVERSAL_STR("{% raw %}") }, { Token::RawEnd, UNIVERSAL_STR("{% endraw %}") }, { Token::MetaBegin, UNIVERSAL_STR("{% meta %}") }, { Token::MetaEnd, UNIVERSAL_STR("{% endmeta %}") }, { Token::CommentBegin, UNIVERSAL_STR("{#") }, { Token::CommentEnd, UNIVERSAL_STR("#}") }, { Token::StmtBegin, UNIVERSAL_STR("{%") }, { Token::StmtEnd, UNIVERSAL_STR("%}") }, { Token::ExprBegin, UNIVERSAL_STR("{{") }, { Token::ExprEnd, UNIVERSAL_STR("}}") }, }; } // namespace jinja2 #endif // JINJA2CPP_SRC_TEMPLATE_PARSER_H