diff options
author | snaury <snaury@yandex-team.com> | 2024-10-16 12:16:48 +0300 |
---|---|---|
committer | snaury <snaury@yandex-team.com> | 2024-10-16 12:32:13 +0300 |
commit | e0fb25470a47f0c243091ed28bf54a186f732f6a (patch) | |
tree | e85dfe628401f4f21749ab95b9d711242e3b49cd /contrib/libs/jinja2cpp/src/lexertk.h | |
parent | b3b4a0b9681eb0981f9958a426c95a53f79169a7 (diff) | |
download | ydb-e0fb25470a47f0c243091ed28bf54a186f732f6a.tar.gz |
ydblib: add jinja2cpp
commit_hash:f3563041f6f6f7443e75fc99acd2c967d0debb04
Diffstat (limited to 'contrib/libs/jinja2cpp/src/lexertk.h')
-rw-r--r-- | contrib/libs/jinja2cpp/src/lexertk.h | 1842 |
1 files changed, 1842 insertions, 0 deletions
diff --git a/contrib/libs/jinja2cpp/src/lexertk.h b/contrib/libs/jinja2cpp/src/lexertk.h new file mode 100644 index 0000000000..c318cc7f3d --- /dev/null +++ b/contrib/libs/jinja2cpp/src/lexertk.h @@ -0,0 +1,1842 @@ +/* + ***************************************************************** + * Simple C++ Lexer Toolkit Library * + * * + * Author: Arash Partow (2001) * + * Modified: Flex Ferrum (2018) + * URL: http://www.partow.net/programming/lexertk/index.html * + * * + * Copyright notice: * + * Free use of the Simple C++ Lexer Toolkit Library is permitted * + * under the guidelines and in accordance with the MIT License. * + * http://www.opensource.org/licenses/MIT * + * * + * * + * The lexer will tokenize input against the following BNF: * + * * + * expression ::= term { +|- term } * + * term ::= (symbol | factor) {operator symbol | factor} * + * factor ::= symbol | ( '(' {-} expression ')' ) * + * symbol ::= number | gensymb | string * + * gensymb ::= alphabet {alphabet | digit} * + * string ::= '"' {alphabet | digit | operator } '"' * + * operator ::= * | / | % | ^ | < | > | <= | >= | << | >> != * + * alphabet ::= a | b | .. | z | A | B | .. | Z * + * digit ::= 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 * + * sign ::= + | - * + * edef ::= e | E * + * decimal ::= {digit} (digit [.] | [.] digit) {digit} * + * exponent ::= edef [sign] digit {digit} * + * real ::= [sign] decimal [exponent] * + * integer ::= [sign] {digit} * + * number ::= real | integer * + * * + * * + * Note: This lexer has been taken from the ExprTk Library. * + * * + ***************************************************************** +*/ + + +#ifndef JINJA2CPP_SRC_LEXERTK_H +#define JINJA2CPP_SRC_LEXERTK_H + +#include <algorithm> +#include <cctype> +#include <clocale> +#include <cstddef> +#include <cstdio> +#include <cstdlib> +#include <deque> +#include <exception> +#include <limits> +#include <locale> +#include <map> +#include <set> +#include <stack> +#include <stdexcept> +#include <string> +#include <vector> + + +namespace lexertk +{ + template<typename CharT> + struct CharRange + { + CharT* start; + CharT* end; + + auto length() const {return end - start;} + auto offset(CharT* from) const {return start - from;} + auto operator[] (size_t idx) const {return start[idx];} + }; + namespace details + { +#if 0 + inline bool is_whitespace(const char c) + { + return (' ' == c) || ('\n' == c) || + ('\r' == c) || ('\t' == c) || + ('\b' == c) || ('\v' == c) || + ('\f' == c) ; + } + + + inline bool is_letter(const char c) + { + return (('a' <= c) && (c <= 'z')) || (('A' <= c) && (c <= 'Z')); + } + + inline bool is_digit(const char c) + { + return ('0' <= c) && (c <= '9'); + } + + inline bool is_letter_or_digit(const char c) + { + return is_letter(c) || is_digit(c); + } +#endif + template<typename CharT> + struct lexer_traits + { + static auto& get_locale() + { + static auto locale = std::locale(); + return locale; + } + + static bool is_whitespace(const CharT c) + { + return std::isspace(c, get_locale()); + } + static bool is_letter(const CharT c) + { + return std::isalpha(c, get_locale()); + } + static bool is_digit(const CharT c) + { + return std::isdigit(c, get_locale()); + } + static bool is_letter_or_digit(CharT c) + { + return std::isalnum(c, get_locale()); + } + static bool is_operator_char(const CharT c); + static bool is_left_bracket(const CharT c); + static bool is_right_bracket(const CharT c); + static bool is_sign(const CharT c); + static bool is_invalid(const CharT c); + static bool is_bracket(const CharT c) + { + return is_left_bracket(c) || is_right_bracket(c); + } + static CharT tolower(const CharT c) + { + return std::tolower(c, get_locale()); + } + static CharT toupper(const CharT c) + { + return std::toupper(c, get_locale()); + } + static inline bool imatch(const CharT c1, const CharT c2) + { + return tolower(c1) == tolower(c2); + } + + static inline bool imatch(const CharRange<CharT>& s1, const CharRange<CharT>& s2) + { + if (s1.length() == s2.length()) + { + for (std::size_t i = 0; i < s1.length(); ++i) + { + if (tolower(s1[i]) != tolower(s2[i])) + { + return false; + } + } + + return true; + } + + return false; + } + }; + + template<> + inline bool lexer_traits<char>::is_operator_char(const char c) + { + return ('+' == c) || ('-' == c) || + ('*' == c) || ('/' == c) || + ('^' == c) || ('<' == c) || + ('>' == c) || ('=' == c) || + (',' == c) || ('!' == c) || + ('(' == c) || (')' == c) || + ('[' == c) || (']' == c) || + ('{' == c) || ('}' == c) || + ('%' == c) || (':' == c) || + ('?' == c) || ('&' == c) || + ('|' == c) || (';' == c) || + ('~' == c); + } + + template<> + inline bool lexer_traits<wchar_t>::is_operator_char(const wchar_t c) + { + return (L'+' == c) || (L'-' == c) || + (L'*' == c) || (L'/' == c) || + (L'^' == c) || (L'<' == c) || + (L'>' == c) || (L'=' == c) || + (L',' == c) || (L'!' == c) || + (L'(' == c) || (L')' == c) || + (L'[' == c) || (L']' == c) || + (L'{' == c) || (L'}' == c) || + (L'%' == c) || (L':' == c) || + (L'?' == c) || (L'&' == c) || + (L'|' == c) || (L';' == c) || + (L'~' == c); + } + + template<> + inline bool lexer_traits<char>::is_left_bracket(const char c) + { + return ('(' == c) || ('[' == c) || ('{' == c); + } + template<> + inline bool lexer_traits<wchar_t>::is_left_bracket(const wchar_t c) + { + return (L'(' == c) || (L'[' == c) || (L'{' == c); + } + + template<> + inline bool lexer_traits<char>::is_right_bracket(const char c) + { + return (')' == c) || (']' == c) || ('}' == c); + } + + template<> + inline bool lexer_traits<wchar_t>::is_right_bracket(const wchar_t c) + { + return (L')' == c) || (L']' == c) || (L'}' == c); + } + + template<> + inline bool lexer_traits<char>::is_sign(const char c) + { + return ('+' == c) || ('-' == c); + } + + template<> + inline bool lexer_traits<wchar_t>::is_sign(const wchar_t c) + { + return (L'+' == c) || (L'-' == c); + } + + template<> + inline bool lexer_traits<char>::is_invalid(const char c) + { + return !is_whitespace(c) && + !is_operator_char(c) && + !is_letter(c) && + !is_digit(c) && + ('.' != c) && + ('_' != c) && + ('$' != c) && + ('~' != c) && + ('\'' != c); + } + + template<> + inline bool lexer_traits<wchar_t>::is_invalid(const wchar_t c) + { + return !is_whitespace(c) && + !is_operator_char(c) && + !is_letter(c) && + !is_digit(c) && + (L'.' != c) && + (L'_' != c) && + (L'$' != c) && + (L'~' != c) && + (L'\'' != c); + } + + template<typename CharT> + struct ilesscompare + { + inline bool operator()(const CharRange<CharT>& s1, const CharRange<CharT>& s2) const + { + using traits = lexer_traits<CharT>; + const std::size_t length = std::min(s1.length(),s2.length()); + + for (std::size_t i = 0; i < length; ++i) + { + if (traits::tolower(s1[i]) > traits::tolower(s2[i])) + return false; + else if (traits::tolower(s1[i]) < traits::tolower(s2[i])) + return true; + } + + return s1.length() < s2.length(); + } + }; + + } // namespace details + + struct token + { + + enum token_type + { + e_none = 0, e_error = 1, e_err_symbol = 2, + e_err_number = 3, e_err_string = 4, e_err_sfunc = 5, + e_eof = 6, e_number = 7, e_symbol = 8, + e_string = 9, e_eq = 10, e_shr = 11, + e_shl = 12, e_lte = 13, e_ne = 14, + e_gte = 15, e_lt = '<', e_gt = '>', + e_rbracket = ')', e_lbracket = '(', e_tilda = '~', + e_rsqrbracket = ']', e_lsqrbracket = '[', e_rcrlbracket = '}', + e_lcrlbracket = '{', e_comma = ',', e_add = '+', + e_sub = '-', e_div = '/', e_mul = '*', + e_mod = '%', e_pow = '^', e_colon = ':', + e_dot = '.', e_divdiv = 16 , e_mulmul = 17 , + e_assign = '=', e_pipe = '|', + }; + + token() + : type(e_none), + position(std::numeric_limits<std::size_t>::max()), + length(0) + {} + + void clear() + { + type = e_none; + position = std::numeric_limits<std::size_t>::max(); + } + + template <typename Iterator> + inline token& set_operator(const token_type tt, const Iterator begin, const Iterator end, const Iterator base_begin) + { + type = tt; + position = std::distance(base_begin,begin); + length = end - begin; + return *this; + } + + template <typename Iterator> + inline token& set_symbol(const Iterator begin, const Iterator end, const Iterator base_begin) + { + type = e_symbol; + position = std::distance(base_begin,begin); + length = end - begin; + return *this; + } + + template <typename Iterator> + inline token& set_numeric(const Iterator begin, const Iterator end, const Iterator base_begin) + { + type = e_number; + position = std::distance(base_begin,begin); + length = end - begin; + return *this; + } + + template <typename Iterator> + inline token& set_string(const Iterator begin, const Iterator end, const Iterator base_begin) + { + type = e_string; + position = std::distance(base_begin,begin); + length = end - begin; + return *this; + } + + template <typename Iterator> + inline token& set_error(const token_type et, const Iterator begin, const Iterator end, const Iterator base_begin) + { + if ( + (e_error == et) || + (e_err_symbol == et) || + (e_err_number == et) || + (e_err_string == et) + ) + { + type = e_error; + } + else + type = e_error; + + position = std::distance(base_begin,begin); + length = end - begin; + + return *this; + } + + static inline const char* to_str(token_type t) + { + switch (t) + { + case e_none : return "NONE"; + case e_error : return "ERROR"; + case e_err_symbol : return "ERROR_SYMBOL"; + case e_err_number : return "ERROR_NUMBER"; + case e_err_string : return "ERROR_STRING"; + case e_eof : return "EOF"; + case e_number : return "NUMBER"; + case e_symbol : return "SYMBOL"; + case e_string : return "STRING"; + case e_assign : return "="; + case e_shr : return ">>"; + case e_shl : return "<<"; + case e_lte : return "<="; + case e_ne : return "!="; + case e_gte : return ">="; + case e_lt : return "<"; + case e_gt : return ">"; + case e_eq : return "=="; + case e_rbracket : return ")"; + case e_lbracket : return "("; + case e_rsqrbracket : return "]"; + case e_lsqrbracket : return "["; + case e_rcrlbracket : return "}"; + case e_lcrlbracket : return "{"; + case e_comma : return ","; + case e_dot : return "."; + case e_add : return "+"; + case e_sub : return "-"; + case e_div : return "/"; + case e_mul : return "*"; + case e_mod : return "%"; + case e_pow : return "^"; + case e_colon : return ":"; + case e_divdiv : return "//"; + case e_mulmul : return "**"; + default : return "UNKNOWN"; + } + } + + inline bool is_error() const + { + return ( + (e_error == type) || + (e_err_symbol == type) || + (e_err_number == type) || + (e_err_string == type) + ); + } + + token_type type; + size_t position; + size_t length; + }; + + template<typename CharT> + class generator + { + public: + + typedef token token_t; + typedef std::vector<token_t> token_list_t; + typedef std::vector<token_t>::iterator token_list_itr_t; + typedef details::lexer_traits<CharT> traits; + + generator() + : base_itr_(0), + s_itr_(0), + s_end_(0) + { + clear(); + } + + inline void clear() + { + base_itr_ = 0; + s_itr_ = 0; + s_end_ = 0; + token_list_.clear(); + token_itr_ = token_list_.end(); + store_token_itr_ = token_itr_; + } + + inline bool process(const std::basic_string<CharT>& str) + { + return process(str.data(), str.data() + str.size()); + } + + inline bool process(const CharT* begin, const CharT* end) + { + base_itr_ = begin; + s_itr_ = begin; + s_end_ = end; + + eof_token_.set_operator(token_t::e_eof,s_end_,s_end_,base_itr_); + token_list_.clear(); + + while (!is_end(s_itr_)) + { + scan_token(); + + if (token_list_.empty()) + return true; + else if (token_list_.back().is_error()) + { + return false; + } + } + return true; + } + + inline bool empty() const + { + return token_list_.empty(); + } + + inline size_t size() const + { + return token_list_.size(); + } + + inline void begin() + { + token_itr_ = token_list_.begin(); + store_token_itr_ = token_itr_; + } + + inline void store() + { + store_token_itr_ = token_itr_; + } + + inline void restore() + { + token_itr_ = store_token_itr_; + } + + inline token_t& next_token() + { + if (token_list_.end() != token_itr_) + { + return *token_itr_++; + } + else + return eof_token_; + } + + inline token_t& peek_next_token() + { + if (token_list_.end() != token_itr_) + { + return *token_itr_; + } + else + return eof_token_; + } + + inline token_t& operator[](const std::size_t& index) + { + if (index < token_list_.size()) + return token_list_[index]; + else + return eof_token_; + } + + inline token_t operator[](const std::size_t& index) const + { + if (index < token_list_.size()) + return token_list_[index]; + else + return eof_token_; + } + + inline bool finished() const + { + return (token_list_.end() == token_itr_); + } + + inline std::basic_string<CharT> remaining() const + { + using string = std::basic_string<CharT>; + if (finished()) + return string(); + else if (token_list_.begin() != token_itr_) + return string(base_itr_ + (token_itr_ - 1)->position,s_end_); + else + return string(base_itr_ + token_itr_->position,s_end_); + } + + private: + + inline bool is_end(const CharT* itr) + { + return (s_end_ == itr); + } + + inline void skip_whitespace() + { + while (!is_end(s_itr_) && traits::is_whitespace(*s_itr_)) + { + ++s_itr_; + } + } + inline void scan_token() + { + skip_whitespace(); + + if (is_end(s_itr_)) + { + return; + } + else if (traits::is_operator_char(*s_itr_)) + { + scan_operator(); + return; + } + else if (traits::is_letter(*s_itr_) || ('_' == (*s_itr_))) + { + scan_symbol(); + return; + } + else if (traits::is_digit((*s_itr_)) || ('.' == (*s_itr_))) + { + scan_number(); + return; + } + else if ('\'' == (*s_itr_) || '\"' == (*s_itr_)) + { + scan_string(); + return; + } + else + { + token_t t; + t.set_error(token::e_error,s_itr_,s_itr_ + 2,base_itr_); + token_list_.push_back(t); + ++s_itr_; + } + } + + inline void scan_operator() + { + token_t t; + + if (!is_end(s_itr_ + 1)) + { + token_t::token_type ttype = token_t::e_none; + + CharT c0 = s_itr_[0]; + CharT c1 = s_itr_[1]; + + if ((c0 == '<') && (c1 == '=')) ttype = token_t::e_lte; + else if ((c0 == '>') && (c1 == '=')) ttype = token_t::e_gte; + else if ((c0 == '<') && (c1 == '>')) ttype = token_t::e_ne; + else if ((c0 == '!') && (c1 == '=')) ttype = token_t::e_ne; + else if ((c0 == '=') && (c1 == '=')) ttype = token_t::e_eq; + else if ((c0 == ':') && (c1 == '=')) ttype = token_t::e_assign; + else if ((c0 == '<') && (c1 == '<')) ttype = token_t::e_shl; + else if ((c0 == '>') && (c1 == '>')) ttype = token_t::e_shr; + else if ((c0 == '*') && (c1 == '*')) ttype = token_t::e_mulmul; + else if ((c0 == '/') && (c1 == '/')) ttype = token_t::e_divdiv; + + if (token_t::e_none != ttype) + { + t.set_operator(ttype,s_itr_,s_itr_ + 2,base_itr_); + token_list_.push_back(t); + s_itr_ += 2; + return; + } + } + + if ('<' == *s_itr_) + t.set_operator(token_t::e_lt ,s_itr_,s_itr_ + 1,base_itr_); + else if ('>' == *s_itr_) + t.set_operator(token_t::e_gt ,s_itr_,s_itr_ + 1,base_itr_); + else if (';' == *s_itr_) + t.set_operator(token_t::e_eof,s_itr_,s_itr_ + 1,base_itr_); + else if ('&' == *s_itr_) + t.set_symbol(s_itr_,s_itr_ + 1,base_itr_); + else if ('|' == *s_itr_) + t.set_operator(token::e_pipe,s_itr_,s_itr_ + 1,base_itr_); + else + t.set_operator(token_t::token_type(*s_itr_),s_itr_,s_itr_ + 1,base_itr_); + + token_list_.push_back(t); + + ++s_itr_; + } + + inline void scan_symbol() + { + const CharT* begin = s_itr_; + while ( + (!is_end(s_itr_)) && + (traits::is_letter_or_digit(*s_itr_) || ((*s_itr_) == '_')) + ) + { + ++s_itr_; + } + token_t t; + t.set_symbol(begin,s_itr_,base_itr_); + token_list_.push_back(t); + } + + inline void scan_number() + { + /* + Attempt to match a valid numeric value in one of the following formats: + 01. 123456 + 02. 123.456 + 03. 123.456e3 + 04. 123.456E3 + 05. 123.456e+3 + 06. 123.456E+3 + 07. 123.456e-3 + 08. 123.456E-3 + 09. .1234 + 10. .1234e3 + 11. .1234E+3 + 12. .1234e+3 + 13. .1234E-3 + 14. .1234e-3 + */ + const CharT* begin = s_itr_; + bool dot_found = false; + bool e_found = false; + bool post_e_sign_found = false; + bool post_e_digit_found = false; + token_t t; + + if ('.' == *begin && !is_end(begin + 1) && !traits::is_digit(begin[1])) + { + scan_operator(); + return; + } + + while (!is_end(s_itr_)) + { + if ('.' == (*s_itr_)) + { + if (dot_found) + { + t.set_error(token::e_err_number,begin,s_itr_,base_itr_); + token_list_.push_back(t); + + return; + } + + dot_found = true; + ++s_itr_; + + continue; + } + else if (traits::imatch('e',(*s_itr_))) + { + const CharT& c = *(s_itr_ + 1); + + if (is_end(s_itr_ + 1)) + { + t.set_error(token::e_err_number,begin,s_itr_,base_itr_); + token_list_.push_back(t); + + return; + } + else if ( + ('+' != c) && + ('-' != c) && + !traits::is_digit(c) + ) + { + t.set_error(token::e_err_number,begin,s_itr_,base_itr_); + token_list_.push_back(t); + + return; + } + + e_found = true; + ++s_itr_; + + continue; + } + else if (e_found && traits::is_sign(*s_itr_) && !post_e_digit_found) + { + if (post_e_sign_found) + { + t.set_error(token::e_err_number,begin,s_itr_,base_itr_); + token_list_.push_back(t); + + return; + } + + post_e_sign_found = true; + ++s_itr_; + + continue; + } + else if (e_found && traits::is_digit(*s_itr_)) + { + post_e_digit_found = true; + ++s_itr_; + + continue; + } + else if (('.' != (*s_itr_)) && !traits::is_digit(*s_itr_)) + break; + else + ++s_itr_; + } + + t.set_numeric(begin,s_itr_,base_itr_); + + token_list_.push_back(t); + + return; + } + + inline void scan_string() + { + CharT endChar = *s_itr_; + const CharT* begin = s_itr_ + 1; + + token_t t; + + if (std::distance(s_itr_,s_end_) < 2) + { + t.set_error(token::e_err_string,s_itr_,s_end_,base_itr_); + token_list_.push_back(t); + + return; + } + + ++s_itr_; + + bool escaped = false; + + while (!is_end(s_itr_)) + { + if (!escaped && ('\\' == *s_itr_)) + { + escaped = true; + ++s_itr_; + + continue; + } + else if (!escaped) + { + if (endChar == *s_itr_) + break; + } + else + escaped = false; + + ++s_itr_; + } + + if (is_end(s_itr_)) + { + t.set_error(token::e_err_string,begin,s_itr_,base_itr_); + token_list_.push_back(t); + + return; + } + + t.set_string(begin,s_itr_,base_itr_); + token_list_.push_back(t); + ++s_itr_; + + return; + } + + private: + + token_list_t token_list_; + token_list_itr_t token_itr_; + token_list_itr_t store_token_itr_; + token_t eof_token_; + const CharT* base_itr_; + const CharT* s_itr_; + const CharT* s_end_; + + friend class token_scanner; + friend class token_modifier; + friend class token_inserter; + friend class token_joiner; + }; +#if 0 + class helper_interface + { + public: + + virtual void init() { } + virtual void reset() { } + virtual bool result() { return true; } + virtual std::size_t process(generator&) { return 0; } + virtual ~helper_interface() { } + }; + + class token_scanner : public helper_interface + { + public: + + virtual ~token_scanner() + {} + + explicit token_scanner(const std::size_t& stride) + : stride_(stride) + { + if (stride > 4) + { + throw std::invalid_argument("token_scanner() - Invalid stride value"); + } + } + + inline std::size_t process(generator& g) + { + if (!g.token_list_.empty()) + { + for (std::size_t i = 0; i < (g.token_list_.size() - stride_ + 1); ++i) + { + token t; + switch (stride_) + { + case 1 : + { + const token& t0 = g.token_list_[i]; + + if (!operator()(t0)) return i; + } + break; + + case 2 : + { + const token& t0 = g.token_list_[i ]; + const token& t1 = g.token_list_[i + 1]; + + if (!operator()(t0,t1)) return i; + } + break; + + case 3 : + { + const token& t0 = g.token_list_[i ]; + const token& t1 = g.token_list_[i + 1]; + const token& t2 = g.token_list_[i + 2]; + + if (!operator()(t0,t1,t2)) return i; + } + break; + + case 4 : + { + const token& t0 = g.token_list_[i ]; + const token& t1 = g.token_list_[i + 1]; + const token& t2 = g.token_list_[i + 2]; + const token& t3 = g.token_list_[i + 3]; + + if (!operator()(t0,t1,t2,t3)) return i; + } + break; + } + } + } + + return (g.token_list_.size() - stride_ + 1); + } + + virtual bool operator()(const token&) + { + return false; + } + + virtual bool operator()(const token&, const token&) + { + return false; + } + + virtual bool operator()(const token&, const token&, const token&) + { + return false; + } + + virtual bool operator()(const token&, const token&, const token&, const token&) + { + return false; + } + + private: + + std::size_t stride_; + }; + + class token_modifier : public helper_interface + { + public: + + inline std::size_t process(generator& g) + { + std::size_t changes = 0; + + for (std::size_t i = 0; i < g.token_list_.size(); ++i) + { + if (modify(g.token_list_[i])) changes++; + } + + return changes; + } + + virtual bool modify(token& t) = 0; + }; + + class token_inserter : public helper_interface + { + public: + + explicit token_inserter(const std::size_t& stride) + : stride_(stride) + { + if (stride > 5) + { + throw std::invalid_argument("token_inserter() - Invalid stride value"); + } + } + + inline std::size_t process(generator& g) + { + if (g.token_list_.empty()) + return 0; + + std::size_t changes = 0; + + for (std::size_t i = 0; i < (g.token_list_.size() - stride_ + 1); ++i) + { + token t; + int insert_index = -1; + + switch (stride_) + { + case 1 : insert_index = insert(g.token_list_[i],t); + break; + + case 2 : insert_index = insert(g.token_list_[i],g.token_list_[i + 1],t); + break; + + case 3 : insert_index = insert(g.token_list_[i],g.token_list_[i + 1],g.token_list_[i + 2],t); + break; + + case 4 : insert_index = insert(g.token_list_[i],g.token_list_[i + 1],g.token_list_[i + 2],g.token_list_[i + 3],t); + break; + + case 5 : insert_index = insert(g.token_list_[i],g.token_list_[i + 1],g.token_list_[i + 2],g.token_list_[i + 3],g.token_list_[i + 4],t); + break; + } + + if ((insert_index >= 0) && (insert_index <= (static_cast<int>(stride_) + 1))) + { + g.token_list_.insert(g.token_list_.begin() + (i + insert_index),t); + changes++; + } + } + + return changes; + } + + virtual inline int insert(const token&, token& ) + { + return -1; + } + + virtual inline int insert(const token&, const token&, token&) + { + return -1; + } + + virtual inline int insert(const token&, const token&, const token&, token&) + { + return -1; + } + + virtual inline int insert(const token&, const token&, const token&, const token&, token&) + { + return -1; + } + + virtual inline int insert(const token&, const token&, const token&, const token&, const token&, token&) + { + return -1; + } + + private: + + std::size_t stride_; + }; + + class token_joiner : public helper_interface + { + public: + + inline std::size_t process(generator& g) + { + if (g.token_list_.empty()) + return 0; + + std::size_t changes = 0; + + for (std::size_t i = 0; i < g.token_list_.size() - 1; ++i) + { + token t; + + if (join(g.token_list_[i],g.token_list_[i + 1],t)) + { + g.token_list_[i] = t; + g.token_list_.erase(g.token_list_.begin() + (i + 1)); + + ++changes; + } + } + + return changes; + } + + virtual bool join(const token&, const token&, token&) = 0; + }; + + namespace helper + { + + inline void dump(lexertk::generator& generator) + { + for (std::size_t i = 0; i < generator.size(); ++i) + { + lexertk::token t = generator[i]; + printf("Token[%02d] @ %03d %6s --> '%s'\n", + static_cast<unsigned int>(i), + static_cast<unsigned int>(t.position), + t.to_str(t.type).c_str(), + t.value.c_str()); + } + } + + class commutative_inserter : public token_inserter + { + public: + + commutative_inserter() + : lexertk::token_inserter(2) + {} + + inline void ignore_symbol(const std::string& symbol) + { + ignore_set_.insert(symbol); + } + + inline int insert(const lexertk::token& t0, const lexertk::token& t1, lexertk::token& new_token) + { + new_token.type = lexertk::token::e_mul; + new_token.value = "*"; + new_token.position = t1.position; + bool match = false; + + if (t0.type == lexertk::token::e_symbol) + { + if (ignore_set_.end() != ignore_set_.find(t0.value)) + { + return -1; + } + else if (!t0.value.empty() && ('$' == t0.value[0])) + { + return -1; + } + } + + if (t1.type == lexertk::token::e_symbol) + { + if (ignore_set_.end() != ignore_set_.find(t1.value)) + { + return -1; + } + } + + if ((t0.type == lexertk::token::e_number ) && (t1.type == lexertk::token::e_symbol )) match = true; + else if ((t0.type == lexertk::token::e_number ) && (t1.type == lexertk::token::e_lbracket )) match = true; + else if ((t0.type == lexertk::token::e_number ) && (t1.type == lexertk::token::e_lcrlbracket)) match = true; + else if ((t0.type == lexertk::token::e_number ) && (t1.type == lexertk::token::e_lsqrbracket)) match = true; + else if ((t0.type == lexertk::token::e_symbol ) && (t1.type == lexertk::token::e_number )) match = true; + else if ((t0.type == lexertk::token::e_rbracket ) && (t1.type == lexertk::token::e_number )) match = true; + else if ((t0.type == lexertk::token::e_rcrlbracket) && (t1.type == lexertk::token::e_number )) match = true; + else if ((t0.type == lexertk::token::e_rsqrbracket) && (t1.type == lexertk::token::e_number )) match = true; + else if ((t0.type == lexertk::token::e_rbracket ) && (t1.type == lexertk::token::e_symbol )) match = true; + else if ((t0.type == lexertk::token::e_rcrlbracket) && (t1.type == lexertk::token::e_symbol )) match = true; + else if ((t0.type == lexertk::token::e_rsqrbracket) && (t1.type == lexertk::token::e_symbol )) match = true; + + return (match) ? 1 : -1; + } + + private: + + std::set<std::string,details::ilesscompare> ignore_set_; + }; + + class operator_joiner : public token_joiner + { + public: + + inline bool join(const lexertk::token& t0, const lexertk::token& t1, lexertk::token& t) + { + //': =' --> ':=' + if ((t0.type == lexertk::token::e_colon) && (t1.type == lexertk::token::e_eq)) + { + t.type = lexertk::token::e_assign; + t.value = ":="; + t.position = t0.position; + + return true; + } + //'> =' --> '>=' + else if ((t0.type == lexertk::token::e_gt) && (t1.type == lexertk::token::e_eq)) + { + t.type = lexertk::token::e_gte; + t.value = ">="; + t.position = t0.position; + + return true; + } + //'< =' --> '<=' + else if ((t0.type == lexertk::token::e_lt) && (t1.type == lexertk::token::e_eq)) + { + t.type = lexertk::token::e_lte; + t.value = "<="; + t.position = t0.position; + + return true; + } + //'= =' --> '==' + else if ((t0.type == lexertk::token::e_eq) && (t1.type == lexertk::token::e_eq)) + { + t.type = lexertk::token::e_eq; + t.value = "=="; + t.position = t0.position; + + return true; + } + //'! =' --> '!=' + else if ((static_cast<char>(t0.type) == '!') && (t1.type == lexertk::token::e_eq)) + { + t.type = lexertk::token::e_ne; + t.value = "!="; + t.position = t0.position; + + return true; + } + //'< >' --> '<>' + else if ((t0.type == lexertk::token::e_lt) && (t1.type == lexertk::token::e_gt)) + { + t.type = lexertk::token::e_ne; + t.value = "<>"; + t.position = t0.position; + + return true; + } + else + return false; + } + }; + + class bracket_checker : public token_scanner + { + public: + + bracket_checker() + : token_scanner(1), + state_(true) + {} + + bool result() + { + return state_ && stack_.empty(); + } + + lexertk::token error_token() + { + return error_token_; + } + + void reset() + { + //why? because msvc doesn't support swap properly. + stack_ = std::stack<char>(); + state_ = true; + error_token_.clear(); + } + + bool operator()(const lexertk::token& t) + { + if ( + !t.value.empty() && + (lexertk::token::e_string != t.type) && + (lexertk::token::e_symbol != t.type) && + details::is_bracket(t.value[0]) + ) + { + char c = t.value[0]; + + if (t.type == lexertk::token::e_lbracket) stack_.push(')'); + else if (t.type == lexertk::token::e_lcrlbracket) stack_.push('}'); + else if (t.type == lexertk::token::e_lsqrbracket) stack_.push(']'); + else if (details::is_right_bracket(c)) + { + if (stack_.empty()) + { + state_ = false; + error_token_ = t; + + return false; + } + else if (c != stack_.top()) + { + state_ = false; + error_token_ = t; + + return false; + } + else + stack_.pop(); + } + } + + return true; + } + + private: + + bool state_; + std::stack<char> stack_; + lexertk::token error_token_; + }; + + class symbol_replacer : public token_modifier + { + private: + + typedef std::map<std::string,std::pair<std::string,token::token_type>,details::ilesscompare> replace_map_t; + + public: + + bool remove(const std::string& target_symbol) + { + replace_map_t::iterator itr = replace_map_.find(target_symbol); + + if (replace_map_.end() == itr) + return false; + + replace_map_.erase(itr); + + return true; + } + + bool add_replace(const std::string& target_symbol, + const std::string& replace_symbol, + const lexertk::token::token_type token_type = lexertk::token::e_symbol) + { + replace_map_t::iterator itr = replace_map_.find(target_symbol); + + if (replace_map_.end() != itr) + { + return false; + } + + replace_map_[target_symbol] = std::make_pair(replace_symbol,token_type); + + return true; + } + + void clear() + { + replace_map_.clear(); + } + + private: + + bool modify(lexertk::token& t) + { + if (lexertk::token::e_symbol == t.type) + { + if (replace_map_.empty()) + return false; + + replace_map_t::iterator itr = replace_map_.find(t.value); + + if (replace_map_.end() != itr) + { + t.value = itr->second.first; + t.type = itr->second.second; + + return true; + } + } + + return false; + } + + replace_map_t replace_map_; + }; + + class sequence_validator : public token_scanner + { + private: + + typedef std::pair<lexertk::token::token_type,lexertk::token::token_type> token_pair_t; + typedef std::set<token_pair_t> set_t; + + public: + + sequence_validator() + : lexertk::token_scanner(2) + { + add_invalid(lexertk::token::e_number,lexertk::token::e_number); + add_invalid(lexertk::token::e_string,lexertk::token::e_string); + add_invalid(lexertk::token::e_number,lexertk::token::e_string); + add_invalid(lexertk::token::e_string,lexertk::token::e_number); + add_invalid(lexertk::token::e_string,lexertk::token::e_colon); + add_invalid(lexertk::token::e_colon,lexertk::token::e_string); + add_invalid_set1(lexertk::token::e_assign); + add_invalid_set1(lexertk::token::e_shr ); + add_invalid_set1(lexertk::token::e_shl ); + add_invalid_set1(lexertk::token::e_lte ); + add_invalid_set1(lexertk::token::e_ne ); + add_invalid_set1(lexertk::token::e_gte ); + add_invalid_set1(lexertk::token::e_lt ); + add_invalid_set1(lexertk::token::e_gt ); + add_invalid_set1(lexertk::token::e_eq ); + add_invalid_set1(lexertk::token::e_comma ); + add_invalid_set1(lexertk::token::e_add ); + add_invalid_set1(lexertk::token::e_sub ); + add_invalid_set1(lexertk::token::e_div ); + add_invalid_set1(lexertk::token::e_mul ); + add_invalid_set1(lexertk::token::e_mod ); + add_invalid_set1(lexertk::token::e_pow ); + add_invalid_set1(lexertk::token::e_colon ); + } + + bool result() + { + return error_list_.empty(); + } + + bool operator()(const lexertk::token& t0, const lexertk::token& t1) + { + set_t::value_type p = std::make_pair(t0.type,t1.type); + + if (invalid_bracket_check(t0.type,t1.type)) + { + error_list_.push_back(std::make_pair(t0,t1)); + } + else if (invalid_comb_.find(p) != invalid_comb_.end()) + error_list_.push_back(std::make_pair(t0,t1)); + + return true; + } + + std::size_t error_count() + { + return error_list_.size(); + } + + std::pair<lexertk::token,lexertk::token> error(const std::size_t index) + { + if (index < error_list_.size()) + { + return error_list_[index]; + } + else + { + static const lexertk::token error_token; + return std::make_pair(error_token,error_token); + } + } + + void clear_errors() + { + error_list_.clear(); + } + + private: + + void add_invalid(lexertk::token::token_type base, lexertk::token::token_type t) + { + invalid_comb_.insert(std::make_pair(base,t)); + } + + void add_invalid_set1(lexertk::token::token_type t) + { + add_invalid(t,lexertk::token::e_assign); + add_invalid(t,lexertk::token::e_shr ); + add_invalid(t,lexertk::token::e_shl ); + add_invalid(t,lexertk::token::e_lte ); + add_invalid(t,lexertk::token::e_ne ); + add_invalid(t,lexertk::token::e_gte ); + add_invalid(t,lexertk::token::e_lt ); + add_invalid(t,lexertk::token::e_gt ); + add_invalid(t,lexertk::token::e_eq ); + add_invalid(t,lexertk::token::e_comma ); + add_invalid(t,lexertk::token::e_div ); + add_invalid(t,lexertk::token::e_mul ); + add_invalid(t,lexertk::token::e_mod ); + add_invalid(t,lexertk::token::e_pow ); + add_invalid(t,lexertk::token::e_colon ); + } + + bool invalid_bracket_check(lexertk::token::token_type base, lexertk::token::token_type t) + { + if (details::is_right_bracket(static_cast<char>(base))) + { + switch (t) + { + case lexertk::token::e_string : return true; + case lexertk::token::e_assign : return true; + default : return false; + } + } + else if (details::is_left_bracket(static_cast<char>(base))) + { + if (details::is_right_bracket(static_cast<char>(t))) + return false; + else if (details::is_left_bracket(static_cast<char>(t))) + return false; + else + { + switch (t) + { + case lexertk::token::e_number : return false; + case lexertk::token::e_symbol : return false; + case lexertk::token::e_string : return false; + case lexertk::token::e_add : return false; + case lexertk::token::e_sub : return false; + case lexertk::token::e_colon : return false; + default : return true; + } + } + } + else if (details::is_right_bracket(static_cast<char>(t))) + { + switch (base) + { + case lexertk::token::e_number : return false; + case lexertk::token::e_symbol : return false; + case lexertk::token::e_string : return false; + case lexertk::token::e_eof : return false; + case lexertk::token::e_colon : return false; + default : return true; + } + } + else if (details::is_left_bracket(static_cast<char>(t))) + { + switch (base) + { + case lexertk::token::e_rbracket : return true; + case lexertk::token::e_rsqrbracket : return true; + case lexertk::token::e_rcrlbracket : return true; + default : return false; + } + } + + return false; + } + + set_t invalid_comb_; + std::deque<std::pair<lexertk::token,lexertk::token> > error_list_; + + }; + + struct helper_assembly + { + inline bool register_scanner(lexertk::token_scanner* scanner) + { + if (token_scanner_list.end() != std::find(token_scanner_list.begin(), + token_scanner_list.end(), + scanner)) + { + return false; + } + + token_scanner_list.push_back(scanner); + + return true; + } + + inline bool register_modifier(lexertk::token_modifier* modifier) + { + if (token_modifier_list.end() != std::find(token_modifier_list.begin(), + token_modifier_list.end(), + modifier)) + { + return false; + } + + token_modifier_list.push_back(modifier); + + return true; + } + + inline bool register_joiner(lexertk::token_joiner* joiner) + { + if (token_joiner_list.end() != std::find(token_joiner_list.begin(), + token_joiner_list.end(), + joiner)) + { + return false; + } + + token_joiner_list.push_back(joiner); + + return true; + } + + inline bool register_inserter(lexertk::token_inserter* inserter) + { + if (token_inserter_list.end() != std::find(token_inserter_list.begin(), + token_inserter_list.end(), + inserter)) + { + return false; + } + + token_inserter_list.push_back(inserter); + + return true; + } + + inline bool run_modifiers(lexertk::generator& g) + { + error_token_modifier = reinterpret_cast<lexertk::token_modifier*>(0); + + for (std::size_t i = 0; i < token_modifier_list.size(); ++i) + { + lexertk::token_modifier& modifier = (*token_modifier_list[i]); + + modifier.reset(); + modifier.process(g); + + if (!modifier.result()) + { + error_token_modifier = token_modifier_list[i]; + + return false; + } + } + + return true; + } + + inline bool run_joiners(lexertk::generator& g) + { + error_token_joiner = reinterpret_cast<lexertk::token_joiner*>(0); + + for (std::size_t i = 0; i < token_joiner_list.size(); ++i) + { + lexertk::token_joiner& joiner = (*token_joiner_list[i]); + + joiner.reset(); + joiner.process(g); + + if (!joiner.result()) + { + error_token_joiner = token_joiner_list[i]; + + return false; + } + } + + return true; + } + + inline bool run_inserters(lexertk::generator& g) + { + error_token_inserter = reinterpret_cast<lexertk::token_inserter*>(0); + + for (std::size_t i = 0; i < token_inserter_list.size(); ++i) + { + lexertk::token_inserter& inserter = (*token_inserter_list[i]); + + inserter.reset(); + inserter.process(g); + + if (!inserter.result()) + { + error_token_inserter = token_inserter_list[i]; + + return false; + } + } + + return true; + } + + inline bool run_scanners(lexertk::generator& g) + { + error_token_scanner = reinterpret_cast<lexertk::token_scanner*>(0); + + for (std::size_t i = 0; i < token_scanner_list.size(); ++i) + { + lexertk::token_scanner& scanner = (*token_scanner_list[i]); + + scanner.reset(); + scanner.process(g); + + if (!scanner.result()) + { + error_token_scanner = token_scanner_list[i]; + + return false; + } + } + + return true; + } + + std::deque<lexertk::token_scanner*> token_scanner_list; + std::deque<lexertk::token_modifier*> token_modifier_list; + std::deque<lexertk::token_joiner*> token_joiner_list; + std::deque<lexertk::token_inserter*> token_inserter_list; + + lexertk::token_scanner* error_token_scanner; + lexertk::token_modifier* error_token_modifier; + lexertk::token_joiner* error_token_joiner; + lexertk::token_inserter* error_token_inserter; + }; + } + + class parser_helper + { + public: + + typedef token token_t; + typedef generator generator_t; + + inline bool init(const std::string& str) + { + if (!lexer_.process(str)) + { + return false; + } + + lexer_.begin(); + + next_token(); + + return true; + } + + inline generator_t& lexer() + { + return lexer_; + } + + inline const generator_t& lexer() const + { + return lexer_; + } + + inline void next_token() + { + current_token_ = lexer_.next_token(); + } + + inline const token_t& current_token() const + { + return current_token_; + } + + enum token_advance_mode + { + e_hold = 0, + e_advance = 1 + }; + + inline void advance_token(const token_advance_mode mode) + { + if (e_advance == mode) + { + next_token(); + } + } + + inline bool token_is(const token_t::token_type& ttype, const token_advance_mode mode = e_advance) + { + if (current_token().type != ttype) + { + return false; + } + + advance_token(mode); + + return true; + } + + inline bool token_is(const token_t::token_type& ttype, + const std::string& value, + const token_advance_mode mode = e_advance) + { + if ( + (current_token().type != ttype) || + !details::imatch(value,current_token().value) + ) + { + return false; + } + + advance_token(mode); + + return true; + } + + inline bool token_is_then_assign(const token_t::token_type& ttype, + std::string& token, + const token_advance_mode mode = e_advance) + { + if (current_token_.type != ttype) + { + return false; + } + + token = current_token_.value; + + advance_token(mode); + + return true; + } + + template <typename Allocator, + template <typename,typename> class Container> + inline bool token_is_then_assign(const token_t::token_type& ttype, + Container<std::string,Allocator>& token_list, + const token_advance_mode mode = e_advance) + { + if (current_token_.type != ttype) + { + return false; + } + + token_list.push_back(current_token_.value); + + advance_token(mode); + + return true; + } + + inline bool peek_token_is(const token_t::token_type& ttype) + { + return (lexer_.peek_next_token().type == ttype); + } + + inline bool peek_token_is(const std::string& s) + { + return (details::imatch(lexer_.peek_next_token().value,s)); + } + + private: + + generator_t lexer_; + token_t current_token_; + }; +#endif +} // namespace lexertk + +#endif |