/*
*****************************************************************
* Simple C++ Lexer Toolkit Library *
* *
* Author: Arash Partow (2001) *
* Modified: Flex Ferrum (2018)
* URL: http://www.partow.net/programming/lexertk/index.html *
* *
* Copyright notice: *
* Free use of the Simple C++ Lexer Toolkit Library is permitted *
* under the guidelines and in accordance with the MIT License. *
* http://www.opensource.org/licenses/MIT *
* *
* *
* The lexer will tokenize input against the following BNF: *
* *
* expression ::= term { +|- term } *
* term ::= (symbol | factor) {operator symbol | factor} *
* factor ::= symbol | ( '(' {-} expression ')' ) *
* symbol ::= number | gensymb | string *
* gensymb ::= alphabet {alphabet | digit} *
* string ::= '"' {alphabet | digit | operator } '"' *
* operator ::= * | / | % | ^ | < | > | <= | >= | << | >> != *
* alphabet ::= a | b | .. | z | A | B | .. | Z *
* digit ::= 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 *
* sign ::= + | - *
* edef ::= e | E *
* decimal ::= {digit} (digit [.] | [.] digit) {digit} *
* exponent ::= edef [sign] digit {digit} *
* real ::= [sign] decimal [exponent] *
* integer ::= [sign] {digit} *
* number ::= real | integer *
* *
* *
* Note: This lexer has been taken from the ExprTk Library. *
* *
*****************************************************************
*/
#ifndef JINJA2CPP_SRC_LEXERTK_H
#define JINJA2CPP_SRC_LEXERTK_H
#include <algorithm>
#include <cctype>
#include <clocale>
#include <cstddef>
#include <cstdio>
#include <cstdlib>
#include <deque>
#include <exception>
#include <limits>
#include <locale>
#include <map>
#include <set>
#include <stack>
#include <stdexcept>
#include <string>
#include <vector>
namespace lexertk
{
template<typename CharT>
struct CharRange
{
CharT* start;
CharT* end;
auto length() const {return end - start;}
auto offset(CharT* from) const {return start - from;}
auto operator[] (size_t idx) const {return start[idx];}
};
namespace details
{
#if 0
inline bool is_whitespace(const char c)
{
return (' ' == c) || ('\n' == c) ||
('\r' == c) || ('\t' == c) ||
('\b' == c) || ('\v' == c) ||
('\f' == c) ;
}
inline bool is_letter(const char c)
{
return (('a' <= c) && (c <= 'z')) || (('A' <= c) && (c <= 'Z'));
}
inline bool is_digit(const char c)
{
return ('0' <= c) && (c <= '9');
}
inline bool is_letter_or_digit(const char c)
{
return is_letter(c) || is_digit(c);
}
#endif
template<typename CharT>
struct lexer_traits
{
static auto& get_locale()
{
static auto locale = std::locale();
return locale;
}
static bool is_whitespace(const CharT c)
{
return std::isspace(c, get_locale());
}
static bool is_letter(const CharT c)
{
return std::isalpha(c, get_locale());
}
static bool is_digit(const CharT c)
{
return std::isdigit(c, get_locale());
}
static bool is_letter_or_digit(CharT c)
{
return std::isalnum(c, get_locale());
}
static bool is_operator_char(const CharT c);
static bool is_left_bracket(const CharT c);
static bool is_right_bracket(const CharT c);
static bool is_sign(const CharT c);
static bool is_invalid(const CharT c);
static bool is_bracket(const CharT c)
{
return is_left_bracket(c) || is_right_bracket(c);
}
static CharT tolower(const CharT c)
{
return std::tolower(c, get_locale());
}
static CharT toupper(const CharT c)
{
return std::toupper(c, get_locale());
}
static inline bool imatch(const CharT c1, const CharT c2)
{
return tolower(c1) == tolower(c2);
}
static inline bool imatch(const CharRange<CharT>& s1, const CharRange<CharT>& s2)
{
if (s1.length() == s2.length())
{
for (std::size_t i = 0; i < s1.length(); ++i)
{
if (tolower(s1[i]) != tolower(s2[i]))
{
return false;
}
}
return true;
}
return false;
}
};
template<>
inline bool lexer_traits<char>::is_operator_char(const char c)
{
return ('+' == c) || ('-' == c) ||
('*' == c) || ('/' == c) ||
('^' == c) || ('<' == c) ||
('>' == c) || ('=' == c) ||
(',' == c) || ('!' == c) ||
('(' == c) || (')' == c) ||
('[' == c) || (']' == c) ||
('{' == c) || ('}' == c) ||
('%' == c) || (':' == c) ||
('?' == c) || ('&' == c) ||
('|' == c) || (';' == c) ||
('~' == c);
}
template<>
inline bool lexer_traits<wchar_t>::is_operator_char(const wchar_t c)
{
return (L'+' == c) || (L'-' == c) ||
(L'*' == c) || (L'/' == c) ||
(L'^' == c) || (L'<' == c) ||
(L'>' == c) || (L'=' == c) ||
(L',' == c) || (L'!' == c) ||
(L'(' == c) || (L')' == c) ||
(L'[' == c) || (L']' == c) ||
(L'{' == c) || (L'}' == c) ||
(L'%' == c) || (L':' == c) ||
(L'?' == c) || (L'&' == c) ||
(L'|' == c) || (L';' == c) ||
(L'~' == c);
}
template<>
inline bool lexer_traits<char>::is_left_bracket(const char c)
{
return ('(' == c) || ('[' == c) || ('{' == c);
}
template<>
inline bool lexer_traits<wchar_t>::is_left_bracket(const wchar_t c)
{
return (L'(' == c) || (L'[' == c) || (L'{' == c);
}
template<>
inline bool lexer_traits<char>::is_right_bracket(const char c)
{
return (')' == c) || (']' == c) || ('}' == c);
}
template<>
inline bool lexer_traits<wchar_t>::is_right_bracket(const wchar_t c)
{
return (L')' == c) || (L']' == c) || (L'}' == c);
}
template<>
inline bool lexer_traits<char>::is_sign(const char c)
{
return ('+' == c) || ('-' == c);
}
template<>
inline bool lexer_traits<wchar_t>::is_sign(const wchar_t c)
{
return (L'+' == c) || (L'-' == c);
}
template<>
inline bool lexer_traits<char>::is_invalid(const char c)
{
return !is_whitespace(c) &&
!is_operator_char(c) &&
!is_letter(c) &&
!is_digit(c) &&
('.' != c) &&
('_' != c) &&
('$' != c) &&
('~' != c) &&
('\'' != c);
}
template<>
inline bool lexer_traits<wchar_t>::is_invalid(const wchar_t c)
{
return !is_whitespace(c) &&
!is_operator_char(c) &&
!is_letter(c) &&
!is_digit(c) &&
(L'.' != c) &&
(L'_' != c) &&
(L'$' != c) &&
(L'~' != c) &&
(L'\'' != c);
}
template<typename CharT>
struct ilesscompare
{
inline bool operator()(const CharRange<CharT>& s1, const CharRange<CharT>& s2) const
{
using traits = lexer_traits<CharT>;
const std::size_t length = std::min(s1.length(),s2.length());
for (std::size_t i = 0; i < length; ++i)
{
if (traits::tolower(s1[i]) > traits::tolower(s2[i]))
return false;
else if (traits::tolower(s1[i]) < traits::tolower(s2[i]))
return true;
}
return s1.length() < s2.length();
}
};
} // namespace details
struct token
{
enum token_type
{
e_none = 0, e_error = 1, e_err_symbol = 2,
e_err_number = 3, e_err_string = 4, e_err_sfunc = 5,
e_eof = 6, e_number = 7, e_symbol = 8,
e_string = 9, e_eq = 10, e_shr = 11,
e_shl = 12, e_lte = 13, e_ne = 14,
e_gte = 15, e_lt = '<', e_gt = '>',
e_rbracket = ')', e_lbracket = '(', e_tilda = '~',
e_rsqrbracket = ']', e_lsqrbracket = '[', e_rcrlbracket = '}',
e_lcrlbracket = '{', e_comma = ',', e_add = '+',
e_sub = '-', e_div = '/', e_mul = '*',
e_mod = '%', e_pow = '^', e_colon = ':',
e_dot = '.', e_divdiv = 16 , e_mulmul = 17 ,
e_assign = '=', e_pipe = '|',
};
token()
: type(e_none),
position(std::numeric_limits<std::size_t>::max()),
length(0)
{}
void clear()
{
type = e_none;
position = std::numeric_limits<std::size_t>::max();
}
template <typename Iterator>
inline token& set_operator(const token_type tt, const Iterator begin, const Iterator end, const Iterator base_begin)
{
type = tt;
position = std::distance(base_begin,begin);
length = end - begin;
return *this;
}
template <typename Iterator>
inline token& set_symbol(const Iterator begin, const Iterator end, const Iterator base_begin)
{
type = e_symbol;
position = std::distance(base_begin,begin);
length = end - begin;
return *this;
}
template <typename Iterator>
inline token& set_numeric(const Iterator begin, const Iterator end, const Iterator base_begin)
{
type = e_number;
position = std::distance(base_begin,begin);
length = end - begin;
return *this;
}
template <typename Iterator>
inline token& set_string(const Iterator begin, const Iterator end, const Iterator base_begin)
{
type = e_string;
position = std::distance(base_begin,begin);
length = end - begin;
return *this;
}
template <typename Iterator>
inline token& set_error(const token_type et, const Iterator begin, const Iterator end, const Iterator base_begin)
{
if (
(e_error == et) ||
(e_err_symbol == et) ||
(e_err_number == et) ||
(e_err_string == et)
)
{
type = e_error;
}
else
type = e_error;
position = std::distance(base_begin,begin);
length = end - begin;
return *this;
}
static inline const char* to_str(token_type t)
{
switch (t)
{
case e_none : return "NONE";
case e_error : return "ERROR";
case e_err_symbol : return "ERROR_SYMBOL";
case e_err_number : return "ERROR_NUMBER";
case e_err_string : return "ERROR_STRING";
case e_eof : return "EOF";
case e_number : return "NUMBER";
case e_symbol : return "SYMBOL";
case e_string : return "STRING";
case e_assign : return "=";
case e_shr : return ">>";
case e_shl : return "<<";
case e_lte : return "<=";
case e_ne : return "!=";
case e_gte : return ">=";
case e_lt : return "<";
case e_gt : return ">";
case e_eq : return "==";
case e_rbracket : return ")";
case e_lbracket : return "(";
case e_rsqrbracket : return "]";
case e_lsqrbracket : return "[";
case e_rcrlbracket : return "}";
case e_lcrlbracket : return "{";
case e_comma : return ",";
case e_dot : return ".";
case e_add : return "+";
case e_sub : return "-";
case e_div : return "/";
case e_mul : return "*";
case e_mod : return "%";
case e_pow : return "^";
case e_colon : return ":";
case e_divdiv : return "//";
case e_mulmul : return "**";
default : return "UNKNOWN";
}
}
inline bool is_error() const
{
return (
(e_error == type) ||
(e_err_symbol == type) ||
(e_err_number == type) ||
(e_err_string == type)
);
}
token_type type;
size_t position;
size_t length;
};
template<typename CharT>
class generator
{
public:
typedef token token_t;
typedef std::vector<token_t> token_list_t;
typedef std::vector<token_t>::iterator token_list_itr_t;
typedef details::lexer_traits<CharT> traits;
generator()
: base_itr_(0),
s_itr_(0),
s_end_(0)
{
clear();
}
inline void clear()
{
base_itr_ = 0;
s_itr_ = 0;
s_end_ = 0;
token_list_.clear();
token_itr_ = token_list_.end();
store_token_itr_ = token_itr_;
}
inline bool process(const std::basic_string<CharT>& str)
{
return process(str.data(), str.data() + str.size());
}
inline bool process(const CharT* begin, const CharT* end)
{
base_itr_ = begin;
s_itr_ = begin;
s_end_ = end;
eof_token_.set_operator(token_t::e_eof,s_end_,s_end_,base_itr_);
token_list_.clear();
while (!is_end(s_itr_))
{
scan_token();
if (token_list_.empty())
return true;
else if (token_list_.back().is_error())
{
return false;
}
}
return true;
}
inline bool empty() const
{
return token_list_.empty();
}
inline size_t size() const
{
return token_list_.size();
}
inline void begin()
{
token_itr_ = token_list_.begin();
store_token_itr_ = token_itr_;
}
inline void store()
{
store_token_itr_ = token_itr_;
}
inline void restore()
{
token_itr_ = store_token_itr_;
}
inline token_t& next_token()
{
if (token_list_.end() != token_itr_)
{
return *token_itr_++;
}
else
return eof_token_;
}
inline token_t& peek_next_token()
{
if (token_list_.end() != token_itr_)
{
return *token_itr_;
}
else
return eof_token_;
}
inline token_t& operator[](const std::size_t& index)
{
if (index < token_list_.size())
return token_list_[index];
else
return eof_token_;
}
inline token_t operator[](const std::size_t& index) const
{
if (index < token_list_.size())
return token_list_[index];
else
return eof_token_;
}
inline bool finished() const
{
return (token_list_.end() == token_itr_);
}
inline std::basic_string<CharT> remaining() const
{
using string = std::basic_string<CharT>;
if (finished())
return string();
else if (token_list_.begin() != token_itr_)
return string(base_itr_ + (token_itr_ - 1)->position,s_end_);
else
return string(base_itr_ + token_itr_->position,s_end_);
}
private:
inline bool is_end(const CharT* itr)
{
return (s_end_ == itr);
}
inline void skip_whitespace()
{
while (!is_end(s_itr_) && traits::is_whitespace(*s_itr_))
{
++s_itr_;
}
}
inline void scan_token()
{
skip_whitespace();
if (is_end(s_itr_))
{
return;
}
else if (traits::is_operator_char(*s_itr_))
{
scan_operator();
return;
}
else if (traits::is_letter(*s_itr_) || ('_' == (*s_itr_)))
{
scan_symbol();
return;
}
else if (traits::is_digit((*s_itr_)) || ('.' == (*s_itr_)))
{
scan_number();
return;
}
else if ('\'' == (*s_itr_) || '\"' == (*s_itr_))
{
scan_string();
return;
}
else
{
token_t t;
t.set_error(token::e_error,s_itr_,s_itr_ + 2,base_itr_);
token_list_.push_back(t);
++s_itr_;
}
}
inline void scan_operator()
{
token_t t;
if (!is_end(s_itr_ + 1))
{
token_t::token_type ttype = token_t::e_none;
CharT c0 = s_itr_[0];
CharT c1 = s_itr_[1];
if ((c0 == '<') && (c1 == '=')) ttype = token_t::e_lte;
else if ((c0 == '>') && (c1 == '=')) ttype = token_t::e_gte;
else if ((c0 == '<') && (c1 == '>')) ttype = token_t::e_ne;
else if ((c0 == '!') && (c1 == '=')) ttype = token_t::e_ne;
else if ((c0 == '=') && (c1 == '=')) ttype = token_t::e_eq;
else if ((c0 == ':') && (c1 == '=')) ttype = token_t::e_assign;
else if ((c0 == '<') && (c1 == '<')) ttype = token_t::e_shl;
else if ((c0 == '>') && (c1 == '>')) ttype = token_t::e_shr;
else if ((c0 == '*') && (c1 == '*')) ttype = token_t::e_mulmul;
else if ((c0 == '/') && (c1 == '/')) ttype = token_t::e_divdiv;
if (token_t::e_none != ttype)
{
t.set_operator(ttype,s_itr_,s_itr_ + 2,base_itr_);
token_list_.push_back(t);
s_itr_ += 2;
return;
}
}
if ('<' == *s_itr_)
t.set_operator(token_t::e_lt ,s_itr_,s_itr_ + 1,base_itr_);
else if ('>' == *s_itr_)
t.set_operator(token_t::e_gt ,s_itr_,s_itr_ + 1,base_itr_);
else if (';' == *s_itr_)
t.set_operator(token_t::e_eof,s_itr_,s_itr_ + 1,base_itr_);
else if ('&' == *s_itr_)
t.set_symbol(s_itr_,s_itr_ + 1,base_itr_);
else if ('|' == *s_itr_)
t.set_operator(token::e_pipe,s_itr_,s_itr_ + 1,base_itr_);
else
t.set_operator(token_t::token_type(*s_itr_),s_itr_,s_itr_ + 1,base_itr_);
token_list_.push_back(t);
++s_itr_;
}
inline void scan_symbol()
{
const CharT* begin = s_itr_;
while (
(!is_end(s_itr_)) &&
(traits::is_letter_or_digit(*s_itr_) || ((*s_itr_) == '_'))
)
{
++s_itr_;
}
token_t t;
t.set_symbol(begin,s_itr_,base_itr_);
token_list_.push_back(t);
}
inline void scan_number()
{
/*
Attempt to match a valid numeric value in one of the following formats:
01. 123456
02. 123.456
03. 123.456e3
04. 123.456E3
05. 123.456e+3
06. 123.456E+3
07. 123.456e-3
08. 123.456E-3
09. .1234
10. .1234e3
11. .1234E+3
12. .1234e+3
13. .1234E-3
14. .1234e-3
*/
const CharT* begin = s_itr_;
bool dot_found = false;
bool e_found = false;
bool post_e_sign_found = false;
bool post_e_digit_found = false;
token_t t;
if ('.' == *begin && !is_end(begin + 1) && !traits::is_digit(begin[1]))
{
scan_operator();
return;
}
while (!is_end(s_itr_))
{
if ('.' == (*s_itr_))
{
if (dot_found)
{
t.set_error(token::e_err_number,begin,s_itr_,base_itr_);
token_list_.push_back(t);
return;
}
dot_found = true;
++s_itr_;
continue;
}
else if (traits::imatch('e',(*s_itr_)))
{
const CharT& c = *(s_itr_ + 1);
if (is_end(s_itr_ + 1))
{
t.set_error(token::e_err_number,begin,s_itr_,base_itr_);
token_list_.push_back(t);
return;
}
else if (
('+' != c) &&
('-' != c) &&
!traits::is_digit(c)
)
{
t.set_error(token::e_err_number,begin,s_itr_,base_itr_);
token_list_.push_back(t);
return;
}
e_found = true;
++s_itr_;
continue;
}
else if (e_found && traits::is_sign(*s_itr_) && !post_e_digit_found)
{
if (post_e_sign_found)
{
t.set_error(token::e_err_number,begin,s_itr_,base_itr_);
token_list_.push_back(t);
return;
}
post_e_sign_found = true;
++s_itr_;
continue;
}
else if (e_found && traits::is_digit(*s_itr_))
{
post_e_digit_found = true;
++s_itr_;
continue;
}
else if (('.' != (*s_itr_)) && !traits::is_digit(*s_itr_))
break;
else
++s_itr_;
}
t.set_numeric(begin,s_itr_,base_itr_);
token_list_.push_back(t);
return;
}
inline void scan_string()
{
CharT endChar = *s_itr_;
const CharT* begin = s_itr_ + 1;
token_t t;
if (std::distance(s_itr_,s_end_) < 2)
{
t.set_error(token::e_err_string,s_itr_,s_end_,base_itr_);
token_list_.push_back(t);
return;
}
++s_itr_;
bool escaped = false;
while (!is_end(s_itr_))
{
if (!escaped && ('\\' == *s_itr_))
{
escaped = true;
++s_itr_;
continue;
}
else if (!escaped)
{
if (endChar == *s_itr_)
break;
}
else
escaped = false;
++s_itr_;
}
if (is_end(s_itr_))
{
t.set_error(token::e_err_string,begin,s_itr_,base_itr_);
token_list_.push_back(t);
return;
}
t.set_string(begin,s_itr_,base_itr_);
token_list_.push_back(t);
++s_itr_;
return;
}
private:
token_list_t token_list_;
token_list_itr_t token_itr_;
token_list_itr_t store_token_itr_;
token_t eof_token_;
const CharT* base_itr_;
const CharT* s_itr_;
const CharT* s_end_;
friend class token_scanner;
friend class token_modifier;
friend class token_inserter;
friend class token_joiner;
};
#if 0
class helper_interface
{
public:
virtual void init() { }
virtual void reset() { }
virtual bool result() { return true; }
virtual std::size_t process(generator&) { return 0; }
virtual ~helper_interface() { }
};
class token_scanner : public helper_interface
{
public:
virtual ~token_scanner()
{}
explicit token_scanner(const std::size_t& stride)
: stride_(stride)
{
if (stride > 4)
{
throw std::invalid_argument("token_scanner() - Invalid stride value");
}
}
inline std::size_t process(generator& g)
{
if (!g.token_list_.empty())
{
for (std::size_t i = 0; i < (g.token_list_.size() - stride_ + 1); ++i)
{
token t;
switch (stride_)
{
case 1 :
{
const token& t0 = g.token_list_[i];
if (!operator()(t0)) return i;
}
break;
case 2 :
{
const token& t0 = g.token_list_[i ];
const token& t1 = g.token_list_[i + 1];
if (!operator()(t0,t1)) return i;
}
break;
case 3 :
{
const token& t0 = g.token_list_[i ];
const token& t1 = g.token_list_[i + 1];
const token& t2 = g.token_list_[i + 2];
if (!operator()(t0,t1,t2)) return i;
}
break;
case 4 :
{
const token& t0 = g.token_list_[i ];
const token& t1 = g.token_list_[i + 1];
const token& t2 = g.token_list_[i + 2];
const token& t3 = g.token_list_[i + 3];
if (!operator()(t0,t1,t2,t3)) return i;
}
break;
}
}
}
return (g.token_list_.size() - stride_ + 1);
}
virtual bool operator()(const token&)
{
return false;
}
virtual bool operator()(const token&, const token&)
{
return false;
}
virtual bool operator()(const token&, const token&, const token&)
{
return false;
}
virtual bool operator()(const token&, const token&, const token&, const token&)
{
return false;
}
private:
std::size_t stride_;
};
class token_modifier : public helper_interface
{
public:
inline std::size_t process(generator& g)
{
std::size_t changes = 0;
for (std::size_t i = 0; i < g.token_list_.size(); ++i)
{
if (modify(g.token_list_[i])) changes++;
}
return changes;
}
virtual bool modify(token& t) = 0;
};
class token_inserter : public helper_interface
{
public:
explicit token_inserter(const std::size_t& stride)
: stride_(stride)
{
if (stride > 5)
{
throw std::invalid_argument("token_inserter() - Invalid stride value");
}
}
inline std::size_t process(generator& g)
{
if (g.token_list_.empty())
return 0;
std::size_t changes = 0;
for (std::size_t i = 0; i < (g.token_list_.size() - stride_ + 1); ++i)
{
token t;
int insert_index = -1;
switch (stride_)
{
case 1 : insert_index = insert(g.token_list_[i],t);
break;
case 2 : insert_index = insert(g.token_list_[i],g.token_list_[i + 1],t);
break;
case 3 : insert_index = insert(g.token_list_[i],g.token_list_[i + 1],g.token_list_[i + 2],t);
break;
case 4 : insert_index = insert(g.token_list_[i],g.token_list_[i + 1],g.token_list_[i + 2],g.token_list_[i + 3],t);
break;
case 5 : insert_index = insert(g.token_list_[i],g.token_list_[i + 1],g.token_list_[i + 2],g.token_list_[i + 3],g.token_list_[i + 4],t);
break;
}
if ((insert_index >= 0) && (insert_index <= (static_cast<int>(stride_) + 1)))
{
g.token_list_.insert(g.token_list_.begin() + (i + insert_index),t);
changes++;
}
}
return changes;
}
virtual inline int insert(const token&, token& )
{
return -1;
}
virtual inline int insert(const token&, const token&, token&)
{
return -1;
}
virtual inline int insert(const token&, const token&, const token&, token&)
{
return -1;
}
virtual inline int insert(const token&, const token&, const token&, const token&, token&)
{
return -1;
}
virtual inline int insert(const token&, const token&, const token&, const token&, const token&, token&)
{
return -1;
}
private:
std::size_t stride_;
};
class token_joiner : public helper_interface
{
public:
inline std::size_t process(generator& g)
{
if (g.token_list_.empty())
return 0;
std::size_t changes = 0;
for (std::size_t i = 0; i < g.token_list_.size() - 1; ++i)
{
token t;
if (join(g.token_list_[i],g.token_list_[i + 1],t))
{
g.token_list_[i] = t;
g.token_list_.erase(g.token_list_.begin() + (i + 1));
++changes;
}
}
return changes;
}
virtual bool join(const token&, const token&, token&) = 0;
};
namespace helper
{
inline void dump(lexertk::generator& generator)
{
for (std::size_t i = 0; i < generator.size(); ++i)
{
lexertk::token t = generator[i];
printf("Token[%02d] @ %03d %6s --> '%s'\n",
static_cast<unsigned int>(i),
static_cast<unsigned int>(t.position),
t.to_str(t.type).c_str(),
t.value.c_str());
}
}
class commutative_inserter : public token_inserter
{
public:
commutative_inserter()
: lexertk::token_inserter(2)
{}
inline void ignore_symbol(const std::string& symbol)
{
ignore_set_.insert(symbol);
}
inline int insert(const lexertk::token& t0, const lexertk::token& t1, lexertk::token& new_token)
{
new_token.type = lexertk::token::e_mul;
new_token.value = "*";
new_token.position = t1.position;
bool match = false;
if (t0.type == lexertk::token::e_symbol)
{
if (ignore_set_.end() != ignore_set_.find(t0.value))
{
return -1;
}
else if (!t0.value.empty() && ('$' == t0.value[0]))
{
return -1;
}
}
if (t1.type == lexertk::token::e_symbol)
{
if (ignore_set_.end() != ignore_set_.find(t1.value))
{
return -1;
}
}
if ((t0.type == lexertk::token::e_number ) && (t1.type == lexertk::token::e_symbol )) match = true;
else if ((t0.type == lexertk::token::e_number ) && (t1.type == lexertk::token::e_lbracket )) match = true;
else if ((t0.type == lexertk::token::e_number ) && (t1.type == lexertk::token::e_lcrlbracket)) match = true;
else if ((t0.type == lexertk::token::e_number ) && (t1.type == lexertk::token::e_lsqrbracket)) match = true;
else if ((t0.type == lexertk::token::e_symbol ) && (t1.type == lexertk::token::e_number )) match = true;
else if ((t0.type == lexertk::token::e_rbracket ) && (t1.type == lexertk::token::e_number )) match = true;
else if ((t0.type == lexertk::token::e_rcrlbracket) && (t1.type == lexertk::token::e_number )) match = true;
else if ((t0.type == lexertk::token::e_rsqrbracket) && (t1.type == lexertk::token::e_number )) match = true;
else if ((t0.type == lexertk::token::e_rbracket ) && (t1.type == lexertk::token::e_symbol )) match = true;
else if ((t0.type == lexertk::token::e_rcrlbracket) && (t1.type == lexertk::token::e_symbol )) match = true;
else if ((t0.type == lexertk::token::e_rsqrbracket) && (t1.type == lexertk::token::e_symbol )) match = true;
return (match) ? 1 : -1;
}
private:
std::set<std::string,details::ilesscompare> ignore_set_;
};
class operator_joiner : public token_joiner
{
public:
inline bool join(const lexertk::token& t0, const lexertk::token& t1, lexertk::token& t)
{
//': =' --> ':='
if ((t0.type == lexertk::token::e_colon) && (t1.type == lexertk::token::e_eq))
{
t.type = lexertk::token::e_assign;
t.value = ":=";
t.position = t0.position;
return true;
}
//'> =' --> '>='
else if ((t0.type == lexertk::token::e_gt) && (t1.type == lexertk::token::e_eq))
{
t.type = lexertk::token::e_gte;
t.value = ">=";
t.position = t0.position;
return true;
}
//'< =' --> '<='
else if ((t0.type == lexertk::token::e_lt) && (t1.type == lexertk::token::e_eq))
{
t.type = lexertk::token::e_lte;
t.value = "<=";
t.position = t0.position;
return true;
}
//'= =' --> '=='
else if ((t0.type == lexertk::token::e_eq) && (t1.type == lexertk::token::e_eq))
{
t.type = lexertk::token::e_eq;
t.value = "==";
t.position = t0.position;
return true;
}
//'! =' --> '!='
else if ((static_cast<char>(t0.type) == '!') && (t1.type == lexertk::token::e_eq))
{
t.type = lexertk::token::e_ne;
t.value = "!=";
t.position = t0.position;
return true;
}
//'< >' --> '<>'
else if ((t0.type == lexertk::token::e_lt) && (t1.type == lexertk::token::e_gt))
{
t.type = lexertk::token::e_ne;
t.value = "<>";
t.position = t0.position;
return true;
}
else
return false;
}
};
class bracket_checker : public token_scanner
{
public:
bracket_checker()
: token_scanner(1),
state_(true)
{}
bool result()
{
return state_ && stack_.empty();
}
lexertk::token error_token()
{
return error_token_;
}
void reset()
{
//why? because msvc doesn't support swap properly.
stack_ = std::stack<char>();
state_ = true;
error_token_.clear();
}
bool operator()(const lexertk::token& t)
{
if (
!t.value.empty() &&
(lexertk::token::e_string != t.type) &&
(lexertk::token::e_symbol != t.type) &&
details::is_bracket(t.value[0])
)
{
char c = t.value[0];
if (t.type == lexertk::token::e_lbracket) stack_.push(')');
else if (t.type == lexertk::token::e_lcrlbracket) stack_.push('}');
else if (t.type == lexertk::token::e_lsqrbracket) stack_.push(']');
else if (details::is_right_bracket(c))
{
if (stack_.empty())
{
state_ = false;
error_token_ = t;
return false;
}
else if (c != stack_.top())
{
state_ = false;
error_token_ = t;
return false;
}
else
stack_.pop();
}
}
return true;
}
private:
bool state_;
std::stack<char> stack_;
lexertk::token error_token_;
};
class symbol_replacer : public token_modifier
{
private:
typedef std::map<std::string,std::pair<std::string,token::token_type>,details::ilesscompare> replace_map_t;
public:
bool remove(const std::string& target_symbol)
{
replace_map_t::iterator itr = replace_map_.find(target_symbol);
if (replace_map_.end() == itr)
return false;
replace_map_.erase(itr);
return true;
}
bool add_replace(const std::string& target_symbol,
const std::string& replace_symbol,
const lexertk::token::token_type token_type = lexertk::token::e_symbol)
{
replace_map_t::iterator itr = replace_map_.find(target_symbol);
if (replace_map_.end() != itr)
{
return false;
}
replace_map_[target_symbol] = std::make_pair(replace_symbol,token_type);
return true;
}
void clear()
{
replace_map_.clear();
}
private:
bool modify(lexertk::token& t)
{
if (lexertk::token::e_symbol == t.type)
{
if (replace_map_.empty())
return false;
replace_map_t::iterator itr = replace_map_.find(t.value);
if (replace_map_.end() != itr)
{
t.value = itr->second.first;
t.type = itr->second.second;
return true;
}
}
return false;
}
replace_map_t replace_map_;
};
class sequence_validator : public token_scanner
{
private:
typedef std::pair<lexertk::token::token_type,lexertk::token::token_type> token_pair_t;
typedef std::set<token_pair_t> set_t;
public:
sequence_validator()
: lexertk::token_scanner(2)
{
add_invalid(lexertk::token::e_number,lexertk::token::e_number);
add_invalid(lexertk::token::e_string,lexertk::token::e_string);
add_invalid(lexertk::token::e_number,lexertk::token::e_string);
add_invalid(lexertk::token::e_string,lexertk::token::e_number);
add_invalid(lexertk::token::e_string,lexertk::token::e_colon);
add_invalid(lexertk::token::e_colon,lexertk::token::e_string);
add_invalid_set1(lexertk::token::e_assign);
add_invalid_set1(lexertk::token::e_shr );
add_invalid_set1(lexertk::token::e_shl );
add_invalid_set1(lexertk::token::e_lte );
add_invalid_set1(lexertk::token::e_ne );
add_invalid_set1(lexertk::token::e_gte );
add_invalid_set1(lexertk::token::e_lt );
add_invalid_set1(lexertk::token::e_gt );
add_invalid_set1(lexertk::token::e_eq );
add_invalid_set1(lexertk::token::e_comma );
add_invalid_set1(lexertk::token::e_add );
add_invalid_set1(lexertk::token::e_sub );
add_invalid_set1(lexertk::token::e_div );
add_invalid_set1(lexertk::token::e_mul );
add_invalid_set1(lexertk::token::e_mod );
add_invalid_set1(lexertk::token::e_pow );
add_invalid_set1(lexertk::token::e_colon );
}
bool result()
{
return error_list_.empty();
}
bool operator()(const lexertk::token& t0, const lexertk::token& t1)
{
set_t::value_type p = std::make_pair(t0.type,t1.type);
if (invalid_bracket_check(t0.type,t1.type))
{
error_list_.push_back(std::make_pair(t0,t1));
}
else if (invalid_comb_.find(p) != invalid_comb_.end())
error_list_.push_back(std::make_pair(t0,t1));
return true;
}
std::size_t error_count()
{
return error_list_.size();
}
std::pair<lexertk::token,lexertk::token> error(const std::size_t index)
{
if (index < error_list_.size())
{
return error_list_[index];
}
else
{
static const lexertk::token error_token;
return std::make_pair(error_token,error_token);
}
}
void clear_errors()
{
error_list_.clear();
}
private:
void add_invalid(lexertk::token::token_type base, lexertk::token::token_type t)
{
invalid_comb_.insert(std::make_pair(base,t));
}
void add_invalid_set1(lexertk::token::token_type t)
{
add_invalid(t,lexertk::token::e_assign);
add_invalid(t,lexertk::token::e_shr );
add_invalid(t,lexertk::token::e_shl );
add_invalid(t,lexertk::token::e_lte );
add_invalid(t,lexertk::token::e_ne );
add_invalid(t,lexertk::token::e_gte );
add_invalid(t,lexertk::token::e_lt );
add_invalid(t,lexertk::token::e_gt );
add_invalid(t,lexertk::token::e_eq );
add_invalid(t,lexertk::token::e_comma );
add_invalid(t,lexertk::token::e_div );
add_invalid(t,lexertk::token::e_mul );
add_invalid(t,lexertk::token::e_mod );
add_invalid(t,lexertk::token::e_pow );
add_invalid(t,lexertk::token::e_colon );
}
bool invalid_bracket_check(lexertk::token::token_type base, lexertk::token::token_type t)
{
if (details::is_right_bracket(static_cast<char>(base)))
{
switch (t)
{
case lexertk::token::e_string : return true;
case lexertk::token::e_assign : return true;
default : return false;
}
}
else if (details::is_left_bracket(static_cast<char>(base)))
{
if (details::is_right_bracket(static_cast<char>(t)))
return false;
else if (details::is_left_bracket(static_cast<char>(t)))
return false;
else
{
switch (t)
{
case lexertk::token::e_number : return false;
case lexertk::token::e_symbol : return false;
case lexertk::token::e_string : return false;
case lexertk::token::e_add : return false;
case lexertk::token::e_sub : return false;
case lexertk::token::e_colon : return false;
default : return true;
}
}
}
else if (details::is_right_bracket(static_cast<char>(t)))
{
switch (base)
{
case lexertk::token::e_number : return false;
case lexertk::token::e_symbol : return false;
case lexertk::token::e_string : return false;
case lexertk::token::e_eof : return false;
case lexertk::token::e_colon : return false;
default : return true;
}
}
else if (details::is_left_bracket(static_cast<char>(t)))
{
switch (base)
{
case lexertk::token::e_rbracket : return true;
case lexertk::token::e_rsqrbracket : return true;
case lexertk::token::e_rcrlbracket : return true;
default : return false;
}
}
return false;
}
set_t invalid_comb_;
std::deque<std::pair<lexertk::token,lexertk::token> > error_list_;
};
struct helper_assembly
{
inline bool register_scanner(lexertk::token_scanner* scanner)
{
if (token_scanner_list.end() != std::find(token_scanner_list.begin(),
token_scanner_list.end(),
scanner))
{
return false;
}
token_scanner_list.push_back(scanner);
return true;
}
inline bool register_modifier(lexertk::token_modifier* modifier)
{
if (token_modifier_list.end() != std::find(token_modifier_list.begin(),
token_modifier_list.end(),
modifier))
{
return false;
}
token_modifier_list.push_back(modifier);
return true;
}
inline bool register_joiner(lexertk::token_joiner* joiner)
{
if (token_joiner_list.end() != std::find(token_joiner_list.begin(),
token_joiner_list.end(),
joiner))
{
return false;
}
token_joiner_list.push_back(joiner);
return true;
}
inline bool register_inserter(lexertk::token_inserter* inserter)
{
if (token_inserter_list.end() != std::find(token_inserter_list.begin(),
token_inserter_list.end(),
inserter))
{
return false;
}
token_inserter_list.push_back(inserter);
return true;
}
inline bool run_modifiers(lexertk::generator& g)
{
error_token_modifier = reinterpret_cast<lexertk::token_modifier*>(0);
for (std::size_t i = 0; i < token_modifier_list.size(); ++i)
{
lexertk::token_modifier& modifier = (*token_modifier_list[i]);
modifier.reset();
modifier.process(g);
if (!modifier.result())
{
error_token_modifier = token_modifier_list[i];
return false;
}
}
return true;
}
inline bool run_joiners(lexertk::generator& g)
{
error_token_joiner = reinterpret_cast<lexertk::token_joiner*>(0);
for (std::size_t i = 0; i < token_joiner_list.size(); ++i)
{
lexertk::token_joiner& joiner = (*token_joiner_list[i]);
joiner.reset();
joiner.process(g);
if (!joiner.result())
{
error_token_joiner = token_joiner_list[i];
return false;
}
}
return true;
}
inline bool run_inserters(lexertk::generator& g)
{
error_token_inserter = reinterpret_cast<lexertk::token_inserter*>(0);
for (std::size_t i = 0; i < token_inserter_list.size(); ++i)
{
lexertk::token_inserter& inserter = (*token_inserter_list[i]);
inserter.reset();
inserter.process(g);
if (!inserter.result())
{
error_token_inserter = token_inserter_list[i];
return false;
}
}
return true;
}
inline bool run_scanners(lexertk::generator& g)
{
error_token_scanner = reinterpret_cast<lexertk::token_scanner*>(0);
for (std::size_t i = 0; i < token_scanner_list.size(); ++i)
{
lexertk::token_scanner& scanner = (*token_scanner_list[i]);
scanner.reset();
scanner.process(g);
if (!scanner.result())
{
error_token_scanner = token_scanner_list[i];
return false;
}
}
return true;
}
std::deque<lexertk::token_scanner*> token_scanner_list;
std::deque<lexertk::token_modifier*> token_modifier_list;
std::deque<lexertk::token_joiner*> token_joiner_list;
std::deque<lexertk::token_inserter*> token_inserter_list;
lexertk::token_scanner* error_token_scanner;
lexertk::token_modifier* error_token_modifier;
lexertk::token_joiner* error_token_joiner;
lexertk::token_inserter* error_token_inserter;
};
}
class parser_helper
{
public:
typedef token token_t;
typedef generator generator_t;
inline bool init(const std::string& str)
{
if (!lexer_.process(str))
{
return false;
}
lexer_.begin();
next_token();
return true;
}
inline generator_t& lexer()
{
return lexer_;
}
inline const generator_t& lexer() const
{
return lexer_;
}
inline void next_token()
{
current_token_ = lexer_.next_token();
}
inline const token_t& current_token() const
{
return current_token_;
}
enum token_advance_mode
{
e_hold = 0,
e_advance = 1
};
inline void advance_token(const token_advance_mode mode)
{
if (e_advance == mode)
{
next_token();
}
}
inline bool token_is(const token_t::token_type& ttype, const token_advance_mode mode = e_advance)
{
if (current_token().type != ttype)
{
return false;
}
advance_token(mode);
return true;
}
inline bool token_is(const token_t::token_type& ttype,
const std::string& value,
const token_advance_mode mode = e_advance)
{
if (
(current_token().type != ttype) ||
!details::imatch(value,current_token().value)
)
{
return false;
}
advance_token(mode);
return true;
}
inline bool token_is_then_assign(const token_t::token_type& ttype,
std::string& token,
const token_advance_mode mode = e_advance)
{
if (current_token_.type != ttype)
{
return false;
}
token = current_token_.value;
advance_token(mode);
return true;
}
template <typename Allocator,
template <typename,typename> class Container>
inline bool token_is_then_assign(const token_t::token_type& ttype,
Container<std::string,Allocator>& token_list,
const token_advance_mode mode = e_advance)
{
if (current_token_.type != ttype)
{
return false;
}
token_list.push_back(current_token_.value);
advance_token(mode);
return true;
}
inline bool peek_token_is(const token_t::token_type& ttype)
{
return (lexer_.peek_next_token().type == ttype);
}
inline bool peek_token_is(const std::string& s)
{
return (details::imatch(lexer_.peek_next_token().value,s));
}
private:
generator_t lexer_;
token_t current_token_;
};
#endif
} // namespace lexertk
#endif