aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/yaml-cpp/src/scanner.h
blob: 6c1ebcecfecf8ac1f070c3022e0cf24ea2cacb64 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#ifndef SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#define SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66

#if defined(_MSC_VER) ||                                            \
    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
#pragma once
#endif

#include <cstddef>
#include <ios>
#include <map>
#include <queue>
#include <set>
#include <stack>
#include <string>

#include "ptr_vector.h"
#include "stream.h"
#include "token.h"
#include "yaml-cpp/mark.h"

namespace YAML {
class Node;
class RegEx;

/** 
 * A scanner transforms a stream of characters into a stream of tokens. 
 */ 
class Scanner {
 public:
  explicit Scanner(std::istream &in); 
  ~Scanner();

  /** Returns true if there are no more tokens to be read. */ 
  bool empty();
 
  /** Removes the next token in the queue. */ 
  void pop();
 
  /** Returns, but does not remove, the next token in the queue. */ 
  Token &peek();
 
  /** Returns the current mark in the input stream. */ 
  Mark mark() const;

 private:
  struct IndentMarker {
    enum INDENT_TYPE { MAP, SEQ, NONE };
    enum STATUS { VALID, INVALID, UNKNOWN };
    IndentMarker(int column_, INDENT_TYPE type_)
        : column(column_), type(type_), status(VALID), pStartToken(0) {}

    int column;
    INDENT_TYPE type;
    STATUS status;
    Token *pStartToken;
  };

  enum FLOW_MARKER { FLOW_MAP, FLOW_SEQ };

 private:
  // scanning
 
  /** 
   * Scans until there's a valid token at the front of the queue, or the queue 
   * is empty. The state can be checked by {@link #empty}, and the next token 
   * retrieved by {@link #peek}. 
   */ 
  void EnsureTokensInQueue();
 
  /** 
   * The main scanning function; this method branches out to scan whatever the 
   * next token should be. 
   */ 
  void ScanNextToken();
 
  /** Eats the input stream until it reaches the next token-like thing. */ 
  void ScanToNextToken();
 
  /** Sets the initial conditions for starting a stream. */ 
  void StartStream();
 
  /** Closes out the stream, finish up, etc. */ 
  void EndStream();
 
  Token *PushToken(Token::TYPE type);

  bool InFlowContext() const { return !m_flows.empty(); }
  bool InBlockContext() const { return m_flows.empty(); }
  std::size_t GetFlowLevel() const { return m_flows.size(); }

  Token::TYPE GetStartTokenFor(IndentMarker::INDENT_TYPE type) const;
 
  /** 
   * Pushes an indentation onto the stack, and enqueues the proper token 
   * (sequence start or mapping start). 
   * 
   * @return the indent marker it generates (if any). 
   */ 
  IndentMarker *PushIndentTo(int column, IndentMarker::INDENT_TYPE type);
 
  /** 
   * Pops indentations off the stack until it reaches the current indentation 
   * level, and enqueues the proper token each time. Then pops all invalid 
   * indentations off. 
   */ 
  void PopIndentToHere();
 
  /** 
   * Pops all indentations (except for the base empty one) off the stack, and 
   * enqueues the proper token each time. 
   */ 
  void PopAllIndents();
 
  /** Pops a single indent, pushing the proper token. */ 
  void PopIndent();
  int GetTopIndent() const;

  // checking input
  bool CanInsertPotentialSimpleKey() const;
  bool ExistsActiveSimpleKey() const;
  void InsertPotentialSimpleKey();
  void InvalidateSimpleKey();
  bool VerifySimpleKey();
  void PopAllSimpleKeys();

  /** 
   * Throws a ParserException with the current token location (if available), 
   * and does not parse any more tokens. 
   */ 
  void ThrowParserException(const std::string &msg) const;

  bool IsWhitespaceToBeEaten(char ch);
 
  /** 
   * Returns the appropriate regex to check if the next token is a value token. 
   */ 
  const RegEx &GetValueRegex() const;

  struct SimpleKey {
    SimpleKey(const Mark &mark_, std::size_t flowLevel_);

    void Validate();
    void Invalidate();

    Mark mark;
    std::size_t flowLevel;
    IndentMarker *pIndent;
    Token *pMapStart, *pKey;
  };

  // and the tokens
  void ScanDirective();
  void ScanDocStart();
  void ScanDocEnd();
  void ScanBlockSeqStart();
  void ScanBlockMapSTart();
  void ScanBlockEnd();
  void ScanBlockEntry();
  void ScanFlowStart();
  void ScanFlowEnd();
  void ScanFlowEntry();
  void ScanKey();
  void ScanValue();
  void ScanAnchorOrAlias();
  void ScanTag();
  void ScanPlainScalar();
  void ScanQuotedScalar();
  void ScanBlockScalar();

 private:
  // the stream
  Stream INPUT;

  // the output (tokens)
  std::queue<Token> m_tokens;

  // state info
  bool m_startedStream, m_endedStream;
  bool m_simpleKeyAllowed;
  bool m_canBeJSONFlow;
  std::stack<SimpleKey> m_simpleKeys;
  std::stack<IndentMarker *> m_indents;
  ptr_vector<IndentMarker> m_indentRefs;  // for "garbage collection"
  std::stack<FLOW_MARKER> m_flows;
};
}

#endif  // SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66