Restoring authorship annotation for <[email protected]>. Commit 1 of 2.

author: bnagaev <[email protected]> 2022-02-10 16:47:04 +0300
committer: Daniil Cherednik <[email protected]> 2022-02-10 16:47:04 +0300
commit: d6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d (patch)
tree: d5dca6d44593f5e52556a1cc7b1ab0386e096ebe /contrib/libs/yaml-cpp/src/stream.cpp
parent: 1861d4c1402bb2c67a3e6b43b51706081b74508a (diff)
1 files changed, 448 insertions, 448 deletions
diff --git a/contrib/libs/yaml-cpp/src/stream.cpp b/contrib/libs/yaml-cpp/src/stream.cpp
index 3b013cfa7d3..070eda1ad9b 100644
--- a/contrib/libs/yaml-cpp/src/stream.cpp
+++ b/contrib/libs/yaml-cpp/src/stream.cpp
@@ -1,448 +1,448 @@
-#include <iostream>
-
-#include "stream.h"
-
-#ifndef YAML_PREFETCH_SIZE
-#define YAML_PREFETCH_SIZE 2048
-#endif
-
-#define S_ARRAY_SIZE(A) (sizeof(A) / sizeof(*(A)))
-#define S_ARRAY_END(A) ((A) + S_ARRAY_SIZE(A))
-
-#define CP_REPLACEMENT_CHARACTER (0xFFFD)
-
-namespace YAML {
-enum UtfIntroState {
-  uis_start,
-  uis_utfbe_b1,
-  uis_utf32be_b2,
-  uis_utf32be_bom3,
-  uis_utf32be,
-  uis_utf16be,
-  uis_utf16be_bom1,
-  uis_utfle_bom1,
-  uis_utf16le_bom2,
-  uis_utf32le_bom3,
-  uis_utf16le,
-  uis_utf32le,
-  uis_utf8_imp,
-  uis_utf16le_imp,
-  uis_utf32le_imp3,
-  uis_utf8_bom1,
-  uis_utf8_bom2,
-  uis_utf8,
-  uis_error
-};
-
-enum UtfIntroCharType {
-  uict00,
-  uictBB,
-  uictBF,
-  uictEF,
-  uictFE,
-  uictFF,
-  uictAscii,
-  uictOther,
-  uictMax
-};
-
-static bool s_introFinalState[] = {
-    false,  // uis_start
-    false,  // uis_utfbe_b1
-    false,  // uis_utf32be_b2
-    false,  // uis_utf32be_bom3
-    true,   // uis_utf32be
-    true,   // uis_utf16be
-    false,  // uis_utf16be_bom1
-    false,  // uis_utfle_bom1
-    false,  // uis_utf16le_bom2
-    false,  // uis_utf32le_bom3
-    true,   // uis_utf16le
-    true,   // uis_utf32le
-    false,  // uis_utf8_imp
-    false,  // uis_utf16le_imp
-    false,  // uis_utf32le_imp3
-    false,  // uis_utf8_bom1
-    false,  // uis_utf8_bom2
-    true,   // uis_utf8
-    true,   // uis_error
-};
-
-static UtfIntroState s_introTransitions[][uictMax] = {
-    // uict00,           uictBB,           uictBF,           uictEF,
-    // uictFE,           uictFF,           uictAscii,        uictOther
-    {uis_utfbe_b1, uis_utf8, uis_utf8, uis_utf8_bom1, uis_utf16be_bom1,
-     uis_utfle_bom1, uis_utf8_imp, uis_utf8},
-    {uis_utf32be_b2, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8,
-     uis_utf16be, uis_utf8},
-    {uis_utf32be, uis_utf8, uis_utf8, uis_utf8, uis_utf32be_bom3, uis_utf8,
-     uis_utf8, uis_utf8},
-    {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf32be, uis_utf8,
-     uis_utf8},
-    {uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be,
-     uis_utf32be, uis_utf32be, uis_utf32be},
-    {uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be,
-     uis_utf16be, uis_utf16be, uis_utf16be},
-    {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf16be, uis_utf8,
-     uis_utf8},
-    {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf16le_bom2, uis_utf8,
-     uis_utf8, uis_utf8},
-    {uis_utf32le_bom3, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le,
-     uis_utf16le, uis_utf16le, uis_utf16le},
-    {uis_utf32le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le,
-     uis_utf16le, uis_utf16le, uis_utf16le},
-    {uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le,
-     uis_utf16le, uis_utf16le, uis_utf16le},
-    {uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le,
-     uis_utf32le, uis_utf32le, uis_utf32le},
-    {uis_utf16le_imp, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8,
-     uis_utf8, uis_utf8},
-    {uis_utf32le_imp3, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le,
-     uis_utf16le, uis_utf16le, uis_utf16le},
-    {uis_utf32le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le,
-     uis_utf16le, uis_utf16le, uis_utf16le},
-    {uis_utf8, uis_utf8_bom2, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8,
-     uis_utf8},
-    {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8,
-     uis_utf8},
-    {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8,
-     uis_utf8},
-};
-
-static char s_introUngetCount[][uictMax] = {
-    // uict00, uictBB, uictBF, uictEF, uictFE, uictFF, uictAscii, uictOther
-    {0, 1, 1, 0, 0, 0, 0, 1},
-    {0, 2, 2, 2, 2, 2, 2, 2},
-    {3, 3, 3, 3, 0, 3, 3, 3},
-    {4, 4, 4, 4, 4, 0, 4, 4},
-    {1, 1, 1, 1, 1, 1, 1, 1},
-    {1, 1, 1, 1, 1, 1, 1, 1},
-    {2, 2, 2, 2, 2, 0, 2, 2},
-    {2, 2, 2, 2, 0, 2, 2, 2},
-    {0, 1, 1, 1, 1, 1, 1, 1},
-    {0, 2, 2, 2, 2, 2, 2, 2},
-    {1, 1, 1, 1, 1, 1, 1, 1},
-    {1, 1, 1, 1, 1, 1, 1, 1},
-    {0, 2, 2, 2, 2, 2, 2, 2},
-    {0, 3, 3, 3, 3, 3, 3, 3},
-    {4, 4, 4, 4, 4, 4, 4, 4},
-    {2, 0, 2, 2, 2, 2, 2, 2},
-    {3, 3, 0, 3, 3, 3, 3, 3},
-    {1, 1, 1, 1, 1, 1, 1, 1},
-};
-
-inline UtfIntroCharType IntroCharTypeOf(std::istream::int_type ch) {
-  if (std::istream::traits_type::eof() == ch) {
-    return uictOther;
-  }
-
-  switch (ch) {
-    case 0:
-      return uict00;
-    case 0xBB:
-      return uictBB;
-    case 0xBF:
-      return uictBF;
-    case 0xEF:
-      return uictEF;
-    case 0xFE:
-      return uictFE;
-    case 0xFF:
-      return uictFF;
-  }
-
-  if ((ch > 0) && (ch < 0xFF)) {
-    return uictAscii;
-  }
-
-  return uictOther;
-}
-
-inline char Utf8Adjust(unsigned long ch, unsigned char lead_bits,
-                       unsigned char rshift) {
-  const unsigned char header = ((1 << lead_bits) - 1) << (8 - lead_bits);
-  const unsigned char mask = (0xFF >> (lead_bits + 1));
-  return static_cast<char>(
-      static_cast<unsigned char>(header | ((ch >> rshift) & mask)));
-}
-
-inline void QueueUnicodeCodepoint(std::deque<char>& q, unsigned long ch) {
-  // We are not allowed to queue the Stream::eof() codepoint, so
-  // replace it with CP_REPLACEMENT_CHARACTER
-  if (static_cast<unsigned long>(Stream::eof()) == ch) {
-    ch = CP_REPLACEMENT_CHARACTER;
-  }
-
-  if (ch < 0x80) {
-    q.push_back(Utf8Adjust(ch, 0, 0));
-  } else if (ch < 0x800) {
-    q.push_back(Utf8Adjust(ch, 2, 6));
-    q.push_back(Utf8Adjust(ch, 1, 0));
-  } else if (ch < 0x10000) {
-    q.push_back(Utf8Adjust(ch, 3, 12));
-    q.push_back(Utf8Adjust(ch, 1, 6));
-    q.push_back(Utf8Adjust(ch, 1, 0));
-  } else {
-    q.push_back(Utf8Adjust(ch, 4, 18));
-    q.push_back(Utf8Adjust(ch, 1, 12));
-    q.push_back(Utf8Adjust(ch, 1, 6));
-    q.push_back(Utf8Adjust(ch, 1, 0));
-  }
-}
-
-Stream::Stream(std::istream& input)
-    : m_input(input),
-      m_pPrefetched(new unsigned char[YAML_PREFETCH_SIZE]),
-      m_nPrefetchedAvailable(0),
-      m_nPrefetchedUsed(0) {
-  typedef std::istream::traits_type char_traits;
-
-  if (!input)
-    return;
-
-  // Determine (or guess) the character-set by reading the BOM, if any.  See
-  // the YAML specification for the determination algorithm.
-  char_traits::int_type intro[4];
-  int nIntroUsed = 0;
-  UtfIntroState state = uis_start;
-  for (; !s_introFinalState[state];) {
-    std::istream::int_type ch = input.get();
-    intro[nIntroUsed++] = ch;
-    UtfIntroCharType charType = IntroCharTypeOf(ch);
-    UtfIntroState newState = s_introTransitions[state][charType];
-    int nUngets = s_introUngetCount[state][charType];
-    if (nUngets > 0) {
-      input.clear();
-      for (; nUngets > 0; --nUngets) {
-        if (char_traits::eof() != intro[--nIntroUsed])
-          input.putback(char_traits::to_char_type(intro[nIntroUsed]));
-      }
-    }
-    state = newState;
-  }
-
-  switch (state) {
-    case uis_utf8:
-      m_charSet = utf8;
-      break;
-    case uis_utf16le:
-      m_charSet = utf16le;
-      break;
-    case uis_utf16be:
-      m_charSet = utf16be;
-      break;
-    case uis_utf32le:
-      m_charSet = utf32le;
-      break;
-    case uis_utf32be:
-      m_charSet = utf32be;
-      break;
-    default:
-      m_charSet = utf8;
-      break;
-  }
-
-  ReadAheadTo(0);
-}
-
-Stream::~Stream() { delete[] m_pPrefetched; }
-
-char Stream::peek() const {
-  if (m_readahead.empty()) {
-    return Stream::eof();
-  }
-
-  return m_readahead[0];
-}
-
-Stream::operator bool() const {
-  return m_input.good() ||
-         (!m_readahead.empty() && m_readahead[0] != Stream::eof());
-}
-
-// get
-// . Extracts a character from the stream and updates our position
-char Stream::get() {
-  char ch = peek();
-  AdvanceCurrent();
-  m_mark.column++;
-
-  if (ch == '\n') {
-    m_mark.column = 0;
-    m_mark.line++;
-  }
-
-  return ch;
-}
-
-// get
-// . Extracts 'n' characters from the stream and updates our position
-std::string Stream::get(int n) {
-  std::string ret;
-  ret.reserve(n);
-  for (int i = 0; i < n; i++)
-    ret += get();
-  return ret;
-}
-
-// eat
-// . Eats 'n' characters and updates our position.
-void Stream::eat(int n) {
-  for (int i = 0; i < n; i++)
-    get();
-}
-
-void Stream::AdvanceCurrent() {
-  if (!m_readahead.empty()) {
-    m_readahead.pop_front();
-    m_mark.pos++;
-  }
-
-  ReadAheadTo(0);
-}
-
-bool Stream::_ReadAheadTo(size_t i) const {
-  while (m_input.good() && (m_readahead.size() <= i)) {
-    switch (m_charSet) {
-      case utf8:
-        StreamInUtf8();
-        break;
-      case utf16le:
-        StreamInUtf16();
-        break;
-      case utf16be:
-        StreamInUtf16();
-        break;
-      case utf32le:
-        StreamInUtf32();
-        break;
-      case utf32be:
-        StreamInUtf32();
-        break;
-    }
-  }
-
-  // signal end of stream
-  if (!m_input.good())
-    m_readahead.push_back(Stream::eof());
-
-  return m_readahead.size() > i;
-}
-
-void Stream::StreamInUtf8() const {
-  unsigned char b = GetNextByte();
-  if (m_input.good()) {
-    m_readahead.push_back(b);
-  }
-}
-
-void Stream::StreamInUtf16() const {
-  unsigned long ch = 0;
-  unsigned char bytes[2];
-  int nBigEnd = (m_charSet == utf16be) ? 0 : 1;
-
-  bytes[0] = GetNextByte();
-  bytes[1] = GetNextByte();
-  if (!m_input.good()) {
-    return;
-  }
-  ch = (static_cast<unsigned long>(bytes[nBigEnd]) << 8) |
-       static_cast<unsigned long>(bytes[1 ^ nBigEnd]);
-
-  if (ch >= 0xDC00 && ch < 0xE000) {
-    // Trailing (low) surrogate...ugh, wrong order
-    QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER);
-    return;
-  } else if (ch >= 0xD800 && ch < 0xDC00) {
-    // ch is a leading (high) surrogate
-
-    // Four byte UTF-8 code point
-
-    // Read the trailing (low) surrogate
-    for (;;) {
-      bytes[0] = GetNextByte();
-      bytes[1] = GetNextByte();
-      if (!m_input.good()) {
-        QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER);
-        return;
-      }
-      unsigned long chLow = (static_cast<unsigned long>(bytes[nBigEnd]) << 8) |
-                            static_cast<unsigned long>(bytes[1 ^ nBigEnd]);
-      if (chLow < 0xDC00 || chLow >= 0xE000) {
-        // Trouble...not a low surrogate.  Dump a REPLACEMENT CHARACTER into the
-        // stream.
-        QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER);
-
-        // Deal with the next UTF-16 unit
-        if (chLow < 0xD800 || chLow >= 0xE000) {
-          // Easiest case: queue the codepoint and return
-          QueueUnicodeCodepoint(m_readahead, ch);
-          return;
-        } else {
-          // Start the loop over with the new high surrogate
-          ch = chLow;
-          continue;
-        }
-      }
-
-      // Select the payload bits from the high surrogate
-      ch &= 0x3FF;
-      ch <<= 10;
-
-      // Include bits from low surrogate
-      ch |= (chLow & 0x3FF);
-
-      // Add the surrogacy offset
-      ch += 0x10000;
-      break;
-    }
-  }
-
-  QueueUnicodeCodepoint(m_readahead, ch);
-}
-
-inline char* ReadBuffer(unsigned char* pBuffer) {
-  return reinterpret_cast<char*>(pBuffer);
-}
-
-unsigned char Stream::GetNextByte() const {
-  if (m_nPrefetchedUsed >= m_nPrefetchedAvailable) {
-    std::streambuf* pBuf = m_input.rdbuf();
-    m_nPrefetchedAvailable = static_cast<std::size_t>(
-        pBuf->sgetn(ReadBuffer(m_pPrefetched), YAML_PREFETCH_SIZE));
-    m_nPrefetchedUsed = 0;
-    if (!m_nPrefetchedAvailable) {
-      m_input.setstate(std::ios_base::eofbit);
-    }
-
-    if (0 == m_nPrefetchedAvailable) {
-      return 0;
-    }
-  }
-
-  return m_pPrefetched[m_nPrefetchedUsed++];
-}
-
-void Stream::StreamInUtf32() const {
-  static int indexes[2][4] = {{3, 2, 1, 0}, {0, 1, 2, 3}};
-
-  unsigned long ch = 0;
-  unsigned char bytes[4];
-  int* pIndexes = (m_charSet == utf32be) ? indexes[1] : indexes[0];
-
-  bytes[0] = GetNextByte();
-  bytes[1] = GetNextByte();
-  bytes[2] = GetNextByte();
-  bytes[3] = GetNextByte();
-  if (!m_input.good()) {
-    return;
-  }
-
-  for (int i = 0; i < 4; ++i) {
-    ch <<= 8;
-    ch |= bytes[pIndexes[i]];
-  }
-
-  QueueUnicodeCodepoint(m_readahead, ch);
-}
-}
+#include <iostream> 
+ 
+#include "stream.h" 
+ 
+#ifndef YAML_PREFETCH_SIZE 
+#define YAML_PREFETCH_SIZE 2048 
+#endif 
+ 
+#define S_ARRAY_SIZE(A) (sizeof(A) / sizeof(*(A))) 
+#define S_ARRAY_END(A) ((A) + S_ARRAY_SIZE(A)) 
+ 
+#define CP_REPLACEMENT_CHARACTER (0xFFFD) 
+ 
+namespace YAML { 
+enum UtfIntroState { 
+  uis_start, 
+  uis_utfbe_b1, 
+  uis_utf32be_b2, 
+  uis_utf32be_bom3, 
+  uis_utf32be, 
+  uis_utf16be, 
+  uis_utf16be_bom1, 
+  uis_utfle_bom1, 
+  uis_utf16le_bom2, 
+  uis_utf32le_bom3, 
+  uis_utf16le, 
+  uis_utf32le, 
+  uis_utf8_imp, 
+  uis_utf16le_imp, 
+  uis_utf32le_imp3, 
+  uis_utf8_bom1, 
+  uis_utf8_bom2, 
+  uis_utf8, 
+  uis_error 
+}; 
+ 
+enum UtfIntroCharType { 
+  uict00, 
+  uictBB, 
+  uictBF, 
+  uictEF, 
+  uictFE, 
+  uictFF, 
+  uictAscii, 
+  uictOther, 
+  uictMax 
+}; 
+ 
+static bool s_introFinalState[] = { 
+    false,  // uis_start 
+    false,  // uis_utfbe_b1 
+    false,  // uis_utf32be_b2 
+    false,  // uis_utf32be_bom3 
+    true,   // uis_utf32be 
+    true,   // uis_utf16be 
+    false,  // uis_utf16be_bom1 
+    false,  // uis_utfle_bom1 
+    false,  // uis_utf16le_bom2 
+    false,  // uis_utf32le_bom3 
+    true,   // uis_utf16le 
+    true,   // uis_utf32le 
+    false,  // uis_utf8_imp 
+    false,  // uis_utf16le_imp 
+    false,  // uis_utf32le_imp3 
+    false,  // uis_utf8_bom1 
+    false,  // uis_utf8_bom2 
+    true,   // uis_utf8 
+    true,   // uis_error 
+}; 
+ 
+static UtfIntroState s_introTransitions[][uictMax] = { 
+    // uict00,           uictBB,           uictBF,           uictEF, 
+    // uictFE,           uictFF,           uictAscii,        uictOther 
+    {uis_utfbe_b1, uis_utf8, uis_utf8, uis_utf8_bom1, uis_utf16be_bom1, 
+     uis_utfle_bom1, uis_utf8_imp, uis_utf8}, 
+    {uis_utf32be_b2, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, 
+     uis_utf16be, uis_utf8}, 
+    {uis_utf32be, uis_utf8, uis_utf8, uis_utf8, uis_utf32be_bom3, uis_utf8, 
+     uis_utf8, uis_utf8}, 
+    {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf32be, uis_utf8, 
+     uis_utf8}, 
+    {uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be, 
+     uis_utf32be, uis_utf32be, uis_utf32be}, 
+    {uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be, 
+     uis_utf16be, uis_utf16be, uis_utf16be}, 
+    {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf16be, uis_utf8, 
+     uis_utf8}, 
+    {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf16le_bom2, uis_utf8, 
+     uis_utf8, uis_utf8}, 
+    {uis_utf32le_bom3, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, 
+     uis_utf16le, uis_utf16le, uis_utf16le}, 
+    {uis_utf32le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, 
+     uis_utf16le, uis_utf16le, uis_utf16le}, 
+    {uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, 
+     uis_utf16le, uis_utf16le, uis_utf16le}, 
+    {uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le, 
+     uis_utf32le, uis_utf32le, uis_utf32le}, 
+    {uis_utf16le_imp, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, 
+     uis_utf8, uis_utf8}, 
+    {uis_utf32le_imp3, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, 
+     uis_utf16le, uis_utf16le, uis_utf16le}, 
+    {uis_utf32le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, 
+     uis_utf16le, uis_utf16le, uis_utf16le}, 
+    {uis_utf8, uis_utf8_bom2, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, 
+     uis_utf8}, 
+    {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, 
+     uis_utf8}, 
+    {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, 
+     uis_utf8}, 
+}; 
+ 
+static char s_introUngetCount[][uictMax] = { 
+    // uict00, uictBB, uictBF, uictEF, uictFE, uictFF, uictAscii, uictOther 
+    {0, 1, 1, 0, 0, 0, 0, 1}, 
+    {0, 2, 2, 2, 2, 2, 2, 2}, 
+    {3, 3, 3, 3, 0, 3, 3, 3}, 
+    {4, 4, 4, 4, 4, 0, 4, 4}, 
+    {1, 1, 1, 1, 1, 1, 1, 1}, 
+    {1, 1, 1, 1, 1, 1, 1, 1}, 
+    {2, 2, 2, 2, 2, 0, 2, 2}, 
+    {2, 2, 2, 2, 0, 2, 2, 2}, 
+    {0, 1, 1, 1, 1, 1, 1, 1}, 
+    {0, 2, 2, 2, 2, 2, 2, 2}, 
+    {1, 1, 1, 1, 1, 1, 1, 1}, 
+    {1, 1, 1, 1, 1, 1, 1, 1}, 
+    {0, 2, 2, 2, 2, 2, 2, 2}, 
+    {0, 3, 3, 3, 3, 3, 3, 3}, 
+    {4, 4, 4, 4, 4, 4, 4, 4}, 
+    {2, 0, 2, 2, 2, 2, 2, 2}, 
+    {3, 3, 0, 3, 3, 3, 3, 3}, 
+    {1, 1, 1, 1, 1, 1, 1, 1}, 
+}; 
+ 
+inline UtfIntroCharType IntroCharTypeOf(std::istream::int_type ch) { 
+  if (std::istream::traits_type::eof() == ch) { 
+    return uictOther; 
+  } 
+ 
+  switch (ch) { 
+    case 0: 
+      return uict00; 
+    case 0xBB: 
+      return uictBB; 
+    case 0xBF: 
+      return uictBF; 
+    case 0xEF: 
+      return uictEF; 
+    case 0xFE: 
+      return uictFE; 
+    case 0xFF: 
+      return uictFF; 
+  } 
+ 
+  if ((ch > 0) && (ch < 0xFF)) { 
+    return uictAscii; 
+  } 
+ 
+  return uictOther; 
+} 
+ 
+inline char Utf8Adjust(unsigned long ch, unsigned char lead_bits, 
+                       unsigned char rshift) { 
+  const unsigned char header = ((1 << lead_bits) - 1) << (8 - lead_bits); 
+  const unsigned char mask = (0xFF >> (lead_bits + 1)); 
+  return static_cast<char>( 
+      static_cast<unsigned char>(header | ((ch >> rshift) & mask))); 
+} 
+ 
+inline void QueueUnicodeCodepoint(std::deque<char>& q, unsigned long ch) { 
+  // We are not allowed to queue the Stream::eof() codepoint, so 
+  // replace it with CP_REPLACEMENT_CHARACTER 
+  if (static_cast<unsigned long>(Stream::eof()) == ch) { 
+    ch = CP_REPLACEMENT_CHARACTER; 
+  } 
+ 
+  if (ch < 0x80) { 
+    q.push_back(Utf8Adjust(ch, 0, 0)); 
+  } else if (ch < 0x800) { 
+    q.push_back(Utf8Adjust(ch, 2, 6)); 
+    q.push_back(Utf8Adjust(ch, 1, 0)); 
+  } else if (ch < 0x10000) { 
+    q.push_back(Utf8Adjust(ch, 3, 12)); 
+    q.push_back(Utf8Adjust(ch, 1, 6)); 
+    q.push_back(Utf8Adjust(ch, 1, 0)); 
+  } else { 
+    q.push_back(Utf8Adjust(ch, 4, 18)); 
+    q.push_back(Utf8Adjust(ch, 1, 12)); 
+    q.push_back(Utf8Adjust(ch, 1, 6)); 
+    q.push_back(Utf8Adjust(ch, 1, 0)); 
+  } 
+} 
+ 
+Stream::Stream(std::istream& input) 
+    : m_input(input), 
+      m_pPrefetched(new unsigned char[YAML_PREFETCH_SIZE]), 
+      m_nPrefetchedAvailable(0), 
+      m_nPrefetchedUsed(0) { 
+  typedef std::istream::traits_type char_traits; 
+ 
+  if (!input) 
+    return; 
+ 
+  // Determine (or guess) the character-set by reading the BOM, if any.  See 
+  // the YAML specification for the determination algorithm. 
+  char_traits::int_type intro[4]; 
+  int nIntroUsed = 0; 
+  UtfIntroState state = uis_start; 
+  for (; !s_introFinalState[state];) { 
+    std::istream::int_type ch = input.get(); 
+    intro[nIntroUsed++] = ch; 
+    UtfIntroCharType charType = IntroCharTypeOf(ch); 
+    UtfIntroState newState = s_introTransitions[state][charType]; 
+    int nUngets = s_introUngetCount[state][charType]; 
+    if (nUngets > 0) { 
+      input.clear(); 
+      for (; nUngets > 0; --nUngets) { 
+        if (char_traits::eof() != intro[--nIntroUsed]) 
+          input.putback(char_traits::to_char_type(intro[nIntroUsed])); 
+      } 
+    } 
+    state = newState; 
+  } 
+ 
+  switch (state) { 
+    case uis_utf8: 
+      m_charSet = utf8; 
+      break; 
+    case uis_utf16le: 
+      m_charSet = utf16le; 
+      break; 
+    case uis_utf16be: 
+      m_charSet = utf16be; 
+      break; 
+    case uis_utf32le: 
+      m_charSet = utf32le; 
+      break; 
+    case uis_utf32be: 
+      m_charSet = utf32be; 
+      break; 
+    default: 
+      m_charSet = utf8; 
+      break; 
+  } 
+ 
+  ReadAheadTo(0); 
+} 
+ 
+Stream::~Stream() { delete[] m_pPrefetched; } 
+ 
+char Stream::peek() const { 
+  if (m_readahead.empty()) { 
+    return Stream::eof(); 
+  } 
+ 
+  return m_readahead[0]; 
+} 
+ 
+Stream::operator bool() const { 
+  return m_input.good() || 
+         (!m_readahead.empty() && m_readahead[0] != Stream::eof()); 
+} 
+ 
+// get 
+// . Extracts a character from the stream and updates our position 
+char Stream::get() { 
+  char ch = peek(); 
+  AdvanceCurrent(); 
+  m_mark.column++; 
+ 
+  if (ch == '\n') { 
+    m_mark.column = 0; 
+    m_mark.line++; 
+  } 
+ 
+  return ch; 
+} 
+ 
+// get 
+// . Extracts 'n' characters from the stream and updates our position 
+std::string Stream::get(int n) { 
+  std::string ret; 
+  ret.reserve(n); 
+  for (int i = 0; i < n; i++) 
+    ret += get(); 
+  return ret; 
+} 
+ 
+// eat 
+// . Eats 'n' characters and updates our position. 
+void Stream::eat(int n) { 
+  for (int i = 0; i < n; i++) 
+    get(); 
+} 
+ 
+void Stream::AdvanceCurrent() { 
+  if (!m_readahead.empty()) { 
+    m_readahead.pop_front(); 
+    m_mark.pos++; 
+  } 
+ 
+  ReadAheadTo(0); 
+} 
+ 
+bool Stream::_ReadAheadTo(size_t i) const { 
+  while (m_input.good() && (m_readahead.size() <= i)) { 
+    switch (m_charSet) { 
+      case utf8: 
+        StreamInUtf8(); 
+        break; 
+      case utf16le: 
+        StreamInUtf16(); 
+        break; 
+      case utf16be: 
+        StreamInUtf16(); 
+        break; 
+      case utf32le: 
+        StreamInUtf32(); 
+        break; 
+      case utf32be: 
+        StreamInUtf32(); 
+        break; 
+    } 
+  } 
+ 
+  // signal end of stream 
+  if (!m_input.good()) 
+    m_readahead.push_back(Stream::eof()); 
+ 
+  return m_readahead.size() > i; 
+} 
+ 
+void Stream::StreamInUtf8() const { 
+  unsigned char b = GetNextByte(); 
+  if (m_input.good()) { 
+    m_readahead.push_back(b); 
+  } 
+} 
+ 
+void Stream::StreamInUtf16() const { 
+  unsigned long ch = 0; 
+  unsigned char bytes[2]; 
+  int nBigEnd = (m_charSet == utf16be) ? 0 : 1; 
+ 
+  bytes[0] = GetNextByte(); 
+  bytes[1] = GetNextByte(); 
+  if (!m_input.good()) { 
+    return; 
+  } 
+  ch = (static_cast<unsigned long>(bytes[nBigEnd]) << 8) | 
+       static_cast<unsigned long>(bytes[1 ^ nBigEnd]); 
+ 
+  if (ch >= 0xDC00 && ch < 0xE000) { 
+    // Trailing (low) surrogate...ugh, wrong order 
+    QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER); 
+    return; 
+  } else if (ch >= 0xD800 && ch < 0xDC00) { 
+    // ch is a leading (high) surrogate 
+ 
+    // Four byte UTF-8 code point 
+ 
+    // Read the trailing (low) surrogate 
+    for (;;) { 
+      bytes[0] = GetNextByte(); 
+      bytes[1] = GetNextByte(); 
+      if (!m_input.good()) { 
+        QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER); 
+        return; 
+      } 
+      unsigned long chLow = (static_cast<unsigned long>(bytes[nBigEnd]) << 8) | 
+                            static_cast<unsigned long>(bytes[1 ^ nBigEnd]); 
+      if (chLow < 0xDC00 || chLow >= 0xE000) { 
+        // Trouble...not a low surrogate.  Dump a REPLACEMENT CHARACTER into the 
+        // stream. 
+        QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER); 
+ 
+        // Deal with the next UTF-16 unit 
+        if (chLow < 0xD800 || chLow >= 0xE000) { 
+          // Easiest case: queue the codepoint and return 
+          QueueUnicodeCodepoint(m_readahead, ch); 
+          return; 
+        } else { 
+          // Start the loop over with the new high surrogate 
+          ch = chLow; 
+          continue; 
+        } 
+      } 
+ 
+      // Select the payload bits from the high surrogate 
+      ch &= 0x3FF; 
+      ch <<= 10; 
+ 
+      // Include bits from low surrogate 
+      ch |= (chLow & 0x3FF); 
+ 
+      // Add the surrogacy offset 
+      ch += 0x10000; 
+      break; 
+    } 
+  } 
+ 
+  QueueUnicodeCodepoint(m_readahead, ch); 
+} 
+ 
+inline char* ReadBuffer(unsigned char* pBuffer) { 
+  return reinterpret_cast<char*>(pBuffer); 
+} 
+ 
+unsigned char Stream::GetNextByte() const { 
+  if (m_nPrefetchedUsed >= m_nPrefetchedAvailable) { 
+    std::streambuf* pBuf = m_input.rdbuf(); 
+    m_nPrefetchedAvailable = static_cast<std::size_t>( 
+        pBuf->sgetn(ReadBuffer(m_pPrefetched), YAML_PREFETCH_SIZE)); 
+    m_nPrefetchedUsed = 0; 
+    if (!m_nPrefetchedAvailable) { 
+      m_input.setstate(std::ios_base::eofbit); 
+    } 
+ 
+    if (0 == m_nPrefetchedAvailable) { 
+      return 0; 
+    } 
+  } 
+ 
+  return m_pPrefetched[m_nPrefetchedUsed++]; 
+} 
+ 
+void Stream::StreamInUtf32() const { 
+  static int indexes[2][4] = {{3, 2, 1, 0}, {0, 1, 2, 3}}; 
+ 
+  unsigned long ch = 0; 
+  unsigned char bytes[4]; 
+  int* pIndexes = (m_charSet == utf32be) ? indexes[1] : indexes[0]; 
+ 
+  bytes[0] = GetNextByte(); 
+  bytes[1] = GetNextByte(); 
+  bytes[2] = GetNextByte(); 
+  bytes[3] = GetNextByte(); 
+  if (!m_input.good()) { 
+    return; 
+  } 
+ 
+  for (int i = 0; i < 4; ++i) { 
+    ch <<= 8; 
+    ch |= bytes[pIndexes[i]]; 
+  } 
+ 
+  QueueUnicodeCodepoint(m_readahead, ch); 
+} 
+}
author	bnagaev <[email protected]>	2022-02-10 16:47:04 +0300
committer	Daniil Cherednik <[email protected]>	2022-02-10 16:47:04 +0300
commit	d6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d (patch)
tree	d5dca6d44593f5e52556a1cc7b1ab0386e096ebe /contrib/libs/yaml-cpp/src/stream.cpp
parent	1861d4c1402bb2c67a3e6b43b51706081b74508a (diff)