aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/cxxsupp/libcxx/src/filesystem/path_parser.h
blob: 630391fe6b5bd50540413b05382091cb0cad9286 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef PATH_PARSER_H
#define PATH_PARSER_H

#include <__config>
#include <__utility/unreachable.h>
#include <cstddef>
#include <filesystem>
#include <utility>

#include "format_string.h"

_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM

inline bool isSeparator(path::value_type C) {
  if (C == '/')
    return true;
#if defined(_LIBCPP_WIN32API)
  if (C == '\\')
    return true;
#endif
  return false;
}

inline bool isDriveLetter(path::value_type C) {
  return (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z');
}

namespace parser {

using string_view_t = path::__string_view;
using string_view_pair = pair<string_view_t, string_view_t>;
using PosPtr = path::value_type const*;

struct PathParser {
  enum ParserState : unsigned char {
    // Zero is a special sentinel value used by default constructed iterators.
    PS_BeforeBegin = path::iterator::_BeforeBegin,
    PS_InRootName = path::iterator::_InRootName,
    PS_InRootDir = path::iterator::_InRootDir,
    PS_InFilenames = path::iterator::_InFilenames,
    PS_InTrailingSep = path::iterator::_InTrailingSep,
    PS_AtEnd = path::iterator::_AtEnd
  };

  const string_view_t Path;
  string_view_t RawEntry;
  ParserState State;

private:
  PathParser(string_view_t P, ParserState State) noexcept : Path(P),
                                                            State(State) {}

public:
  PathParser(string_view_t P, string_view_t E, unsigned char S)
      : Path(P), RawEntry(E), State(static_cast<ParserState>(S)) {
    // S cannot be '0' or PS_BeforeBegin.
  }

  static PathParser CreateBegin(string_view_t P) noexcept {
    PathParser PP(P, PS_BeforeBegin);
    PP.increment();
    return PP;
  }

  static PathParser CreateEnd(string_view_t P) noexcept {
    PathParser PP(P, PS_AtEnd);
    return PP;
  }

  PosPtr peek() const noexcept {
    auto TkEnd = getNextTokenStartPos();
    auto End = getAfterBack();
    return TkEnd == End ? nullptr : TkEnd;
  }

  void increment() noexcept {
    const PosPtr End = getAfterBack();
    const PosPtr Start = getNextTokenStartPos();
    if (Start == End)
      return makeState(PS_AtEnd);

    switch (State) {
    case PS_BeforeBegin: {
      PosPtr TkEnd = consumeRootName(Start, End);
      if (TkEnd)
        return makeState(PS_InRootName, Start, TkEnd);
    }
      _LIBCPP_FALLTHROUGH();
    case PS_InRootName: {
      PosPtr TkEnd = consumeAllSeparators(Start, End);
      if (TkEnd)
        return makeState(PS_InRootDir, Start, TkEnd);
      else
        return makeState(PS_InFilenames, Start, consumeName(Start, End));
    }
    case PS_InRootDir:
      return makeState(PS_InFilenames, Start, consumeName(Start, End));

    case PS_InFilenames: {
      PosPtr SepEnd = consumeAllSeparators(Start, End);
      if (SepEnd != End) {
        PosPtr TkEnd = consumeName(SepEnd, End);
        if (TkEnd)
          return makeState(PS_InFilenames, SepEnd, TkEnd);
      }
      return makeState(PS_InTrailingSep, Start, SepEnd);
    }

    case PS_InTrailingSep:
      return makeState(PS_AtEnd);

    case PS_AtEnd:
      __libcpp_unreachable();
    }
  }

  void decrement() noexcept {
    const PosPtr REnd = getBeforeFront();
    const PosPtr RStart = getCurrentTokenStartPos() - 1;
    if (RStart == REnd) // we're decrementing the begin
      return makeState(PS_BeforeBegin);

    switch (State) {
    case PS_AtEnd: {
      // Try to consume a trailing separator or root directory first.
      if (PosPtr SepEnd = consumeAllSeparators(RStart, REnd)) {
        if (SepEnd == REnd)
          return makeState(PS_InRootDir, Path.data(), RStart + 1);
        PosPtr TkStart = consumeRootName(SepEnd, REnd);
        if (TkStart == REnd)
          return makeState(PS_InRootDir, RStart, RStart + 1);
        return makeState(PS_InTrailingSep, SepEnd + 1, RStart + 1);
      } else {
        PosPtr TkStart = consumeRootName(RStart, REnd);
        if (TkStart == REnd)
          return makeState(PS_InRootName, TkStart + 1, RStart + 1);
        TkStart = consumeName(RStart, REnd);
        return makeState(PS_InFilenames, TkStart + 1, RStart + 1);
      }
    }
    case PS_InTrailingSep:
      return makeState(PS_InFilenames, consumeName(RStart, REnd) + 1,
                       RStart + 1);
    case PS_InFilenames: {
      PosPtr SepEnd = consumeAllSeparators(RStart, REnd);
      if (SepEnd == REnd)
        return makeState(PS_InRootDir, Path.data(), RStart + 1);
      PosPtr TkStart = consumeRootName(SepEnd ? SepEnd : RStart, REnd);
      if (TkStart == REnd) {
        if (SepEnd)
          return makeState(PS_InRootDir, SepEnd + 1, RStart + 1);
        return makeState(PS_InRootName, TkStart + 1, RStart + 1);
      }
      TkStart = consumeName(SepEnd, REnd);
      return makeState(PS_InFilenames, TkStart + 1, SepEnd + 1);
    }
    case PS_InRootDir:
      return makeState(PS_InRootName, Path.data(), RStart + 1);
    case PS_InRootName:
    case PS_BeforeBegin:
      __libcpp_unreachable();
    }
  }

  /// \brief Return a view with the "preferred representation" of the current
  ///   element. For example trailing separators are represented as a '.'
  string_view_t operator*() const noexcept {
    switch (State) {
    case PS_BeforeBegin:
    case PS_AtEnd:
      return PATHSTR("");
    case PS_InRootDir:
      if (RawEntry[0] == '\\')
        return PATHSTR("\\");
      else
        return PATHSTR("/");
    case PS_InTrailingSep:
      return PATHSTR("");
    case PS_InRootName:
    case PS_InFilenames:
      return RawEntry;
    }
    __libcpp_unreachable();
  }

  explicit operator bool() const noexcept {
    return State != PS_BeforeBegin && State != PS_AtEnd;
  }

  PathParser& operator++() noexcept {
    increment();
    return *this;
  }

  PathParser& operator--() noexcept {
    decrement();
    return *this;
  }

  bool atEnd() const noexcept {
    return State == PS_AtEnd;
  }

  bool inRootDir() const noexcept {
    return State == PS_InRootDir;
  }

  bool inRootName() const noexcept {
    return State == PS_InRootName;
  }

  bool inRootPath() const noexcept {
    return inRootName() || inRootDir();
  }

private:
  void makeState(ParserState NewState, PosPtr Start, PosPtr End) noexcept {
    State = NewState;
    RawEntry = string_view_t(Start, End - Start);
  }
  void makeState(ParserState NewState) noexcept {
    State = NewState;
    RawEntry = {};
  }

  PosPtr getAfterBack() const noexcept { return Path.data() + Path.size(); }

  PosPtr getBeforeFront() const noexcept { return Path.data() - 1; }

  /// \brief Return a pointer to the first character after the currently
  ///   lexed element.
  PosPtr getNextTokenStartPos() const noexcept {
    switch (State) {
    case PS_BeforeBegin:
      return Path.data();
    case PS_InRootName:
    case PS_InRootDir:
    case PS_InFilenames:
      return &RawEntry.back() + 1;
    case PS_InTrailingSep:
    case PS_AtEnd:
      return getAfterBack();
    }
    __libcpp_unreachable();
  }

  /// \brief Return a pointer to the first character in the currently lexed
  ///   element.
  PosPtr getCurrentTokenStartPos() const noexcept {
    switch (State) {
    case PS_BeforeBegin:
    case PS_InRootName:
      return &Path.front();
    case PS_InRootDir:
    case PS_InFilenames:
    case PS_InTrailingSep:
      return &RawEntry.front();
    case PS_AtEnd:
      return &Path.back() + 1;
    }
    __libcpp_unreachable();
  }

  // Consume all consecutive separators.
  PosPtr consumeAllSeparators(PosPtr P, PosPtr End) const noexcept {
    if (P == nullptr || P == End || !isSeparator(*P))
      return nullptr;
    const int Inc = P < End ? 1 : -1;
    P += Inc;
    while (P != End && isSeparator(*P))
      P += Inc;
    return P;
  }

  // Consume exactly N separators, or return nullptr.
  PosPtr consumeNSeparators(PosPtr P, PosPtr End, int N) const noexcept {
    PosPtr Ret = consumeAllSeparators(P, End);
    if (Ret == nullptr)
      return nullptr;
    if (P < End) {
      if (Ret == P + N)
        return Ret;
    } else {
      if (Ret == P - N)
        return Ret;
    }
    return nullptr;
  }

  PosPtr consumeName(PosPtr P, PosPtr End) const noexcept {
    PosPtr Start = P;
    if (P == nullptr || P == End || isSeparator(*P))
      return nullptr;
    const int Inc = P < End ? 1 : -1;
    P += Inc;
    while (P != End && !isSeparator(*P))
      P += Inc;
    if (P == End && Inc < 0) {
      // Iterating backwards and consumed all the rest of the input.
      // Check if the start of the string would have been considered
      // a root name.
      PosPtr RootEnd = consumeRootName(End + 1, Start);
      if (RootEnd)
        return RootEnd - 1;
    }
    return P;
  }

  PosPtr consumeDriveLetter(PosPtr P, PosPtr End) const noexcept {
    if (P == End)
      return nullptr;
    if (P < End) {
      if (P + 1 == End || !isDriveLetter(P[0]) || P[1] != ':')
        return nullptr;
      return P + 2;
    } else {
      if (P - 1 == End || !isDriveLetter(P[-1]) || P[0] != ':')
        return nullptr;
      return P - 2;
    }
  }

  PosPtr consumeNetworkRoot(PosPtr P, PosPtr End) const noexcept {
    if (P == End)
      return nullptr;
    if (P < End)
      return consumeName(consumeNSeparators(P, End, 2), End);
    else
      return consumeNSeparators(consumeName(P, End), End, 2);
  }

#if defined(_LIBCPP_WIN32API)
  PosPtr consumeRootName(PosPtr P, PosPtr End) const noexcept {
    if (PosPtr Ret = consumeDriveLetter(P, End))
      return Ret;
    if (PosPtr Ret = consumeNetworkRoot(P, End))
      return Ret;
    return nullptr;
  }
#else
  PosPtr consumeRootName(PosPtr /*P*/, PosPtr /*End*/) const noexcept {
    return nullptr;
  }
#endif
};

inline string_view_pair separate_filename(string_view_t const& s) {
  if (s == PATHSTR(".") || s == PATHSTR("..") || s.empty())
    return string_view_pair{s, PATHSTR("")};
  auto pos = s.find_last_of('.');
  if (pos == string_view_t::npos || pos == 0)
    return string_view_pair{s, string_view_t{}};
  return string_view_pair{s.substr(0, pos), s.substr(pos)};
}

inline string_view_t createView(PosPtr S, PosPtr E) noexcept {
  return {S, static_cast<size_t>(E - S) + 1};
}

} // namespace parser

_LIBCPP_END_NAMESPACE_FILESYSTEM

#endif // PATH_PARSER_H