1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
|
#pragma once
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
#endif
//===- StringRef.h - Constant String Reference Wrapper ----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_STRINGREF_H
#define LLVM_ADT_STRINGREF_H
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/Compiler.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstring>
#include <limits>
#include <string>
#if __cplusplus > 201402L
#include <string_view>
#endif
#include <type_traits>
#include <utility>
// Declare the __builtin_strlen intrinsic for MSVC so it can be used in
// constexpr context.
#if defined(_MSC_VER)
extern "C" size_t __builtin_strlen(const char *);
#endif
namespace llvm {
class APInt;
class hash_code;
template <typename T> class SmallVectorImpl;
class StringRef;
/// Helper functions for StringRef::getAsInteger.
bool getAsUnsignedInteger(StringRef Str, unsigned Radix,
unsigned long long &Result);
bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result);
bool consumeUnsignedInteger(StringRef &Str, unsigned Radix,
unsigned long long &Result);
bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result);
/// StringRef - Represent a constant reference to a string, i.e. a character
/// array and a length, which need not be null terminated.
///
/// This class does not own the string data, it is expected to be used in
/// situations where the character data resides in some other buffer, whose
/// lifetime extends past that of the StringRef. For this reason, it is not in
/// general safe to store a StringRef.
class LLVM_GSL_POINTER StringRef {
public:
static constexpr size_t npos = ~size_t(0);
using iterator = const char *;
using const_iterator = const char *;
using size_type = size_t;
private:
/// The start of the string, in an external buffer.
const char *Data = nullptr;
/// The length of the string.
size_t Length = 0;
// Workaround memcmp issue with null pointers (undefined behavior)
// by providing a specialized version
static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) {
if (Length == 0) { return 0; }
return ::memcmp(Lhs,Rhs,Length);
}
// Constexpr version of std::strlen.
static constexpr size_t strLen(const char *Str) {
#if __cplusplus > 201402L
return std::char_traits<char>::length(Str);
#elif __has_builtin(__builtin_strlen) || defined(__GNUC__) || \
(defined(_MSC_VER) && _MSC_VER >= 1916)
return __builtin_strlen(Str);
#else
const char *Begin = Str;
while (*Str != '\0')
++Str;
return Str - Begin;
#endif
}
public:
/// @name Constructors
/// @{
/// Construct an empty string ref.
/*implicit*/ StringRef() = default;
/// Disable conversion from nullptr. This prevents things like
/// if (S == nullptr)
StringRef(std::nullptr_t) = delete;
/// Construct a string ref from a cstring.
/*implicit*/ constexpr StringRef(const char *Str)
: Data(Str), Length(Str ? strLen(Str) : 0) {}
/// Construct a string ref from a pointer and length.
/*implicit*/ constexpr StringRef(const char *data, size_t length)
: Data(data), Length(length) {}
/// Construct a string ref from an std::string.
/*implicit*/ StringRef(const std::string &Str)
: Data(Str.data()), Length(Str.length()) {}
#if __cplusplus > 201402L
/// Construct a string ref from an std::string_view.
/*implicit*/ constexpr StringRef(std::string_view Str)
: Data(Str.data()), Length(Str.size()) {}
#endif
/// @}
/// @name Iterators
/// @{
iterator begin() const { return Data; }
iterator end() const { return Data + Length; }
const unsigned char *bytes_begin() const {
return reinterpret_cast<const unsigned char *>(begin());
}
const unsigned char *bytes_end() const {
return reinterpret_cast<const unsigned char *>(end());
}
iterator_range<const unsigned char *> bytes() const {
return make_range(bytes_begin(), bytes_end());
}
/// @}
/// @name String Operations
/// @{
/// data - Get a pointer to the start of the string (which may not be null
/// terminated).
LLVM_NODISCARD
const char *data() const { return Data; }
/// empty - Check if the string is empty.
LLVM_NODISCARD
constexpr bool empty() const { return Length == 0; }
/// size - Get the string size.
LLVM_NODISCARD
constexpr size_t size() const { return Length; }
/// front - Get the first character in the string.
LLVM_NODISCARD
char front() const {
assert(!empty());
return Data[0];
}
/// back - Get the last character in the string.
LLVM_NODISCARD
char back() const {
assert(!empty());
return Data[Length-1];
}
// copy - Allocate copy in Allocator and return StringRef to it.
template <typename Allocator>
LLVM_NODISCARD StringRef copy(Allocator &A) const {
// Don't request a length 0 copy from the allocator.
if (empty())
return StringRef();
char *S = A.template Allocate<char>(Length);
std::copy(begin(), end(), S);
return StringRef(S, Length);
}
/// equals - Check for string equality, this is more efficient than
/// compare() when the relative ordering of inequal strings isn't needed.
LLVM_NODISCARD
bool equals(StringRef RHS) const {
return (Length == RHS.Length &&
compareMemory(Data, RHS.Data, RHS.Length) == 0);
}
/// Check for string equality, ignoring case.
LLVM_NODISCARD
bool equals_insensitive(StringRef RHS) const {
return Length == RHS.Length && compare_insensitive(RHS) == 0;
}
/// compare - Compare two strings; the result is -1, 0, or 1 if this string
/// is lexicographically less than, equal to, or greater than the \p RHS.
LLVM_NODISCARD
int compare(StringRef RHS) const {
// Check the prefix for a mismatch.
if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length)))
return Res < 0 ? -1 : 1;
// Otherwise the prefixes match, so we only need to check the lengths.
if (Length == RHS.Length)
return 0;
return Length < RHS.Length ? -1 : 1;
}
/// Compare two strings, ignoring case.
LLVM_NODISCARD
int compare_insensitive(StringRef RHS) const;
/// compare_numeric - Compare two strings, treating sequences of digits as
/// numbers.
LLVM_NODISCARD
int compare_numeric(StringRef RHS) const;
/// Determine the edit distance between this string and another
/// string.
///
/// \param Other the string to compare this string against.
///
/// \param AllowReplacements whether to allow character
/// replacements (change one character into another) as a single
/// operation, rather than as two operations (an insertion and a
/// removal).
///
/// \param MaxEditDistance If non-zero, the maximum edit distance that
/// this routine is allowed to compute. If the edit distance will exceed
/// that maximum, returns \c MaxEditDistance+1.
///
/// \returns the minimum number of character insertions, removals,
/// or (if \p AllowReplacements is \c true) replacements needed to
/// transform one of the given strings into the other. If zero,
/// the strings are identical.
LLVM_NODISCARD
unsigned edit_distance(StringRef Other, bool AllowReplacements = true,
unsigned MaxEditDistance = 0) const;
/// str - Get the contents as an std::string.
LLVM_NODISCARD
std::string str() const {
if (!Data) return std::string();
return std::string(Data, Length);
}
/// @}
/// @name Operator Overloads
/// @{
LLVM_NODISCARD
char operator[](size_t Index) const {
assert(Index < Length && "Invalid index!");
return Data[Index];
}
/// Disallow accidental assignment from a temporary std::string.
///
/// The declaration here is extra complicated so that `stringRef = {}`
/// and `stringRef = "abc"` continue to select the move assignment operator.
template <typename T>
std::enable_if_t<std::is_same<T, std::string>::value, StringRef> &
operator=(T &&Str) = delete;
/// @}
/// @name Type Conversions
/// @{
explicit operator std::string() const { return str(); }
#if __cplusplus > 201402L
operator std::string_view() const {
return std::string_view(data(), size());
}
#endif
/// @}
/// @name String Predicates
/// @{
/// Check if this string starts with the given \p Prefix.
LLVM_NODISCARD
bool startswith(StringRef Prefix) const {
return Length >= Prefix.Length &&
compareMemory(Data, Prefix.Data, Prefix.Length) == 0;
}
/// Check if this string starts with the given \p Prefix, ignoring case.
LLVM_NODISCARD
bool startswith_insensitive(StringRef Prefix) const;
/// Check if this string ends with the given \p Suffix.
LLVM_NODISCARD
bool endswith(StringRef Suffix) const {
return Length >= Suffix.Length &&
compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
}
/// Check if this string ends with the given \p Suffix, ignoring case.
LLVM_NODISCARD
bool endswith_insensitive(StringRef Suffix) const;
/// @}
/// @name String Searching
/// @{
/// Search for the first character \p C in the string.
///
/// \returns The index of the first occurrence of \p C, or npos if not
/// found.
LLVM_NODISCARD
size_t find(char C, size_t From = 0) const {
size_t FindBegin = std::min(From, Length);
if (FindBegin < Length) { // Avoid calling memchr with nullptr.
// Just forward to memchr, which is faster than a hand-rolled loop.
if (const void *P = ::memchr(Data + FindBegin, C, Length - FindBegin))
return static_cast<const char *>(P) - Data;
}
return npos;
}
/// Search for the first character \p C in the string, ignoring case.
///
/// \returns The index of the first occurrence of \p C, or npos if not
/// found.
LLVM_NODISCARD
size_t find_insensitive(char C, size_t From = 0) const;
/// Search for the first character satisfying the predicate \p F
///
/// \returns The index of the first character satisfying \p F starting from
/// \p From, or npos if not found.
LLVM_NODISCARD
size_t find_if(function_ref<bool(char)> F, size_t From = 0) const {
StringRef S = drop_front(From);
while (!S.empty()) {
if (F(S.front()))
return size() - S.size();
S = S.drop_front();
}
return npos;
}
/// Search for the first character not satisfying the predicate \p F
///
/// \returns The index of the first character not satisfying \p F starting
/// from \p From, or npos if not found.
LLVM_NODISCARD
size_t find_if_not(function_ref<bool(char)> F, size_t From = 0) const {
return find_if([F](char c) { return !F(c); }, From);
}
/// Search for the first string \p Str in the string.
///
/// \returns The index of the first occurrence of \p Str, or npos if not
/// found.
LLVM_NODISCARD
size_t find(StringRef Str, size_t From = 0) const;
/// Search for the first string \p Str in the string, ignoring case.
///
/// \returns The index of the first occurrence of \p Str, or npos if not
/// found.
LLVM_NODISCARD
size_t find_insensitive(StringRef Str, size_t From = 0) const;
/// Search for the last character \p C in the string.
///
/// \returns The index of the last occurrence of \p C, or npos if not
/// found.
LLVM_NODISCARD
size_t rfind(char C, size_t From = npos) const {
From = std::min(From, Length);
size_t i = From;
while (i != 0) {
--i;
if (Data[i] == C)
return i;
}
return npos;
}
/// Search for the last character \p C in the string, ignoring case.
///
/// \returns The index of the last occurrence of \p C, or npos if not
/// found.
LLVM_NODISCARD
size_t rfind_insensitive(char C, size_t From = npos) const;
/// Search for the last string \p Str in the string.
///
/// \returns The index of the last occurrence of \p Str, or npos if not
/// found.
LLVM_NODISCARD
size_t rfind(StringRef Str) const;
/// Search for the last string \p Str in the string, ignoring case.
///
/// \returns The index of the last occurrence of \p Str, or npos if not
/// found.
LLVM_NODISCARD
size_t rfind_insensitive(StringRef Str) const;
/// Find the first character in the string that is \p C, or npos if not
/// found. Same as find.
LLVM_NODISCARD
size_t find_first_of(char C, size_t From = 0) const {
return find(C, From);
}
/// Find the first character in the string that is in \p Chars, or npos if
/// not found.
///
/// Complexity: O(size() + Chars.size())
LLVM_NODISCARD
size_t find_first_of(StringRef Chars, size_t From = 0) const;
/// Find the first character in the string that is not \p C or npos if not
/// found.
LLVM_NODISCARD
size_t find_first_not_of(char C, size_t From = 0) const;
/// Find the first character in the string that is not in the string
/// \p Chars, or npos if not found.
///
/// Complexity: O(size() + Chars.size())
LLVM_NODISCARD
size_t find_first_not_of(StringRef Chars, size_t From = 0) const;
/// Find the last character in the string that is \p C, or npos if not
/// found.
LLVM_NODISCARD
size_t find_last_of(char C, size_t From = npos) const {
return rfind(C, From);
}
/// Find the last character in the string that is in \p C, or npos if not
/// found.
///
/// Complexity: O(size() + Chars.size())
LLVM_NODISCARD
size_t find_last_of(StringRef Chars, size_t From = npos) const;
/// Find the last character in the string that is not \p C, or npos if not
/// found.
LLVM_NODISCARD
size_t find_last_not_of(char C, size_t From = npos) const;
/// Find the last character in the string that is not in \p Chars, or
/// npos if not found.
///
/// Complexity: O(size() + Chars.size())
LLVM_NODISCARD
size_t find_last_not_of(StringRef Chars, size_t From = npos) const;
/// Return true if the given string is a substring of *this, and false
/// otherwise.
LLVM_NODISCARD
bool contains(StringRef Other) const { return find(Other) != npos; }
/// Return true if the given character is contained in *this, and false
/// otherwise.
LLVM_NODISCARD
bool contains(char C) const { return find_first_of(C) != npos; }
/// Return true if the given string is a substring of *this, and false
/// otherwise.
LLVM_NODISCARD
bool contains_insensitive(StringRef Other) const {
return find_insensitive(Other) != npos;
}
/// Return true if the given character is contained in *this, and false
/// otherwise.
LLVM_NODISCARD
bool contains_insensitive(char C) const {
return find_insensitive(C) != npos;
}
/// @}
/// @name Helpful Algorithms
/// @{
/// Return the number of occurrences of \p C in the string.
LLVM_NODISCARD
size_t count(char C) const {
size_t Count = 0;
for (size_t i = 0, e = Length; i != e; ++i)
if (Data[i] == C)
++Count;
return Count;
}
/// Return the number of non-overlapped occurrences of \p Str in
/// the string.
size_t count(StringRef Str) const;
/// Parse the current string as an integer of the specified radix. If
/// \p Radix is specified as zero, this does radix autosensing using
/// extended C rules: 0 is octal, 0x is hex, 0b is binary.
///
/// If the string is invalid or if only a subset of the string is valid,
/// this returns true to signify the error. The string is considered
/// erroneous if empty or if it overflows T.
template <typename T>
std::enable_if_t<std::numeric_limits<T>::is_signed, bool>
getAsInteger(unsigned Radix, T &Result) const {
long long LLVal;
if (getAsSignedInteger(*this, Radix, LLVal) ||
static_cast<T>(LLVal) != LLVal)
return true;
Result = LLVal;
return false;
}
template <typename T>
std::enable_if_t<!std::numeric_limits<T>::is_signed, bool>
getAsInteger(unsigned Radix, T &Result) const {
unsigned long long ULLVal;
// The additional cast to unsigned long long is required to avoid the
// Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type
// 'unsigned __int64' when instantiating getAsInteger with T = bool.
if (getAsUnsignedInteger(*this, Radix, ULLVal) ||
static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
return true;
Result = ULLVal;
return false;
}
/// Parse the current string as an integer of the specified radix. If
/// \p Radix is specified as zero, this does radix autosensing using
/// extended C rules: 0 is octal, 0x is hex, 0b is binary.
///
/// If the string does not begin with a number of the specified radix,
/// this returns true to signify the error. The string is considered
/// erroneous if empty or if it overflows T.
/// The portion of the string representing the discovered numeric value
/// is removed from the beginning of the string.
template <typename T>
std::enable_if_t<std::numeric_limits<T>::is_signed, bool>
consumeInteger(unsigned Radix, T &Result) {
long long LLVal;
if (consumeSignedInteger(*this, Radix, LLVal) ||
static_cast<long long>(static_cast<T>(LLVal)) != LLVal)
return true;
Result = LLVal;
return false;
}
template <typename T>
std::enable_if_t<!std::numeric_limits<T>::is_signed, bool>
consumeInteger(unsigned Radix, T &Result) {
unsigned long long ULLVal;
if (consumeUnsignedInteger(*this, Radix, ULLVal) ||
static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
return true;
Result = ULLVal;
return false;
}
/// Parse the current string as an integer of the specified \p Radix, or of
/// an autosensed radix if the \p Radix given is 0. The current value in
/// \p Result is discarded, and the storage is changed to be wide enough to
/// store the parsed integer.
///
/// \returns true if the string does not solely consist of a valid
/// non-empty number in the appropriate base.
///
/// APInt::fromString is superficially similar but assumes the
/// string is well-formed in the given radix.
bool getAsInteger(unsigned Radix, APInt &Result) const;
/// Parse the current string as an IEEE double-precision floating
/// point value. The string must be a well-formed double.
///
/// If \p AllowInexact is false, the function will fail if the string
/// cannot be represented exactly. Otherwise, the function only fails
/// in case of an overflow or underflow, or an invalid floating point
/// representation.
bool getAsDouble(double &Result, bool AllowInexact = true) const;
/// @}
/// @name String Operations
/// @{
// Convert the given ASCII string to lowercase.
LLVM_NODISCARD
std::string lower() const;
/// Convert the given ASCII string to uppercase.
LLVM_NODISCARD
std::string upper() const;
/// @}
/// @name Substring Operations
/// @{
/// Return a reference to the substring from [Start, Start + N).
///
/// \param Start The index of the starting character in the substring; if
/// the index is npos or greater than the length of the string then the
/// empty substring will be returned.
///
/// \param N The number of characters to included in the substring. If N
/// exceeds the number of characters remaining in the string, the string
/// suffix (starting with \p Start) will be returned.
LLVM_NODISCARD
StringRef substr(size_t Start, size_t N = npos) const {
Start = std::min(Start, Length);
return StringRef(Data + Start, std::min(N, Length - Start));
}
/// Return a StringRef equal to 'this' but with only the first \p N
/// elements remaining. If \p N is greater than the length of the
/// string, the entire string is returned.
LLVM_NODISCARD
StringRef take_front(size_t N = 1) const {
if (N >= size())
return *this;
return drop_back(size() - N);
}
/// Return a StringRef equal to 'this' but with only the last \p N
/// elements remaining. If \p N is greater than the length of the
/// string, the entire string is returned.
LLVM_NODISCARD
StringRef take_back(size_t N = 1) const {
if (N >= size())
return *this;
return drop_front(size() - N);
}
/// Return the longest prefix of 'this' such that every character
/// in the prefix satisfies the given predicate.
LLVM_NODISCARD
StringRef take_while(function_ref<bool(char)> F) const {
return substr(0, find_if_not(F));
}
/// Return the longest prefix of 'this' such that no character in
/// the prefix satisfies the given predicate.
LLVM_NODISCARD
StringRef take_until(function_ref<bool(char)> F) const {
return substr(0, find_if(F));
}
/// Return a StringRef equal to 'this' but with the first \p N elements
/// dropped.
LLVM_NODISCARD
StringRef drop_front(size_t N = 1) const {
assert(size() >= N && "Dropping more elements than exist");
return substr(N);
}
/// Return a StringRef equal to 'this' but with the last \p N elements
/// dropped.
LLVM_NODISCARD
StringRef drop_back(size_t N = 1) const {
assert(size() >= N && "Dropping more elements than exist");
return substr(0, size()-N);
}
/// Return a StringRef equal to 'this', but with all characters satisfying
/// the given predicate dropped from the beginning of the string.
LLVM_NODISCARD
StringRef drop_while(function_ref<bool(char)> F) const {
return substr(find_if_not(F));
}
/// Return a StringRef equal to 'this', but with all characters not
/// satisfying the given predicate dropped from the beginning of the string.
LLVM_NODISCARD
StringRef drop_until(function_ref<bool(char)> F) const {
return substr(find_if(F));
}
/// Returns true if this StringRef has the given prefix and removes that
/// prefix.
bool consume_front(StringRef Prefix) {
if (!startswith(Prefix))
return false;
*this = drop_front(Prefix.size());
return true;
}
/// Returns true if this StringRef has the given prefix, ignoring case,
/// and removes that prefix.
bool consume_front_insensitive(StringRef Prefix) {
if (!startswith_insensitive(Prefix))
return false;
*this = drop_front(Prefix.size());
return true;
}
/// Returns true if this StringRef has the given suffix and removes that
/// suffix.
bool consume_back(StringRef Suffix) {
if (!endswith(Suffix))
return false;
*this = drop_back(Suffix.size());
return true;
}
/// Returns true if this StringRef has the given suffix, ignoring case,
/// and removes that suffix.
bool consume_back_insensitive(StringRef Suffix) {
if (!endswith_insensitive(Suffix))
return false;
*this = drop_back(Suffix.size());
return true;
}
/// Return a reference to the substring from [Start, End).
///
/// \param Start The index of the starting character in the substring; if
/// the index is npos or greater than the length of the string then the
/// empty substring will be returned.
///
/// \param End The index following the last character to include in the
/// substring. If this is npos or exceeds the number of characters
/// remaining in the string, the string suffix (starting with \p Start)
/// will be returned. If this is less than \p Start, an empty string will
/// be returned.
LLVM_NODISCARD
StringRef slice(size_t Start, size_t End) const {
Start = std::min(Start, Length);
End = std::min(std::max(Start, End), Length);
return StringRef(Data + Start, End - Start);
}
/// Split into two substrings around the first occurrence of a separator
/// character.
///
/// If \p Separator is in the string, then the result is a pair (LHS, RHS)
/// such that (*this == LHS + Separator + RHS) is true and RHS is
/// maximal. If \p Separator is not in the string, then the result is a
/// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
///
/// \param Separator The character to split on.
/// \returns The split substrings.
LLVM_NODISCARD
std::pair<StringRef, StringRef> split(char Separator) const {
return split(StringRef(&Separator, 1));
}
/// Split into two substrings around the first occurrence of a separator
/// string.
///
/// If \p Separator is in the string, then the result is a pair (LHS, RHS)
/// such that (*this == LHS + Separator + RHS) is true and RHS is
/// maximal. If \p Separator is not in the string, then the result is a
/// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
///
/// \param Separator - The string to split on.
/// \return - The split substrings.
LLVM_NODISCARD
std::pair<StringRef, StringRef> split(StringRef Separator) const {
size_t Idx = find(Separator);
if (Idx == npos)
return std::make_pair(*this, StringRef());
return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
}
/// Split into two substrings around the last occurrence of a separator
/// string.
///
/// If \p Separator is in the string, then the result is a pair (LHS, RHS)
/// such that (*this == LHS + Separator + RHS) is true and RHS is
/// minimal. If \p Separator is not in the string, then the result is a
/// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
///
/// \param Separator - The string to split on.
/// \return - The split substrings.
LLVM_NODISCARD
std::pair<StringRef, StringRef> rsplit(StringRef Separator) const {
size_t Idx = rfind(Separator);
if (Idx == npos)
return std::make_pair(*this, StringRef());
return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
}
/// Split into substrings around the occurrences of a separator string.
///
/// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
/// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
/// elements are added to A.
/// If \p KeepEmpty is false, empty strings are not added to \p A. They
/// still count when considering \p MaxSplit
/// An useful invariant is that
/// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
///
/// \param A - Where to put the substrings.
/// \param Separator - The string to split on.
/// \param MaxSplit - The maximum number of times the string is split.
/// \param KeepEmpty - True if empty substring should be added.
void split(SmallVectorImpl<StringRef> &A,
StringRef Separator, int MaxSplit = -1,
bool KeepEmpty = true) const;
/// Split into substrings around the occurrences of a separator character.
///
/// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
/// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
/// elements are added to A.
/// If \p KeepEmpty is false, empty strings are not added to \p A. They
/// still count when considering \p MaxSplit
/// An useful invariant is that
/// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
///
/// \param A - Where to put the substrings.
/// \param Separator - The string to split on.
/// \param MaxSplit - The maximum number of times the string is split.
/// \param KeepEmpty - True if empty substring should be added.
void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -1,
bool KeepEmpty = true) const;
/// Split into two substrings around the last occurrence of a separator
/// character.
///
/// If \p Separator is in the string, then the result is a pair (LHS, RHS)
/// such that (*this == LHS + Separator + RHS) is true and RHS is
/// minimal. If \p Separator is not in the string, then the result is a
/// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
///
/// \param Separator - The character to split on.
/// \return - The split substrings.
LLVM_NODISCARD
std::pair<StringRef, StringRef> rsplit(char Separator) const {
return rsplit(StringRef(&Separator, 1));
}
/// Return string with consecutive \p Char characters starting from the
/// the left removed.
LLVM_NODISCARD
StringRef ltrim(char Char) const {
return drop_front(std::min(Length, find_first_not_of(Char)));
}
/// Return string with consecutive characters in \p Chars starting from
/// the left removed.
LLVM_NODISCARD
StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const {
return drop_front(std::min(Length, find_first_not_of(Chars)));
}
/// Return string with consecutive \p Char characters starting from the
/// right removed.
LLVM_NODISCARD
StringRef rtrim(char Char) const {
return drop_back(Length - std::min(Length, find_last_not_of(Char) + 1));
}
/// Return string with consecutive characters in \p Chars starting from
/// the right removed.
LLVM_NODISCARD
StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const {
return drop_back(Length - std::min(Length, find_last_not_of(Chars) + 1));
}
/// Return string with consecutive \p Char characters starting from the
/// left and right removed.
LLVM_NODISCARD
StringRef trim(char Char) const {
return ltrim(Char).rtrim(Char);
}
/// Return string with consecutive characters in \p Chars starting from
/// the left and right removed.
LLVM_NODISCARD
StringRef trim(StringRef Chars = " \t\n\v\f\r") const {
return ltrim(Chars).rtrim(Chars);
}
/// Detect the line ending style of the string.
///
/// If the string contains a line ending, return the line ending character
/// sequence that is detected. Otherwise return '\n' for unix line endings.
///
/// \return - The line ending character sequence.
LLVM_NODISCARD
StringRef detectEOL() const {
size_t Pos = find('\r');
if (Pos == npos) {
// If there is no carriage return, assume unix
return "\n";
}
if (Pos + 1 < Length && Data[Pos + 1] == '\n')
return "\r\n"; // Windows
if (Pos > 0 && Data[Pos - 1] == '\n')
return "\n\r"; // You monster!
return "\r"; // Classic Mac
}
/// @}
};
/// A wrapper around a string literal that serves as a proxy for constructing
/// global tables of StringRefs with the length computed at compile time.
/// In order to avoid the invocation of a global constructor, StringLiteral
/// should *only* be used in a constexpr context, as such:
///
/// constexpr StringLiteral S("test");
///
class StringLiteral : public StringRef {
private:
constexpr StringLiteral(const char *Str, size_t N) : StringRef(Str, N) {
}
public:
template <size_t N>
constexpr StringLiteral(const char (&Str)[N])
#if defined(__clang__) && __has_attribute(enable_if)
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wgcc-compat"
__attribute((enable_if(__builtin_strlen(Str) == N - 1,
"invalid string literal")))
#pragma clang diagnostic pop
#endif
: StringRef(Str, N - 1) {
}
// Explicit construction for strings like "foo\0bar".
template <size_t N>
static constexpr StringLiteral withInnerNUL(const char (&Str)[N]) {
return StringLiteral(Str, N - 1);
}
};
/// @name StringRef Comparison Operators
/// @{
inline bool operator==(StringRef LHS, StringRef RHS) {
return LHS.equals(RHS);
}
inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); }
inline bool operator<(StringRef LHS, StringRef RHS) {
return LHS.compare(RHS) == -1;
}
inline bool operator<=(StringRef LHS, StringRef RHS) {
return LHS.compare(RHS) != 1;
}
inline bool operator>(StringRef LHS, StringRef RHS) {
return LHS.compare(RHS) == 1;
}
inline bool operator>=(StringRef LHS, StringRef RHS) {
return LHS.compare(RHS) != -1;
}
inline std::string &operator+=(std::string &buffer, StringRef string) {
return buffer.append(string.data(), string.size());
}
/// @}
/// Compute a hash_code for a StringRef.
LLVM_NODISCARD
hash_code hash_value(StringRef S);
// Provide DenseMapInfo for StringRefs.
template <> struct DenseMapInfo<StringRef, void> {
static inline StringRef getEmptyKey() {
return StringRef(
reinterpret_cast<const char *>(~static_cast<uintptr_t>(0)), 0);
}
static inline StringRef getTombstoneKey() {
return StringRef(
reinterpret_cast<const char *>(~static_cast<uintptr_t>(1)), 0);
}
static unsigned getHashValue(StringRef Val);
static bool isEqual(StringRef LHS, StringRef RHS) {
if (RHS.data() == getEmptyKey().data())
return LHS.data() == getEmptyKey().data();
if (RHS.data() == getTombstoneKey().data())
return LHS.data() == getTombstoneKey().data();
return LHS == RHS;
}
};
} // end namespace llvm
#endif // LLVM_ADT_STRINGREF_H
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
|