aboutsummaryrefslogtreecommitdiffstats
path: root/util/string/strip.h
blob: c9172ef19a951f9c4960dc12c35b0912232474cb (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
#pragma once

#include "ascii.h"

#include <util/generic/string.h>
#include <util/generic/strbuf.h>
#include <utility>

template <class It>
struct TIsAsciiSpaceAdapter {
    bool operator()(const It& it) const noexcept {
        return IsAsciiSpace(*it);
    }
};

template <class It>
TIsAsciiSpaceAdapter<It> IsAsciiSpaceAdapter(It) {
    return {};
}

template <class TChar>
struct TEqualsStripAdapter {
    TEqualsStripAdapter(TChar ch)
        : Ch(ch)
    {
    }

    template <class It>
    bool operator()(const It& it) const noexcept {
        return *it == Ch;
    }

    const TChar Ch;
};

template <class TChar>
TEqualsStripAdapter<TChar> EqualsStripAdapter(TChar ch) {
    return {ch};
}

template <class It, class TStripCriterion>
inline void StripRangeBegin(It& b, const It& e, TStripCriterion&& criterion) noexcept {
    while (b < e && criterion(b)) {
        ++b;
    }
}

template <class It>
inline void StripRangeBegin(It& b, const It& e) noexcept {
    StripRangeBegin(b, e, IsAsciiSpaceAdapter(b));
}

template <class It, class TStripCriterion>
inline void StripRangeEnd(const It& b, It& e, TStripCriterion&& criterion) noexcept {
    while (b < e && criterion(e - 1)) {
        --e;
    }
}

template <class It>
inline void StripRangeEnd(const It& b, It& e) noexcept {
    StripRangeEnd(b, e, IsAsciiSpaceAdapter(b));
}

template <bool stripBeg, bool stripEnd>
struct TStripImpl {
    template <class It, class TStripCriterion>
    static inline bool StripRange(It& b, It& e, TStripCriterion&& criterion) noexcept {
        const size_t oldLen = e - b;

        if (stripBeg) {
            StripRangeBegin(b, e, criterion); 
        }

        if (stripEnd) {
            StripRangeEnd(b, e, criterion); 
        }

        const size_t newLen = e - b;
        return newLen != oldLen;
    }

    template <class T, class TStripCriterion>
    static inline bool StripString(const T& from, T& to, TStripCriterion&& criterion) {
        auto b = from.begin();
        auto e = from.end();

        if (StripRange(b, e, criterion)) { 
            to = T(b, e - b);

            return true;
        }

        to = from;

        return false;
    }

    template <class T, class TStripCriterion>
    static inline T StripString(const T& from, TStripCriterion&& criterion) {
        T ret;
        StripString(from, ret, criterion); 
        return ret;
    }

    template <class T>
    static inline T StripString(const T& from) {
        return StripString(from, IsAsciiSpaceAdapter(from.begin()));
    }
};

template <class It, class TStripCriterion>
inline bool StripRange(It& b, It& e, TStripCriterion&& criterion) noexcept {
    return TStripImpl<true, true>::StripRange(b, e, criterion); 
}

template <class It>
inline bool StripRange(It& b, It& e) noexcept {
    return StripRange(b, e, IsAsciiSpaceAdapter(b));
}

template <class It, class TStripCriterion>
inline bool Strip(It& b, size_t& len, TStripCriterion&& criterion) noexcept {
    It e = b + len;

    if (StripRange(b, e, criterion)) { 
        len = e - b;

        return true;
    }

    return false;
}

template <class It>
inline bool Strip(It& b, size_t& len) noexcept {
    return Strip(b, len, IsAsciiSpaceAdapter(b));
}

template <class T, class TStripCriterion>
static inline bool StripString(const T& from, T& to, TStripCriterion&& criterion) {
    return TStripImpl<true, true>::StripString(from, to, criterion); 
}

template <class T>
static inline bool StripString(const T& from, T& to) {
    return StripString(from, to, IsAsciiSpaceAdapter(from.begin()));
}

template <class T, class TStripCriterion>
static inline T StripString(const T& from, TStripCriterion&& criterion) {
    return TStripImpl<true, true>::StripString(from, criterion); 
}

template <class T>
static inline T StripString(const T& from) {
    return TStripImpl<true, true>::StripString(from);
}

template <class T>
static inline T StripStringLeft(const T& from) {
    return TStripImpl<true, false>::StripString(from);
}

template <class T>
static inline T StripStringRight(const T& from) {
    return TStripImpl<false, true>::StripString(from);
}

template <class T, class TStripCriterion>
static inline T StripStringLeft(const T& from, TStripCriterion&& criterion) {
    return TStripImpl<true, false>::StripString(from, criterion); 
}

template <class T, class TStripCriterion>
static inline T StripStringRight(const T& from, TStripCriterion&& criterion) {
    return TStripImpl<false, true>::StripString(from, criterion); 
}

/// Copies the given string removing leading and trailing spaces.
static inline bool Strip(const TString& from, TString& to) {
    return StripString(from, to);
}

/// Removes leading and trailing spaces from the string.
inline TString& StripInPlace(TString& s) {
    Strip(s, s);
    return s;
}

/// Returns a copy of the given string with removed leading and trailing spaces.
inline TString Strip(const TString& s) Y_WARN_UNUSED_RESULT;
inline TString Strip(const TString& s) {
    TString ret = s;
    Strip(ret, ret);
    return ret;
}

template <class TChar, class TWhitespaceFunc>
size_t CollapseImpl(TChar* s, size_t n, const TWhitespaceFunc& isWhitespace) {
    size_t newLen = 0;
    for (size_t i = 0; i < n; ++i, ++newLen) {
        size_t nextNonSpace = i;
        while (nextNonSpace < n && isWhitespace(s[nextNonSpace])) {
            ++nextNonSpace;
        }
        size_t numSpaces = nextNonSpace - i;
        if (numSpaces > 1 || (numSpaces == 1 && s[i] != ' ')) {
            s[newLen] = ' ';
            i = nextNonSpace - 1;
        } else {
            s[newLen] = s[i];
        }
    }
    return newLen;
}

template <class TStringType, class TWhitespaceFunc>
bool CollapseImpl(const TStringType& from, TStringType& to, size_t maxLen, const TWhitespaceFunc& isWhitespace) {
    to = from;
    maxLen = maxLen ? Min(maxLen, to.size()) : to.size();
    for (size_t i = 0; i < maxLen; ++i) {
        if (isWhitespace(to[i]) && (to[i] != ' ' || isWhitespace(to[i + 1]))) {
            size_t tailSize = maxLen - i;
            size_t newTailSize = CollapseImpl(to.begin() + i, tailSize, isWhitespace);
            to.remove(i + newTailSize, tailSize - newTailSize);
            return true;
        }
    }
    return false;
}

bool Collapse(const TString& from, TString& to, size_t maxLen = 0);

/// Replaces several consequtive space symbols with one (processing is limited to maxLen bytes)
inline TString& CollapseInPlace(TString& s, size_t maxLen = 0) {
    Collapse(s, s, maxLen);
    return s;
}

/// Replaces several consequtive space symbols with one (processing is limited to maxLen bytes)
inline TString Collapse(const TString& s, size_t maxLen = 0) Y_WARN_UNUSED_RESULT;
inline TString Collapse(const TString& s, size_t maxLen) {
    TString ret;
    Collapse(s, ret, maxLen);
    return ret;
}

void CollapseText(const TString& from, TString& to, size_t maxLen);

/// The same as Collapse() + truncates the string to maxLen.
/// @details An ellipsis is inserted at the end of the truncated line.
inline void CollapseText(TString& s, size_t maxLen) {
    TString to;
    CollapseText(s, to, maxLen);
    s = to;
}