aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/unified_agent_client/helpers.cpp
blob: 01908660f33914784b9f37953c832655c8ad6cb2 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#include "helpers.h"
#include <util/string/ascii.h>

namespace NUnifiedAgent::NPrivate {
    bool IsUtf8(const THashMap<TString, TString>& meta) {
        for (const auto& p : meta) {
            if (!IsUtf(p.first) || !IsUtf(p.second)) {
                return false;
            }
        }
        return true;
    }

    ResultReplacingNonUTF ReplaceNonUTF(TStringBuf message, char signBrokenSymbol, size_t maxSize) {
        ResultReplacingNonUTF result;
        if (maxSize == 0) {
            result.IsTruncated = !message.empty();
            return result;
        }
        if (message.empty()) {
            return result;
        }

        auto currentPoint = reinterpret_cast<const unsigned char*>(&message[0]);
        auto endPoint = currentPoint + message.size();

        auto pushSignBroken = [&result, signBrokenSymbol]() {
            if (result.Data.empty() || result.Data.back() != signBrokenSymbol) {
                result.Data.push_back(signBrokenSymbol);
            }
            ++result.BrokenCount;
        };

        while (currentPoint < endPoint) {
            wchar32 rune = 0;
            size_t rune_len = 0;
            auto statusRead = SafeReadUTF8Char(rune, rune_len, currentPoint, endPoint);

            if (statusRead == RECODE_OK) {
                if (rune_len == 1 && !IsAsciiAlnum(*currentPoint) && !IsAsciiPunct(*currentPoint) && !IsAsciiSpace(*currentPoint)) {
                    ++currentPoint;
                    pushSignBroken();
                } else {
                    while (rune_len != 0) {
                        result.Data.push_back(*currentPoint);
                        ++currentPoint;
                        --rune_len;
                    }
                }
            } else if (statusRead == RECODE_BROKENSYMBOL) {
                ++currentPoint;
                pushSignBroken();
            } else {
                pushSignBroken();
                break;
            }

            if (result.Data.size() >= maxSize && currentPoint < endPoint) {
                result.IsTruncated = true;
                break;
            }
        }
        return result;
    }
}