summaryrefslogtreecommitdiffstats
path: root/yql/essentials/utils/log/ut/log_parser.cpp
blob: 55f23ba0f576266849330e88c54b45c332b7152a (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#include "log_parser.h"

#include <regex>

namespace NYql::NLog {

TLogRow ParseJsonLogRow(TStringBuf str) {
    NJson::TJsonMap json;
    UNIT_ASSERT_C(NJson::ReadJsonTree(str, &json), "invalid json '" << str << "'");

    return {
        .Time = TInstant::ParseIso8601(json["@fields"]["datetime"].GetStringSafe()) - TDuration::Hours(4),
        .Level = ELevelHelpers::FromString(json["@fields"]["level"].GetStringSafe()),
        .ProcName = json["@fields"]["procname"].GetStringSafe(),
        .ProcId = FromString<pid_t>(json["@fields"]["pid"].GetStringSafe()),
        .ThreadId = [&] {
            TString string = json["@fields"]["tid"].GetStringSafe();
            if (string.substr(0, 2) == "0x") {
                return IntFromString<ui64, 16, TStringBuf>(string.substr(2));
            } else {
                return IntFromString<ui64, 10, TStringBuf>(string);
            }
        }(),
        .Component = EComponentHelpers::FromString(json["@fields"]["component"].GetStringSafe()),
        .FileName = json["@fields"]["filename"].GetStringSafe(),
        .LineNumber = FromString<ui32>(json["@fields"]["line"].GetStringSafe()),
        .Path = json["@fields"]["path"].GetStringRobust(),
        .Message = json["message"].GetStringSafe(),
    };
}

TLogRow ParseLegacyLogRow(TStringBuf str) {
    static std::regex rowRe(
        "^([0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}\\.[0-9]{3}) " // (1) time
        "([A-Z ]{5}) "                                                         // (2) level
        "([a-zA-Z0-9_\\.-]+)"                                                  // (3) process name
        ".pid=([0-9]+),"                                                       // (4) process id
        " tid=(0?x?[0-9a-fA-F]+). "                                            // (5) thread id
        ".([a-zA-Z0-9_\\. ]+). "                                               // (6) component name
        "([^:]+):"                                                             // (7) file name
        "([0-9]+): "                                                           // (8) line number
        "(\\{[^\n]*\\} )?"                                                     // (9) path
        "([^\n]*)\n?$"                                                         // (10) message
        , std::regex_constants::extended);

    std::cmatch match;
    bool isMatch = std::regex_match(str.data(), match, rowRe);

    UNIT_ASSERT_C(isMatch, "log row does not match format: '" << str << '\'');
    UNIT_ASSERT_EQUAL_C(match.size(), 11, "expected 11 groups in log row: '" << str << '\'');

    return {
        .Time = TInstant::ParseIso8601(match[1].str()) - TDuration::Hours(4),
        .Level = ELevelHelpers::FromString(match[2].str()),
        .ProcName = match[3].str(),
        .ProcId = FromString<pid_t>(match[4].str()),
        .ThreadId = match[5].str().substr(0, 2) == "0x"
                        ? IntFromString<ui64, 16, TStringBuf>(match[5].str().substr(2))
                        : IntFromString<ui64, 10, TStringBuf>(match[5].str()),
        .Component = EComponentHelpers::FromString(match[6].str()),
        .FileName = match[7].str(),
        .LineNumber = FromString<ui32>(match[8].str()),
        .Path = match[9].str() != ""
                    ? match[9].str().substr(1, match[9].str().size() - 3)
                    : "",
        .Message = match[10].str(),
    };
}

} // namespace NYql::NLog