aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/tokenizer/special_tokens.cpp
blob: 141ea9ac06584b78e8531cc9cc4a9844ef3c96a3 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#include "special_tokens.h"

#include <library/cpp/containers/comptrie/set.h>

#include <util/generic/singleton.h>

namespace {
    extern "C" {
        extern const unsigned char SpecialTokens[];
        extern const ui32 SpecialTokensSize;
    }

    class TSpecialTokensSet: public TCompactTrieSet<wchar16> {
    public:
        TSpecialTokensSet(): TCompactTrieSet<wchar16>(reinterpret_cast<const char*>(SpecialTokens), SpecialTokensSize)
        {
        }
    };

    auto SpecialTokensSet = Singleton<TSpecialTokensSet>();
}

size_t GetSpecialTokenLength(const wchar16* text, size_t maxLen) {
    size_t resultLen = 0;
    SpecialTokensSet->FindLongestPrefix(text, maxLen, &resultLen);
    return resultLen;
}