aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/containers/comptrie/comptrie_trie.h
diff options
context:
space:
mode:
authorsereglond <sereglond@yandex-team.ru>2022-02-10 16:47:47 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:47:47 +0300
commit73bb02f2495181e0719a800f979df508924f4b71 (patch)
treec0748b5dcbade83af788c0abfa89c0383d6b779c /library/cpp/containers/comptrie/comptrie_trie.h
parenteb3d925534734c808602b31b38b953677f0a279f (diff)
downloadydb-73bb02f2495181e0719a800f979df508924f4b71.tar.gz
Restoring authorship annotation for <sereglond@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/containers/comptrie/comptrie_trie.h')
-rw-r--r--library/cpp/containers/comptrie/comptrie_trie.h322
1 files changed, 161 insertions, 161 deletions
diff --git a/library/cpp/containers/comptrie/comptrie_trie.h b/library/cpp/containers/comptrie/comptrie_trie.h
index 2b1a42aa0a..40ec1e52b3 100644
--- a/library/cpp/containers/comptrie/comptrie_trie.h
+++ b/library/cpp/containers/comptrie/comptrie_trie.h
@@ -33,8 +33,8 @@ template <class T = char, class D = ui64, class S = TCompactTriePacker<D>>
class TCompactTrie {
public:
typedef T TSymbol;
- typedef D TData;
- typedef S TPacker;
+ typedef D TData;
+ typedef S TPacker;
typedef typename TCompactTrieKeySelector<TSymbol>::TKey TKey;
typedef typename TCompactTrieKeySelector<TSymbol>::TKeyBuf TKeyBuf;
@@ -42,7 +42,7 @@ public:
typedef std::pair<TKey, TData> TValueType;
typedef std::pair<size_t, TData> TPhraseMatch;
typedef TVector<TPhraseMatch> TPhraseMatchVector;
-
+
typedef TCompactTrieBuilder<T, D, S> TBuilder;
protected:
@@ -77,8 +77,8 @@ public:
void Init(const char* d, size_t len, TPacker packer = TPacker());
void Init(const TBlob& data, TPacker packer = TPacker());
- bool IsInitialized() const;
- bool IsEmpty() const;
+ bool IsInitialized() const;
+ bool IsEmpty() const;
bool Find(const TSymbol* key, size_t keylen, TData* value = nullptr) const;
bool Find(const TKeyBuf& key, TData* value = nullptr) const {
@@ -118,7 +118,7 @@ public:
return Skipper.GetPacker() == &Packer;
}
- void FindPhrases(const TSymbol* key, size_t keylen, TPhraseMatchVector& matches, TSymbol separator = TSymbol(' ')) const;
+ void FindPhrases(const TSymbol* key, size_t keylen, TPhraseMatchVector& matches, TSymbol separator = TSymbol(' ')) const;
void FindPhrases(const TKeyBuf& key, TPhraseMatchVector& matches, TSymbol separator = TSymbol(' ')) const {
return FindPhrases(key.data(), key.size(), matches, separator);
}
@@ -141,14 +141,14 @@ public:
// return false, if no arc with @label exists
inline bool FindTails(TSymbol label, TCompactTrie<T, D, S>& res) const;
- class TConstIterator {
+ class TConstIterator {
private:
typedef NCompactTrie::TOpaqueTrieIterator TOpaqueTrieIterator;
typedef NCompactTrie::TOpaqueTrie TOpaqueTrie;
friend class TCompactTrie;
TConstIterator(const TOpaqueTrie& trie, const char* emptyValue, bool atend, TPacker packer); // only usable from Begin() and End() methods
TConstIterator(const TOpaqueTrie& trie, const char* emptyValue, const TKeyBuf& key, TPacker packer); // only usable from UpperBound() method
-
+
public:
TConstIterator() = default;
bool IsEmpty() const {
@@ -157,11 +157,11 @@ public:
bool operator==(const TConstIterator& other) const;
bool operator!=(const TConstIterator& other) const;
- TConstIterator& operator++();
- TConstIterator operator++(int /*unused*/);
+ TConstIterator& operator++();
+ TConstIterator operator++(int /*unused*/);
TConstIterator& operator--();
TConstIterator operator--(int /*unused*/);
- TValueType operator*();
+ TValueType operator*();
TKey GetKey() const;
size_t GetKeySize() const;
@@ -169,14 +169,14 @@ public:
void GetValue(TData& data) const;
const char* GetValuePtr() const;
- private:
+ private:
TPacker Packer;
TCopyPtr<TOpaqueTrieIterator> Impl;
};
- TConstIterator Begin() const;
+ TConstIterator Begin() const;
TConstIterator begin() const;
- TConstIterator End() const;
+ TConstIterator End() const;
TConstIterator end() const;
// Returns an iterator pointing to the smallest key in the trie >= the argument.
@@ -194,9 +194,9 @@ public:
friend class TPrefixIterator<TCompactTrie>;
protected:
- explicit TCompactTrie(const char* emptyValue);
+ explicit TCompactTrie(const char* emptyValue);
TCompactTrie(const TBlob& data, const char* emptyValue, TPacker packer = TPacker());
-
+
bool LookupLongestPrefix(const TSymbol* key, size_t keylen, size_t& prefixLen, const char*& valuepos, bool& hasNext) const;
bool LookupLongestPrefix(const TSymbol* key, size_t keylen, size_t& prefixLen, const char*& valuepos) const {
bool hasNext;
@@ -207,49 +207,49 @@ protected:
template <class T = char, class D = ui64, class S = TCompactTriePacker<D>>
class TCompactTrieHolder: public TCompactTrie<T, D, S>, NNonCopyable::TNonCopyable {
-private:
+private:
typedef TCompactTrie<T, D, S> TBase;
TArrayHolder<char> Storage;
-public:
+public:
TCompactTrieHolder(IInputStream& is, size_t len);
-};
+};
+
+//------------------------//
+// Implementation section //
+//------------------------//
-//------------------------//
-// Implementation section //
-//------------------------//
+// TCompactTrie
-// TCompactTrie
-
template <class T, class D, class S>
TCompactTrie<T, D, S>::TCompactTrie(const TBlob& data, TPacker packer)
- : DataHolder(data)
+ : DataHolder(data)
, Packer(packer)
-{
- Init(data, packer);
-}
-
+{
+ Init(data, packer);
+}
+
template <class T, class D, class S>
TCompactTrie<T, D, S>::TCompactTrie(const char* d, size_t len, TPacker packer)
: Packer(packer)
-{
+{
Init(d, len, packer);
-}
-
+}
+
template <class T, class D, class S>
TCompactTrie<T, D, S>::TCompactTrie(const char* emptyValue)
- : EmptyValue(emptyValue)
+ : EmptyValue(emptyValue)
{
}
-
+
template <class T, class D, class S>
TCompactTrie<T, D, S>::TCompactTrie(const TBlob& data, const char* emptyValue, TPacker packer)
- : DataHolder(data)
- , EmptyValue(emptyValue)
+ : DataHolder(data)
+ , EmptyValue(emptyValue)
, Packer(packer)
{
}
-
+
template <class T, class D, class S>
TCompactTrie<T, D, S>::TCompactTrie(const TCompactTrie& other)
: DataHolder(other.DataHolder)
@@ -289,43 +289,43 @@ TCompactTrie<T, D, S>& TCompactTrie<T, D, S>::operator=(TCompactTrie&& other) no
template <class T, class D, class S>
void TCompactTrie<T, D, S>::Init(const char* d, size_t len, TPacker packer) {
Init(TBlob::NoCopy(d, len), packer);
-}
-
+}
+
template <class T, class D, class S>
void TCompactTrie<T, D, S>::Init(const TBlob& data, TPacker packer) {
- using namespace NCompactTrie;
-
- DataHolder = data;
+ using namespace NCompactTrie;
+
+ DataHolder = data;
Packer = packer;
-
+
const char* datapos = DataHolder.AsCharPtr();
size_t len = DataHolder.Length();
- if (!len)
- return;
-
- const char* const dataend = datapos + len;
-
- const char* emptypos = datapos;
- char flags = LeapByte(emptypos, dataend, 0);
- if (emptypos && (flags & MT_FINAL)) {
+ if (!len)
+ return;
+
+ const char* const dataend = datapos + len;
+
+ const char* emptypos = datapos;
+ char flags = LeapByte(emptypos, dataend, 0);
+ if (emptypos && (flags & MT_FINAL)) {
Y_ASSERT(emptypos <= dataend);
- EmptyValue = emptypos;
- }
-}
-
+ EmptyValue = emptypos;
+ }
+}
+
template <class T, class D, class S>
bool TCompactTrie<T, D, S>::IsInitialized() const {
return DataHolder.Data() != nullptr;
-}
-
+}
+
template <class T, class D, class S>
bool TCompactTrie<T, D, S>::IsEmpty() const {
return DataHolder.Size() == 0 && EmptyValue == nullptr;
-}
-
+}
+
template <class T, class D, class S>
bool TCompactTrie<T, D, S>::Find(const TSymbol* key, size_t keylen, TData* value) const {
- size_t prefixLen = 0;
+ size_t prefixLen = 0;
const char* valuepos = nullptr;
bool hasNext;
if (!LookupLongestPrefix(key, keylen, prefixLen, valuepos, hasNext) || prefixLen != keylen)
@@ -333,13 +333,13 @@ bool TCompactTrie<T, D, S>::Find(const TSymbol* key, size_t keylen, TData* value
if (value)
Packer.UnpackLeaf(valuepos, *value);
return true;
-}
-
+}
+
template <class T, class D, class S>
void TCompactTrie<T, D, S>::FindPhrases(const TSymbol* key, size_t keylen, TPhraseMatchVector& matches, TSymbol separator) const {
LookupPhrases(DataHolder.AsCharPtr(), DataHolder.Length(), key, keylen, matches, separator);
-}
-
+}
+
template <class T, class D, class S>
inline TCompactTrie<T, D, S> TCompactTrie<T, D, S>::FindTails(const TSymbol* key, size_t keylen) const {
TCompactTrie<T, D, S> ret;
@@ -349,46 +349,46 @@ inline TCompactTrie<T, D, S> TCompactTrie<T, D, S>::FindTails(const TSymbol* key
template <class T, class D, class S>
bool TCompactTrie<T, D, S>::FindTails(const TSymbol* key, size_t keylen, TCompactTrie<T, D, S>& res) const {
- using namespace NCompactTrie;
-
- size_t len = DataHolder.Length();
-
- if (!key || !len)
+ using namespace NCompactTrie;
+
+ size_t len = DataHolder.Length();
+
+ if (!key || !len)
return false;
-
- if (!keylen) {
+
+ if (!keylen) {
res = *this;
return true;
- }
-
+ }
+
const char* datastart = DataHolder.AsCharPtr();
const char* datapos = datastart;
const char* const dataend = datapos + len;
- const TSymbol* keyend = key + keylen;
+ const TSymbol* keyend = key + keylen;
const char* value = nullptr;
- while (key != keyend) {
- T label = *(key++);
+ while (key != keyend) {
+ T label = *(key++);
if (!NCompactTrie::Advance(datapos, dataend, value, label, Packer))
return false;
-
+
if (key == keyend) {
if (datapos) {
Y_ASSERT(datapos >= datastart);
res = TCompactTrie<T, D, S>(TBlob::NoCopy(datapos, dataend - datapos), value);
} else {
res = TCompactTrie<T, D, S>(value);
- }
+ }
return true;
} else if (!datapos) {
return false; // No further way
- }
- }
-
+ }
+ }
+
return false;
-}
-
+}
+
template <class T, class D, class S>
inline bool TCompactTrie<T, D, S>::FindTails(TSymbol label, TCompactTrie<T, D, S>& res) const {
using namespace NCompactTrie;
@@ -419,8 +419,8 @@ template <class T, class D, class S>
typename TCompactTrie<T, D, S>::TConstIterator TCompactTrie<T, D, S>::Begin() const {
NCompactTrie::TOpaqueTrie self(DataHolder.AsCharPtr(), DataHolder.Length(), Skipper);
return TConstIterator(self, EmptyValue, false, Packer);
-}
-
+}
+
template <class T, class D, class S>
typename TCompactTrie<T, D, S>::TConstIterator TCompactTrie<T, D, S>::begin() const {
return Begin();
@@ -430,8 +430,8 @@ template <class T, class D, class S>
typename TCompactTrie<T, D, S>::TConstIterator TCompactTrie<T, D, S>::End() const {
NCompactTrie::TOpaqueTrie self(DataHolder.AsCharPtr(), DataHolder.Length(), Skipper);
return TConstIterator(self, EmptyValue, true, Packer);
-}
-
+}
+
template <class T, class D, class S>
typename TCompactTrie<T, D, S>::TConstIterator TCompactTrie<T, D, S>::end() const {
return End();
@@ -462,11 +462,11 @@ void TCompactTrie<T, D, S>::Print(IOutputStream& os) {
template <class T, class D, class S>
bool TCompactTrie<T, D, S>::FindLongestPrefix(const TSymbol* key, size_t keylen, size_t* prefixLen, TData* value, bool* hasNext) const {
const char* valuepos = nullptr;
- size_t tempPrefixLen = 0;
+ size_t tempPrefixLen = 0;
bool tempHasNext;
bool found = LookupLongestPrefix(key, keylen, tempPrefixLen, valuepos, tempHasNext);
- if (prefixLen)
- *prefixLen = tempPrefixLen;
+ if (prefixLen)
+ *prefixLen = tempPrefixLen;
if (found && value)
Packer.UnpackLeaf(valuepos, *value);
if (hasNext)
@@ -476,38 +476,38 @@ bool TCompactTrie<T, D, S>::FindLongestPrefix(const TSymbol* key, size_t keylen,
template <class T, class D, class S>
bool TCompactTrie<T, D, S>::LookupLongestPrefix(const TSymbol* key, size_t keylen, size_t& prefixLen, const char*& valuepos, bool& hasNext) const {
- using namespace NCompactTrie;
-
+ using namespace NCompactTrie;
+
const char* datapos = DataHolder.AsCharPtr();
size_t len = DataHolder.Length();
- prefixLen = 0;
+ prefixLen = 0;
hasNext = false;
bool found = false;
- if (EmptyValue) {
- valuepos = EmptyValue;
- found = true;
- }
-
- if (!key || !len)
+ if (EmptyValue) {
+ valuepos = EmptyValue;
+ found = true;
+ }
+
+ if (!key || !len)
return found;
-
- const char* const dataend = datapos + len;
-
+
+ const char* const dataend = datapos + len;
+
const T* keyend = key + keylen;
- while (key != keyend) {
- T label = *(key++);
+ while (key != keyend) {
+ T label = *(key++);
for (i64 i = (i64)ExtraBits<TSymbol>(); i >= 0; i -= 8) {
const char flags = LeapByte(datapos, dataend, (char)(label >> i));
if (!datapos) {
return found; // no such arc
}
-
+
Y_ASSERT(datapos <= dataend);
if ((flags & MT_FINAL)) {
prefixLen = keylen - (keyend - key) - (i ? 1 : 0);
- valuepos = datapos;
+ valuepos = datapos;
hasNext = flags & MT_NEXT;
found = true;
@@ -516,67 +516,67 @@ bool TCompactTrie<T, D, S>::LookupLongestPrefix(const TSymbol* key, size_t keyle
}
datapos += Packer.SkipLeaf(datapos); // skip intermediate leaf nodes
}
-
+
if (!(flags & MT_NEXT)) {
return found; // no further way
- }
- }
- }
-
+ }
+ }
+ }
+
return found;
-}
-
+}
+
template <class T, class D, class S>
void TCompactTrie<T, D, S>::LookupPhrases(
- const char* datapos, size_t len, const TSymbol* key, size_t keylen,
+ const char* datapos, size_t len, const TSymbol* key, size_t keylen,
TVector<TPhraseMatch>& matches, TSymbol separator) const {
- using namespace NCompactTrie;
-
- matches.clear();
-
- if (!key || !len)
- return;
-
- const T* const keystart = key;
- const T* const keyend = key + keylen;
- const char* const dataend = datapos + len;
+ using namespace NCompactTrie;
+
+ matches.clear();
+
+ if (!key || !len)
+ return;
+
+ const T* const keystart = key;
+ const T* const keyend = key + keylen;
+ const char* const dataend = datapos + len;
while (datapos && key != keyend) {
- T label = *(key++);
+ T label = *(key++);
const char* value = nullptr;
if (!Advance(datapos, dataend, value, label, Packer)) {
return;
- }
+ }
if (value && (key == keyend || *key == separator)) {
size_t matchlength = (size_t)(key - keystart);
D data;
Packer.UnpackLeaf(value, data);
matches.push_back(TPhraseMatch(matchlength, data));
}
- }
-}
-
-// TCompactTrieHolder
-
-template <class T, class D, class S>
+ }
+}
+
+// TCompactTrieHolder
+
+template <class T, class D, class S>
TCompactTrieHolder<T, D, S>::TCompactTrieHolder(IInputStream& is, size_t len)
- : Storage(new char[len])
-{
- if (is.Load(Storage.Get(), len) != len) {
+ : Storage(new char[len])
+{
+ if (is.Load(Storage.Get(), len) != len) {
ythrow yexception() << "bad data load";
- }
- TBase::Init(Storage.Get(), len);
-}
-
+ }
+ TBase::Init(Storage.Get(), len);
+}
+
//----------------------------------------------------------------------------------------------------------------
-// TCompactTrie::TConstIterator
-
+// TCompactTrie::TConstIterator
+
template <class T, class D, class S>
TCompactTrie<T, D, S>::TConstIterator::TConstIterator(const TOpaqueTrie& trie, const char* emptyValue, bool atend, TPacker packer)
: Packer(packer)
, Impl(new TOpaqueTrieIterator(trie, emptyValue, atend))
{
}
-
+
template <class T, class D, class S>
TCompactTrie<T, D, S>::TConstIterator::TConstIterator(const TOpaqueTrie& trie, const char* emptyValue, const TKeyBuf& key, TPacker packer)
: Packer(packer)
@@ -591,27 +591,27 @@ bool TCompactTrie<T, D, S>::TConstIterator::operator==(const TConstIterator& oth
return !other.Impl;
if (!other.Impl)
return false;
- return *Impl == *other.Impl;
-}
-
+ return *Impl == *other.Impl;
+}
+
template <class T, class D, class S>
bool TCompactTrie<T, D, S>::TConstIterator::operator!=(const TConstIterator& other) const {
return !operator==(other);
-}
-
+}
+
template <class T, class D, class S>
typename TCompactTrie<T, D, S>::TConstIterator& TCompactTrie<T, D, S>::TConstIterator::operator++() {
- Impl->Forward();
- return *this;
-}
-
+ Impl->Forward();
+ return *this;
+}
+
template <class T, class D, class S>
typename TCompactTrie<T, D, S>::TConstIterator TCompactTrie<T, D, S>::TConstIterator::operator++(int /*unused*/) {
- TConstIterator copy(*this);
- Impl->Forward();
- return copy;
-}
-
+ TConstIterator copy(*this);
+ Impl->Forward();
+ return copy;
+}
+
template <class T, class D, class S>
typename TCompactTrie<T, D, S>::TConstIterator& TCompactTrie<T, D, S>::TConstIterator::operator--() {
Impl->Backward();
@@ -627,14 +627,14 @@ typename TCompactTrie<T, D, S>::TConstIterator TCompactTrie<T, D, S>::TConstIter
template <class T, class D, class S>
typename TCompactTrie<T, D, S>::TValueType TCompactTrie<T, D, S>::TConstIterator::operator*() {
- return TValueType(GetKey(), GetValue());
-}
-
+ return TValueType(GetKey(), GetValue());
+}
+
template <class T, class D, class S>
typename TCompactTrie<T, D, S>::TKey TCompactTrie<T, D, S>::TConstIterator::GetKey() const {
return Impl->GetKey<TSymbol>();
-}
-
+}
+
template <class T, class D, class S>
size_t TCompactTrie<T, D, S>::TConstIterator::GetKeySize() const {
return Impl->MeasureKey<TSymbol>();