aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/containers/comptrie/write_trie_backwards.cpp
blob: f7459b1c497ed9486f60e02c6c14cfbe191bfb26 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#include "write_trie_backwards.h"

#include "comptrie_impl.h"
#include "leaf_skipper.h"

#include <util/generic/buffer.h>
#include <util/generic/vector.h>

namespace NCompactTrie {
    size_t WriteTrieBackwards(IOutputStream& os, TReverseNodeEnumerator& enumerator, bool verbose) { 
        if (verbose) { 
            Cerr << "Writing down the trie..." << Endl; 
        } 

        // Rewrite everything from the back, removing unused pieces. 
        const size_t chunksize = 0x10000; 
        TVector<char*> resultData; 

        resultData.push_back(new char[chunksize]); 
        char* chunkend = resultData.back() + chunksize; 

        size_t resultLength = 0; 
        size_t chunkLength = 0; 

        size_t counter = 0; 
        TBuffer bufferHolder; 
        while (enumerator.Move()) { 
            if (verbose) 
                ShowProgress(++counter); 

            size_t bufferLength = 64 + enumerator.GetLeafLength(); // never know how big leaf data can be 
            bufferHolder.Clear(); 
            bufferHolder.Resize(bufferLength); 
            char* buffer = bufferHolder.Data(); 
 
            size_t nodelength = enumerator.RecreateNode(buffer, resultLength); 
            Y_ASSERT(nodelength <= bufferLength); 

            resultLength += nodelength; 

            if (chunkLength + nodelength <= chunksize) { 
                chunkLength += nodelength; 
                memcpy(chunkend - chunkLength, buffer, nodelength); 
            } else { // allocate a new chunk 
                memcpy(chunkend - chunksize, buffer + nodelength - (chunksize - chunkLength), chunksize - chunkLength); 
                chunkLength = chunkLength + nodelength - chunksize; 

                resultData.push_back(new char[chunksize]); 
                chunkend = resultData.back() + chunksize; 

                while (chunkLength > chunksize) { // allocate a new chunks 
                    chunkLength -= chunksize; 
                    memcpy(chunkend - chunksize, buffer + chunkLength, chunksize); 

                    resultData.push_back(new char[chunksize]); 
                    chunkend = resultData.back() + chunksize; 
                } 

                memcpy(chunkend - chunkLength, buffer, chunkLength); 
            }
        } 

        if (verbose) 
            Cerr << counter << Endl; 
 
        // Write the whole thing down 
        while (!resultData.empty()) { 
            char* chunk = resultData.back(); 
            os.Write(chunk + chunksize - chunkLength, chunkLength); 
            chunkLength = chunksize; 
            delete[] chunk; 
            resultData.pop_back(); 
        }
 
        return resultLength; 
    }

    size_t WriteTrieBackwardsNoAlloc(IOutputStream& os, TReverseNodeEnumerator& enumerator, TOpaqueTrie& trie, EMinimizeMode mode) { 
        char* data = const_cast<char*>(trie.Data); 
        char* end = data + trie.Length; 
        char* pos = end; 

        TVector<char> buf(64); 
        while (enumerator.Move()) { 
            size_t nodeLength = enumerator.RecreateNode(nullptr, end - pos); 
            if (nodeLength > buf.size()) 
                buf.resize(nodeLength); 

            size_t realLength = enumerator.RecreateNode(buf.data(), end - pos); 
            Y_ASSERT(realLength == nodeLength); 

            pos -= nodeLength; 
            memcpy(pos, buf.data(), nodeLength); 
        } 

        switch (mode) { 
            case MM_NOALLOC: 
                os.Write(pos, end - pos); 
                break; 
            case MM_INPLACE: 
                memmove(data, pos, end - pos); 
                break; 
            default: 
                Y_VERIFY(false); 
        } 

        return end - pos; 
    }

}