aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Storages/MergeTree/LevelMergeSelector.cpp
blob: 67495c1372d411619f60e51259bca4c17e11959b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#include <Storages/MergeTree/LevelMergeSelector.h>

#include <cmath>


namespace DB
{

namespace
{

/** Estimates best set of parts to merge within passed alternatives.
  * It is selected simply: by minimal size.
  */
struct Estimator
{
    using Iterator = LevelMergeSelector::PartsRange::const_iterator;

    void consider(Iterator begin, Iterator end, size_t sum_size)
    {
        double current_score = sum_size;

        if (min_score == 0.0 || current_score < min_score)
        {
            min_score = current_score;
            best_begin = begin;
            best_end = end;
        }
    }

    LevelMergeSelector::PartsRange getBest() const
    {
        return LevelMergeSelector::PartsRange(best_begin, best_end);
    }

    double min_score = 0.0;
    Iterator best_begin {};
    Iterator best_end {};
};


void selectWithinPartition(
    const LevelMergeSelector::PartsRange & parts,
    const size_t max_total_size_to_merge,
    Estimator & estimator,
    const LevelMergeSelector::Settings & settings)
{
    size_t parts_size = parts.size();
    if (parts_size < settings.parts_to_merge)
        return;

    /// To easily calculate sum size in any range.
    size_t parts_count = parts.size();
    size_t prefix_sum = 0;
    std::vector<size_t> prefix_sums(parts.size() + 1);

    for (size_t i = 0; i < parts_count; ++i)
    {
        prefix_sum += parts[i].size;
        prefix_sums[i + 1] = prefix_sum;
    }

    /// Use "corrected" level. It will be non-decreasing while traversing parts right to left.
    /// This is done for compatibility with another algorithms.
    size_t corrected_level_at_left = 0;
    size_t corrected_level_at_right = 0;

    size_t range_end = parts_size;
    size_t range_begin = range_end - settings.parts_to_merge;

    for (size_t i = range_begin; i < range_end; ++i)
        if (corrected_level_at_left < parts[i].level)
            corrected_level_at_left = parts[i].level;

    while (true)
    {
        if (corrected_level_at_left < parts[range_begin].level)
            corrected_level_at_left = parts[range_begin].level;

        if (corrected_level_at_right < parts[range_end - 1].level)
            corrected_level_at_right = parts[range_end - 1].level;

        /// Leftmost range of same corrected level.
        if (corrected_level_at_left == corrected_level_at_right
            && (range_begin == 0 || parts[range_begin - 1].level > corrected_level_at_left))
        {
            size_t range_size = prefix_sums[range_end] - prefix_sums[range_begin];

            if (range_size <= max_total_size_to_merge)
                estimator.consider(parts.begin() + range_begin, parts.begin() + range_end, range_size);

            break;    /// Minimum level is enough.
        }

        if (range_begin == 0)
            break;

        --range_begin;
        --range_end;
    }
}

}


LevelMergeSelector::PartsRange LevelMergeSelector::select(
    const PartsRanges & parts_ranges,
    size_t max_total_size_to_merge)
{
    Estimator estimator;

    for (const auto & parts_range: parts_ranges)
        selectWithinPartition(parts_range, max_total_size_to_merge, estimator, settings);

    return estimator.getBest();
}

}