aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/python/marisa-trie/marisa/grimoire/vector/flat-vector.h
blob: 14b25d7d8282984218085b94b0818aac5c656179 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
#pragma once
#ifndef MARISA_GRIMOIRE_VECTOR_FLAT_VECTOR_H_
#define MARISA_GRIMOIRE_VECTOR_FLAT_VECTOR_H_

#include "vector.h"

namespace marisa {
namespace grimoire {
namespace vector {

class FlatVector {
 public:
#if MARISA_WORD_SIZE == 64
  typedef UInt64 Unit;
#else  // MARISA_WORD_SIZE == 64
  typedef UInt32 Unit;
#endif  // MARISA_WORD_SIZE == 64

  FlatVector() : units_(), value_size_(0), mask_(0), size_(0) {}

  void build(const Vector<UInt32> &values) {
    FlatVector temp;
    temp.build_(values);
    swap(temp);
  }

  void map(Mapper &mapper) {
    FlatVector temp;
    temp.map_(mapper);
    swap(temp);
  }
  void read(Reader &reader) {
    FlatVector temp;
    temp.read_(reader);
    swap(temp);
  }
  void write(Writer &writer) const {
    write_(writer);
  }

  UInt32 operator[](std::size_t i) const {
    MARISA_DEBUG_IF(i >= size_, MARISA_BOUND_ERROR);

    const std::size_t pos = i * value_size_;
    const std::size_t unit_id = pos / MARISA_WORD_SIZE;
    const std::size_t unit_offset = pos % MARISA_WORD_SIZE;

    if ((unit_offset + value_size_) <= MARISA_WORD_SIZE) {
      return (UInt32)(units_[unit_id] >> unit_offset) & mask_;
    } else {
      return (UInt32)((units_[unit_id] >> unit_offset)
          | (units_[unit_id + 1] << (MARISA_WORD_SIZE - unit_offset))) & mask_;
    }
  }

  std::size_t value_size() const {
    return value_size_;
  }
  UInt32 mask() const {
    return mask_;
  }

  bool empty() const {
    return size_ == 0;
  }
  std::size_t size() const {
    return size_;
  }
  std::size_t total_size() const {
    return units_.total_size();
  }
  std::size_t io_size() const {
    return units_.io_size() + (sizeof(UInt32) * 2) + sizeof(UInt64);
  }

  void clear() {
    FlatVector().swap(*this);
  }
  void swap(FlatVector &rhs) {
    units_.swap(rhs.units_);
    marisa::swap(value_size_, rhs.value_size_);
    marisa::swap(mask_, rhs.mask_);
    marisa::swap(size_, rhs.size_);
  }

 private:
  Vector<Unit> units_;
  std::size_t value_size_;
  UInt32 mask_;
  std::size_t size_;

  void build_(const Vector<UInt32> &values) {
    UInt32 max_value = 0;
    for (std::size_t i = 0; i < values.size(); ++i) {
      if (values[i] > max_value) {
        max_value = values[i];
      }
    }

    std::size_t value_size = 0;
    while (max_value != 0) {
      ++value_size;
      max_value >>= 1;
    }

    std::size_t num_units = values.empty() ? 0 : (64 / MARISA_WORD_SIZE);
    if (value_size != 0) {
      num_units = (std::size_t)(
          (((UInt64)value_size * values.size()) + (MARISA_WORD_SIZE - 1))
          / MARISA_WORD_SIZE);
      num_units += num_units % (64 / MARISA_WORD_SIZE);
    }

    units_.resize(num_units);
    if (num_units > 0) {
      units_.back() = 0;
    }

    value_size_ = value_size;
    if (value_size != 0) {
      mask_ = MARISA_UINT32_MAX >> (32 - value_size);
    }
    size_ = values.size();

    for (std::size_t i = 0; i < values.size(); ++i) {
      set(i, values[i]);
    }
  }

  void map_(Mapper &mapper) {
    units_.map(mapper);
    {
      UInt32 temp_value_size;
      mapper.map(&temp_value_size);
      MARISA_THROW_IF(temp_value_size > 32, MARISA_FORMAT_ERROR);
      value_size_ = temp_value_size;
    }
    {
      UInt32 temp_mask;
      mapper.map(&temp_mask);
      mask_ = temp_mask;
    }
    {
      UInt64 temp_size;
      mapper.map(&temp_size);
      MARISA_THROW_IF(temp_size > MARISA_SIZE_MAX, MARISA_SIZE_ERROR);
      size_ = (std::size_t)temp_size;
    }
  }

  void read_(Reader &reader) {
    units_.read(reader);
    {
      UInt32 temp_value_size;
      reader.read(&temp_value_size);
      MARISA_THROW_IF(temp_value_size > 32, MARISA_FORMAT_ERROR);
      value_size_ = temp_value_size;
    }
    {
      UInt32 temp_mask;
      reader.read(&temp_mask);
      mask_ = temp_mask;
    }
    {
      UInt64 temp_size;
      reader.read(&temp_size);
      MARISA_THROW_IF(temp_size > MARISA_SIZE_MAX, MARISA_SIZE_ERROR);
      size_ = (std::size_t)temp_size;
    }
  }

  void write_(Writer &writer) const {
    units_.write(writer);
    writer.write((UInt32)value_size_);
    writer.write((UInt32)mask_);
    writer.write((UInt64)size_);
  }

  void set(std::size_t i, UInt32 value) {
    MARISA_DEBUG_IF(i >= size_, MARISA_BOUND_ERROR);
    MARISA_DEBUG_IF(value > mask_, MARISA_RANGE_ERROR);

    const std::size_t pos = i * value_size_;
    const std::size_t unit_id = pos / MARISA_WORD_SIZE;
    const std::size_t unit_offset = pos % MARISA_WORD_SIZE;

    units_[unit_id] &= ~((Unit)mask_ << unit_offset);
    units_[unit_id] |= (Unit)(value & mask_) << unit_offset;
    if ((unit_offset + value_size_) > MARISA_WORD_SIZE) {
      units_[unit_id + 1] &=
          ~((Unit)mask_ >> (MARISA_WORD_SIZE - unit_offset));
      units_[unit_id + 1] |=
          (Unit)(value & mask_) >> (MARISA_WORD_SIZE - unit_offset);
    }
  }

  // Disallows copy and assignment.
  FlatVector(const FlatVector &);
  FlatVector &operator=(const FlatVector &);
};

}  // namespace vector
}  // namespace grimoire
}  // namespace marisa

#endif  // MARISA_GRIMOIRE_VECTOR_FLAT_VECTOR_H_