1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
|
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_Decoder_hh__
#define avro_Decoder_hh__
#include "Config.hh"
#include <stdint.h>
#include <string>
#include <vector>
#include <memory>
#include "ValidSchema.hh"
#include "Stream.hh"
/// \file
///
/// Low level support for decoding avro values.
/// This class has two types of functions. One type of functions support
/// decoding of leaf values (for example, decodeLong and
/// decodeString). These functions have analogs in Encoder.
///
/// The other type of functions support decoding of maps and arrays.
/// These functions are arrayStart, startItem, and arrayEnd
/// (and similar functions for maps).
namespace avro {
/**
* Decoder is an interface implemented by every decoder capable
* of decoding Avro data.
*/
class AVRO_DECL Decoder {
public:
virtual ~Decoder() { };
/// All future decoding will come from is, which should be valid
/// until replaced by another call to init() or this Decoder is
/// destructed.
virtual void init(InputStream& is) = 0;
/// Decodes a null from the current stream.
virtual void decodeNull() = 0;
/// Decodes a bool from the current stream
virtual bool decodeBool() = 0;
/// Decodes a 32-bit int from the current stream.
virtual int32_t decodeInt() = 0;
/// Decodes a 64-bit signed int from the current stream.
virtual int64_t decodeLong() = 0;
/// Decodes a single-precision floating point number from current stream.
virtual float decodeFloat() = 0;
/// Decodes a double-precision floating point number from current stream.
virtual double decodeDouble() = 0;
/// Decodes a UTF-8 string from the current stream.
std::string decodeString() {
std::string result;
decodeString(result);
return result;
}
/**
* Decodes a UTF-8 string from the stream and assigns it to value.
*/
virtual void decodeString(std::string& value) = 0;
/// Skips a string on the current stream.
virtual void skipString() = 0;
/// Decodes arbitray binary data from the current stream.
std::vector<uint8_t> decodeBytes() {
std::vector<uint8_t> result;
decodeBytes(result);
return result;
}
/// Decodes arbitrary binary data from the current stream and puts it
/// in value.
virtual void decodeBytes(std::vector<uint8_t>& value) = 0;
/// Skips bytes on the current stream.
virtual void skipBytes() = 0;
/**
* Decodes fixed length binary from the current stream.
* \param[in] n The size (byte count) of the fixed being read.
* \return The fixed data that has been read. The size of the returned
* vector is guaranteed to be equal to \p n.
*/
std::vector<uint8_t> decodeFixed(size_t n) {
std::vector<uint8_t> result;
decodeFixed(n, result);
return result;
}
/**
* Decodes a fixed from the current stream.
* \param[in] n The size (byte count) of the fixed being read.
* \param[out] value The value that receives the fixed. The vector will
* be size-adjusted based on the fixed's size.
*/
virtual void decodeFixed(size_t n, std::vector<uint8_t>& value) = 0;
/// Skips fixed length binary on the current stream.
virtual void skipFixed(size_t n) = 0;
/// Decodes enum from the current stream.
virtual size_t decodeEnum() = 0;
/// Start decoding an array. Returns the number of entries in first chunk.
virtual size_t arrayStart() = 0;
/// Returns the number of entries in next chunk. 0 if last.
virtual size_t arrayNext() = 0;
/// Tries to skip an array. If it can, it returns 0. Otherwise
/// it returns the number of elements to be skipped. The client
/// should skip the individual items. In such cases, skipArray
/// is identical to arrayStart.
virtual size_t skipArray() = 0;
/// Start decoding a map. Returns the number of entries in first chunk.
virtual size_t mapStart() = 0;
/// Returns the number of entries in next chunk. 0 if last.
virtual size_t mapNext() = 0;
/// Tries to skip a map. If it can, it returns 0. Otherwise
/// it returns the number of elements to be skipped. The client
/// should skip the individual items. In such cases, skipMap
/// is identical to mapStart.
virtual size_t skipMap() = 0;
/// Decodes a branch of a union. The actual value is to follow.
virtual size_t decodeUnionIndex() = 0;
/// Drains any additional data at the end of the current entry in a stream.
/// It also returns any unused bytes back to any underlying input stream.
/// One situation this happens is when the reader's schema and
/// the writer's schema are records but are different and the writer's
/// record has more fields at the end of the record.
/// Leaving such data unread is usually not a problem. If multiple
/// records are stored consecutively in a stream (e.g. Avro data file)
/// any attempt to read the next record will automatically skip
/// those extra fields of the current record. It would still leave
/// the extra fields at the end of the last record in the stream.
/// This would mean that the stream is not in a good state. For example,
/// if some non-avro information is stored at the end of the stream,
/// the consumers of such data would see the bytes left behind
/// by the avro decoder. Similar set of problems occur if the Decoder
/// consumes more than what it should.
virtual void drain() = 0;
};
/**
* Shared pointer to Decoder.
*/
typedef std::shared_ptr<Decoder> DecoderPtr;
/**
* ResolvingDecoder is derived from \ref Decoder, with an additional
* function to obtain the field ordering of fields within a record.
*/
class AVRO_DECL ResolvingDecoder : public Decoder {
public:
/// Returns the order of fields for records.
/// The order of fields could be different from the order of their
/// order in the schema because the writer's field order could
/// be different. In order to avoid buffering and later use,
/// we return the values in the writer's field order.
virtual const std::vector<size_t>& fieldOrder() = 0;
};
/**
* Shared pointer to ResolvingDecoder.
*/
typedef std::shared_ptr<ResolvingDecoder> ResolvingDecoderPtr;
/**
* Returns an decoder that can decode binary Avro standard.
*/
AVRO_DECL DecoderPtr binaryDecoder();
/**
* Returns an decoder that validates sequence of calls to an underlying
* Decoder against the given schema.
*/
AVRO_DECL DecoderPtr validatingDecoder(const ValidSchema& schema,
const DecoderPtr& base);
/**
* Returns an decoder that can decode Avro standard for JSON.
*/
AVRO_DECL DecoderPtr jsonDecoder(const ValidSchema& schema);
/**
* Returns a decoder that decodes avro data from base written according to
* writerSchema and resolves against readerSchema.
* The client uses the decoder as if the data were written using readerSchema.
* // FIXME: Handle out of order fields.
*/
AVRO_DECL ResolvingDecoderPtr resolvingDecoder(const ValidSchema& writer,
const ValidSchema& reader, const DecoderPtr& base);
} // namespace avro
#endif
|