1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
|
//
// Copyright (c) 2016-2017 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
// Official repository: https://github.com/boostorg/beast
//
#ifndef BOOST_BEAST_HTTP_BASIC_PARSER_HPP
#define BOOST_BEAST_HTTP_BASIC_PARSER_HPP
#include <boost/beast/core/detail/config.hpp>
#include <boost/beast/core/error.hpp>
#include <boost/beast/core/string.hpp>
#include <boost/beast/http/field.hpp>
#include <boost/beast/http/verb.hpp>
#include <boost/beast/http/detail/basic_parser.hpp>
#include <boost/asio/buffer.hpp>
#include <boost/optional.hpp>
#include <boost/assert.hpp>
#include <limits>
#include <memory>
#include <type_traits>
#include <utility>
namespace boost {
namespace beast {
namespace http {
/** A parser for decoding HTTP/1 wire format messages.
This parser is designed to efficiently parse messages in the
HTTP/1 wire format. It allocates no memory when input is
presented as a single contiguous buffer, and uses minimal
state. It will handle chunked encoding and it understands
the semantics of the Connection, Content-Length, and Upgrade
fields.
The parser is optimized for the case where the input buffer
sequence consists of a single contiguous buffer. The
@ref flat_buffer class is provided, which guarantees
that the input sequence of the stream buffer will be represented
by exactly one contiguous buffer. To ensure the optimum performance
of the parser, use @ref flat_buffer with HTTP algorithms
such as @ref read, @ref read_some, @ref async_read, and @ref async_read_some.
Alternatively, the caller may use custom techniques to ensure that
the structured portion of the HTTP message (header or chunk header)
is contained in a linear buffer.
The interface uses CRTP (Curiously Recurring Template Pattern).
To use this class directly, derive from @ref basic_parser. When
bytes are presented, the implementation will make a series of zero
or more calls to derived class members functions (termed "callbacks"
in this context) matching a specific signature.
Every callback must be provided by the derived class, or else
a compilation error will be generated. This exemplar shows
the signature and description of the callbacks required in
the derived class.
For each callback, the function will ensure that `!ec` is `true`
if there was no error or set to the appropriate error code if
there was one. If an error is set, the value is propagated to
the caller of the parser.
@par Derived Class Requirements
@code
template<bool isRequest>
class derived
: public basic_parser<isRequest, derived<isRequest>>
{
private:
// The friend declaration is needed,
// otherwise the callbacks must be made public.
friend class basic_parser<isRequest, derived>;
/// Called after receiving the request-line (isRequest == true).
void
on_request_impl(
verb method, // The method verb, verb::unknown if no match
string_view method_str, // The method as a string
string_view target, // The request-target
int version, // The HTTP-version
error_code& ec); // The error returned to the caller, if any
/// Called after receiving the start-line (isRequest == false).
void
on_response_impl(
int code, // The status-code
string_view reason, // The obsolete reason-phrase
int version, // The HTTP-version
error_code& ec); // The error returned to the caller, if any
/// Called after receiving a header field.
void
on_field_impl(
field f, // The known-field enumeration constant
string_view name, // The field name string.
string_view value, // The field value
error_code& ec); // The error returned to the caller, if any
/// Called after the complete header is received.
void
on_header_impl(
error_code& ec); // The error returned to the caller, if any
/// Called just before processing the body, if a body exists.
void
on_body_init_impl(
boost::optional<
std::uint64_t> const&
content_length, // Content length if known, else `boost::none`
error_code& ec); // The error returned to the caller, if any
/// Called for each piece of the body, if a body exists.
//!
//! This is used when there is no chunked transfer coding.
//!
//! The function returns the number of bytes consumed from the
//! input buffer. Any input octets not consumed will be will be
//! presented on subsequent calls.
//!
std::size_t
on_body_impl(
string_view s, // A portion of the body
error_code& ec); // The error returned to the caller, if any
/// Called for each chunk header.
void
on_chunk_header_impl(
std::uint64_t size, // The size of the upcoming chunk,
// or zero for the last chunk
string_view extension, // The chunk extensions (may be empty)
error_code& ec); // The error returned to the caller, if any
/// Called to deliver the chunk body.
//!
//! This is used when there is a chunked transfer coding. The
//! implementation will automatically remove the encoding before
//! calling this function.
//!
//! The function returns the number of bytes consumed from the
//! input buffer. Any input octets not consumed will be will be
//! presented on subsequent calls.
//!
std::size_t
on_chunk_body_impl(
std::uint64_t remain, // The number of bytes remaining in the chunk,
// including what is being passed here.
// or zero for the last chunk
string_view body, // The next piece of the chunk body
error_code& ec); // The error returned to the caller, if any
/// Called when the complete message is parsed.
void
on_finish_impl(error_code& ec);
public:
derived() = default;
};
@endcode
@tparam isRequest A `bool` indicating whether the parser will be
presented with request or response message.
@tparam Derived The derived class type. This is part of the
Curiously Recurring Template Pattern interface.
@note If the parser encounters a field value with obs-fold
longer than 4 kilobytes in length, an error is generated.
*/
template<bool isRequest, class Derived>
class basic_parser
: private detail::basic_parser_base
{
template<bool OtherIsRequest, class OtherDerived>
friend class basic_parser;
// limit on the size of the stack flat buffer
static std::size_t constexpr max_stack_buffer = 8192;
// Message will be complete after reading header
static unsigned constexpr flagSkipBody = 1<< 0;
// Consume input buffers across semantic boundaries
static unsigned constexpr flagEager = 1<< 1;
// The parser has read at least one byte
static unsigned constexpr flagGotSome = 1<< 2;
// Message semantics indicate a body is expected.
// cleared if flagSkipBody set
//
static unsigned constexpr flagHasBody = 1<< 3;
static unsigned constexpr flagHTTP11 = 1<< 4;
static unsigned constexpr flagNeedEOF = 1<< 5;
static unsigned constexpr flagExpectCRLF = 1<< 6;
static unsigned constexpr flagConnectionClose = 1<< 7;
static unsigned constexpr flagConnectionUpgrade = 1<< 8;
static unsigned constexpr flagConnectionKeepAlive = 1<< 9;
static unsigned constexpr flagContentLength = 1<< 10;
static unsigned constexpr flagChunked = 1<< 11;
static unsigned constexpr flagUpgrade = 1<< 12;
static unsigned constexpr flagFinalChunk = 1<< 13;
static constexpr
std::uint64_t
default_body_limit(std::true_type)
{
// limit for requests
return 1 * 1024 * 1024; // 1MB
}
static constexpr
std::uint64_t
default_body_limit(std::false_type)
{
// limit for responses
return 8 * 1024 * 1024; // 8MB
}
std::uint64_t body_limit_ =
default_body_limit(is_request{}); // max payload body
std::uint64_t len_ = 0; // size of chunk or body
std::unique_ptr<char[]> buf_; // temp storage
std::size_t buf_len_ = 0; // size of buf_
std::size_t skip_ = 0; // resume search here
std::uint32_t header_limit_ = 8192; // max header size
unsigned short status_ = 0; // response status
state state_ = state::nothing_yet; // initial state
unsigned f_ = 0; // flags
protected:
/// Default constructor
basic_parser() = default;
/// Move constructor
basic_parser(basic_parser &&) = default;
/// Move assignment
basic_parser& operator=(basic_parser &&) = default;
/** Move constructor
@note
After the move, the only valid operation on the
moved-from object is destruction.
*/
template<class OtherDerived>
basic_parser(basic_parser<isRequest, OtherDerived>&&);
public:
/// `true` if this parser parses requests, `false` for responses.
using is_request =
std::integral_constant<bool, isRequest>;
/// Destructor
~basic_parser() = default;
/// Copy constructor
basic_parser(basic_parser const&) = delete;
/// Copy assignment
basic_parser& operator=(basic_parser const&) = delete;
/** Returns a reference to this object as a @ref basic_parser.
This is used to pass a derived class where a base class is
expected, to choose a correct function overload when the
resolution would be ambiguous.
*/
basic_parser&
base()
{
return *this;
}
/** Returns a constant reference to this object as a @ref basic_parser.
This is used to pass a derived class where a base class is
expected, to choose a correct function overload when the
resolution would be ambiguous.
*/
basic_parser const&
base() const
{
return *this;
}
/// Returns `true` if the parser has received at least one byte of input.
bool
got_some() const
{
return state_ != state::nothing_yet;
}
/** Returns `true` if the message is complete.
The message is complete after the full header is prduced
and one of the following is true:
@li The skip body option was set.
@li The semantics of the message indicate there is no body.
@li The semantics of the message indicate a body is expected,
and the entire body was parsed.
*/
bool
is_done() const
{
return state_ == state::complete;
}
/** Returns `true` if a the parser has produced the full header.
*/
bool
is_header_done() const
{
return state_ > state::fields;
}
/** Returns `true` if the message is an upgrade message.
@note The return value is undefined unless
@ref is_header_done would return `true`.
*/
bool
upgrade() const
{
return (f_ & flagConnectionUpgrade) != 0;
}
/** Returns `true` if the last value for Transfer-Encoding is "chunked".
@note The return value is undefined unless
@ref is_header_done would return `true`.
*/
bool
chunked() const
{
return (f_ & flagChunked) != 0;
}
/** Returns `true` if the message has keep-alive connection semantics.
This function always returns `false` if @ref need_eof would return
`false`.
@note The return value is undefined unless
@ref is_header_done would return `true`.
*/
bool
keep_alive() const;
/** Returns the optional value of Content-Length if known.
@note The return value is undefined unless
@ref is_header_done would return `true`.
*/
boost::optional<std::uint64_t>
content_length() const;
/** Returns `true` if the message semantics require an end of file.
Depending on the contents of the header, the parser may
require and end of file notification to know where the end
of the body lies. If this function returns `true` it will be
necessary to call @ref put_eof when there will never be additional
data from the input.
*/
bool
need_eof() const
{
return (f_ & flagNeedEOF) != 0;
}
/** Set the limit on the payload body.
This function sets the maximum allowed size of the payload body,
before any encodings except chunked have been removed. Depending
on the message semantics, one of these cases will apply:
@li The Content-Length is specified and exceeds the limit. In
this case the result @ref error::body_limit is returned
immediately after the header is parsed.
@li The Content-Length is unspecified and the chunked encoding
is not specified as the last encoding. In this case the end of
message is determined by the end of file indicator on the
associated stream or input source. If a sufficient number of
body payload octets are presented to the parser to exceed the
configured limit, the parse fails with the result
@ref error::body_limit
@li The Transfer-Encoding specifies the chunked encoding as the
last encoding. In this case, when the number of payload body
octets produced by removing the chunked encoding exceeds
the configured limit, the parse fails with the result
@ref error::body_limit.
Setting the limit after any body octets have been parsed
results in undefined behavior.
The default limit is 1MB for requests and 8MB for responses.
@param v The payload body limit to set
*/
void
body_limit(std::uint64_t v)
{
body_limit_ = v;
}
/** Set a limit on the total size of the header.
This function sets the maximum allowed size of the header
including all field name, value, and delimiter characters
and also including the CRLF sequences in the serialized
input. If the end of the header is not found within the
limit of the header size, the error @ref error::header_limit
is returned by @ref put.
Setting the limit after any header octets have been parsed
results in undefined behavior.
*/
void
header_limit(std::uint32_t v)
{
header_limit_ = v;
}
/// Returns `true` if the eager parse option is set.
bool
eager() const
{
return (f_ & flagEager) != 0;
}
/** Set the eager parse option.
Normally the parser returns after successfully parsing a structured
element (header, chunk header, or chunk body) even if there are octets
remaining in the input. This is necessary when attempting to parse the
header first, or when the caller wants to inspect information which may
be invalidated by subsequent parsing, such as a chunk extension. The
`eager` option controls whether the parser keeps going after parsing
structured element if there are octets remaining in the buffer and no
error occurs. This option is automatically set or cleared during certain
stream operations to improve performance with no change in functionality.
The default setting is `false`.
@param v `true` to set the eager parse option or `false` to disable it.
*/
void
eager(bool v)
{
if(v)
f_ |= flagEager;
else
f_ &= ~flagEager;
}
/// Returns `true` if the skip parse option is set.
bool
skip() const
{
return (f_ & flagSkipBody) != 0;
}
/** Set the skip parse option.
This option controls whether or not the parser expects to see an HTTP
body, regardless of the presence or absence of certain fields such as
Content-Length or a chunked Transfer-Encoding. Depending on the request,
some responses do not carry a body. For example, a 200 response to a
CONNECT request from a tunneling proxy, or a response to a HEAD request.
In these cases, callers may use this function inform the parser that
no body is expected. The parser will consider the message complete
after the header has been received.
@param v `true` to set the skip body option or `false` to disable it.
@note This function must called before any bytes are processed.
*/
void
skip(bool v);
/** Write a buffer sequence to the parser.
This function attempts to incrementally parse the HTTP
message data stored in the caller provided buffers. Upon
success, a positive return value indicates that the parser
made forward progress, consuming that number of
bytes.
In some cases there may be an insufficient number of octets
in the input buffer in order to make forward progress. This
is indicated by the code @ref error::need_more. When
this happens, the caller should place additional bytes into
the buffer sequence and call @ref put again.
The error code @ref error::need_more is special. When this
error is returned, a subsequent call to @ref put may succeed
if the buffers have been updated. Otherwise, upon error
the parser may not be restarted.
@param buffers An object meeting the requirements of
@b ConstBufferSequence that represents the next chunk of
message data. If the length of this buffer sequence is
one, the implementation will not allocate additional memory.
The class @ref beast::flat_buffer is provided as one way to
meet this requirement
@param ec Set to the error, if any occurred.
@return The number of octets consumed in the buffer
sequence. The caller should remove these octets even if the
error is set.
*/
template<class ConstBufferSequence>
std::size_t
put(ConstBufferSequence const& buffers, error_code& ec);
#if ! BOOST_BEAST_DOXYGEN
std::size_t
put(boost::asio::const_buffer const& buffer,
error_code& ec);
#endif
/** Inform the parser that the end of stream was reached.
In certain cases, HTTP needs to know where the end of
the stream is. For example, sometimes servers send
responses without Content-Length and expect the client
to consume input (for the body) until EOF. Callbacks
and errors will still be processed as usual.
This is typically called when a read from the
underlying stream object sets the error code to
`boost::asio::error::eof`.
@note Only valid after parsing a complete header.
@param ec Set to the error, if any occurred.
*/
void
put_eof(error_code& ec);
private:
inline
Derived&
impl()
{
return *static_cast<Derived*>(this);
}
template<class ConstBufferSequence>
std::size_t
put_from_stack(std::size_t size,
ConstBufferSequence const& buffers,
error_code& ec);
void
maybe_need_more(
char const* p, std::size_t n,
error_code& ec);
void
parse_start_line(
char const*& p, char const* last,
error_code& ec, std::true_type);
void
parse_start_line(
char const*& p, char const* last,
error_code& ec, std::false_type);
void
parse_fields(
char const*& p, char const* last,
error_code& ec);
void
finish_header(
error_code& ec, std::true_type);
void
finish_header(
error_code& ec, std::false_type);
void
parse_body(char const*& p,
std::size_t n, error_code& ec);
void
parse_body_to_eof(char const*& p,
std::size_t n, error_code& ec);
void
parse_chunk_header(char const*& p,
std::size_t n, error_code& ec);
void
parse_chunk_body(char const*& p,
std::size_t n, error_code& ec);
void
do_field(field f,
string_view value, error_code& ec);
};
} // http
} // beast
} // boost
#include <boost/beast/http/impl/basic_parser.ipp>
#endif
|