diff options
author | leo <[email protected]> | 2022-02-10 16:46:40 +0300 |
---|---|---|
committer | Daniil Cherednik <[email protected]> | 2022-02-10 16:46:40 +0300 |
commit | 99609724f661f7e21d1cb08e8d80e87c3632fdb3 (patch) | |
tree | 49e222ea1c5804306084bb3ae065bb702625360f /library/cpp/http/fetch/httpfsm.rl6 | |
parent | 980edcd3304699edf9d4e4d6a656e585028e2a72 (diff) |
Restoring authorship annotation for <[email protected]>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/http/fetch/httpfsm.rl6')
-rw-r--r-- | library/cpp/http/fetch/httpfsm.rl6 | 434 |
1 files changed, 217 insertions, 217 deletions
diff --git a/library/cpp/http/fetch/httpfsm.rl6 b/library/cpp/http/fetch/httpfsm.rl6 index 83557b144e5..eab0328b187 100644 --- a/library/cpp/http/fetch/httpfsm.rl6 +++ b/library/cpp/http/fetch/httpfsm.rl6 @@ -1,70 +1,70 @@ -#include <stdio.h> -#include <time.h> - +#include <stdio.h> +#include <time.h> + #include <library/cpp/charset/doccodes.h> #include <library/cpp/charset/codepage.h> #include <library/cpp/http/misc/httpcodes.h> #include <util/datetime/base.h> #include <util/generic/ylimits.h> #include <algorithm> // max - + #include <library/cpp/http/fetch/httpheader.h> #include <library/cpp/http/fetch/httpfsm.h> - + #ifdef _MSC_VER #pragma warning(disable: 4702) // unreachable code #endif #define c(i) I = i; #define m(i) I = std::max(I, (long)i); - -static inline int X(unsigned char c) { - return (c >= 'A' ? ((c & 0xdf) - 'A' + 10) : (c - '0')); -} - -template <typename x> -static inline void guard(x &val) { - val = (val >= -1) ? -4 - val : -2; // f(-2) = -2 -} - -template <typename x> -static inline void setguarded(x &val, long cnt) { - val = (val == -4 - -1 || cnt == -4 -val) ? cnt : -2; -} - -//////////////////////////////////////////////////////////////////// -/// HTTP PARSER -//////////////////////////////////////////////////////////////////// - -%%{ -machine http_header_parser; - + +static inline int X(unsigned char c) { + return (c >= 'A' ? ((c & 0xdf) - 'A' + 10) : (c - '0')); +} + +template <typename x> +static inline void guard(x &val) { + val = (val >= -1) ? -4 - val : -2; // f(-2) = -2 +} + +template <typename x> +static inline void setguarded(x &val, long cnt) { + val = (val == -4 - -1 || cnt == -4 -val) ? cnt : -2; +} + +//////////////////////////////////////////////////////////////////// +/// HTTP PARSER +//////////////////////////////////////////////////////////////////// + +%%{ +machine http_header_parser; + include HttpDateTimeParser "../../../../util/datetime/parser.rl6"; -alphtype unsigned char; - -################# 2.2 Basic Rules ################# -eol = '\r'? '\n'; -ws = [ \t]; -lw = '\r'? '\n'? ws; +alphtype unsigned char; + +################# 2.2 Basic Rules ################# +eol = '\r'? '\n'; +ws = [ \t]; +lw = '\r'? '\n'? ws; separator = [()<>@,;:\\"/\[\]?={}]; -token_char = [!-~] - separator; # http tokens chars -url_char = [!-~] - ["<>\[\]\\^`{}|]; # uric chars -text_char = ws | 33..126 | 128..255; -any_text_char = any - [\r\n]; - -lws = lw*; -eoh = lws eol; -token = token_char+; -ex_token = (token_char | ws)* token_char; -text = (text_char | lw)*; -any_text = (any_text_char | lw)*; -def = lws ':' lws; - +token_char = [!-~] - separator; # http tokens chars +url_char = [!-~] - ["<>\[\]\\^`{}|]; # uric chars +text_char = ws | 33..126 | 128..255; +any_text_char = any - [\r\n]; + +lws = lw*; +eoh = lws eol; +token = token_char+; +ex_token = (token_char | ws)* token_char; +text = (text_char | lw)*; +any_text = (any_text_char | lw)*; +def = lws ':' lws; + action clear_buf { buflen = 0; } action update_buf { if (buflen < sizeof(buf)) buf[buflen++] = fc; } - -################################################### + +################################################### ############ response status line ################# action set_minor { base_hd->http_minor = I; } action set_status { @@ -75,14 +75,14 @@ action set_status { return -3; } } - + status_code = int3; http_major = int; http_minor = int; reason_phrase = ws+ text_char*; http_version = "http/"i http_major '.' http_minor %set_minor; response_status_line = http_version ws+ status_code reason_phrase? eol %set_status; - + ############ request status line ################# action set_request_uri { if (request_hd && buflen < FETCHER_URL_MAX) { @@ -116,44 +116,44 @@ request_uri = (token_char | separator)+ >clear_buf $update_buf %set_request_uri; request_status_line = http_method ws+ request_uri ws+ http_version eoh; -################# connection ###################### +################# connection ###################### action beg_connection { guard(base_hd->connection_closed); I = -1; } action set_connection { setguarded(base_hd->connection_closed, I); } - -c_token = "close"i %{m(1)} - | "keep-alive"i %{m(0)}; -c_tokenlist = c_token (lws ',' lws c_token)?; + +c_token = "close"i %{m(1)} + | "keep-alive"i %{m(0)}; +c_tokenlist = c_token (lws ',' lws c_token)?; connection = "connection"i def %beg_connection c_tokenlist eoh %set_connection; - -################# content-encoding ################ + +################# content-encoding ################ action beg_content_encoding { I = HTTP_COMPRESSION_ERROR; } action set_content_encoding { base_hd->compression_method = ((base_hd->compression_method == HTTP_COMPRESSION_UNSET || base_hd->compression_method == I) ? I : (int)HTTP_COMPRESSION_ERROR); } - -ce_tokenlist = "identity"i %{c(HTTP_COMPRESSION_IDENTITY)} - | "gzip"i %{c(HTTP_COMPRESSION_GZIP)} - | "x-gzip"i %{c(HTTP_COMPRESSION_GZIP)} - | "deflate"i %{c(HTTP_COMPRESSION_DEFLATE)} - | "compress"i %{c(HTTP_COMPRESSION_COMPRESS)} - | "x-compress"i %{c(HTTP_COMPRESSION_COMPRESS)}; + +ce_tokenlist = "identity"i %{c(HTTP_COMPRESSION_IDENTITY)} + | "gzip"i %{c(HTTP_COMPRESSION_GZIP)} + | "x-gzip"i %{c(HTTP_COMPRESSION_GZIP)} + | "deflate"i %{c(HTTP_COMPRESSION_DEFLATE)} + | "compress"i %{c(HTTP_COMPRESSION_COMPRESS)} + | "x-compress"i %{c(HTTP_COMPRESSION_COMPRESS)}; content_encoding = "content-encoding"i def %beg_content_encoding ce_tokenlist eoh %set_content_encoding; - -################# transfer-encoding ############### + +################# transfer-encoding ############### action beg_encoding { guard(base_hd->transfer_chunked); } action set_encoding { setguarded(base_hd->transfer_chunked, I); } - -e_tokenlist = "identity"i %{c(0)} - | "chunked"i %{c(1)}; + +e_tokenlist = "identity"i %{c(0)} + | "chunked"i %{c(1)}; transfer_encoding = "transfer-encoding"i def %beg_encoding e_tokenlist eoh %set_encoding; - -################# content-length ################## + +################# content-length ################## action beg_content_length { guard(base_hd->content_length); } action set_content_length { setguarded(base_hd->content_length, I); } - + content_length = "content-length"i def %beg_content_length int eoh %set_content_length; - + ################# content-range ################### action beg_content_range_start { guard(base_hd->content_range_start); I = -1; } action set_content_range_start { setguarded(base_hd->content_range_start, I); } @@ -166,7 +166,7 @@ content_range = "content-range"i def "bytes"i sp %beg_content_range_start int %beg_content_range_end int '/' %set_content_range_end %beg_content_range_el int eoh %set_content_range_el; -################# accept-ranges ################### +################# accept-ranges ################### action beg_accept_ranges { if (hd) { guard(hd->accept_ranges); @@ -174,21 +174,21 @@ action beg_accept_ranges { } } action set_accept_ranges { if (hd) setguarded(hd->accept_ranges, I); } - -ar_tokenlist = "bytes"i %{c(1)} - | "none"i %{c(0)}; + +ar_tokenlist = "bytes"i %{c(1)} + | "none"i %{c(0)}; accept_ranges = "accept-ranges"i def %beg_accept_ranges ar_tokenlist eoh %set_accept_ranges; - -################# content-type #################### + +################# content-type #################### action beg_mime { guard(base_hd->mime_type); } action set_mime { setguarded(base_hd->mime_type, I); } action set_charset { if (buflen < FETCHER_URL_MAX) { - buf[buflen++] = 0; + buf[buflen++] = 0; base_hd->charset = EncodingHintByName((const char*)buf); - } -} - + } +} + mime_type = "text/plain"i %{c(MIME_TEXT)} | "text/html"i %{c(MIME_HTML)} | "application/pdf"i %{c(MIME_PDF)} @@ -234,36 +234,36 @@ mime_type = "text/plain"i %{c(MIME_TEXT)} charset_name = token_char+ >clear_buf $update_buf; mime_param = "charset"i ws* '=' ws* '"'? charset_name '"'? %set_charset @2 - | token ws* '=' ws* '"'? token '"'? @1 - | text $0; -mime_parms = (lws ';' lws mime_param)*; + | token ws* '=' ws* '"'? token '"'? @1 + | text $0; +mime_parms = (lws ';' lws mime_param)*; content_type = "content-type"i def %beg_mime mime_type mime_parms eoh %set_mime; - -################# last modified ################### + +################# last modified ################### action beg_modtime { guard(base_hd->http_time); } action set_modtime { setguarded(base_hd->http_time, DateTimeFields.ToTimeT(-1)); } - + last_modified = "last-modified"i def %beg_modtime http_date eoh %set_modtime; - -################# location ######################## + +################# location ######################## action set_location { while (buflen > 0 && (buf[buflen - 1] == ' ' || buf[buflen - 1] == '\t')) { buflen --; } if (hd && buflen < FETCHER_URL_MAX) { hd->location = TStringBuf(buf, buflen); - } -} - + } +} + action set_status_303{ if (hd) hd->http_status = 303; } - + url = url_char+ >clear_buf $update_buf; loc_url = any_text_char+ >clear_buf $update_buf; location = "location"i def loc_url eoh %set_location; refresh = "refresh"i def int ';' lws "url="i loc_url eoh %set_location; - + ################# x-robots-tag ################ action set_x_robots { if (hd && AcceptingXRobots) { @@ -349,56 +349,56 @@ action set_squid_error { squid_error = "X-Yandex-Squid-Error"i def any_text eoh %set_squid_error; -################# auth ######################## +################# auth ######################## action init_auth { - if (auth_hd) - auth_hd->use_auth=true; -} - + if (auth_hd) + auth_hd->use_auth=true; +} + action update_auth_buf - { if (auth_hd && buflen < sizeof(buf)) buf[buflen++] = *fpc; } - -quoted_str = /"/ (text_char - /"/)* /"/ >2; + { if (auth_hd && buflen < sizeof(buf)) buf[buflen++] = *fpc; } + +quoted_str = /"/ (text_char - /"/)* /"/ >2; auth_quoted_str = ( /"/ ( ( text_char - /"/ )* >clear_buf $update_auth_buf ) /"/ ) > 2; - -# do not support auth-int, too heavy procedure - -qop_auth_option = "auth"i @1 %{if(auth_hd) auth_hd->qop_auth = true; }; - -qop_option = ( qop_auth_option @1 ) | (( token-"auth"i) $0 ); - -auth_good_param = ( "nonce"i /=/ auth_quoted_str ) + +# do not support auth-int, too heavy procedure + +qop_auth_option = "auth"i @1 %{if(auth_hd) auth_hd->qop_auth = true; }; + +qop_option = ( qop_auth_option @1 ) | (( token-"auth"i) $0 ); + +auth_good_param = ( "nonce"i /=/ auth_quoted_str ) %{if (auth_hd && buflen < FETCHER_URL_MAX-1) { - buf[buflen++] = 0; - auth_hd->nonce = strdup((const char*)buf); - }} - | ( "realm"i /=/ auth_quoted_str ) + buf[buflen++] = 0; + auth_hd->nonce = strdup((const char*)buf); + }} + | ( "realm"i /=/ auth_quoted_str ) %{if (auth_hd && buflen < FETCHER_URL_MAX-1) { - buf[buflen++] = 0; - auth_hd->realm = strdup((const char*)buf); - }} - | ( "opaque"i /=/ auth_quoted_str ) + buf[buflen++] = 0; + auth_hd->realm = strdup((const char*)buf); + }} + | ( "opaque"i /=/ auth_quoted_str ) %{if (auth_hd && buflen < FETCHER_URL_MAX-1) { - buf[buflen++] = 0; - auth_hd->opaque = strdup((const char*)buf); - }} - | "stale"i /=/ "true"i - %{if (auth_hd) auth_hd->stale = true; } - | "algorithm"i /=/ "md5"i /-/ "sess"i - %{if (auth_hd) auth_hd->algorithm = 1; } - | ( "qop"i /="/ qop_option (ws* "," ws* qop_option)* /"/); - -auth_param = auth_good_param @1 | - ( (token - ( "nonce"i | "opaque"i | "realm"i | "qop"i ) ) - /=/ (token | quoted_str ) ) $0; - -auth_params = auth_param ( ws* /,/ ws* auth_param )*; - + buf[buflen++] = 0; + auth_hd->opaque = strdup((const char*)buf); + }} + | "stale"i /=/ "true"i + %{if (auth_hd) auth_hd->stale = true; } + | "algorithm"i /=/ "md5"i /-/ "sess"i + %{if (auth_hd) auth_hd->algorithm = 1; } + | ( "qop"i /="/ qop_option (ws* "," ws* qop_option)* /"/); + +auth_param = auth_good_param @1 | + ( (token - ( "nonce"i | "opaque"i | "realm"i | "qop"i ) ) + /=/ (token | quoted_str ) ) $0; + +auth_params = auth_param ( ws* /,/ ws* auth_param )*; + digest_challenge = ("digest"i %init_auth ws+ auth_params) | - ((token-"digest"i) text); - -auth = "www-authenticate"i def digest_challenge eoh; - + ((token-"digest"i) text); + +auth = "www-authenticate"i def digest_challenge eoh; + ###################### host ####################### action set_host { if (request_hd && buflen < HOST_MAX) { @@ -562,8 +562,8 @@ action set_request_priority { request_priority = "x-yandex-request-priority"i def int eoh %set_request_priority; -################# message header ################## -other_header = ( ex_token - "www-authenticate"i ) def any_text eoh; +################# message header ################## +other_header = ( ex_token - "www-authenticate"i ) def any_text eoh; message_header = other_header $0 | connection @1 | content_encoding @1 @@ -595,90 +595,90 @@ request_header = message_header $0 | request_cache_control @1 | response_timeout @1 | request_priority @1; - -################# main ############################ + +################# main ############################ action accepted { lastchar = (char*)fpc; return 2; } - + main := ((response_status_line ('\r'? response_header)*) | (request_status_line ('\r' ? request_header)*)) eol @accepted; - -}%% - -%% write data; - -int THttpHeaderParser::execute(unsigned char *inBuf, int len) { - const unsigned char *p = inBuf; - const unsigned char *pe = p + len; - %% write exec; - if (cs == http_header_parser_error) - return -1; - else if (cs == http_header_parser_first_final) - return 0; - else - return 1; -} - -void THttpHeaderParser::init() { - %% write init; -} - -%%{ -machine http_chunk_parser; - -alphtype unsigned char; - + +}%% + +%% write data; + +int THttpHeaderParser::execute(unsigned char *inBuf, int len) { + const unsigned char *p = inBuf; + const unsigned char *pe = p + len; + %% write exec; + if (cs == http_header_parser_error) + return -1; + else if (cs == http_header_parser_first_final) + return 0; + else + return 1; +} + +void THttpHeaderParser::init() { + %% write init; +} + +%%{ +machine http_chunk_parser; + +alphtype unsigned char; + action clear_hex { cnt64 = 0; } action update_hex { cnt64 = 16 * cnt64 + X(fc); if(cnt64 > Max<int>()) return -2; } action set_chunk { chunk_length = static_cast<int>(cnt64); } action accepted { lastchar = (char*)fpc; return 2; } - -eol = '\r'? '\n'; -ws = [ \t]; -sp = ' '; -lw = '\r'? '\n'? ws; -separator = [()<>@,;:\\"/\[\]?={}]; -token_char = [!-~] - separator; # http tokens chars -url_char = [!-~] - ["<>\[\]\\^`{}|]; # uric chars -text_char = ws | 33..127 | 160..255; - -lws = lw*; -eoh = lws eol; -token = token_char+; -text = (text_char | lw)*; -def = lws ':' lws; - + +eol = '\r'? '\n'; +ws = [ \t]; +sp = ' '; +lw = '\r'? '\n'? ws; +separator = [()<>@,;:\\"/\[\]?={}]; +token_char = [!-~] - separator; # http tokens chars +url_char = [!-~] - ["<>\[\]\\^`{}|]; # uric chars +text_char = ws | 33..127 | 160..255; + +lws = lw*; +eoh = lws eol; +token = token_char+; +text = (text_char | lw)*; +def = lws ':' lws; + hex = (xdigit+) >clear_hex $update_hex; -quoted_string = '"' ((text_char - '"') $0 | '\\"' @1)* '"'; - -chunk_ext_val = token | quoted_string; -chunk_ext_name = token; -chunk_extension = ws* (';' chunk_ext_name ws* '=' ws* chunk_ext_val ws*)*; - -entity_header = token def text eoh; -trailer = entity_header*; - +quoted_string = '"' ((text_char - '"') $0 | '\\"' @1)* '"'; + +chunk_ext_val = token | quoted_string; +chunk_ext_name = token; +chunk_extension = ws* (';' chunk_ext_name ws* '=' ws* chunk_ext_val ws*)*; + +entity_header = token def text eoh; +trailer = entity_header*; + chunk = (hex - '0'+) chunk_extension? %set_chunk; -last_chunk = '0'+ chunk_extension? eol trailer; +last_chunk = '0'+ chunk_extension? eol trailer; main := eol (chunk $0 | last_chunk @1) eol @accepted; - -}%% - -%% write data; - -int THttpChunkParser::execute(unsigned char *inBuf, int len) { - const unsigned char *p = inBuf; - const unsigned char *pe = p + len; - %% write exec; - if (cs == http_chunk_parser_error) - return -1; - else if (cs == http_chunk_parser_first_final) - return 0; - else - return 1; -} - -void THttpChunkParser::init() { - chunk_length = 0; - %% write init; -} + +}%% + +%% write data; + +int THttpChunkParser::execute(unsigned char *inBuf, int len) { + const unsigned char *p = inBuf; + const unsigned char *pe = p + len; + %% write exec; + if (cs == http_chunk_parser_error) + return -1; + else if (cs == http_chunk_parser_first_final) + return 0; + else + return 1; +} + +void THttpChunkParser::init() { + chunk_length = 0; + %% write init; +} |