summaryrefslogtreecommitdiffstats
path: root/library/cpp/http/fetch/httpfsm.rl6
diff options
context:
space:
mode:
authorleo <[email protected]>2022-02-10 16:46:40 +0300
committerDaniil Cherednik <[email protected]>2022-02-10 16:46:40 +0300
commit99609724f661f7e21d1cb08e8d80e87c3632fdb3 (patch)
tree49e222ea1c5804306084bb3ae065bb702625360f /library/cpp/http/fetch/httpfsm.rl6
parent980edcd3304699edf9d4e4d6a656e585028e2a72 (diff)
Restoring authorship annotation for <[email protected]>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/http/fetch/httpfsm.rl6')
-rw-r--r--library/cpp/http/fetch/httpfsm.rl6434
1 files changed, 217 insertions, 217 deletions
diff --git a/library/cpp/http/fetch/httpfsm.rl6 b/library/cpp/http/fetch/httpfsm.rl6
index 83557b144e5..eab0328b187 100644
--- a/library/cpp/http/fetch/httpfsm.rl6
+++ b/library/cpp/http/fetch/httpfsm.rl6
@@ -1,70 +1,70 @@
-#include <stdio.h>
-#include <time.h>
-
+#include <stdio.h>
+#include <time.h>
+
#include <library/cpp/charset/doccodes.h>
#include <library/cpp/charset/codepage.h>
#include <library/cpp/http/misc/httpcodes.h>
#include <util/datetime/base.h>
#include <util/generic/ylimits.h>
#include <algorithm> // max
-
+
#include <library/cpp/http/fetch/httpheader.h>
#include <library/cpp/http/fetch/httpfsm.h>
-
+
#ifdef _MSC_VER
#pragma warning(disable: 4702) // unreachable code
#endif
#define c(i) I = i;
#define m(i) I = std::max(I, (long)i);
-
-static inline int X(unsigned char c) {
- return (c >= 'A' ? ((c & 0xdf) - 'A' + 10) : (c - '0'));
-}
-
-template <typename x>
-static inline void guard(x &val) {
- val = (val >= -1) ? -4 - val : -2; // f(-2) = -2
-}
-
-template <typename x>
-static inline void setguarded(x &val, long cnt) {
- val = (val == -4 - -1 || cnt == -4 -val) ? cnt : -2;
-}
-
-////////////////////////////////////////////////////////////////////
-/// HTTP PARSER
-////////////////////////////////////////////////////////////////////
-
-%%{
-machine http_header_parser;
-
+
+static inline int X(unsigned char c) {
+ return (c >= 'A' ? ((c & 0xdf) - 'A' + 10) : (c - '0'));
+}
+
+template <typename x>
+static inline void guard(x &val) {
+ val = (val >= -1) ? -4 - val : -2; // f(-2) = -2
+}
+
+template <typename x>
+static inline void setguarded(x &val, long cnt) {
+ val = (val == -4 - -1 || cnt == -4 -val) ? cnt : -2;
+}
+
+////////////////////////////////////////////////////////////////////
+/// HTTP PARSER
+////////////////////////////////////////////////////////////////////
+
+%%{
+machine http_header_parser;
+
include HttpDateTimeParser "../../../../util/datetime/parser.rl6";
-alphtype unsigned char;
-
-################# 2.2 Basic Rules #################
-eol = '\r'? '\n';
-ws = [ \t];
-lw = '\r'? '\n'? ws;
+alphtype unsigned char;
+
+################# 2.2 Basic Rules #################
+eol = '\r'? '\n';
+ws = [ \t];
+lw = '\r'? '\n'? ws;
separator = [()<>@,;:\\"/\[\]?={}];
-token_char = [!-~] - separator; # http tokens chars
-url_char = [!-~] - ["<>\[\]\\^`{}|]; # uric chars
-text_char = ws | 33..126 | 128..255;
-any_text_char = any - [\r\n];
-
-lws = lw*;
-eoh = lws eol;
-token = token_char+;
-ex_token = (token_char | ws)* token_char;
-text = (text_char | lw)*;
-any_text = (any_text_char | lw)*;
-def = lws ':' lws;
-
+token_char = [!-~] - separator; # http tokens chars
+url_char = [!-~] - ["<>\[\]\\^`{}|]; # uric chars
+text_char = ws | 33..126 | 128..255;
+any_text_char = any - [\r\n];
+
+lws = lw*;
+eoh = lws eol;
+token = token_char+;
+ex_token = (token_char | ws)* token_char;
+text = (text_char | lw)*;
+any_text = (any_text_char | lw)*;
+def = lws ':' lws;
+
action clear_buf { buflen = 0; }
action update_buf { if (buflen < sizeof(buf)) buf[buflen++] = fc; }
-
-###################################################
+
+###################################################
############ response status line #################
action set_minor { base_hd->http_minor = I; }
action set_status {
@@ -75,14 +75,14 @@ action set_status {
return -3;
}
}
-
+
status_code = int3;
http_major = int;
http_minor = int;
reason_phrase = ws+ text_char*;
http_version = "http/"i http_major '.' http_minor %set_minor;
response_status_line = http_version ws+ status_code reason_phrase? eol %set_status;
-
+
############ request status line #################
action set_request_uri {
if (request_hd && buflen < FETCHER_URL_MAX) {
@@ -116,44 +116,44 @@ request_uri = (token_char | separator)+ >clear_buf $update_buf
%set_request_uri;
request_status_line = http_method ws+ request_uri ws+ http_version eoh;
-################# connection ######################
+################# connection ######################
action beg_connection { guard(base_hd->connection_closed); I = -1; }
action set_connection { setguarded(base_hd->connection_closed, I); }
-
-c_token = "close"i %{m(1)}
- | "keep-alive"i %{m(0)};
-c_tokenlist = c_token (lws ',' lws c_token)?;
+
+c_token = "close"i %{m(1)}
+ | "keep-alive"i %{m(0)};
+c_tokenlist = c_token (lws ',' lws c_token)?;
connection = "connection"i def %beg_connection c_tokenlist eoh %set_connection;
-
-################# content-encoding ################
+
+################# content-encoding ################
action beg_content_encoding { I = HTTP_COMPRESSION_ERROR; }
action set_content_encoding { base_hd->compression_method =
((base_hd->compression_method == HTTP_COMPRESSION_UNSET ||
base_hd->compression_method == I) ?
I : (int)HTTP_COMPRESSION_ERROR); }
-
-ce_tokenlist = "identity"i %{c(HTTP_COMPRESSION_IDENTITY)}
- | "gzip"i %{c(HTTP_COMPRESSION_GZIP)}
- | "x-gzip"i %{c(HTTP_COMPRESSION_GZIP)}
- | "deflate"i %{c(HTTP_COMPRESSION_DEFLATE)}
- | "compress"i %{c(HTTP_COMPRESSION_COMPRESS)}
- | "x-compress"i %{c(HTTP_COMPRESSION_COMPRESS)};
+
+ce_tokenlist = "identity"i %{c(HTTP_COMPRESSION_IDENTITY)}
+ | "gzip"i %{c(HTTP_COMPRESSION_GZIP)}
+ | "x-gzip"i %{c(HTTP_COMPRESSION_GZIP)}
+ | "deflate"i %{c(HTTP_COMPRESSION_DEFLATE)}
+ | "compress"i %{c(HTTP_COMPRESSION_COMPRESS)}
+ | "x-compress"i %{c(HTTP_COMPRESSION_COMPRESS)};
content_encoding = "content-encoding"i def %beg_content_encoding ce_tokenlist eoh %set_content_encoding;
-
-################# transfer-encoding ###############
+
+################# transfer-encoding ###############
action beg_encoding { guard(base_hd->transfer_chunked); }
action set_encoding { setguarded(base_hd->transfer_chunked, I); }
-
-e_tokenlist = "identity"i %{c(0)}
- | "chunked"i %{c(1)};
+
+e_tokenlist = "identity"i %{c(0)}
+ | "chunked"i %{c(1)};
transfer_encoding = "transfer-encoding"i def %beg_encoding e_tokenlist eoh %set_encoding;
-
-################# content-length ##################
+
+################# content-length ##################
action beg_content_length { guard(base_hd->content_length); }
action set_content_length { setguarded(base_hd->content_length, I); }
-
+
content_length = "content-length"i def %beg_content_length int eoh %set_content_length;
-
+
################# content-range ###################
action beg_content_range_start { guard(base_hd->content_range_start); I = -1; }
action set_content_range_start { setguarded(base_hd->content_range_start, I); }
@@ -166,7 +166,7 @@ content_range = "content-range"i def "bytes"i sp %beg_content_range_start int
%beg_content_range_end int '/' %set_content_range_end
%beg_content_range_el int eoh %set_content_range_el;
-################# accept-ranges ###################
+################# accept-ranges ###################
action beg_accept_ranges {
if (hd) {
guard(hd->accept_ranges);
@@ -174,21 +174,21 @@ action beg_accept_ranges {
}
}
action set_accept_ranges { if (hd) setguarded(hd->accept_ranges, I); }
-
-ar_tokenlist = "bytes"i %{c(1)}
- | "none"i %{c(0)};
+
+ar_tokenlist = "bytes"i %{c(1)}
+ | "none"i %{c(0)};
accept_ranges = "accept-ranges"i def %beg_accept_ranges ar_tokenlist eoh %set_accept_ranges;
-
-################# content-type ####################
+
+################# content-type ####################
action beg_mime { guard(base_hd->mime_type); }
action set_mime { setguarded(base_hd->mime_type, I); }
action set_charset {
if (buflen < FETCHER_URL_MAX) {
- buf[buflen++] = 0;
+ buf[buflen++] = 0;
base_hd->charset = EncodingHintByName((const char*)buf);
- }
-}
-
+ }
+}
+
mime_type = "text/plain"i %{c(MIME_TEXT)}
| "text/html"i %{c(MIME_HTML)}
| "application/pdf"i %{c(MIME_PDF)}
@@ -234,36 +234,36 @@ mime_type = "text/plain"i %{c(MIME_TEXT)}
charset_name = token_char+ >clear_buf $update_buf;
mime_param = "charset"i ws* '=' ws* '"'? charset_name '"'? %set_charset @2
- | token ws* '=' ws* '"'? token '"'? @1
- | text $0;
-mime_parms = (lws ';' lws mime_param)*;
+ | token ws* '=' ws* '"'? token '"'? @1
+ | text $0;
+mime_parms = (lws ';' lws mime_param)*;
content_type = "content-type"i def %beg_mime mime_type mime_parms eoh %set_mime;
-
-################# last modified ###################
+
+################# last modified ###################
action beg_modtime { guard(base_hd->http_time); }
action set_modtime {
setguarded(base_hd->http_time, DateTimeFields.ToTimeT(-1));
}
-
+
last_modified = "last-modified"i def %beg_modtime http_date eoh %set_modtime;
-
-################# location ########################
+
+################# location ########################
action set_location {
while (buflen > 0 && (buf[buflen - 1] == ' ' || buf[buflen - 1] == '\t')) {
buflen --;
}
if (hd && buflen < FETCHER_URL_MAX) {
hd->location = TStringBuf(buf, buflen);
- }
-}
-
+ }
+}
+
action set_status_303{ if (hd) hd->http_status = 303; }
-
+
url = url_char+ >clear_buf $update_buf;
loc_url = any_text_char+ >clear_buf $update_buf;
location = "location"i def loc_url eoh %set_location;
refresh = "refresh"i def int ';' lws "url="i loc_url eoh %set_location;
-
+
################# x-robots-tag ################
action set_x_robots {
if (hd && AcceptingXRobots) {
@@ -349,56 +349,56 @@ action set_squid_error {
squid_error = "X-Yandex-Squid-Error"i def any_text eoh %set_squid_error;
-################# auth ########################
+################# auth ########################
action init_auth {
- if (auth_hd)
- auth_hd->use_auth=true;
-}
-
+ if (auth_hd)
+ auth_hd->use_auth=true;
+}
+
action update_auth_buf
- { if (auth_hd && buflen < sizeof(buf)) buf[buflen++] = *fpc; }
-
-quoted_str = /"/ (text_char - /"/)* /"/ >2;
+ { if (auth_hd && buflen < sizeof(buf)) buf[buflen++] = *fpc; }
+
+quoted_str = /"/ (text_char - /"/)* /"/ >2;
auth_quoted_str = ( /"/ ( ( text_char - /"/ )* >clear_buf $update_auth_buf ) /"/ ) > 2;
-
-# do not support auth-int, too heavy procedure
-
-qop_auth_option = "auth"i @1 %{if(auth_hd) auth_hd->qop_auth = true; };
-
-qop_option = ( qop_auth_option @1 ) | (( token-"auth"i) $0 );
-
-auth_good_param = ( "nonce"i /=/ auth_quoted_str )
+
+# do not support auth-int, too heavy procedure
+
+qop_auth_option = "auth"i @1 %{if(auth_hd) auth_hd->qop_auth = true; };
+
+qop_option = ( qop_auth_option @1 ) | (( token-"auth"i) $0 );
+
+auth_good_param = ( "nonce"i /=/ auth_quoted_str )
%{if (auth_hd && buflen < FETCHER_URL_MAX-1) {
- buf[buflen++] = 0;
- auth_hd->nonce = strdup((const char*)buf);
- }}
- | ( "realm"i /=/ auth_quoted_str )
+ buf[buflen++] = 0;
+ auth_hd->nonce = strdup((const char*)buf);
+ }}
+ | ( "realm"i /=/ auth_quoted_str )
%{if (auth_hd && buflen < FETCHER_URL_MAX-1) {
- buf[buflen++] = 0;
- auth_hd->realm = strdup((const char*)buf);
- }}
- | ( "opaque"i /=/ auth_quoted_str )
+ buf[buflen++] = 0;
+ auth_hd->realm = strdup((const char*)buf);
+ }}
+ | ( "opaque"i /=/ auth_quoted_str )
%{if (auth_hd && buflen < FETCHER_URL_MAX-1) {
- buf[buflen++] = 0;
- auth_hd->opaque = strdup((const char*)buf);
- }}
- | "stale"i /=/ "true"i
- %{if (auth_hd) auth_hd->stale = true; }
- | "algorithm"i /=/ "md5"i /-/ "sess"i
- %{if (auth_hd) auth_hd->algorithm = 1; }
- | ( "qop"i /="/ qop_option (ws* "," ws* qop_option)* /"/);
-
-auth_param = auth_good_param @1 |
- ( (token - ( "nonce"i | "opaque"i | "realm"i | "qop"i ) )
- /=/ (token | quoted_str ) ) $0;
-
-auth_params = auth_param ( ws* /,/ ws* auth_param )*;
-
+ buf[buflen++] = 0;
+ auth_hd->opaque = strdup((const char*)buf);
+ }}
+ | "stale"i /=/ "true"i
+ %{if (auth_hd) auth_hd->stale = true; }
+ | "algorithm"i /=/ "md5"i /-/ "sess"i
+ %{if (auth_hd) auth_hd->algorithm = 1; }
+ | ( "qop"i /="/ qop_option (ws* "," ws* qop_option)* /"/);
+
+auth_param = auth_good_param @1 |
+ ( (token - ( "nonce"i | "opaque"i | "realm"i | "qop"i ) )
+ /=/ (token | quoted_str ) ) $0;
+
+auth_params = auth_param ( ws* /,/ ws* auth_param )*;
+
digest_challenge = ("digest"i %init_auth ws+ auth_params) |
- ((token-"digest"i) text);
-
-auth = "www-authenticate"i def digest_challenge eoh;
-
+ ((token-"digest"i) text);
+
+auth = "www-authenticate"i def digest_challenge eoh;
+
###################### host #######################
action set_host {
if (request_hd && buflen < HOST_MAX) {
@@ -562,8 +562,8 @@ action set_request_priority {
request_priority = "x-yandex-request-priority"i def int eoh
%set_request_priority;
-################# message header ##################
-other_header = ( ex_token - "www-authenticate"i ) def any_text eoh;
+################# message header ##################
+other_header = ( ex_token - "www-authenticate"i ) def any_text eoh;
message_header = other_header $0
| connection @1
| content_encoding @1
@@ -595,90 +595,90 @@ request_header = message_header $0
| request_cache_control @1
| response_timeout @1
| request_priority @1;
-
-################# main ############################
+
+################# main ############################
action accepted { lastchar = (char*)fpc; return 2; }
-
+
main := ((response_status_line ('\r'? response_header)*)
| (request_status_line ('\r' ? request_header)*))
eol @accepted;
-
-}%%
-
-%% write data;
-
-int THttpHeaderParser::execute(unsigned char *inBuf, int len) {
- const unsigned char *p = inBuf;
- const unsigned char *pe = p + len;
- %% write exec;
- if (cs == http_header_parser_error)
- return -1;
- else if (cs == http_header_parser_first_final)
- return 0;
- else
- return 1;
-}
-
-void THttpHeaderParser::init() {
- %% write init;
-}
-
-%%{
-machine http_chunk_parser;
-
-alphtype unsigned char;
-
+
+}%%
+
+%% write data;
+
+int THttpHeaderParser::execute(unsigned char *inBuf, int len) {
+ const unsigned char *p = inBuf;
+ const unsigned char *pe = p + len;
+ %% write exec;
+ if (cs == http_header_parser_error)
+ return -1;
+ else if (cs == http_header_parser_first_final)
+ return 0;
+ else
+ return 1;
+}
+
+void THttpHeaderParser::init() {
+ %% write init;
+}
+
+%%{
+machine http_chunk_parser;
+
+alphtype unsigned char;
+
action clear_hex { cnt64 = 0; }
action update_hex { cnt64 = 16 * cnt64 + X(fc); if(cnt64 > Max<int>()) return -2; }
action set_chunk { chunk_length = static_cast<int>(cnt64); }
action accepted { lastchar = (char*)fpc; return 2; }
-
-eol = '\r'? '\n';
-ws = [ \t];
-sp = ' ';
-lw = '\r'? '\n'? ws;
-separator = [()<>@,;:\\"/\[\]?={}];
-token_char = [!-~] - separator; # http tokens chars
-url_char = [!-~] - ["<>\[\]\\^`{}|]; # uric chars
-text_char = ws | 33..127 | 160..255;
-
-lws = lw*;
-eoh = lws eol;
-token = token_char+;
-text = (text_char | lw)*;
-def = lws ':' lws;
-
+
+eol = '\r'? '\n';
+ws = [ \t];
+sp = ' ';
+lw = '\r'? '\n'? ws;
+separator = [()<>@,;:\\"/\[\]?={}];
+token_char = [!-~] - separator; # http tokens chars
+url_char = [!-~] - ["<>\[\]\\^`{}|]; # uric chars
+text_char = ws | 33..127 | 160..255;
+
+lws = lw*;
+eoh = lws eol;
+token = token_char+;
+text = (text_char | lw)*;
+def = lws ':' lws;
+
hex = (xdigit+) >clear_hex $update_hex;
-quoted_string = '"' ((text_char - '"') $0 | '\\"' @1)* '"';
-
-chunk_ext_val = token | quoted_string;
-chunk_ext_name = token;
-chunk_extension = ws* (';' chunk_ext_name ws* '=' ws* chunk_ext_val ws*)*;
-
-entity_header = token def text eoh;
-trailer = entity_header*;
-
+quoted_string = '"' ((text_char - '"') $0 | '\\"' @1)* '"';
+
+chunk_ext_val = token | quoted_string;
+chunk_ext_name = token;
+chunk_extension = ws* (';' chunk_ext_name ws* '=' ws* chunk_ext_val ws*)*;
+
+entity_header = token def text eoh;
+trailer = entity_header*;
+
chunk = (hex - '0'+) chunk_extension? %set_chunk;
-last_chunk = '0'+ chunk_extension? eol trailer;
+last_chunk = '0'+ chunk_extension? eol trailer;
main := eol (chunk $0 | last_chunk @1) eol @accepted;
-
-}%%
-
-%% write data;
-
-int THttpChunkParser::execute(unsigned char *inBuf, int len) {
- const unsigned char *p = inBuf;
- const unsigned char *pe = p + len;
- %% write exec;
- if (cs == http_chunk_parser_error)
- return -1;
- else if (cs == http_chunk_parser_first_final)
- return 0;
- else
- return 1;
-}
-
-void THttpChunkParser::init() {
- chunk_length = 0;
- %% write init;
-}
+
+}%%
+
+%% write data;
+
+int THttpChunkParser::execute(unsigned char *inBuf, int len) {
+ const unsigned char *p = inBuf;
+ const unsigned char *pe = p + len;
+ %% write exec;
+ if (cs == http_chunk_parser_error)
+ return -1;
+ else if (cs == http_chunk_parser_first_final)
+ return 0;
+ else
+ return 1;
+}
+
+void THttpChunkParser::init() {
+ chunk_length = 0;
+ %% write init;
+}