diff options
| author | marakasov <[email protected]> | 2022-02-10 16:49:50 +0300 | 
|---|---|---|
| committer | Daniil Cherednik <[email protected]> | 2022-02-10 16:49:50 +0300 | 
| commit | f4edb31a1d309efb6c243b2615a587cbbdec081d (patch) | |
| tree | 5d5cb817648f650d76cf1076100726fd9b8448e8 /library/cpp/http/fetch | |
| parent | 359de4829c1d82b06fad5e0ad82470e4e9f27c89 (diff) | |
Restoring authorship annotation for <[email protected]>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/http/fetch')
| -rw-r--r-- | library/cpp/http/fetch/httpfsm.h | 4 | ||||
| -rw-r--r-- | library/cpp/http/fetch/httpfsm.rl6 | 42 | ||||
| -rw-r--r-- | library/cpp/http/fetch/httpfsm_ut.cpp | 268 | 
3 files changed, 157 insertions, 157 deletions
| diff --git a/library/cpp/http/fetch/httpfsm.h b/library/cpp/http/fetch/httpfsm.h index e1b109d26f5..c4abdcd0d23 100644 --- a/library/cpp/http/fetch/httpfsm.h +++ b/library/cpp/http/fetch/httpfsm.h @@ -58,8 +58,8 @@ struct THttpHeaderParser {      char* hreflangpos;      size_t hreflangspace; -    bool AcceptingXRobots;  -  +    bool AcceptingXRobots; +      THttpAuthHeader* auth_hd;      THttpRequestHeader* request_hd; diff --git a/library/cpp/http/fetch/httpfsm.rl6 b/library/cpp/http/fetch/httpfsm.rl6 index e2447a6dfa9..eab0328b187 100644 --- a/library/cpp/http/fetch/httpfsm.rl6 +++ b/library/cpp/http/fetch/httpfsm.rl6 @@ -227,9 +227,9 @@ mime_type       = "text/plain"i                     %{c(MIME_TEXT)}                  | "application/x-gzip"i             %{c(MIME_GZIP)}                  | "application/json"i               %{c(MIME_JSON)}                  | ("application/javascript"i | "text/javascript"i) %{c(MIME_JAVASCRIPT)} -                | "application/vnd.android.package-archive"i %{c(MIME_APK)}  -                | ("image/x-icon"i | "image/vnd.microsoft.icon"i) %{c(MIME_IMAGE_ICON)}  -                ;  +                | "application/vnd.android.package-archive"i %{c(MIME_APK)} +                | ("image/x-icon"i | "image/vnd.microsoft.icon"i) %{c(MIME_IMAGE_ICON)} +                ;  charset_name    = token_char+ >clear_buf $update_buf; @@ -266,7 +266,7 @@ refresh         = "refresh"i def int ';' lws "url="i loc_url eoh %set_location;  ################# x-robots-tag ################  action set_x_robots { -    if (hd && AcceptingXRobots) {  +    if (hd && AcceptingXRobots) {          if (I > 0)              hd->x_robots_tag |= I; @@ -277,10 +277,10 @@ action set_x_robots {      }  } -action accept_x_robots {  -    AcceptingXRobots = (bool)I;  -}  -  +action accept_x_robots { +    AcceptingXRobots = (bool)I; +} +  x_robots_directive = "none"i      %{c(3)} | "all"i     %{c(-3)}                     | "noindex"i   %{c(1)} | "index"i   %{c(-1)}                     | "nofollow"i  %{c(2)} | "follow"i  %{c(-2)} @@ -288,19 +288,19 @@ x_robots_directive = "none"i      %{c(3)} | "all"i     %{c(-3)}                     | "noyaca"i    %{c(16)}                     | "noodp"i     %{c(8)}; -any_value = (any_text_char - [, \t])+ (lws (any_text_char - [, \t])+)*;  -any_key = (any_text_char - [:, \t])+ (lws (any_text_char - [:, \t])+)*;  -  -unavailable_after_directive = "unavailable_after"i def any_value;  -  -yandex_robot = "yandex"i | "yandexbot"i;  -other_robot = any_key - "unavailable_after"i - yandex_robot;  -robot_specifier = yandex_robot %{c(1)} | other_robot %{c(0)};  -  -x_robots_value = (robot_specifier def %accept_x_robots)? (unavailable_after_directive | (x_robots_directive %set_x_robots) | any_value? );  -  -x_robots_tag = "x-robots-tag"i def >{ AcceptingXRobots = true; } x_robots_value (lws ',' lws x_robots_value)* eoh;  -  +any_value = (any_text_char - [, \t])+ (lws (any_text_char - [, \t])+)*; +any_key = (any_text_char - [:, \t])+ (lws (any_text_char - [:, \t])+)*; + +unavailable_after_directive = "unavailable_after"i def any_value; + +yandex_robot = "yandex"i | "yandexbot"i; +other_robot = any_key - "unavailable_after"i - yandex_robot; +robot_specifier = yandex_robot %{c(1)} | other_robot %{c(0)}; + +x_robots_value = (robot_specifier def %accept_x_robots)? (unavailable_after_directive | (x_robots_directive %set_x_robots) | any_value? ); + +x_robots_tag = "x-robots-tag"i def >{ AcceptingXRobots = true; } x_robots_value (lws ',' lws x_robots_value)* eoh; +  ################# rel_canonical ###############  action set_canonical {      if (hd && buflen < FETCHER_URL_MAX) { diff --git a/library/cpp/http/fetch/httpfsm_ut.cpp b/library/cpp/http/fetch/httpfsm_ut.cpp index b1904be14cd..b018e80101b 100644 --- a/library/cpp/http/fetch/httpfsm_ut.cpp +++ b/library/cpp/http/fetch/httpfsm_ut.cpp @@ -18,13 +18,13 @@ class THttpHeaderParserTestSuite: public TTestBase {      UNIT_TEST(TestResponseHeaderOnRequest);      UNIT_TEST(TestRequestHeaderOnResponse);      UNIT_TEST(TestXRobotsTagUnknownTags); -    UNIT_TEST(TestXRobotsTagMyBot);  -    UNIT_TEST(TestXRobotsTagOtherBot);  -    UNIT_TEST(TestXRobotsTagUnavailableAfterAware);  -    UNIT_TEST(TestXRobotsTagUnavailableAfterWorks);  +    UNIT_TEST(TestXRobotsTagMyBot); +    UNIT_TEST(TestXRobotsTagOtherBot); +    UNIT_TEST(TestXRobotsTagUnavailableAfterAware); +    UNIT_TEST(TestXRobotsTagUnavailableAfterWorks);      UNIT_TEST(TestXRobotsTagOverridePriority); -    UNIT_TEST(TestXRobotsTagDoesNotBreakCharset);  -    UNIT_TEST(TestXRobotsTagAllowsMultiline);  +    UNIT_TEST(TestXRobotsTagDoesNotBreakCharset); +    UNIT_TEST(TestXRobotsTagAllowsMultiline);      UNIT_TEST(TestRelCanonical);      UNIT_TEST(TestHreflang);      UNIT_TEST(TestHreflangOnLongInput); @@ -50,13 +50,13 @@ public:      void TestResponseHeaderOnRequest();      void TestRequestHeaderOnResponse();      void TestXRobotsTagUnknownTags(); -    void TestXRobotsTagMyBot();  -    void TestXRobotsTagOtherBot();  -    void TestXRobotsTagUnavailableAfterAware();  -    void TestXRobotsTagUnavailableAfterWorks();  +    void TestXRobotsTagMyBot(); +    void TestXRobotsTagOtherBot(); +    void TestXRobotsTagUnavailableAfterAware(); +    void TestXRobotsTagUnavailableAfterWorks();      void TestXRobotsTagOverridePriority(); -    void TestXRobotsTagDoesNotBreakCharset();  -    void TestXRobotsTagAllowsMultiline();  +    void TestXRobotsTagDoesNotBreakCharset(); +    void TestXRobotsTagAllowsMultiline();      void TestRelCanonical();      void TestHreflang();      void TestHreflangOnLongInput(); @@ -238,80 +238,80 @@ void THttpHeaderParserTestSuite::TestXRobotsTagUnknownTags() {      const char* headers =          "HTTP/1.1 200 OK\r\n"          "Content-Type: text/html\r\n" -        "x-robots-tag: asdfasdf asdf asdf,,, , noindex,noodpXXX , NOFOLLOW ,noodpnofollow\r\n\r\n";  +        "x-robots-tag: asdfasdf asdf asdf,,, , noindex,noodpXXX , NOFOLLOW ,noodpnofollow\r\n\r\n";      i32 result = httpHeaderParser->Execute(headers, strlen(headers));      UNIT_ASSERT_EQUAL(result, 2); -    UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 3);  -    UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "00xxx");  +    UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 3); +    UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "00xxx"); +    TestFinish(); +} + +void THttpHeaderParserTestSuite::TestXRobotsTagMyBot() { +    TestStart(); +    THttpHeader httpHeader; +    httpHeaderParser->Init(&httpHeader); +    const char* headers = +        "HTTP/1.1 200 OK\r\n" +        "Content-Type: text/html\r\n" +        "x-robots-tag: yandex: noindex, nofollow\r\n" +        "x-robots-tag: yandexbot: noarchive, noodp\r\n\r\n"; +    i32 result = httpHeaderParser->Execute(headers, strlen(headers)); +    UNIT_ASSERT_EQUAL(result, 2); +    UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 15); +    UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "0000x"); +    TestFinish(); +} + +void THttpHeaderParserTestSuite::TestXRobotsTagOtherBot() { +    TestStart(); +    THttpHeader httpHeader; +    httpHeaderParser->Init(&httpHeader); +    const char* headers = +        "HTTP/1.1 200 OK\r\n" +        "Content-Type: text/html\r\n" +        "x-robots-tag: google: noindex, nofollow\r\n" +        "x-robots-tag: googlebot: noarchive, noodp\r\n" +        "x-robots-tag: !still(-other) bot_: foo, noyaca\r\n\r\n"; +    i32 result = httpHeaderParser->Execute(headers, strlen(headers)); +    UNIT_ASSERT_EQUAL(result, 2); +    UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 0); +    UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "xxxxx"); +    TestFinish(); +} + +void THttpHeaderParserTestSuite::TestXRobotsTagUnavailableAfterAware() { +    TestStart(); +    THttpHeader httpHeader; +    httpHeaderParser->Init(&httpHeader); +    // проверяем только что unavailable_after ничего не ломает +    const char* headers = +        "HTTP/1.1 200 OK\r\n" +        "Content-Type: text/html\r\n" +        "x-robots-tag: unavailable_after: 01 Jan 2999 00:00 UTC, noindex, nofollow\r\n" +        "x-robots-tag: yandex: unavailable_after: 01 Jan 2999 00:00 UTC, noarchive, noodp\r\n\r\n"; +    i32 result = httpHeaderParser->Execute(headers, strlen(headers)); +    UNIT_ASSERT_EQUAL(result, 2); +    UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 15); +    UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "0000x"); +    TestFinish(); +} + +void THttpHeaderParserTestSuite::TestXRobotsTagUnavailableAfterWorks() { +    TestStart(); +    THttpHeader httpHeader; +    httpHeaderParser->Init(&httpHeader); +    // пока не поддерживается +    const char* headers = +        "HTTP/1.1 200 OK\r\n" +        "Content-Type: text/html\r\n" +        "x-robots-tag: unavailable_after: 01 Jan 2000 00:00 UTC\r\n\r\n"; +    i32 result = httpHeaderParser->Execute(headers, strlen(headers)); +    UNIT_ASSERT_EQUAL(result, 2); +    //UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 1); +    //UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "0xxxx");      TestFinish();  } -void THttpHeaderParserTestSuite::TestXRobotsTagMyBot() {  -    TestStart();  -    THttpHeader httpHeader;  -    httpHeaderParser->Init(&httpHeader);  -    const char* headers =  -        "HTTP/1.1 200 OK\r\n"  -        "Content-Type: text/html\r\n"  -        "x-robots-tag: yandex: noindex, nofollow\r\n"  -        "x-robots-tag: yandexbot: noarchive, noodp\r\n\r\n";  -    i32 result = httpHeaderParser->Execute(headers, strlen(headers));  -    UNIT_ASSERT_EQUAL(result, 2);  -    UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 15);  -    UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "0000x");  -    TestFinish();  -}  -  -void THttpHeaderParserTestSuite::TestXRobotsTagOtherBot() {  -    TestStart();  -    THttpHeader httpHeader;  -    httpHeaderParser->Init(&httpHeader);  -    const char* headers =  -        "HTTP/1.1 200 OK\r\n"  -        "Content-Type: text/html\r\n"  -        "x-robots-tag: google: noindex, nofollow\r\n"  -        "x-robots-tag: googlebot: noarchive, noodp\r\n"  -        "x-robots-tag: !still(-other) bot_: foo, noyaca\r\n\r\n";  -    i32 result = httpHeaderParser->Execute(headers, strlen(headers));  -    UNIT_ASSERT_EQUAL(result, 2);  -    UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 0);  -    UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "xxxxx");  -    TestFinish();  -}  -  -void THttpHeaderParserTestSuite::TestXRobotsTagUnavailableAfterAware() {  -    TestStart();  -    THttpHeader httpHeader;  -    httpHeaderParser->Init(&httpHeader);  -    // проверяем только что unavailable_after ничего не ломает  -    const char* headers =  -        "HTTP/1.1 200 OK\r\n"  -        "Content-Type: text/html\r\n"  -        "x-robots-tag: unavailable_after: 01 Jan 2999 00:00 UTC, noindex, nofollow\r\n"  -        "x-robots-tag: yandex: unavailable_after: 01 Jan 2999 00:00 UTC, noarchive, noodp\r\n\r\n";  -    i32 result = httpHeaderParser->Execute(headers, strlen(headers));  -    UNIT_ASSERT_EQUAL(result, 2);  -    UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 15);  -    UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "0000x");  -    TestFinish();  -}  -  -void THttpHeaderParserTestSuite::TestXRobotsTagUnavailableAfterWorks() {  -    TestStart();  -    THttpHeader httpHeader;  -    httpHeaderParser->Init(&httpHeader);  -    // пока не поддерживается  -    const char* headers =  -        "HTTP/1.1 200 OK\r\n"  -        "Content-Type: text/html\r\n"  -        "x-robots-tag: unavailable_after: 01 Jan 2000 00:00 UTC\r\n\r\n";  -    i32 result = httpHeaderParser->Execute(headers, strlen(headers));  -    UNIT_ASSERT_EQUAL(result, 2);  -    //UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 1);  -    //UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "0xxxx");  -    TestFinish();  -}  -   void THttpHeaderParserTestSuite::TestXRobotsTagOverridePriority() {      TestStart();      THttpHeader httpHeader; @@ -327,59 +327,59 @@ void THttpHeaderParserTestSuite::TestXRobotsTagOverridePriority() {      TestFinish();  } -void THttpHeaderParserTestSuite::TestXRobotsTagDoesNotBreakCharset() {  -    TestStart();  -    THttpHeader httpHeader;  -    httpHeaderParser->Init(&httpHeader);  -    const char* headers =  -        "HTTP/1.1 200 OK\r\n"  -        "X-Robots-Tag: noarchive\r\n"  -        "Content-Type: application/json; charset=utf-8\r\n\r\n";  -    i32 result = httpHeaderParser->Execute(headers, strlen(headers));  -    UNIT_ASSERT_EQUAL(result, 2);  -    UNIT_ASSERT_EQUAL(httpHeader.mime_type, static_cast<ui8>(MIME_JSON));  -    UNIT_ASSERT_EQUAL(httpHeader.charset, static_cast<ui8>(CODES_UTF8));  -    TestFinish();  -}  -  -void THttpHeaderParserTestSuite::TestXRobotsTagAllowsMultiline() {  -    TestStart();  -    THttpHeader httpHeader;  -    httpHeaderParser->Init(&httpHeader);  -    const char* headers =  -        "HTTP/1.1 200 OK\r\n"  -        "X-Robots-Tag\r\n"  -        " :\r\n"  -        " unavailable_since\r\n"  -        " :\r\n"  -        " ,\r\n"  -        " unavailable_since\r\n"  -        " :\r\n"  -        " 01 Jan 2000\r\n"  -        " 00:00 UTC\r\n"  -        " ,\r\n"  -        " yandexbot\r\n"  -        " :\r\n"  -        " noindex\r\n"  -        " ,\r\n"  -        " garbage\r\n"  -        " ,\r\n"  -        " nofollow\r\n"  -        " ,\r\n"  -        " other\r\n"  -        " bot\r\n"  -        " :\r\n"  -        " noarchive\r\n"  -        " ,\r\n"  -        "Content-Type: application/json; charset=utf-8\r\n\r\n";  -    i32 result = httpHeaderParser->Execute(headers, strlen(headers));  -    UNIT_ASSERT_EQUAL(result, 2);  -    UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "00xxx");  -    UNIT_ASSERT_EQUAL(httpHeader.mime_type, static_cast<ui8>(MIME_JSON));  -    UNIT_ASSERT_EQUAL(httpHeader.charset, static_cast<ui8>(CODES_UTF8));  -    TestFinish();  -}  -  +void THttpHeaderParserTestSuite::TestXRobotsTagDoesNotBreakCharset() { +    TestStart(); +    THttpHeader httpHeader; +    httpHeaderParser->Init(&httpHeader); +    const char* headers = +        "HTTP/1.1 200 OK\r\n" +        "X-Robots-Tag: noarchive\r\n" +        "Content-Type: application/json; charset=utf-8\r\n\r\n"; +    i32 result = httpHeaderParser->Execute(headers, strlen(headers)); +    UNIT_ASSERT_EQUAL(result, 2); +    UNIT_ASSERT_EQUAL(httpHeader.mime_type, static_cast<ui8>(MIME_JSON)); +    UNIT_ASSERT_EQUAL(httpHeader.charset, static_cast<ui8>(CODES_UTF8)); +    TestFinish(); +} + +void THttpHeaderParserTestSuite::TestXRobotsTagAllowsMultiline() { +    TestStart(); +    THttpHeader httpHeader; +    httpHeaderParser->Init(&httpHeader); +    const char* headers = +        "HTTP/1.1 200 OK\r\n" +        "X-Robots-Tag\r\n" +        " :\r\n" +        " unavailable_since\r\n" +        " :\r\n" +        " ,\r\n" +        " unavailable_since\r\n" +        " :\r\n" +        " 01 Jan 2000\r\n" +        " 00:00 UTC\r\n" +        " ,\r\n" +        " yandexbot\r\n" +        " :\r\n" +        " noindex\r\n" +        " ,\r\n" +        " garbage\r\n" +        " ,\r\n" +        " nofollow\r\n" +        " ,\r\n" +        " other\r\n" +        " bot\r\n" +        " :\r\n" +        " noarchive\r\n" +        " ,\r\n" +        "Content-Type: application/json; charset=utf-8\r\n\r\n"; +    i32 result = httpHeaderParser->Execute(headers, strlen(headers)); +    UNIT_ASSERT_EQUAL(result, 2); +    UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "00xxx"); +    UNIT_ASSERT_EQUAL(httpHeader.mime_type, static_cast<ui8>(MIME_JSON)); +    UNIT_ASSERT_EQUAL(httpHeader.charset, static_cast<ui8>(CODES_UTF8)); +    TestFinish(); +} +  void THttpHeaderParserTestSuite::TestHreflang() {      TestStart();      THttpHeader httpHeader; | 
