diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/html | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/html')
-rw-r--r-- | library/cpp/html/escape/escape.cpp | 66 | ||||
-rw-r--r-- | library/cpp/html/escape/escape.h | 9 | ||||
-rw-r--r-- | library/cpp/html/escape/ut/escape_ut.cpp | 16 | ||||
-rw-r--r-- | library/cpp/html/escape/ut/ya.make | 13 | ||||
-rw-r--r-- | library/cpp/html/escape/ya.make | 9 | ||||
-rw-r--r-- | library/cpp/html/pcdata/pcdata.cpp | 81 | ||||
-rw-r--r-- | library/cpp/html/pcdata/pcdata.h | 10 | ||||
-rw-r--r-- | library/cpp/html/pcdata/pcdata_ut.cpp | 48 | ||||
-rw-r--r-- | library/cpp/html/pcdata/ut/ya.make | 9 | ||||
-rw-r--r-- | library/cpp/html/pcdata/ya.make | 10 | ||||
-rw-r--r-- | library/cpp/html/ya.make | 32 |
11 files changed, 303 insertions, 0 deletions
diff --git a/library/cpp/html/escape/escape.cpp b/library/cpp/html/escape/escape.cpp new file mode 100644 index 0000000000..5b8ed60f04 --- /dev/null +++ b/library/cpp/html/escape/escape.cpp @@ -0,0 +1,66 @@ +#include "escape.h" + +#include <util/generic/array_size.h> +#include <util/generic/strbuf.h> + +namespace NHtml { + namespace { + struct TReplace { + char Char; + bool ForText; + TStringBuf Entity; + }; + + TReplace Escapable[] = { + {'"', false, TStringBuf(""")}, + {'&', true, TStringBuf("&")}, + {'<', true, TStringBuf("<")}, + {'>', true, TStringBuf(">")}, + }; + + TString EscapeImpl(const TString& value, bool isText) { + auto ci = value.begin(); + // Looking for escapable characters. + for (; ci != value.end(); ++ci) { + for (size_t i = (isText ? 1 : 0); i < Y_ARRAY_SIZE(Escapable); ++i) { + if (*ci == Escapable[i].Char) { + goto escape; + } + } + } + + // There is no escapable characters, so return original value. + return value; + + escape: + TString tmp = TString(value.begin(), ci); + + for (; ci != value.end(); ++ci) { + size_t i = (isText ? 1 : 0); + + for (; i < Y_ARRAY_SIZE(Escapable); ++i) { + if (*ci == Escapable[i].Char) { + tmp += Escapable[i].Entity; + break; + } + } + + if (i == Y_ARRAY_SIZE(Escapable)) { + tmp += *ci; + } + } + + return tmp; + } + + } + + TString EscapeAttributeValue(const TString& value) { + return EscapeImpl(value, false); + } + + TString EscapeText(const TString& value) { + return EscapeImpl(value, true); + } + +} diff --git a/library/cpp/html/escape/escape.h b/library/cpp/html/escape/escape.h new file mode 100644 index 0000000000..1c45fc5193 --- /dev/null +++ b/library/cpp/html/escape/escape.h @@ -0,0 +1,9 @@ +#pragma once + +#include <util/generic/string.h> + +namespace NHtml { + TString EscapeAttributeValue(const TString& value); + TString EscapeText(const TString& value); + +} diff --git a/library/cpp/html/escape/ut/escape_ut.cpp b/library/cpp/html/escape/ut/escape_ut.cpp new file mode 100644 index 0000000000..cd7b955138 --- /dev/null +++ b/library/cpp/html/escape/ut/escape_ut.cpp @@ -0,0 +1,16 @@ +#include <library/cpp/html/escape/escape.h> +#include <library/cpp/testing/unittest/registar.h> + +using namespace NHtml; + +Y_UNIT_TEST_SUITE(TEscapeHtml) { + Y_UNIT_TEST(Escape) { + UNIT_ASSERT_EQUAL(EscapeText("in & out"), "in & out"); + UNIT_ASSERT_EQUAL(EscapeText("&&"), "&&"); + UNIT_ASSERT_EQUAL(EscapeText("&"), "&amp;"); + + UNIT_ASSERT_EQUAL(EscapeText("<script>"), "<script>"); + + UNIT_ASSERT_EQUAL(EscapeText("text"), "text"); + } +} diff --git a/library/cpp/html/escape/ut/ya.make b/library/cpp/html/escape/ut/ya.make new file mode 100644 index 0000000000..27d3a3d65a --- /dev/null +++ b/library/cpp/html/escape/ut/ya.make @@ -0,0 +1,13 @@ +UNITTEST() + +OWNER(stanly) + +SRCS( + escape_ut.cpp +) + +PEERDIR( + library/cpp/html/escape +) + +END() diff --git a/library/cpp/html/escape/ya.make b/library/cpp/html/escape/ya.make new file mode 100644 index 0000000000..93a31e33d6 --- /dev/null +++ b/library/cpp/html/escape/ya.make @@ -0,0 +1,9 @@ +LIBRARY() + +OWNER(stanly) + +SRCS( + escape.cpp +) + +END() diff --git a/library/cpp/html/pcdata/pcdata.cpp b/library/cpp/html/pcdata/pcdata.cpp new file mode 100644 index 0000000000..740c240fd2 --- /dev/null +++ b/library/cpp/html/pcdata/pcdata.cpp @@ -0,0 +1,81 @@ +#include "pcdata.h" + +#include <util/string/strspn.h> + +static TCompactStrSpn sspn("\"<>&'"); + +static void EncodeHtmlPcdataAppendInternal(const TStringBuf str, TString& strout, bool qAmp) { + const char* s = str.data(); + const char* e = s + str.length(); + + for (;;) { + const char* next = sspn.FindFirstOf(s, e); + + strout.AppendNoAlias(s, next - s); + s = next; + + if (s == e) + break; + + switch (*s) { + case '\"': + strout += TStringBuf("""); + ++s; + break; + + case '<': + strout += TStringBuf("<"); + ++s; + break; + + case '>': + strout += TStringBuf(">"); + ++s; + break; + + case '\'': + strout += TStringBuf("'"); + ++s; + break; + + case '&': + if (qAmp) + strout += TStringBuf("&"); + else + strout += TStringBuf("&"); + ++s; + break; + } + } +} + +void EncodeHtmlPcdataAppend(const TStringBuf str, TString& strout) { + EncodeHtmlPcdataAppendInternal(str, strout, true); +} + +TString EncodeHtmlPcdata(const TStringBuf str, bool qAmp) { + TString strout; + EncodeHtmlPcdataAppendInternal(str, strout, qAmp); + return strout; +} + +TString DecodeHtmlPcdata(const TString& sz) { + TString res; + const char* codes[] = {""", "<", ">", "'", "'", "&", "'", nullptr}; + const char chars[] = {'\"', '<', '>', '\'', '\'', '&', '\''}; + for (size_t i = 0; i < sz.length(); ++i) { + char c = sz[i]; + if (c == '&') { + for (const char** p = codes; *p; ++p) { + size_t len = strlen(*p); + if (strncmp(sz.c_str() + i, *p, len) == 0) { + i += len - 1; + c = chars[p - codes]; + break; + } + } + } + res += c; + } + return res; +} diff --git a/library/cpp/html/pcdata/pcdata.h b/library/cpp/html/pcdata/pcdata.h new file mode 100644 index 0000000000..7dd741f53d --- /dev/null +++ b/library/cpp/html/pcdata/pcdata.h @@ -0,0 +1,10 @@ +#pragma once + +#include <util/generic/fwd.h> + +/// Converts a text into HTML-code. Special characters of HTML («<», «>», ...) replaced with entities. +TString EncodeHtmlPcdata(const TStringBuf str, bool qAmp = true); +void EncodeHtmlPcdataAppend(const TStringBuf str, TString& strout); + +/// Reverse of EncodeHtmlPcdata() +TString DecodeHtmlPcdata(const TString& sz); diff --git a/library/cpp/html/pcdata/pcdata_ut.cpp b/library/cpp/html/pcdata/pcdata_ut.cpp new file mode 100644 index 0000000000..5833f8bc59 --- /dev/null +++ b/library/cpp/html/pcdata/pcdata_ut.cpp @@ -0,0 +1,48 @@ +#include "pcdata.h" + +#include <library/cpp/testing/unittest/registar.h> + +Y_UNIT_TEST_SUITE(TPcdata) { + Y_UNIT_TEST(TestStress) { + { + ui64 key = 0x000017C0B76C4E87ull; + TString res = EncodeHtmlPcdata(TStringBuf((const char*)&key, sizeof(key))); + } + + for (size_t i = 0; i < 1000; ++i) { + const TString s = NUnitTest::RandomString(i, i); + + UNIT_ASSERT_VALUES_EQUAL(DecodeHtmlPcdata(EncodeHtmlPcdata(s)), s); + } + } + + Y_UNIT_TEST(Test1) { + const TString tests[] = { + "qw&qw", + "&<", + ">&qw", + "\'&aaa"}; + + for (auto s : tests) { + UNIT_ASSERT_VALUES_EQUAL(DecodeHtmlPcdata(EncodeHtmlPcdata(s)), s); + } + } + + Y_UNIT_TEST(Test2) { + UNIT_ASSERT_VALUES_EQUAL(EncodeHtmlPcdata("&qqq"), "&qqq"); + } + + Y_UNIT_TEST(TestEncodeHtmlPcdataAppend) { + TString s; + EncodeHtmlPcdataAppend("m&m", s); + EncodeHtmlPcdataAppend("'s", s); + UNIT_ASSERT_VALUES_EQUAL(EncodeHtmlPcdata("m&m's"), s); + UNIT_ASSERT_VALUES_EQUAL("m&m's", s); + } + + Y_UNIT_TEST(TestStrangeAmpParameter) { + UNIT_ASSERT_VALUES_EQUAL(EncodeHtmlPcdata("m&m's", true), "m&m's"); + UNIT_ASSERT_VALUES_EQUAL(EncodeHtmlPcdata("m&m's"), "m&m's"); //default + UNIT_ASSERT_VALUES_EQUAL(EncodeHtmlPcdata("m&m's", false), "m&m's"); + } +} diff --git a/library/cpp/html/pcdata/ut/ya.make b/library/cpp/html/pcdata/ut/ya.make new file mode 100644 index 0000000000..bc947baa89 --- /dev/null +++ b/library/cpp/html/pcdata/ut/ya.make @@ -0,0 +1,9 @@ +UNITTEST_FOR(library/cpp/html/pcdata) + +OWNER(vladon) + +SRCS( + pcdata_ut.cpp +) + +END() diff --git a/library/cpp/html/pcdata/ya.make b/library/cpp/html/pcdata/ya.make new file mode 100644 index 0000000000..637220e706 --- /dev/null +++ b/library/cpp/html/pcdata/ya.make @@ -0,0 +1,10 @@ +LIBRARY() + +OWNER(vladon) + +SRCS( + pcdata.cpp + pcdata.h +) + +END() diff --git a/library/cpp/html/ya.make b/library/cpp/html/ya.make new file mode 100644 index 0000000000..ab904be3b9 --- /dev/null +++ b/library/cpp/html/ya.make @@ -0,0 +1,32 @@ +RECURSE( + dehtml + dehtml/ut + detect + blob + entity + entity/ut + escape + escape/ut + face + face/blob + html5 + html5/tests + html5/ut + lexer + lexer/ut + pcdata + pcdata/ut + pdoc + print + relalternate + relalternate/ut + sanitize + spec + storage + storage/ut + strip + tree + url + zoneconf + zoneconf/ut +) |