aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/html
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/html
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/html')
-rw-r--r--library/cpp/html/escape/escape.cpp66
-rw-r--r--library/cpp/html/escape/escape.h9
-rw-r--r--library/cpp/html/escape/ut/escape_ut.cpp16
-rw-r--r--library/cpp/html/escape/ut/ya.make13
-rw-r--r--library/cpp/html/escape/ya.make9
-rw-r--r--library/cpp/html/pcdata/pcdata.cpp81
-rw-r--r--library/cpp/html/pcdata/pcdata.h10
-rw-r--r--library/cpp/html/pcdata/pcdata_ut.cpp48
-rw-r--r--library/cpp/html/pcdata/ut/ya.make9
-rw-r--r--library/cpp/html/pcdata/ya.make10
-rw-r--r--library/cpp/html/ya.make32
11 files changed, 303 insertions, 0 deletions
diff --git a/library/cpp/html/escape/escape.cpp b/library/cpp/html/escape/escape.cpp
new file mode 100644
index 0000000000..5b8ed60f04
--- /dev/null
+++ b/library/cpp/html/escape/escape.cpp
@@ -0,0 +1,66 @@
+#include "escape.h"
+
+#include <util/generic/array_size.h>
+#include <util/generic/strbuf.h>
+
+namespace NHtml {
+ namespace {
+ struct TReplace {
+ char Char;
+ bool ForText;
+ TStringBuf Entity;
+ };
+
+ TReplace Escapable[] = {
+ {'"', false, TStringBuf("&quot;")},
+ {'&', true, TStringBuf("&amp;")},
+ {'<', true, TStringBuf("&lt;")},
+ {'>', true, TStringBuf("&gt;")},
+ };
+
+ TString EscapeImpl(const TString& value, bool isText) {
+ auto ci = value.begin();
+ // Looking for escapable characters.
+ for (; ci != value.end(); ++ci) {
+ for (size_t i = (isText ? 1 : 0); i < Y_ARRAY_SIZE(Escapable); ++i) {
+ if (*ci == Escapable[i].Char) {
+ goto escape;
+ }
+ }
+ }
+
+ // There is no escapable characters, so return original value.
+ return value;
+
+ escape:
+ TString tmp = TString(value.begin(), ci);
+
+ for (; ci != value.end(); ++ci) {
+ size_t i = (isText ? 1 : 0);
+
+ for (; i < Y_ARRAY_SIZE(Escapable); ++i) {
+ if (*ci == Escapable[i].Char) {
+ tmp += Escapable[i].Entity;
+ break;
+ }
+ }
+
+ if (i == Y_ARRAY_SIZE(Escapable)) {
+ tmp += *ci;
+ }
+ }
+
+ return tmp;
+ }
+
+ }
+
+ TString EscapeAttributeValue(const TString& value) {
+ return EscapeImpl(value, false);
+ }
+
+ TString EscapeText(const TString& value) {
+ return EscapeImpl(value, true);
+ }
+
+}
diff --git a/library/cpp/html/escape/escape.h b/library/cpp/html/escape/escape.h
new file mode 100644
index 0000000000..1c45fc5193
--- /dev/null
+++ b/library/cpp/html/escape/escape.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <util/generic/string.h>
+
+namespace NHtml {
+ TString EscapeAttributeValue(const TString& value);
+ TString EscapeText(const TString& value);
+
+}
diff --git a/library/cpp/html/escape/ut/escape_ut.cpp b/library/cpp/html/escape/ut/escape_ut.cpp
new file mode 100644
index 0000000000..cd7b955138
--- /dev/null
+++ b/library/cpp/html/escape/ut/escape_ut.cpp
@@ -0,0 +1,16 @@
+#include <library/cpp/html/escape/escape.h>
+#include <library/cpp/testing/unittest/registar.h>
+
+using namespace NHtml;
+
+Y_UNIT_TEST_SUITE(TEscapeHtml) {
+ Y_UNIT_TEST(Escape) {
+ UNIT_ASSERT_EQUAL(EscapeText("in & out"), "in &amp; out");
+ UNIT_ASSERT_EQUAL(EscapeText("&&"), "&amp;&amp;");
+ UNIT_ASSERT_EQUAL(EscapeText("&amp;"), "&amp;amp;");
+
+ UNIT_ASSERT_EQUAL(EscapeText("<script>"), "&lt;script&gt;");
+
+ UNIT_ASSERT_EQUAL(EscapeText("text"), "text");
+ }
+}
diff --git a/library/cpp/html/escape/ut/ya.make b/library/cpp/html/escape/ut/ya.make
new file mode 100644
index 0000000000..27d3a3d65a
--- /dev/null
+++ b/library/cpp/html/escape/ut/ya.make
@@ -0,0 +1,13 @@
+UNITTEST()
+
+OWNER(stanly)
+
+SRCS(
+ escape_ut.cpp
+)
+
+PEERDIR(
+ library/cpp/html/escape
+)
+
+END()
diff --git a/library/cpp/html/escape/ya.make b/library/cpp/html/escape/ya.make
new file mode 100644
index 0000000000..93a31e33d6
--- /dev/null
+++ b/library/cpp/html/escape/ya.make
@@ -0,0 +1,9 @@
+LIBRARY()
+
+OWNER(stanly)
+
+SRCS(
+ escape.cpp
+)
+
+END()
diff --git a/library/cpp/html/pcdata/pcdata.cpp b/library/cpp/html/pcdata/pcdata.cpp
new file mode 100644
index 0000000000..740c240fd2
--- /dev/null
+++ b/library/cpp/html/pcdata/pcdata.cpp
@@ -0,0 +1,81 @@
+#include "pcdata.h"
+
+#include <util/string/strspn.h>
+
+static TCompactStrSpn sspn("\"<>&'");
+
+static void EncodeHtmlPcdataAppendInternal(const TStringBuf str, TString& strout, bool qAmp) {
+ const char* s = str.data();
+ const char* e = s + str.length();
+
+ for (;;) {
+ const char* next = sspn.FindFirstOf(s, e);
+
+ strout.AppendNoAlias(s, next - s);
+ s = next;
+
+ if (s == e)
+ break;
+
+ switch (*s) {
+ case '\"':
+ strout += TStringBuf("&quot;");
+ ++s;
+ break;
+
+ case '<':
+ strout += TStringBuf("&lt;");
+ ++s;
+ break;
+
+ case '>':
+ strout += TStringBuf("&gt;");
+ ++s;
+ break;
+
+ case '\'':
+ strout += TStringBuf("&#39;");
+ ++s;
+ break;
+
+ case '&':
+ if (qAmp)
+ strout += TStringBuf("&amp;");
+ else
+ strout += TStringBuf("&");
+ ++s;
+ break;
+ }
+ }
+}
+
+void EncodeHtmlPcdataAppend(const TStringBuf str, TString& strout) {
+ EncodeHtmlPcdataAppendInternal(str, strout, true);
+}
+
+TString EncodeHtmlPcdata(const TStringBuf str, bool qAmp) {
+ TString strout;
+ EncodeHtmlPcdataAppendInternal(str, strout, qAmp);
+ return strout;
+}
+
+TString DecodeHtmlPcdata(const TString& sz) {
+ TString res;
+ const char* codes[] = {"&quot;", "&lt;", "&gt;", "&#39;", "&#039;", "&amp;", "&apos;", nullptr};
+ const char chars[] = {'\"', '<', '>', '\'', '\'', '&', '\''};
+ for (size_t i = 0; i < sz.length(); ++i) {
+ char c = sz[i];
+ if (c == '&') {
+ for (const char** p = codes; *p; ++p) {
+ size_t len = strlen(*p);
+ if (strncmp(sz.c_str() + i, *p, len) == 0) {
+ i += len - 1;
+ c = chars[p - codes];
+ break;
+ }
+ }
+ }
+ res += c;
+ }
+ return res;
+}
diff --git a/library/cpp/html/pcdata/pcdata.h b/library/cpp/html/pcdata/pcdata.h
new file mode 100644
index 0000000000..7dd741f53d
--- /dev/null
+++ b/library/cpp/html/pcdata/pcdata.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include <util/generic/fwd.h>
+
+/// Converts a text into HTML-code. Special characters of HTML («<», «>», ...) replaced with entities.
+TString EncodeHtmlPcdata(const TStringBuf str, bool qAmp = true);
+void EncodeHtmlPcdataAppend(const TStringBuf str, TString& strout);
+
+/// Reverse of EncodeHtmlPcdata()
+TString DecodeHtmlPcdata(const TString& sz);
diff --git a/library/cpp/html/pcdata/pcdata_ut.cpp b/library/cpp/html/pcdata/pcdata_ut.cpp
new file mode 100644
index 0000000000..5833f8bc59
--- /dev/null
+++ b/library/cpp/html/pcdata/pcdata_ut.cpp
@@ -0,0 +1,48 @@
+#include "pcdata.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+Y_UNIT_TEST_SUITE(TPcdata) {
+ Y_UNIT_TEST(TestStress) {
+ {
+ ui64 key = 0x000017C0B76C4E87ull;
+ TString res = EncodeHtmlPcdata(TStringBuf((const char*)&key, sizeof(key)));
+ }
+
+ for (size_t i = 0; i < 1000; ++i) {
+ const TString s = NUnitTest::RandomString(i, i);
+
+ UNIT_ASSERT_VALUES_EQUAL(DecodeHtmlPcdata(EncodeHtmlPcdata(s)), s);
+ }
+ }
+
+ Y_UNIT_TEST(Test1) {
+ const TString tests[] = {
+ "qw&qw",
+ "&<",
+ ">&qw",
+ "\'&aaa"};
+
+ for (auto s : tests) {
+ UNIT_ASSERT_VALUES_EQUAL(DecodeHtmlPcdata(EncodeHtmlPcdata(s)), s);
+ }
+ }
+
+ Y_UNIT_TEST(Test2) {
+ UNIT_ASSERT_VALUES_EQUAL(EncodeHtmlPcdata("&qqq"), "&amp;qqq");
+ }
+
+ Y_UNIT_TEST(TestEncodeHtmlPcdataAppend) {
+ TString s;
+ EncodeHtmlPcdataAppend("m&m", s);
+ EncodeHtmlPcdataAppend("'s", s);
+ UNIT_ASSERT_VALUES_EQUAL(EncodeHtmlPcdata("m&m's"), s);
+ UNIT_ASSERT_VALUES_EQUAL("m&amp;m&#39;s", s);
+ }
+
+ Y_UNIT_TEST(TestStrangeAmpParameter) {
+ UNIT_ASSERT_VALUES_EQUAL(EncodeHtmlPcdata("m&m's", true), "m&amp;m&#39;s");
+ UNIT_ASSERT_VALUES_EQUAL(EncodeHtmlPcdata("m&m's"), "m&amp;m&#39;s"); //default
+ UNIT_ASSERT_VALUES_EQUAL(EncodeHtmlPcdata("m&m's", false), "m&m&#39;s");
+ }
+}
diff --git a/library/cpp/html/pcdata/ut/ya.make b/library/cpp/html/pcdata/ut/ya.make
new file mode 100644
index 0000000000..bc947baa89
--- /dev/null
+++ b/library/cpp/html/pcdata/ut/ya.make
@@ -0,0 +1,9 @@
+UNITTEST_FOR(library/cpp/html/pcdata)
+
+OWNER(vladon)
+
+SRCS(
+ pcdata_ut.cpp
+)
+
+END()
diff --git a/library/cpp/html/pcdata/ya.make b/library/cpp/html/pcdata/ya.make
new file mode 100644
index 0000000000..637220e706
--- /dev/null
+++ b/library/cpp/html/pcdata/ya.make
@@ -0,0 +1,10 @@
+LIBRARY()
+
+OWNER(vladon)
+
+SRCS(
+ pcdata.cpp
+ pcdata.h
+)
+
+END()
diff --git a/library/cpp/html/ya.make b/library/cpp/html/ya.make
new file mode 100644
index 0000000000..ab904be3b9
--- /dev/null
+++ b/library/cpp/html/ya.make
@@ -0,0 +1,32 @@
+RECURSE(
+ dehtml
+ dehtml/ut
+ detect
+ blob
+ entity
+ entity/ut
+ escape
+ escape/ut
+ face
+ face/blob
+ html5
+ html5/tests
+ html5/ut
+ lexer
+ lexer/ut
+ pcdata
+ pcdata/ut
+ pdoc
+ print
+ relalternate
+ relalternate/ut
+ sanitize
+ spec
+ storage
+ storage/ut
+ strip
+ tree
+ url
+ zoneconf
+ zoneconf/ut
+)