aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorart-snake <art-snake@yandex-team.ru>2022-02-10 16:50:34 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:50:34 +0300
commit1700010e2088971894d12a7a16d6004866f986fd (patch)
treeac3b38289119375037d595858db9751013220a3f
parent785bc0acdf3b0c63f971ee17e845945d7381dcb7 (diff)
downloadydb-1700010e2088971894d12a7a16d6004866f986fd.tar.gz
Restoring authorship annotation for <art-snake@yandex-team.ru>. Commit 1 of 2.
-rw-r--r--library/cpp/protobuf/json/config.h24
-rw-r--r--library/cpp/protobuf/json/json2proto.cpp18
-rw-r--r--library/cpp/protobuf/json/json2proto.h24
-rw-r--r--library/cpp/protobuf/json/proto2json_printer.cpp28
-rw-r--r--library/cpp/protobuf/json/ut/json2proto_ut.cpp136
-rw-r--r--library/cpp/protobuf/json/ut/proto2json_ut.cpp114
-rw-r--r--library/cpp/protobuf/json/ut/test.proto32
-rw-r--r--util/charset/utf8.cpp188
-rw-r--r--util/charset/utf8.h16
-rw-r--r--util/charset/utf8_ut.cpp74
10 files changed, 327 insertions, 327 deletions
diff --git a/library/cpp/protobuf/json/config.h b/library/cpp/protobuf/json/config.h
index dc84fb4d5d..ae06b3d8ec 100644
--- a/library/cpp/protobuf/json/config.h
+++ b/library/cpp/protobuf/json/config.h
@@ -15,20 +15,20 @@ namespace NProtobufJson {
bool FormatOutput = false;
enum MissingKeyMode {
- // Skip missing keys
+ // Skip missing keys
MissingKeySkip = 0,
- // Fill missing keys with json null value.
+ // Fill missing keys with json null value.
MissingKeyNull,
- // Use default value in any case.
- // If default value is not explicitly defined, use default type value:
- // i.e. 0 for integers, "" for strings
- // For repeated keys, means []
- MissingKeyDefault,
- // Use default value if it is explicitly specified for optional fields.
- // Skip if no explicitly defined default value for optional fields.
- // Throw exception if required field is empty.
- // For repeated keys, same as MissingKeySkip
- MissingKeyExplicitDefaultThrowRequired
+ // Use default value in any case.
+ // If default value is not explicitly defined, use default type value:
+ // i.e. 0 for integers, "" for strings
+ // For repeated keys, means []
+ MissingKeyDefault,
+ // Use default value if it is explicitly specified for optional fields.
+ // Skip if no explicitly defined default value for optional fields.
+ // Throw exception if required field is empty.
+ // For repeated keys, same as MissingKeySkip
+ MissingKeyExplicitDefaultThrowRequired
};
MissingKeyMode MissingSingleKeyMode = MissingKeySkip;
MissingKeyMode MissingRepeatedKeyMode = MissingKeySkip;
diff --git a/library/cpp/protobuf/json/json2proto.cpp b/library/cpp/protobuf/json/json2proto.cpp
index 640c10f5a5..f19204ac35 100644
--- a/library/cpp/protobuf/json/json2proto.cpp
+++ b/library/cpp/protobuf/json/json2proto.cpp
@@ -19,10 +19,10 @@
} \
if (!json.JsonCheckType()) { \
if (config.CastFromString && json.IsString()) { \
- if (config.DoNotCastEmptyStrings && json.GetString().empty()) { \
- /* Empty string is same as "no value" for scalar types.*/ \
- break; \
- } \
+ if (config.DoNotCastEmptyStrings && json.GetString().empty()) { \
+ /* Empty string is same as "no value" for scalar types.*/ \
+ break; \
+ } \
reflection->ProtoSet(&proto, &field, FromString(json.GetString())); \
break; \
} \
@@ -335,7 +335,7 @@ Json2RepeatedField(const NJson::TJsonValue& json,
}
}
- if (fieldJson.GetType() != NJson::JSON_ARRAY && !config.MapAsObject && !config.VectorizeScalars && !config.ValueVectorizer) {
+ if (fieldJson.GetType() != NJson::JSON_ARRAY && !config.MapAsObject && !config.VectorizeScalars && !config.ValueVectorizer) {
ythrow yexception() << "JSON field doesn't represent an array for "
<< name
<< "(actual type is "
@@ -361,10 +361,10 @@ Json2RepeatedField(const NJson::TJsonValue& json,
for (const NJson::TJsonValue& jsonValue : jsonArray) {
Json2RepeatedFieldValue(jsonValue, proto, field, config, reflection);
}
- } else if (config.ValueVectorizer) {
- for (const NJson::TJsonValue& jsonValue : config.ValueVectorizer(fieldJson)) {
- Json2RepeatedFieldValue(jsonValue, proto, field, config, reflection);
- }
+ } else if (config.ValueVectorizer) {
+ for (const NJson::TJsonValue& jsonValue : config.ValueVectorizer(fieldJson)) {
+ Json2RepeatedFieldValue(jsonValue, proto, field, config, reflection);
+ }
} else if (config.VectorizeScalars) {
Json2RepeatedFieldValue(fieldJson, proto, field, config, reflection);
}
diff --git a/library/cpp/protobuf/json/json2proto.h b/library/cpp/protobuf/json/json2proto.h
index 4c33498dfa..458e0c0909 100644
--- a/library/cpp/protobuf/json/json2proto.h
+++ b/library/cpp/protobuf/json/json2proto.h
@@ -19,7 +19,7 @@ namespace google {
namespace NProtobufJson {
struct TJson2ProtoConfig {
using TSelf = TJson2ProtoConfig;
- using TValueVectorizer = std::function<NJson::TJsonValue::TArray(const NJson::TJsonValue& jsonValue)>;
+ using TValueVectorizer = std::function<NJson::TJsonValue::TArray(const NJson::TJsonValue& jsonValue)>;
enum FldNameMode {
FieldNameOriginalCase = 0, // default
@@ -58,11 +58,11 @@ namespace NProtobufJson {
return *this;
}
- TSelf& SetDoNotCastEmptyStrings(bool cast) {
- DoNotCastEmptyStrings = cast;
- return *this;
- }
-
+ TSelf& SetDoNotCastEmptyStrings(bool cast) {
+ DoNotCastEmptyStrings = cast;
+ return *this;
+ }
+
TSelf& SetCastRobust(bool cast) {
CastRobust = cast;
return *this;
@@ -115,9 +115,9 @@ namespace NProtobufJson {
/// Cast string json values to protobuf field type
bool CastFromString = false;
- /// Skip empty strings, instead casting from string into scalar types.
- /// I.e. empty string like default value for scalar types.
- bool DoNotCastEmptyStrings = false;
+ /// Skip empty strings, instead casting from string into scalar types.
+ /// I.e. empty string like default value for scalar types.
+ bool DoNotCastEmptyStrings = false;
/// Cast all json values to protobuf field types
bool CastRobust = false;
@@ -138,9 +138,9 @@ namespace NProtobufJson {
/// Append scalars to repeated fields
bool VectorizeScalars = false;
-
- /// Custom spliter non array value to repeated fields.
- TValueVectorizer ValueVectorizer;
+
+ /// Custom spliter non array value to repeated fields.
+ TValueVectorizer ValueVectorizer;
/// Allow js-style comments (both // and /**/)
bool AllowComments = false;
diff --git a/library/cpp/protobuf/json/proto2json_printer.cpp b/library/cpp/protobuf/json/proto2json_printer.cpp
index 6123eab0f2..69a0aa25f6 100644
--- a/library/cpp/protobuf/json/proto2json_printer.cpp
+++ b/library/cpp/protobuf/json/proto2json_printer.cpp
@@ -205,18 +205,18 @@ namespace NProtobufJson {
const Reflection* reflection = proto.GetReflection();
- bool shouldPrintField = reflection->HasField(proto, &field);
- if (!shouldPrintField && GetConfig().MissingSingleKeyMode == TProto2JsonConfig::MissingKeyExplicitDefaultThrowRequired) {
- if (field.has_default_value()) {
- shouldPrintField = true;
- } else if (field.is_required()) {
- ythrow yexception() << "Empty required protobuf field: "
- << field.full_name() << ".";
- }
- }
- shouldPrintField = shouldPrintField || GetConfig().MissingSingleKeyMode == TProto2JsonConfig::MissingKeyDefault;
-
- if (shouldPrintField) {
+ bool shouldPrintField = reflection->HasField(proto, &field);
+ if (!shouldPrintField && GetConfig().MissingSingleKeyMode == TProto2JsonConfig::MissingKeyExplicitDefaultThrowRequired) {
+ if (field.has_default_value()) {
+ shouldPrintField = true;
+ } else if (field.is_required()) {
+ ythrow yexception() << "Empty required protobuf field: "
+ << field.full_name() << ".";
+ }
+ }
+ shouldPrintField = shouldPrintField || GetConfig().MissingSingleKeyMode == TProto2JsonConfig::MissingKeyDefault;
+
+ if (shouldPrintField) {
switch (field.cpp_type()) {
INT_FIELD_TO_JSON(CPPTYPE_INT32, GetInt32);
INT_FIELD_TO_JSON(CPPTYPE_INT64, GetInt64);
@@ -256,7 +256,7 @@ namespace NProtobufJson {
}
case TProto2JsonConfig::MissingKeySkip:
- case TProto2JsonConfig::MissingKeyExplicitDefaultThrowRequired:
+ case TProto2JsonConfig::MissingKeyExplicitDefaultThrowRequired:
default:
break;
}
@@ -358,7 +358,7 @@ namespace NProtobufJson {
}
case TProto2JsonConfig::MissingKeySkip:
- case TProto2JsonConfig::MissingKeyExplicitDefaultThrowRequired:
+ case TProto2JsonConfig::MissingKeyExplicitDefaultThrowRequired:
default:
break;
}
diff --git a/library/cpp/protobuf/json/ut/json2proto_ut.cpp b/library/cpp/protobuf/json/ut/json2proto_ut.cpp
index 0dfe57bc7a..081072971c 100644
--- a/library/cpp/protobuf/json/ut/json2proto_ut.cpp
+++ b/library/cpp/protobuf/json/ut/json2proto_ut.cpp
@@ -690,57 +690,57 @@ Y_UNIT_TEST(TestVectorizeScalars) {
#undef DEFINE_FIELD
}
-Y_UNIT_TEST(TestValueVectorizer) {
- {
- // No ValueVectorizer
- NJson::TJsonValue json;
- json["RepeatedString"] = "123";
- TJson2ProtoConfig config;
- TSingleRepeatedString expected;
- UNIT_ASSERT_EXCEPTION(Json2Proto(json, expected, config), yexception);
- }
- {
- // ValueVectorizer replace original value by array
- NJson::TJsonValue json;
- json["RepeatedString"] = "123";
- TJson2ProtoConfig config;
-
- TSingleRepeatedString expected;
- expected.AddRepeatedString("4");
- expected.AddRepeatedString("5");
- expected.AddRepeatedString("6");
-
- config.ValueVectorizer = [](const NJson::TJsonValue& val) -> NJson::TJsonValue::TArray {
- Y_UNUSED(val);
- return {NJson::TJsonValue("4"), NJson::TJsonValue("5"), NJson::TJsonValue("6")};
- };
- TSingleRepeatedString actual;
- Json2Proto(json, actual, config);
- UNIT_ASSERT_PROTOS_EQUAL(expected, actual);
- }
- {
- // ValueVectorizer replace original value by array and cast
- NJson::TJsonValue json;
- json["RepeatedInt"] = 123;
- TJson2ProtoConfig config;
-
- TSingleRepeatedInt expected;
- expected.AddRepeatedInt(4);
- expected.AddRepeatedInt(5);
- expected.AddRepeatedInt(6);
-
- config.ValueVectorizer = [](const NJson::TJsonValue& val) -> NJson::TJsonValue::TArray {
- Y_UNUSED(val);
- return {NJson::TJsonValue("4"), NJson::TJsonValue(5), NJson::TJsonValue("6")};
- };
- config.CastFromString = true;
-
- TSingleRepeatedInt actual;
- Json2Proto(json, actual, config);
- UNIT_ASSERT_PROTOS_EQUAL(expected, actual);
- }
-}
-
+Y_UNIT_TEST(TestValueVectorizer) {
+ {
+ // No ValueVectorizer
+ NJson::TJsonValue json;
+ json["RepeatedString"] = "123";
+ TJson2ProtoConfig config;
+ TSingleRepeatedString expected;
+ UNIT_ASSERT_EXCEPTION(Json2Proto(json, expected, config), yexception);
+ }
+ {
+ // ValueVectorizer replace original value by array
+ NJson::TJsonValue json;
+ json["RepeatedString"] = "123";
+ TJson2ProtoConfig config;
+
+ TSingleRepeatedString expected;
+ expected.AddRepeatedString("4");
+ expected.AddRepeatedString("5");
+ expected.AddRepeatedString("6");
+
+ config.ValueVectorizer = [](const NJson::TJsonValue& val) -> NJson::TJsonValue::TArray {
+ Y_UNUSED(val);
+ return {NJson::TJsonValue("4"), NJson::TJsonValue("5"), NJson::TJsonValue("6")};
+ };
+ TSingleRepeatedString actual;
+ Json2Proto(json, actual, config);
+ UNIT_ASSERT_PROTOS_EQUAL(expected, actual);
+ }
+ {
+ // ValueVectorizer replace original value by array and cast
+ NJson::TJsonValue json;
+ json["RepeatedInt"] = 123;
+ TJson2ProtoConfig config;
+
+ TSingleRepeatedInt expected;
+ expected.AddRepeatedInt(4);
+ expected.AddRepeatedInt(5);
+ expected.AddRepeatedInt(6);
+
+ config.ValueVectorizer = [](const NJson::TJsonValue& val) -> NJson::TJsonValue::TArray {
+ Y_UNUSED(val);
+ return {NJson::TJsonValue("4"), NJson::TJsonValue(5), NJson::TJsonValue("6")};
+ };
+ config.CastFromString = true;
+
+ TSingleRepeatedInt actual;
+ Json2Proto(json, actual, config);
+ UNIT_ASSERT_PROTOS_EQUAL(expected, actual);
+ }
+}
+
Y_UNIT_TEST(TestMapAsObject) {
TMapType modelProto;
@@ -1103,23 +1103,23 @@ Y_UNIT_TEST(TestMergeRepeatedAppend) {
UNIT_ASSERT_PROTOS_EQUAL(proto, modelProto);
} // TestMergeRepeatedAppend
-Y_UNIT_TEST(TestEmptyStringForCastFromString) {
- NJson::TJsonValue json;
- json["I32"] = "";
- json["Bool"] = "";
- json["OneString"] = "";
-
- TJson2ProtoConfig config;
- config.SetCastFromString(true);
- config.SetDoNotCastEmptyStrings(true);
- TFlatOptional proto;
- UNIT_ASSERT_NO_EXCEPTION(Json2Proto(json, proto, config));
- UNIT_ASSERT(!proto.HasBool());
- UNIT_ASSERT(!proto.HasI32());
- UNIT_ASSERT(proto.HasOneString());
- UNIT_ASSERT_EQUAL("", proto.GetOneString());
-} // TestEmptyStringForCastFromString
-
+Y_UNIT_TEST(TestEmptyStringForCastFromString) {
+ NJson::TJsonValue json;
+ json["I32"] = "";
+ json["Bool"] = "";
+ json["OneString"] = "";
+
+ TJson2ProtoConfig config;
+ config.SetCastFromString(true);
+ config.SetDoNotCastEmptyStrings(true);
+ TFlatOptional proto;
+ UNIT_ASSERT_NO_EXCEPTION(Json2Proto(json, proto, config));
+ UNIT_ASSERT(!proto.HasBool());
+ UNIT_ASSERT(!proto.HasI32());
+ UNIT_ASSERT(proto.HasOneString());
+ UNIT_ASSERT_EQUAL("", proto.GetOneString());
+} // TestEmptyStringForCastFromString
+
Y_UNIT_TEST(TestAllowComments) {
constexpr TStringBuf json = R"(
{
diff --git a/library/cpp/protobuf/json/ut/proto2json_ut.cpp b/library/cpp/protobuf/json/ut/proto2json_ut.cpp
index 07e52d7f2f..6ae7960bb1 100644
--- a/library/cpp/protobuf/json/ut/proto2json_ut.cpp
+++ b/library/cpp/protobuf/json/ut/proto2json_ut.cpp
@@ -465,52 +465,52 @@ Y_UNIT_TEST(TestMissingSingleKeyConfig) {
UNIT_ASSERT_NO_EXCEPTION(Proto2Json(proto, json, config));
UNIT_ASSERT_JSONS_EQUAL(json, modelJson);
}
- {
- // Test MissingKeyExplicitDefaultThrowRequired for non explicit default values.
- TFlatOptional proto;
- NJson::TJsonValue modelJson(NJson::JSON_MAP);
- NJson::TJsonValue json;
- TProto2JsonConfig config;
- config.MissingSingleKeyMode = TProto2JsonConfig::MissingKeyExplicitDefaultThrowRequired;
-
- UNIT_ASSERT_NO_EXCEPTION(Proto2Json(proto, json, config));
- UNIT_ASSERT_JSONS_EQUAL(json, modelJson);
- }
- {
- // Test MissingKeyExplicitDefaultThrowRequired for explicit default values.
- NJson::TJsonValue modelJson;
- modelJson["String"] = "value";
-
- TSingleDefaultString proto;
- NJson::TJsonValue json;
- TProto2JsonConfig config;
- config.MissingSingleKeyMode = TProto2JsonConfig::MissingKeyExplicitDefaultThrowRequired;
- UNIT_ASSERT_NO_EXCEPTION(Proto2Json(proto, json, config));
- UNIT_ASSERT_JSONS_EQUAL(json, modelJson);
- }
- {
- // Test MissingKeyExplicitDefaultThrowRequired for empty required values.
- TFlatRequired proto;
- NJson::TJsonValue json;
- TProto2JsonConfig config;
- config.MissingSingleKeyMode = TProto2JsonConfig::MissingKeyExplicitDefaultThrowRequired;
- UNIT_ASSERT_EXCEPTION_CONTAINS(Proto2Json(proto, json, config), yexception, "Empty required protobuf field");
- }
- {
- // Test MissingKeyExplicitDefaultThrowRequired for required value.
- TSingleRequiredString proto;
- NJson::TJsonValue json;
- TProto2JsonConfig config;
- config.MissingSingleKeyMode = TProto2JsonConfig::MissingKeyExplicitDefaultThrowRequired;
-
- UNIT_ASSERT_EXCEPTION_CONTAINS(Proto2Json(proto, json, config), yexception, "Empty required protobuf field");
-
- NJson::TJsonValue modelJson;
- modelJson["String"] = "value";
- proto.SetString("value");
- UNIT_ASSERT_NO_EXCEPTION(Proto2Json(proto, json, config));
- UNIT_ASSERT_JSONS_EQUAL(json, modelJson);
- }
+ {
+ // Test MissingKeyExplicitDefaultThrowRequired for non explicit default values.
+ TFlatOptional proto;
+ NJson::TJsonValue modelJson(NJson::JSON_MAP);
+ NJson::TJsonValue json;
+ TProto2JsonConfig config;
+ config.MissingSingleKeyMode = TProto2JsonConfig::MissingKeyExplicitDefaultThrowRequired;
+
+ UNIT_ASSERT_NO_EXCEPTION(Proto2Json(proto, json, config));
+ UNIT_ASSERT_JSONS_EQUAL(json, modelJson);
+ }
+ {
+ // Test MissingKeyExplicitDefaultThrowRequired for explicit default values.
+ NJson::TJsonValue modelJson;
+ modelJson["String"] = "value";
+
+ TSingleDefaultString proto;
+ NJson::TJsonValue json;
+ TProto2JsonConfig config;
+ config.MissingSingleKeyMode = TProto2JsonConfig::MissingKeyExplicitDefaultThrowRequired;
+ UNIT_ASSERT_NO_EXCEPTION(Proto2Json(proto, json, config));
+ UNIT_ASSERT_JSONS_EQUAL(json, modelJson);
+ }
+ {
+ // Test MissingKeyExplicitDefaultThrowRequired for empty required values.
+ TFlatRequired proto;
+ NJson::TJsonValue json;
+ TProto2JsonConfig config;
+ config.MissingSingleKeyMode = TProto2JsonConfig::MissingKeyExplicitDefaultThrowRequired;
+ UNIT_ASSERT_EXCEPTION_CONTAINS(Proto2Json(proto, json, config), yexception, "Empty required protobuf field");
+ }
+ {
+ // Test MissingKeyExplicitDefaultThrowRequired for required value.
+ TSingleRequiredString proto;
+ NJson::TJsonValue json;
+ TProto2JsonConfig config;
+ config.MissingSingleKeyMode = TProto2JsonConfig::MissingKeyExplicitDefaultThrowRequired;
+
+ UNIT_ASSERT_EXCEPTION_CONTAINS(Proto2Json(proto, json, config), yexception, "Empty required protobuf field");
+
+ NJson::TJsonValue modelJson;
+ modelJson["String"] = "value";
+ proto.SetString("value");
+ UNIT_ASSERT_NO_EXCEPTION(Proto2Json(proto, json, config));
+ UNIT_ASSERT_JSONS_EQUAL(json, modelJson);
+ }
} // TestMissingSingleKeyConfig
Y_UNIT_TEST(TestMissingRepeatedKeyNoConfig) {
@@ -551,17 +551,17 @@ Y_UNIT_TEST(TestMissingRepeatedKeyConfig) {
UNIT_ASSERT_NO_EXCEPTION(Proto2Json(proto, json, config));
UNIT_ASSERT_JSONS_EQUAL(json, modelJson);
}
- {
- TFlatRepeated proto;
- NJson::TJsonValue modelJson(NJson::JSON_MAP);
- NJson::TJsonValue json;
- TProto2JsonConfig config;
- config.MissingRepeatedKeyMode = TProto2JsonConfig::MissingKeyExplicitDefaultThrowRequired;
-
- // SHould be same as MissingKeySkip
- UNIT_ASSERT_NO_EXCEPTION(Proto2Json(proto, json, config));
- UNIT_ASSERT_JSONS_EQUAL(json, modelJson);
- }
+ {
+ TFlatRepeated proto;
+ NJson::TJsonValue modelJson(NJson::JSON_MAP);
+ NJson::TJsonValue json;
+ TProto2JsonConfig config;
+ config.MissingRepeatedKeyMode = TProto2JsonConfig::MissingKeyExplicitDefaultThrowRequired;
+
+ // SHould be same as MissingKeySkip
+ UNIT_ASSERT_NO_EXCEPTION(Proto2Json(proto, json, config));
+ UNIT_ASSERT_JSONS_EQUAL(json, modelJson);
+ }
} // TestMissingRepeatedKeyConfig
Y_UNIT_TEST(TestEscaping) {
diff --git a/library/cpp/protobuf/json/ut/test.proto b/library/cpp/protobuf/json/ut/test.proto
index 0fa996fd41..8cf7bf8a5d 100644
--- a/library/cpp/protobuf/json/ut/test.proto
+++ b/library/cpp/protobuf/json/ut/test.proto
@@ -177,22 +177,22 @@ message TWithJsonName {
optional int32 Def_upper = 3; // json_name = "DefUpper"
optional int32 def_lower = 4; // json_name = "defLower"
}
-
-message TSingleRequiredString {
- required string String = 1;
-}
-
-message TSingleDefaultString {
- optional string String = 1 [default = "value"];
-}
-
-message TSingleRepeatedString {
- repeated string RepeatedString = 1;
-}
-
-message TSingleRepeatedInt {
- repeated int32 RepeatedInt = 1;
-}
+
+message TSingleRequiredString {
+ required string String = 1;
+}
+
+message TSingleDefaultString {
+ optional string String = 1 [default = "value"];
+}
+
+message TSingleRepeatedString {
+ repeated string RepeatedString = 1;
+}
+
+message TSingleRepeatedInt {
+ repeated int32 RepeatedInt = 1;
+}
message TExtensionField {
extensions 100 to 199;
diff --git a/util/charset/utf8.cpp b/util/charset/utf8.cpp
index efe3a52f61..21ed1adcc6 100644
--- a/util/charset/utf8.cpp
+++ b/util/charset/utf8.cpp
@@ -1,87 +1,87 @@
#include "unidata.h"
#include "utf8.h"
-namespace {
- enum class ECaseConversion {
- ToUpper,
- ToLower,
- };
-
- wchar32 ConvertChar(ECaseConversion conversion, wchar32 ch) {
- switch (conversion) {
- case ECaseConversion::ToUpper:
- return ToUpper(ch);
- case ECaseConversion::ToLower:
- return ToLower(ch);
- }
- Y_ASSERT(false); // NOTREACHED
- return 0;
- }
-
- bool ConvertCaseUTF8Impl(ECaseConversion conversion, const char* beg, size_t n,
- TString& newString) {
- const unsigned char* p = (const unsigned char*)beg;
- const unsigned char* const end = p + n;
-
- // first loop searches for the first character, which is changed by ConvertChar
- // if there is no changed character, we don't need reallocation/copy
- wchar32 cNew = 0;
- size_t cLen = 0;
- while (p < end) {
- wchar32 c;
- if (RECODE_OK != SafeReadUTF8Char(c, cLen, p, end)) {
- ythrow yexception()
- << "failed to decode UTF-8 string at pos " << ((const char*)p - beg);
- }
- cNew = ConvertChar(conversion, c);
-
- if (cNew != c)
- break;
- p += cLen;
- }
- if (p == end) {
- return false;
- }
-
- // some character changed after ToLower. Write new string to newString.
- newString.resize(n);
-
- size_t written = (char*)p - beg;
- char* writePtr = newString.begin();
- memcpy(writePtr, beg, written);
- writePtr += written;
- size_t destSpace = n - written;
-
- // before each iteration (including the first one) variable 'cNew' contains unwritten symbol
- while (true) {
- size_t cNewLen;
+namespace {
+ enum class ECaseConversion {
+ ToUpper,
+ ToLower,
+ };
+
+ wchar32 ConvertChar(ECaseConversion conversion, wchar32 ch) {
+ switch (conversion) {
+ case ECaseConversion::ToUpper:
+ return ToUpper(ch);
+ case ECaseConversion::ToLower:
+ return ToLower(ch);
+ }
+ Y_ASSERT(false); // NOTREACHED
+ return 0;
+ }
+
+ bool ConvertCaseUTF8Impl(ECaseConversion conversion, const char* beg, size_t n,
+ TString& newString) {
+ const unsigned char* p = (const unsigned char*)beg;
+ const unsigned char* const end = p + n;
+
+ // first loop searches for the first character, which is changed by ConvertChar
+ // if there is no changed character, we don't need reallocation/copy
+ wchar32 cNew = 0;
+ size_t cLen = 0;
+ while (p < end) {
+ wchar32 c;
+ if (RECODE_OK != SafeReadUTF8Char(c, cLen, p, end)) {
+ ythrow yexception()
+ << "failed to decode UTF-8 string at pos " << ((const char*)p - beg);
+ }
+ cNew = ConvertChar(conversion, c);
+
+ if (cNew != c)
+ break;
+ p += cLen;
+ }
+ if (p == end) {
+ return false;
+ }
+
+ // some character changed after ToLower. Write new string to newString.
+ newString.resize(n);
+
+ size_t written = (char*)p - beg;
+ char* writePtr = newString.begin();
+ memcpy(writePtr, beg, written);
+ writePtr += written;
+ size_t destSpace = n - written;
+
+ // before each iteration (including the first one) variable 'cNew' contains unwritten symbol
+ while (true) {
+ size_t cNewLen;
Y_ASSERT((writePtr - newString.data()) + destSpace == newString.size());
- if (RECODE_EOOUTPUT ==
- SafeWriteUTF8Char(cNew, cNewLen, (unsigned char*)writePtr, destSpace)) {
+ if (RECODE_EOOUTPUT ==
+ SafeWriteUTF8Char(cNew, cNewLen, (unsigned char*)writePtr, destSpace)) {
destSpace += newString.size();
newString.resize(newString.size() * 2);
writePtr = newString.begin() + (newString.size() - destSpace);
- continue;
- }
- destSpace -= cNewLen;
- writePtr += cNewLen;
- p += cLen;
- if (p == end) {
+ continue;
+ }
+ destSpace -= cNewLen;
+ writePtr += cNewLen;
+ p += cLen;
+ if (p == end) {
newString.resize(newString.size() - destSpace);
- return true;
- }
- wchar32 c = 0;
- if (RECODE_OK != SafeReadUTF8Char(c, cLen, p, end)) {
- ythrow yexception()
- << "failed to decode UTF-8 string at pos " << ((const char*)p - beg);
- }
- cNew = ConvertChar(conversion, c);
- }
- Y_ASSERT(false);
- return false;
- }
-} // namespace
-
+ return true;
+ }
+ wchar32 c = 0;
+ if (RECODE_OK != SafeReadUTF8Char(c, cLen, p, end)) {
+ ythrow yexception()
+ << "failed to decode UTF-8 string at pos " << ((const char*)p - beg);
+ }
+ cNew = ConvertChar(conversion, c);
+ }
+ Y_ASSERT(false);
+ return false;
+ }
+} // namespace
+
extern const wchar32 BROKEN_RUNE = 0xFFFD;
static const char* SkipUTF8Chars(const char* begin, const char* end, size_t numChars) {
@@ -130,7 +130,7 @@ EUTF8Detect UTF8Detect(const char* s, size_t len) {
}
bool ToLowerUTF8Impl(const char* beg, size_t n, TString& newString) {
- return ConvertCaseUTF8Impl(ECaseConversion::ToLower, beg, n, newString);
+ return ConvertCaseUTF8Impl(ECaseConversion::ToLower, beg, n, newString);
}
TString ToLowerUTF8(const TString& s) {
@@ -148,23 +148,23 @@ TString ToLowerUTF8(TStringBuf s) {
TString ToLowerUTF8(const char* s) {
return ToLowerUTF8(TStringBuf(s));
}
-
-bool ToUpperUTF8Impl(const char* beg, size_t n, TString& newString) {
- return ConvertCaseUTF8Impl(ECaseConversion::ToUpper, beg, n, newString);
-}
-
-TString ToUpperUTF8(const TString& s) {
- TString newString;
+
+bool ToUpperUTF8Impl(const char* beg, size_t n, TString& newString) {
+ return ConvertCaseUTF8Impl(ECaseConversion::ToUpper, beg, n, newString);
+}
+
+TString ToUpperUTF8(const TString& s) {
+ TString newString;
bool changed = ToUpperUTF8Impl(s.data(), s.size(), newString);
- return changed ? newString : s;
-}
-
-TString ToUpperUTF8(TStringBuf s) {
- TString newString;
+ return changed ? newString : s;
+}
+
+TString ToUpperUTF8(TStringBuf s) {
+ TString newString;
bool changed = ToUpperUTF8Impl(s.data(), s.size(), newString);
return changed ? newString : TString(s.data(), s.size());
-}
-
-TString ToUpperUTF8(const char* s) {
- return ToUpperUTF8(TStringBuf(s));
-}
+}
+
+TString ToUpperUTF8(const char* s) {
+ return ToUpperUTF8(TStringBuf(s));
+}
diff --git a/util/charset/utf8.h b/util/charset/utf8.h
index 5039b46ae9..5250bbeab2 100644
--- a/util/charset/utf8.h
+++ b/util/charset/utf8.h
@@ -374,15 +374,15 @@ bool ToLowerUTF8Impl(const char* beg, size_t n, TString& newString);
TString ToLowerUTF8(const TString& s);
TString ToLowerUTF8(TStringBuf s);
TString ToLowerUTF8(const char* s);
-
+
inline TString ToLowerUTF8(const std::string& s) {
return ToLowerUTF8(TStringBuf(s));
}
-//! returns true, if result is not the same as input, and put it in newString
-//! returns false, if result is unmodified
-bool ToUpperUTF8Impl(const char* beg, size_t n, TString& newString);
-
-TString ToUpperUTF8(const TString& s);
-TString ToUpperUTF8(TStringBuf s);
-TString ToUpperUTF8(const char* s);
+//! returns true, if result is not the same as input, and put it in newString
+//! returns false, if result is unmodified
+bool ToUpperUTF8Impl(const char* beg, size_t n, TString& newString);
+
+TString ToUpperUTF8(const TString& s);
+TString ToUpperUTF8(TStringBuf s);
+TString ToUpperUTF8(const char* s);
diff --git a/util/charset/utf8_ut.cpp b/util/charset/utf8_ut.cpp
index 9e68881cca..8cbb844dc7 100644
--- a/util/charset/utf8_ut.cpp
+++ b/util/charset/utf8_ut.cpp
@@ -52,46 +52,46 @@ Y_UNIT_TEST_SUITE(TUtfUtilTest) {
}
}
- Y_UNIT_TEST(TestToUpperUtfString) {
- UNIT_ASSERT_VALUES_EQUAL(ToUpperUTF8("xyz XYZ привет!"), "XYZ XYZ ПРИВЕТ!");
-
+ Y_UNIT_TEST(TestToUpperUtfString) {
+ UNIT_ASSERT_VALUES_EQUAL(ToUpperUTF8("xyz XYZ привет!"), "XYZ XYZ ПРИВЕТ!");
+
UNIT_ASSERT_VALUES_EQUAL(ToUpperUTF8(TStringBuf("XYZ")), "XYZ");
-
- {
- TString s = "ПРИВЕТ!";
- TString q = "привет!";
- TString tmp;
+
+ {
+ TString s = "ПРИВЕТ!";
+ TString q = "привет!";
+ TString tmp;
UNIT_ASSERT(ToUpperUTF8Impl(s.data(), s.size(), tmp) == false);
UNIT_ASSERT(ToUpperUTF8Impl(q.data(), q.size(), tmp) == true);
- }
-
- {
- const char* weird = "\xC8\xBE"; // 'Ⱦ', U+023E. strlen(weird)==2, strlen(ToUpper_utf8(weird)) is 3
- const char* turkI = "İ"; //strlen("İ") == 2, strlen(ToUpper_utf8("İ") == 1
- TStringBuf chars[] = {"F", "f", "б", "Б", turkI, weird};
- const int N = Y_ARRAY_SIZE(chars);
- //try all combinations of these letters.
- int numberOfVariants = 1;
- for (int len = 0; len <= 4; ++len) {
- for (int i = 0; i < numberOfVariants; ++i) {
- TString s;
- int k = i;
- for (int j = 0; j < len; ++j) {
- //Treat 'i' like number in base-N system with digits from 'chars'-array
- s += chars[k % N];
- k /= N;
- }
-
- TUtf16String tmp = UTF8ToWide(s);
- tmp.to_upper();
-
- UNIT_ASSERT_VALUES_EQUAL(ToUpperUTF8(s), WideToUTF8(tmp));
- }
- numberOfVariants *= N;
- }
- }
- }
-
+ }
+
+ {
+ const char* weird = "\xC8\xBE"; // 'Ⱦ', U+023E. strlen(weird)==2, strlen(ToUpper_utf8(weird)) is 3
+ const char* turkI = "İ"; //strlen("İ") == 2, strlen(ToUpper_utf8("İ") == 1
+ TStringBuf chars[] = {"F", "f", "б", "Б", turkI, weird};
+ const int N = Y_ARRAY_SIZE(chars);
+ //try all combinations of these letters.
+ int numberOfVariants = 1;
+ for (int len = 0; len <= 4; ++len) {
+ for (int i = 0; i < numberOfVariants; ++i) {
+ TString s;
+ int k = i;
+ for (int j = 0; j < len; ++j) {
+ //Treat 'i' like number in base-N system with digits from 'chars'-array
+ s += chars[k % N];
+ k /= N;
+ }
+
+ TUtf16String tmp = UTF8ToWide(s);
+ tmp.to_upper();
+
+ UNIT_ASSERT_VALUES_EQUAL(ToUpperUTF8(s), WideToUTF8(tmp));
+ }
+ numberOfVariants *= N;
+ }
+ }
+ }
+
Y_UNIT_TEST(TestUTF8ToWide) {
TFileInput in(ArcadiaSourceRoot() + TStringBuf("/util/charset/ut/utf8/test1.txt"));