diff options
author | monster <monster@ydb.tech> | 2022-07-07 14:41:37 +0300 |
---|---|---|
committer | monster <monster@ydb.tech> | 2022-07-07 14:41:37 +0300 |
commit | 06e5c21a835c0e923506c4ff27929f34e00761c2 (patch) | |
tree | 75efcbc6854ef9bd476eb8bf00cc5c900da436a2 /contrib/libs/protobuf-mutator/src | |
parent | 03f024c4412e3aa613bb543cf1660176320ba8f4 (diff) | |
download | ydb-06e5c21a835c0e923506c4ff27929f34e00761c2.tar.gz |
fix ya.make
Diffstat (limited to 'contrib/libs/protobuf-mutator/src')
-rw-r--r-- | contrib/libs/protobuf-mutator/src/binary_format.cc | 50 | ||||
-rw-r--r-- | contrib/libs/protobuf-mutator/src/binary_format.h | 34 | ||||
-rw-r--r-- | contrib/libs/protobuf-mutator/src/field_instance.h | 449 | ||||
-rw-r--r-- | contrib/libs/protobuf-mutator/src/libfuzzer/libfuzzer_macro.cc | 242 | ||||
-rw-r--r-- | contrib/libs/protobuf-mutator/src/libfuzzer/libfuzzer_macro.h | 129 | ||||
-rw-r--r-- | contrib/libs/protobuf-mutator/src/libfuzzer/libfuzzer_mutator.cc | 100 | ||||
-rw-r--r-- | contrib/libs/protobuf-mutator/src/libfuzzer/libfuzzer_mutator.h | 46 | ||||
-rw-r--r-- | contrib/libs/protobuf-mutator/src/mutator.cc | 812 | ||||
-rw-r--r-- | contrib/libs/protobuf-mutator/src/mutator.h | 111 | ||||
-rw-r--r-- | contrib/libs/protobuf-mutator/src/random.h | 26 | ||||
-rw-r--r-- | contrib/libs/protobuf-mutator/src/text_format.cc | 57 | ||||
-rw-r--r-- | contrib/libs/protobuf-mutator/src/text_format.h | 34 | ||||
-rw-r--r-- | contrib/libs/protobuf-mutator/src/utf8_fix.cc | 94 | ||||
-rw-r--r-- | contrib/libs/protobuf-mutator/src/utf8_fix.h | 30 | ||||
-rw-r--r-- | contrib/libs/protobuf-mutator/src/weighted_reservoir_sampler.h | 59 |
15 files changed, 2273 insertions, 0 deletions
diff --git a/contrib/libs/protobuf-mutator/src/binary_format.cc b/contrib/libs/protobuf-mutator/src/binary_format.cc new file mode 100644 index 0000000000..9bf3c3494a --- /dev/null +++ b/contrib/libs/protobuf-mutator/src/binary_format.cc @@ -0,0 +1,50 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "src/binary_format.h" + +namespace protobuf_mutator { + +using protobuf::Message; + +bool ParseBinaryMessage(const uint8_t* data, size_t size, Message* output) { + return ParseBinaryMessage({reinterpret_cast<const char*>(data), size}, output); +} + +bool ParseBinaryMessage(const TProtoStringType& data, protobuf::Message* output) { + output->Clear(); + if (!output->ParsePartialFromString(data)) { + output->Clear(); + return false; + } + return true; +} + +size_t SaveMessageAsBinary(const Message& message, uint8_t* data, + size_t max_size) { + TProtoStringType result = SaveMessageAsBinary(message); + if (result.size() <= max_size) { + memcpy(data, result.data(), result.size()); + return result.size(); + } + return 0; +} + +TProtoStringType SaveMessageAsBinary(const protobuf::Message& message) { + String tmp; + if (!message.SerializePartialToString(&tmp)) return {}; + return tmp; +} + +} // namespace protobuf_mutator diff --git a/contrib/libs/protobuf-mutator/src/binary_format.h b/contrib/libs/protobuf-mutator/src/binary_format.h new file mode 100644 index 0000000000..e2233582eb --- /dev/null +++ b/contrib/libs/protobuf-mutator/src/binary_format.h @@ -0,0 +1,34 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef SRC_BINARY_FORMAT_H_ +#define SRC_BINARY_FORMAT_H_ + +#include <string> + +#include "port/protobuf.h" + +namespace protobuf_mutator { + +// Binary serialization of protos. +bool ParseBinaryMessage(const uint8_t* data, size_t size, + protobuf::Message* output); +bool ParseBinaryMessage(const TProtoStringType& data, protobuf::Message* output); +size_t SaveMessageAsBinary(const protobuf::Message& message, uint8_t* data, + size_t max_size); +TProtoStringType SaveMessageAsBinary(const protobuf::Message& message); + +} // namespace protobuf_mutator + +#endif // SRC_BINARY_FORMAT_H_ diff --git a/contrib/libs/protobuf-mutator/src/field_instance.h b/contrib/libs/protobuf-mutator/src/field_instance.h new file mode 100644 index 0000000000..11ab465e8a --- /dev/null +++ b/contrib/libs/protobuf-mutator/src/field_instance.h @@ -0,0 +1,449 @@ +// Copyright 2016 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef SRC_FIELD_INSTANCE_H_ +#define SRC_FIELD_INSTANCE_H_ + +#include <memory> +#include <string> + +#include "port/protobuf.h" + +namespace protobuf_mutator { + +// Helper class for common protobuf fields operations. +class ConstFieldInstance { + public: + static const size_t kInvalidIndex = -1; + + struct Enum { + size_t index; + size_t count; + }; + + ConstFieldInstance() + : message_(nullptr), descriptor_(nullptr), index_(kInvalidIndex) {} + + ConstFieldInstance(const protobuf::Message* message, + const protobuf::FieldDescriptor* field, size_t index) + : message_(message), descriptor_(field), index_(index) { + assert(message_); + assert(descriptor_); + assert(index_ != kInvalidIndex); + assert(descriptor_->is_repeated()); + } + + ConstFieldInstance(const protobuf::Message* message, + const protobuf::FieldDescriptor* field) + : message_(message), descriptor_(field), index_(kInvalidIndex) { + assert(message_); + assert(descriptor_); + assert(!descriptor_->is_repeated()); + } + + void GetDefault(int32_t* out) const { + *out = descriptor_->default_value_int32(); + } + + void GetDefault(int64_t* out) const { + *out = descriptor_->default_value_int64(); + } + + void GetDefault(uint32_t* out) const { + *out = descriptor_->default_value_uint32(); + } + + void GetDefault(uint64_t* out) const { + *out = descriptor_->default_value_uint64(); + } + + void GetDefault(double* out) const { + *out = descriptor_->default_value_double(); + } + + void GetDefault(float* out) const { + *out = descriptor_->default_value_float(); + } + + void GetDefault(bool* out) const { *out = descriptor_->default_value_bool(); } + + void GetDefault(Enum* out) const { + const protobuf::EnumValueDescriptor* value = + descriptor_->default_value_enum(); + const protobuf::EnumDescriptor* type = value->type(); + *out = {static_cast<size_t>(value->index()), + static_cast<size_t>(type->value_count())}; + } + + void GetDefault(TProtoStringType* out) const { + *out = descriptor_->default_value_string(); + } + + void GetDefault(std::unique_ptr<protobuf::Message>* out) const { + out->reset(reflection() + .GetMessageFactory() + ->GetPrototype(descriptor_->message_type()) + ->New()); + } + + void Load(int32_t* value) const { + *value = is_repeated() + ? reflection().GetRepeatedInt32(*message_, descriptor_, index_) + : reflection().GetInt32(*message_, descriptor_); + } + + void Load(int64_t* value) const { + *value = is_repeated() + ? reflection().GetRepeatedInt64(*message_, descriptor_, index_) + : reflection().GetInt64(*message_, descriptor_); + } + + void Load(uint32_t* value) const { + *value = is_repeated() ? reflection().GetRepeatedUInt32(*message_, + descriptor_, index_) + : reflection().GetUInt32(*message_, descriptor_); + } + + void Load(uint64_t* value) const { + *value = is_repeated() ? reflection().GetRepeatedUInt64(*message_, + descriptor_, index_) + : reflection().GetUInt64(*message_, descriptor_); + } + + void Load(double* value) const { + *value = is_repeated() ? reflection().GetRepeatedDouble(*message_, + descriptor_, index_) + : reflection().GetDouble(*message_, descriptor_); + } + + void Load(float* value) const { + *value = is_repeated() + ? reflection().GetRepeatedFloat(*message_, descriptor_, index_) + : reflection().GetFloat(*message_, descriptor_); + } + + void Load(bool* value) const { + *value = is_repeated() + ? reflection().GetRepeatedBool(*message_, descriptor_, index_) + : reflection().GetBool(*message_, descriptor_); + } + + void Load(Enum* value) const { + const protobuf::EnumValueDescriptor* value_descriptor = + is_repeated() + ? reflection().GetRepeatedEnum(*message_, descriptor_, index_) + : reflection().GetEnum(*message_, descriptor_); + *value = {static_cast<size_t>(value_descriptor->index()), + static_cast<size_t>(value_descriptor->type()->value_count())}; + if (value->index >= value->count) GetDefault(value); + } + + void Load(TProtoStringType* value) const { + *value = is_repeated() ? reflection().GetRepeatedString(*message_, + descriptor_, index_) + : reflection().GetString(*message_, descriptor_); + } + + void Load(std::unique_ptr<protobuf::Message>* value) const { + const protobuf::Message& source = + is_repeated() + ? reflection().GetRepeatedMessage(*message_, descriptor_, index_) + : reflection().GetMessage(*message_, descriptor_); + value->reset(source.New()); + (*value)->CopyFrom(source); + } + + template <class T> + bool CanStore(const T& value) const { + return true; + } + + bool CanStore(const TProtoStringType& value) const { + if (!EnforceUtf8()) return true; + using protobuf::internal::WireFormatLite; + return WireFormatLite::VerifyUtf8String(value.data(), value.length(), + WireFormatLite::PARSE, ""); + } + + TProtoStringType name() const { return descriptor_->name(); } + + protobuf::FieldDescriptor::CppType cpp_type() const { + return descriptor_->cpp_type(); + } + + const protobuf::EnumDescriptor* enum_type() const { + return descriptor_->enum_type(); + } + + const protobuf::Descriptor* message_type() const { + return descriptor_->message_type(); + } + + bool EnforceUtf8() const { + return descriptor_->type() == protobuf::FieldDescriptor::TYPE_STRING && + descriptor()->file()->syntax() == + protobuf::FileDescriptor::SYNTAX_PROTO3; + } + + const protobuf::FieldDescriptor* descriptor() const { return descriptor_; } + + TProtoStringType DebugString() const { + TProtoStringType s = descriptor_->DebugString(); + if (is_repeated()) s += "[" + std::to_string(index_) + "]"; + return s + " of\n" + message_->DebugString(); + } + + protected: + bool is_repeated() const { return descriptor_->is_repeated(); } + + const protobuf::Reflection& reflection() const { + return *message_->GetReflection(); + } + + size_t index() const { return index_; } + + private: + template <class Fn, class T> + friend struct FieldFunction; + + const protobuf::Message* message_; + const protobuf::FieldDescriptor* descriptor_; + size_t index_; +}; + +class FieldInstance : public ConstFieldInstance { + public: + static const size_t kInvalidIndex = -1; + + FieldInstance() : ConstFieldInstance(), message_(nullptr) {} + + FieldInstance(protobuf::Message* message, + const protobuf::FieldDescriptor* field, size_t index) + : ConstFieldInstance(message, field, index), message_(message) {} + + FieldInstance(protobuf::Message* message, + const protobuf::FieldDescriptor* field) + : ConstFieldInstance(message, field), message_(message) {} + + void Delete() const { + if (!is_repeated()) return reflection().ClearField(message_, descriptor()); + int field_size = reflection().FieldSize(*message_, descriptor()); + // API has only method to delete the last message, so we move method from + // the + // middle to the end. + for (int i = index() + 1; i < field_size; ++i) + reflection().SwapElements(message_, descriptor(), i, i - 1); + reflection().RemoveLast(message_, descriptor()); + } + + template <class T> + void Create(const T& value) const { + if (!is_repeated()) return Store(value); + InsertRepeated(value); + } + + void Store(int32_t value) const { + if (is_repeated()) + reflection().SetRepeatedInt32(message_, descriptor(), index(), value); + else + reflection().SetInt32(message_, descriptor(), value); + } + + void Store(int64_t value) const { + if (is_repeated()) + reflection().SetRepeatedInt64(message_, descriptor(), index(), value); + else + reflection().SetInt64(message_, descriptor(), value); + } + + void Store(uint32_t value) const { + if (is_repeated()) + reflection().SetRepeatedUInt32(message_, descriptor(), index(), value); + else + reflection().SetUInt32(message_, descriptor(), value); + } + + void Store(uint64_t value) const { + if (is_repeated()) + reflection().SetRepeatedUInt64(message_, descriptor(), index(), value); + else + reflection().SetUInt64(message_, descriptor(), value); + } + + void Store(double value) const { + if (is_repeated()) + reflection().SetRepeatedDouble(message_, descriptor(), index(), value); + else + reflection().SetDouble(message_, descriptor(), value); + } + + void Store(float value) const { + if (is_repeated()) + reflection().SetRepeatedFloat(message_, descriptor(), index(), value); + else + reflection().SetFloat(message_, descriptor(), value); + } + + void Store(bool value) const { + if (is_repeated()) + reflection().SetRepeatedBool(message_, descriptor(), index(), value); + else + reflection().SetBool(message_, descriptor(), value); + } + + void Store(const Enum& value) const { + assert(value.index < value.count); + const protobuf::EnumValueDescriptor* enum_value = + descriptor()->enum_type()->value(value.index); + if (is_repeated()) + reflection().SetRepeatedEnum(message_, descriptor(), index(), enum_value); + else + reflection().SetEnum(message_, descriptor(), enum_value); + } + + void Store(const TProtoStringType& value) const { + if (is_repeated()) + reflection().SetRepeatedString(message_, descriptor(), index(), value); + else + reflection().SetString(message_, descriptor(), value); + } + + void Store(const std::unique_ptr<protobuf::Message>& value) const { + protobuf::Message* mutable_message = + is_repeated() ? reflection().MutableRepeatedMessage( + message_, descriptor(), index()) + : reflection().MutableMessage(message_, descriptor()); + mutable_message->Clear(); + if (value) mutable_message->CopyFrom(*value); + } + + private: + template <class T> + void InsertRepeated(const T& value) const { + PushBackRepeated(value); + size_t field_size = reflection().FieldSize(*message_, descriptor()); + if (field_size == 1) return; + // API has only method to add field to the end of the list. So we add + // descriptor() + // and move it into the middle. + for (size_t i = field_size - 1; i > index(); --i) + reflection().SwapElements(message_, descriptor(), i, i - 1); + } + + void PushBackRepeated(int32_t value) const { + assert(is_repeated()); + reflection().AddInt32(message_, descriptor(), value); + } + + void PushBackRepeated(int64_t value) const { + assert(is_repeated()); + reflection().AddInt64(message_, descriptor(), value); + } + + void PushBackRepeated(uint32_t value) const { + assert(is_repeated()); + reflection().AddUInt32(message_, descriptor(), value); + } + + void PushBackRepeated(uint64_t value) const { + assert(is_repeated()); + reflection().AddUInt64(message_, descriptor(), value); + } + + void PushBackRepeated(double value) const { + assert(is_repeated()); + reflection().AddDouble(message_, descriptor(), value); + } + + void PushBackRepeated(float value) const { + assert(is_repeated()); + reflection().AddFloat(message_, descriptor(), value); + } + + void PushBackRepeated(bool value) const { + assert(is_repeated()); + reflection().AddBool(message_, descriptor(), value); + } + + void PushBackRepeated(const Enum& value) const { + assert(value.index < value.count); + const protobuf::EnumValueDescriptor* enum_value = + descriptor()->enum_type()->value(value.index); + assert(is_repeated()); + reflection().AddEnum(message_, descriptor(), enum_value); + } + + void PushBackRepeated(const TProtoStringType& value) const { + assert(is_repeated()); + reflection().AddString(message_, descriptor(), value); + } + + void PushBackRepeated(const std::unique_ptr<protobuf::Message>& value) const { + assert(is_repeated()); + protobuf::Message* mutable_message = + reflection().AddMessage(message_, descriptor()); + mutable_message->Clear(); + if (value) mutable_message->CopyFrom(*value); + } + + protobuf::Message* message_; +}; + +template <class Fn, class R = void> +struct FieldFunction { + template <class Field, class... Args> + R operator()(const Field& field, const Args&... args) const { + assert(field.descriptor()); + using protobuf::FieldDescriptor; + switch (field.cpp_type()) { + case FieldDescriptor::CPPTYPE_INT32: + return static_cast<const Fn*>(this)->template ForType<int32_t>(field, + args...); + case FieldDescriptor::CPPTYPE_INT64: + return static_cast<const Fn*>(this)->template ForType<int64_t>(field, + args...); + case FieldDescriptor::CPPTYPE_UINT32: + return static_cast<const Fn*>(this)->template ForType<uint32_t>( + field, args...); + case FieldDescriptor::CPPTYPE_UINT64: + return static_cast<const Fn*>(this)->template ForType<uint64_t>( + field, args...); + case FieldDescriptor::CPPTYPE_DOUBLE: + return static_cast<const Fn*>(this)->template ForType<double>(field, + args...); + case FieldDescriptor::CPPTYPE_FLOAT: + return static_cast<const Fn*>(this)->template ForType<float>(field, + args...); + case FieldDescriptor::CPPTYPE_BOOL: + return static_cast<const Fn*>(this)->template ForType<bool>(field, + args...); + case FieldDescriptor::CPPTYPE_ENUM: + return static_cast<const Fn*>(this) + ->template ForType<ConstFieldInstance::Enum>(field, args...); + case FieldDescriptor::CPPTYPE_STRING: + return static_cast<const Fn*>(this)->template ForType<TProtoStringType>( + field, args...); + case FieldDescriptor::CPPTYPE_MESSAGE: + return static_cast<const Fn*>(this) + ->template ForType<std::unique_ptr<protobuf::Message>>(field, + args...); + } + assert(false && "Unknown type"); + abort(); + } +}; + +} // namespace protobuf_mutator + +#endif // SRC_FIELD_INSTANCE_H_ diff --git a/contrib/libs/protobuf-mutator/src/libfuzzer/libfuzzer_macro.cc b/contrib/libs/protobuf-mutator/src/libfuzzer/libfuzzer_macro.cc new file mode 100644 index 0000000000..4e506cbe94 --- /dev/null +++ b/contrib/libs/protobuf-mutator/src/libfuzzer/libfuzzer_macro.cc @@ -0,0 +1,242 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "src/libfuzzer/libfuzzer_macro.h" + +#include <algorithm> +#include <memory> +#include <vector> + +#include "src/binary_format.h" +#include "src/libfuzzer/libfuzzer_mutator.h" +#include "src/text_format.h" + +namespace protobuf_mutator { +namespace libfuzzer { + +namespace { + +class InputReader { + public: + InputReader(const uint8_t* data, size_t size) : data_(data), size_(size) {} + virtual ~InputReader() = default; + + virtual bool Read(protobuf::Message* message) const = 0; + + const uint8_t* data() const { return data_; } + size_t size() const { return size_; } + + private: + const uint8_t* data_; + size_t size_; +}; + +class OutputWriter { + public: + OutputWriter(uint8_t* data, size_t size) : data_(data), size_(size) {} + virtual ~OutputWriter() = default; + + virtual size_t Write(const protobuf::Message& message) = 0; + + uint8_t* data() const { return data_; } + size_t size() const { return size_; } + + private: + uint8_t* data_; + size_t size_; +}; + +class TextInputReader : public InputReader { + public: + using InputReader::InputReader; + + bool Read(protobuf::Message* message) const override { + return ParseTextMessage(data(), size(), message); + } +}; + +class TextOutputWriter : public OutputWriter { + public: + using OutputWriter::OutputWriter; + + size_t Write(const protobuf::Message& message) override { + return SaveMessageAsText(message, data(), size()); + } +}; + +class BinaryInputReader : public InputReader { + public: + using InputReader::InputReader; + + bool Read(protobuf::Message* message) const override { + return ParseBinaryMessage(data(), size(), message); + } +}; + +class BinaryOutputWriter : public OutputWriter { + public: + using OutputWriter::OutputWriter; + + size_t Write(const protobuf::Message& message) override { + return SaveMessageAsBinary(message, data(), size()); + } +}; + +class LastMutationCache { + public: + void Store(const uint8_t* data, size_t size, protobuf::Message* message) { + if (!message_) message_.reset(message->New()); + message->GetReflection()->Swap(message, message_.get()); + data_.assign(data, data + size); + } + + bool LoadIfSame(const uint8_t* data, size_t size, + protobuf::Message* message) { + if (!message_ || size != data_.size() || + !std::equal(data_.begin(), data_.end(), data)) + return false; + + message->GetReflection()->Swap(message, message_.get()); + message_.reset(); + return true; + } + + private: + std::vector<uint8_t> data_; + std::unique_ptr<protobuf::Message> message_; +}; + +LastMutationCache* GetCache() { + static LastMutationCache cache; + return &cache; +} + +Mutator* GetMutator() { + static Mutator mutator; + return &mutator; +} + +size_t GetMaxSize(const InputReader& input, const OutputWriter& output, + const protobuf::Message& message) { + size_t max_size = message.ByteSizeLong() + output.size(); + max_size -= std::min(max_size, input.size()); + return max_size; +} + +size_t MutateMessage(unsigned int seed, const InputReader& input, + OutputWriter* output, protobuf::Message* message) { + GetMutator()->Seed(seed); + input.Read(message); + size_t max_size = GetMaxSize(input, *output, *message); + GetMutator()->Mutate(message, max_size); + if (size_t new_size = output->Write(*message)) { + assert(new_size <= output->size()); + GetCache()->Store(output->data(), new_size, message); + return new_size; + } + return 0; +} + +size_t CrossOverMessages(unsigned int seed, const InputReader& input1, + const InputReader& input2, OutputWriter* output, + protobuf::Message* message1, + protobuf::Message* message2) { + GetMutator()->Seed(seed); + input1.Read(message1); + input2.Read(message2); + size_t max_size = GetMaxSize(input1, *output, *message1); + GetMutator()->CrossOver(*message2, message1, max_size); + if (size_t new_size = output->Write(*message1)) { + assert(new_size <= output->size()); + GetCache()->Store(output->data(), new_size, message1); + return new_size; + } + return 0; +} + +size_t MutateTextMessage(uint8_t* data, size_t size, size_t max_size, + unsigned int seed, protobuf::Message* message) { + TextInputReader input(data, size); + TextOutputWriter output(data, max_size); + return MutateMessage(seed, input, &output, message); +} + +size_t CrossOverTextMessages(const uint8_t* data1, size_t size1, + const uint8_t* data2, size_t size2, uint8_t* out, + size_t max_out_size, unsigned int seed, + protobuf::Message* message1, + protobuf::Message* message2) { + TextInputReader input1(data1, size1); + TextInputReader input2(data2, size2); + TextOutputWriter output(out, max_out_size); + return CrossOverMessages(seed, input1, input2, &output, message1, message2); +} + +size_t MutateBinaryMessage(uint8_t* data, size_t size, size_t max_size, + unsigned int seed, protobuf::Message* message) { + BinaryInputReader input(data, size); + BinaryOutputWriter output(data, max_size); + return MutateMessage(seed, input, &output, message); +} + +size_t CrossOverBinaryMessages(const uint8_t* data1, size_t size1, + const uint8_t* data2, size_t size2, uint8_t* out, + size_t max_out_size, unsigned int seed, + protobuf::Message* message1, + protobuf::Message* message2) { + BinaryInputReader input1(data1, size1); + BinaryInputReader input2(data2, size2); + BinaryOutputWriter output(out, max_out_size); + return CrossOverMessages(seed, input1, input2, &output, message1, message2); +} + +} // namespace + +size_t CustomProtoMutator(bool binary, uint8_t* data, size_t size, + size_t max_size, unsigned int seed, + protobuf::Message* input) { + auto mutate = binary ? &MutateBinaryMessage : &MutateTextMessage; + return mutate(data, size, max_size, seed, input); +} + +size_t CustomProtoCrossOver(bool binary, const uint8_t* data1, size_t size1, + const uint8_t* data2, size_t size2, uint8_t* out, + size_t max_out_size, unsigned int seed, + protobuf::Message* input1, + protobuf::Message* input2) { + auto cross = binary ? &CrossOverBinaryMessages : &CrossOverTextMessages; + return cross(data1, size1, data2, size2, out, max_out_size, seed, input1, + input2); +} + +bool LoadProtoInput(bool binary, const uint8_t* data, size_t size, + protobuf::Message* input) { + if (GetCache()->LoadIfSame(data, size, input)) return true; + auto result = binary ? ParseBinaryMessage(data, size, input) + : ParseTextMessage(data, size, input); + if (!result) return false; + GetMutator()->Seed(size); + GetMutator()->Fix(input); + return true; +} + +void RegisterPostProcessor( + const protobuf::Descriptor* desc, + std::function<void(protobuf::Message* message, unsigned int seed)> + callback) { + GetMutator()->RegisterPostProcessor(desc, callback); +} + +} // namespace libfuzzer +} // namespace protobuf_mutator diff --git a/contrib/libs/protobuf-mutator/src/libfuzzer/libfuzzer_macro.h b/contrib/libs/protobuf-mutator/src/libfuzzer/libfuzzer_macro.h new file mode 100644 index 0000000000..1a1fe0a297 --- /dev/null +++ b/contrib/libs/protobuf-mutator/src/libfuzzer/libfuzzer_macro.h @@ -0,0 +1,129 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef SRC_LIBFUZZER_LIBFUZZER_MACRO_H_ +#define SRC_LIBFUZZER_LIBFUZZER_MACRO_H_ + +#include <stddef.h> + +#include <cstdint> +#include <functional> +#include <type_traits> + +#include "port/protobuf.h" + +// Defines custom mutator, crossover and test functions using default +// serialization format. Default is text. +#define DEFINE_PROTO_FUZZER(arg) DEFINE_TEXT_PROTO_FUZZER(arg) +// Defines custom mutator, crossover and test functions using text +// serialization. This format is more convenient to read. +#define DEFINE_TEXT_PROTO_FUZZER(arg) DEFINE_PROTO_FUZZER_IMPL(false, arg) +// Defines custom mutator, crossover and test functions using binary +// serialization. This makes mutations faster. However often test function is +// significantly slower than mutator, so fuzzing rate may stay unchanged. +#define DEFINE_BINARY_PROTO_FUZZER(arg) DEFINE_PROTO_FUZZER_IMPL(true, arg) + +// Registers the callback as a potential mutation performed on the parent +// message of a field. This must be called inside an initialization code block. +// libFuzzer suggests putting one-time-initialization in a function used to +// initialize a static variable inside the fuzzer target. For example: +// +// static bool Modify( +// SomeMessage* message /* Fix or additionally modify the message */, +// unsigned int seed /* If random generator is needed use this seed */) { +// ... +// } +// +// DEFINE_PROTO_FUZZER(const SomeMessage& msg) { +// static PostProcessorRegistration reg(&Modify); +// } + +// Implementation of macros above. +#define DEFINE_CUSTOM_PROTO_MUTATOR_IMPL(use_binary, Proto) \ + extern "C" size_t LLVMFuzzerCustomMutator( \ + uint8_t* data, size_t size, size_t max_size, unsigned int seed) { \ + using protobuf_mutator::libfuzzer::CustomProtoMutator; \ + Proto input; \ + return CustomProtoMutator(use_binary, data, size, max_size, seed, &input); \ + } + +#define DEFINE_CUSTOM_PROTO_CROSSOVER_IMPL(use_binary, Proto) \ + extern "C" size_t LLVMFuzzerCustomCrossOver( \ + const uint8_t* data1, size_t size1, const uint8_t* data2, size_t size2, \ + uint8_t* out, size_t max_out_size, unsigned int seed) { \ + using protobuf_mutator::libfuzzer::CustomProtoCrossOver; \ + Proto input1; \ + Proto input2; \ + return CustomProtoCrossOver(use_binary, data1, size1, data2, size2, out, \ + max_out_size, seed, &input1, &input2); \ + } + +#define DEFINE_TEST_ONE_PROTO_INPUT_IMPL(use_binary, Proto) \ + extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { \ + using protobuf_mutator::libfuzzer::LoadProtoInput; \ + Proto input; \ + if (LoadProtoInput(use_binary, data, size, &input)) \ + TestOneProtoInput(input); \ + return 0; \ + } + +#define DEFINE_POST_PROCESS_PROTO_MUTATION_IMPL(Proto) \ + using PostProcessorRegistration = \ + protobuf_mutator::libfuzzer::PostProcessorRegistration<Proto>; + +#define DEFINE_PROTO_FUZZER_IMPL(use_binary, arg) \ + static void TestOneProtoInput(arg); \ + using FuzzerProtoType = std::remove_const<std::remove_reference< \ + std::function<decltype(TestOneProtoInput)>::argument_type>::type>::type; \ + DEFINE_CUSTOM_PROTO_MUTATOR_IMPL(use_binary, FuzzerProtoType) \ + DEFINE_CUSTOM_PROTO_CROSSOVER_IMPL(use_binary, FuzzerProtoType) \ + DEFINE_TEST_ONE_PROTO_INPUT_IMPL(use_binary, FuzzerProtoType) \ + DEFINE_POST_PROCESS_PROTO_MUTATION_IMPL(FuzzerProtoType) \ + static void TestOneProtoInput(arg) + +namespace protobuf_mutator { +namespace libfuzzer { + +size_t CustomProtoMutator(bool binary, uint8_t* data, size_t size, + size_t max_size, unsigned int seed, + protobuf::Message* input); +size_t CustomProtoCrossOver(bool binary, const uint8_t* data1, size_t size1, + const uint8_t* data2, size_t size2, uint8_t* out, + size_t max_out_size, unsigned int seed, + protobuf::Message* input1, + protobuf::Message* input2); +bool LoadProtoInput(bool binary, const uint8_t* data, size_t size, + protobuf::Message* input); + +void RegisterPostProcessor( + const protobuf::Descriptor* desc, + std::function<void(protobuf::Message* message, unsigned int seed)> + callback); + +template <class Proto> +struct PostProcessorRegistration { + PostProcessorRegistration( + const std::function<void(Proto* message, unsigned int seed)>& callback) { + RegisterPostProcessor( + Proto::descriptor(), + [callback](protobuf::Message* message, unsigned int seed) { + callback(static_cast<Proto*>(message), seed); + }); + } +}; + +} // namespace libfuzzer +} // namespace protobuf_mutator + +#endif // SRC_LIBFUZZER_LIBFUZZER_MACRO_H_ diff --git a/contrib/libs/protobuf-mutator/src/libfuzzer/libfuzzer_mutator.cc b/contrib/libs/protobuf-mutator/src/libfuzzer/libfuzzer_mutator.cc new file mode 100644 index 0000000000..9c3be921e6 --- /dev/null +++ b/contrib/libs/protobuf-mutator/src/libfuzzer/libfuzzer_mutator.cc @@ -0,0 +1,100 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "src/libfuzzer/libfuzzer_mutator.h" + +#include <string.h> + +#include <algorithm> +#include <cassert> +#include <memory> +#include <string> + +#include "port/protobuf.h" +#include "src/mutator.h" + +// see compiler-rt/lib/sanitizer-common/sanitizer_internal_defs.h; usage same as +// SANITIZER_INTERFACE_WEAK_DEF with some functionality removed +#ifdef _MSC_VER +#if defined(_M_IX86) || defined(__i386__) +#define WIN_SYM_PREFIX "_" +#else +#define WIN_SYM_PREFIX +#endif + +#define STRINGIFY_(A) #A +#define STRINGIFY(A) STRINGIFY_(A) + +#define WEAK_DEFAULT_NAME(Name) Name##__def + +// clang-format off +#define LIB_PROTO_MUTATOR_WEAK_DEF(ReturnType, Name, ...) \ + __pragma(comment(linker, "/alternatename:" \ + WIN_SYM_PREFIX STRINGIFY(Name) "=" \ + WIN_SYM_PREFIX STRINGIFY(WEAK_DEFAULT_NAME(Name))))\ + extern "C" ReturnType Name(__VA_ARGS__); \ + extern "C" ReturnType WEAK_DEFAULT_NAME(Name)(__VA_ARGS__) +// clang-format on +#else +#define LIB_PROTO_MUTATOR_WEAK_DEF(ReturnType, Name, ...) \ + extern "C" __attribute__((weak)) ReturnType Name(__VA_ARGS__) +#endif + +LIB_PROTO_MUTATOR_WEAK_DEF(size_t, LLVMFuzzerMutate, uint8_t*, size_t, size_t) { + return 0; +} + +namespace protobuf_mutator { +namespace libfuzzer { + +namespace { + +template <class T> +T MutateValue(T v) { + size_t size = + LLVMFuzzerMutate(reinterpret_cast<uint8_t*>(&v), sizeof(v), sizeof(v)); + memset(reinterpret_cast<uint8_t*>(&v) + size, 0, sizeof(v) - size); + return v; +} + +} // namespace + +int32_t Mutator::MutateInt32(int32_t value) { return MutateValue(value); } + +int64_t Mutator::MutateInt64(int64_t value) { return MutateValue(value); } + +uint32_t Mutator::MutateUInt32(uint32_t value) { return MutateValue(value); } + +uint64_t Mutator::MutateUInt64(uint64_t value) { return MutateValue(value); } + +float Mutator::MutateFloat(float value) { return MutateValue(value); } + +double Mutator::MutateDouble(double value) { return MutateValue(value); } + +TProtoStringType Mutator::MutateString(const TProtoStringType& value, + int size_increase_hint) { + // Randomly return empty strings as LLVMFuzzerMutate does not produce them. + // Use uint16_t because on Windows, uniform_int_distribution does not support + // any 8 bit types. + if (!std::uniform_int_distribution<uint16_t>(0, 20)(*random())) return {}; + TProtoStringType result = value; + int new_size = value.size() + size_increase_hint; + result.resize(std::max(1, new_size)); + result.resize(LLVMFuzzerMutate(reinterpret_cast<uint8_t*>(&result[0]), + value.size(), result.size())); + return result; +} + +} // namespace libfuzzer +} // namespace protobuf_mutator diff --git a/contrib/libs/protobuf-mutator/src/libfuzzer/libfuzzer_mutator.h b/contrib/libs/protobuf-mutator/src/libfuzzer/libfuzzer_mutator.h new file mode 100644 index 0000000000..722993efe8 --- /dev/null +++ b/contrib/libs/protobuf-mutator/src/libfuzzer/libfuzzer_mutator.h @@ -0,0 +1,46 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef SRC_LIBFUZZER_LIBFUZZER_MUTATOR_H_ +#define SRC_LIBFUZZER_LIBFUZZER_MUTATOR_H_ + +#include <string> + +#include "src/mutator.h" + +namespace protobuf_mutator { +namespace libfuzzer { + +// Overrides protobuf_mutator::Mutator::Mutate* methods with implementation +// which uses libFuzzer library. protobuf_mutator::Mutator has very basic +// implementation of this methods. +class Mutator : public protobuf_mutator::Mutator { + public: + using protobuf_mutator::Mutator::Mutator; + + protected: + int32_t MutateInt32(int32_t value) override; + int64_t MutateInt64(int64_t value) override; + uint32_t MutateUInt32(uint32_t value) override; + uint64_t MutateUInt64(uint64_t value) override; + float MutateFloat(float value) override; + double MutateDouble(double value) override; + TProtoStringType MutateString(const TProtoStringType& value, + int size_increase_hint) override; +}; + +} // namespace libfuzzer +} // namespace protobuf_mutator + +#endif // SRC_LIBFUZZER_LIBFUZZER_MUTATOR_H_ diff --git a/contrib/libs/protobuf-mutator/src/mutator.cc b/contrib/libs/protobuf-mutator/src/mutator.cc new file mode 100644 index 0000000000..98f443e301 --- /dev/null +++ b/contrib/libs/protobuf-mutator/src/mutator.cc @@ -0,0 +1,812 @@ +// Copyright 2016 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "src/mutator.h" + +#include <algorithm> +#include <bitset> +#include <iostream> +#include <map> +#include <memory> +#include <random> +#include <string> +#include <utility> +#include <vector> + +#include "src/field_instance.h" +#include "src/utf8_fix.h" +#include "src/weighted_reservoir_sampler.h" + +namespace protobuf_mutator { + +using google::protobuf::Any; +using protobuf::Descriptor; +using protobuf::FieldDescriptor; +using protobuf::FileDescriptor; +using protobuf::Message; +using protobuf::OneofDescriptor; +using protobuf::Reflection; +using protobuf::util::MessageDifferencer; +using std::placeholders::_1; + +namespace { + +const int kMaxInitializeDepth = 200; +const uint64_t kDefaultMutateWeight = 1000000; + +enum class Mutation : uint8_t { + None, + Add, // Adds new field with default value. + Mutate, // Mutates field contents. + Delete, // Deletes field. + Copy, // Copy values copied from another field. + Clone, // Create new field with value copied from another. + + Last = Clone, +}; + +using MutationBitset = std::bitset<static_cast<size_t>(Mutation::Last) + 1>; + +using Messages = std::vector<Message*>; +using ConstMessages = std::vector<const Message*>; + +// Return random integer from [0, count) +size_t GetRandomIndex(RandomEngine* random, size_t count) { + assert(count > 0); + if (count == 1) return 0; + return std::uniform_int_distribution<size_t>(0, count - 1)(*random); +} + +// Flips random bit in the buffer. +void FlipBit(size_t size, uint8_t* bytes, RandomEngine* random) { + size_t bit = GetRandomIndex(random, size * 8); + bytes[bit / 8] ^= (1u << (bit % 8)); +} + +// Flips random bit in the value. +template <class T> +T FlipBit(T value, RandomEngine* random) { + FlipBit(sizeof(value), reinterpret_cast<uint8_t*>(&value), random); + return value; +} + +// Return true with probability about 1-of-n. +bool GetRandomBool(RandomEngine* random, size_t n = 2) { + return GetRandomIndex(random, n) == 0; +} + +bool IsProto3SimpleField(const FieldDescriptor& field) { + assert(field.file()->syntax() == FileDescriptor::SYNTAX_PROTO3 || + field.file()->syntax() == FileDescriptor::SYNTAX_PROTO2); + return field.file()->syntax() == FileDescriptor::SYNTAX_PROTO3 && + field.cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE && + !field.containing_oneof() && !field.is_repeated(); +} + +struct CreateDefaultField : public FieldFunction<CreateDefaultField> { + template <class T> + void ForType(const FieldInstance& field) const { + T value; + field.GetDefault(&value); + field.Create(value); + } +}; + +struct DeleteField : public FieldFunction<DeleteField> { + template <class T> + void ForType(const FieldInstance& field) const { + field.Delete(); + } +}; + +struct CopyField : public FieldFunction<CopyField> { + template <class T> + void ForType(const ConstFieldInstance& source, + const FieldInstance& field) const { + T value; + source.Load(&value); + field.Store(value); + } +}; + +struct AppendField : public FieldFunction<AppendField> { + template <class T> + void ForType(const ConstFieldInstance& source, + const FieldInstance& field) const { + T value; + source.Load(&value); + field.Create(value); + } +}; + +class CanCopyAndDifferentField + : public FieldFunction<CanCopyAndDifferentField, bool> { + public: + template <class T> + bool ForType(const ConstFieldInstance& src, const ConstFieldInstance& dst, + int size_increase_hint) const { + T s; + src.Load(&s); + if (!dst.CanStore(s)) return false; + T d; + dst.Load(&d); + return SizeDiff(s, d) <= size_increase_hint && !IsEqual(s, d); + } + + private: + bool IsEqual(const ConstFieldInstance::Enum& a, + const ConstFieldInstance::Enum& b) const { + assert(a.count == b.count); + return a.index == b.index; + } + + bool IsEqual(const std::unique_ptr<Message>& a, + const std::unique_ptr<Message>& b) const { + return MessageDifferencer::Equals(*a, *b); + } + + template <class T> + bool IsEqual(const T& a, const T& b) const { + return a == b; + } + + int64_t SizeDiff(const std::unique_ptr<Message>& src, + const std::unique_ptr<Message>& dst) const { + return src->ByteSizeLong() - dst->ByteSizeLong(); + } + + int64_t SizeDiff(const TProtoStringType& src, const TProtoStringType& dst) const { + return src.size() - dst.size(); + } + + template <class T> + int64_t SizeDiff(const T&, const T&) const { + return 0; + } +}; + +// Selects random field and mutation from the given proto message. +class MutationSampler { + public: + MutationSampler(bool keep_initialized, MutationBitset allowed_mutations, + RandomEngine* random) + : keep_initialized_(keep_initialized), + allowed_mutations_(allowed_mutations), + random_(random), + sampler_(random) {} + + // Returns selected field. + const FieldInstance& field() const { return sampler_.selected().field; } + + // Returns selected mutation. + Mutation mutation() const { return sampler_.selected().mutation; } + + void Sample(Message* message) { + SampleImpl(message); + assert(mutation() != Mutation::None || + !allowed_mutations_[static_cast<size_t>(Mutation::Mutate)] || + message->GetDescriptor()->field_count() == 0); + } + + private: + void SampleImpl(Message* message) { + const Descriptor* descriptor = message->GetDescriptor(); + const Reflection* reflection = message->GetReflection(); + + int field_count = descriptor->field_count(); + for (int i = 0; i < field_count; ++i) { + const FieldDescriptor* field = descriptor->field(i); + if (const OneofDescriptor* oneof = field->containing_oneof()) { + // Handle entire oneof group on the first field. + if (field->index_in_oneof() == 0) { + assert(oneof->field_count()); + const FieldDescriptor* current_field = + reflection->GetOneofFieldDescriptor(*message, oneof); + for (;;) { + const FieldDescriptor* add_field = + oneof->field(GetRandomIndex(random_, oneof->field_count())); + if (add_field != current_field) { + Try({message, add_field}, Mutation::Add); + Try({message, add_field}, Mutation::Clone); + break; + } + if (oneof->field_count() < 2) break; + } + if (current_field) { + if (current_field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) + Try({message, current_field}, Mutation::Mutate); + Try({message, current_field}, Mutation::Delete); + Try({message, current_field}, Mutation::Copy); + } + } + } else { + if (field->is_repeated()) { + int field_size = reflection->FieldSize(*message, field); + size_t random_index = GetRandomIndex(random_, field_size + 1); + Try({message, field, random_index}, Mutation::Add); + Try({message, field, random_index}, Mutation::Clone); + + if (field_size) { + size_t random_index = GetRandomIndex(random_, field_size); + if (field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) + Try({message, field, random_index}, Mutation::Mutate); + Try({message, field, random_index}, Mutation::Delete); + Try({message, field, random_index}, Mutation::Copy); + } + } else { + if (reflection->HasField(*message, field) || + IsProto3SimpleField(*field)) { + if (field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) + Try({message, field}, Mutation::Mutate); + if (!IsProto3SimpleField(*field) && + (!field->is_required() || !keep_initialized_)) { + Try({message, field}, Mutation::Delete); + } + Try({message, field}, Mutation::Copy); + } else { + Try({message, field}, Mutation::Add); + Try({message, field}, Mutation::Clone); + } + } + } + + if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { + if (field->is_repeated()) { + const int field_size = reflection->FieldSize(*message, field); + for (int j = 0; j < field_size; ++j) + SampleImpl(reflection->MutableRepeatedMessage(message, field, j)); + } else if (reflection->HasField(*message, field)) { + SampleImpl(reflection->MutableMessage(message, field)); + } + } + } + } + + void Try(const FieldInstance& field, Mutation mutation) { + assert(mutation != Mutation::None); + if (!allowed_mutations_[static_cast<size_t>(mutation)]) return; + sampler_.Try(kDefaultMutateWeight, {field, mutation}); + } + + bool keep_initialized_ = false; + MutationBitset allowed_mutations_; + + RandomEngine* random_; + + struct Result { + Result() = default; + Result(const FieldInstance& f, Mutation m) : field(f), mutation(m) {} + + FieldInstance field; + Mutation mutation = Mutation::None; + }; + WeightedReservoirSampler<Result, RandomEngine> sampler_; +}; + +// Selects random field of compatible type to use for clone mutations. +class DataSourceSampler { + public: + DataSourceSampler(const ConstFieldInstance& match, RandomEngine* random, + int size_increase_hint) + : match_(match), + random_(random), + size_increase_hint_(size_increase_hint), + sampler_(random) {} + + void Sample(const Message& message) { SampleImpl(message); } + + // Returns selected field. + const ConstFieldInstance& field() const { + assert(!IsEmpty()); + return sampler_.selected(); + } + + bool IsEmpty() const { return sampler_.IsEmpty(); } + + private: + void SampleImpl(const Message& message) { + const Descriptor* descriptor = message.GetDescriptor(); + const Reflection* reflection = message.GetReflection(); + + int field_count = descriptor->field_count(); + for (int i = 0; i < field_count; ++i) { + const FieldDescriptor* field = descriptor->field(i); + if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { + if (field->is_repeated()) { + const int field_size = reflection->FieldSize(message, field); + for (int j = 0; j < field_size; ++j) { + SampleImpl(reflection->GetRepeatedMessage(message, field, j)); + } + } else if (reflection->HasField(message, field)) { + SampleImpl(reflection->GetMessage(message, field)); + } + } + + if (field->cpp_type() != match_.cpp_type()) continue; + if (match_.cpp_type() == FieldDescriptor::CPPTYPE_ENUM) { + if (field->enum_type() != match_.enum_type()) continue; + } else if (match_.cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { + if (field->message_type() != match_.message_type()) continue; + } + + if (field->is_repeated()) { + if (int field_size = reflection->FieldSize(message, field)) { + ConstFieldInstance source(&message, field, + GetRandomIndex(random_, field_size)); + if (CanCopyAndDifferentField()(source, match_, size_increase_hint_)) + sampler_.Try(field_size, source); + } + } else { + if (reflection->HasField(message, field)) { + ConstFieldInstance source(&message, field); + if (CanCopyAndDifferentField()(source, match_, size_increase_hint_)) + sampler_.Try(1, source); + } + } + } + } + + ConstFieldInstance match_; + RandomEngine* random_; + int size_increase_hint_; + + WeightedReservoirSampler<ConstFieldInstance, RandomEngine> sampler_; +}; + +using UnpackedAny = + std::unordered_map<const Message*, std::unique_ptr<Message>>; + +const Descriptor* GetAnyTypeDescriptor(const Any& any) { + TProtoStringType type_name; + if (!Any::ParseAnyTypeUrl(TProtoStringType(any.type_url()), &type_name)) + return nullptr; + return any.descriptor()->file()->pool()->FindMessageTypeByName(type_name); +} + +std::unique_ptr<Message> UnpackAny(const Any& any) { + const Descriptor* desc = GetAnyTypeDescriptor(any); + if (!desc) return {}; + std::unique_ptr<Message> message( + any.GetReflection()->GetMessageFactory()->GetPrototype(desc)->New()); + message->ParsePartialFromString(TProtoStringType(any.value())); + return message; +} + +const Any* CastToAny(const Message* message) { + return Any::GetDescriptor() == message->GetDescriptor() + ? static_cast<const Any*>(message) + : nullptr; +} + +Any* CastToAny(Message* message) { + return Any::GetDescriptor() == message->GetDescriptor() + ? static_cast<Any*>(message) + : nullptr; +} + +std::unique_ptr<Message> UnpackIfAny(const Message& message) { + if (const Any* any = CastToAny(&message)) return UnpackAny(*any); + return {}; +} + +void UnpackAny(const Message& message, UnpackedAny* result) { + if (std::unique_ptr<Message> any = UnpackIfAny(message)) { + UnpackAny(*any, result); + result->emplace(&message, std::move(any)); + return; + } + + const Descriptor* descriptor = message.GetDescriptor(); + const Reflection* reflection = message.GetReflection(); + + for (int i = 0; i < descriptor->field_count(); ++i) { + const FieldDescriptor* field = descriptor->field(i); + if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { + if (field->is_repeated()) { + const int field_size = reflection->FieldSize(message, field); + for (int j = 0; j < field_size; ++j) { + UnpackAny(reflection->GetRepeatedMessage(message, field, j), result); + } + } else if (reflection->HasField(message, field)) { + UnpackAny(reflection->GetMessage(message, field), result); + } + } + } +} + +class PostProcessing { + public: + using PostProcessors = + std::unordered_multimap<const Descriptor*, Mutator::PostProcess>; + + PostProcessing(bool keep_initialized, const PostProcessors& post_processors, + const UnpackedAny& any, RandomEngine* random) + : keep_initialized_(keep_initialized), + post_processors_(post_processors), + any_(any), + random_(random) {} + + void Run(Message* message, int max_depth) { + --max_depth; + const Descriptor* descriptor = message->GetDescriptor(); + + // Apply custom mutators in nested messages before packing any. + const Reflection* reflection = message->GetReflection(); + for (int i = 0; i < descriptor->field_count(); i++) { + const FieldDescriptor* field = descriptor->field(i); + if (keep_initialized_ && + (field->is_required() || descriptor->options().map_entry()) && + !reflection->HasField(*message, field)) { + CreateDefaultField()(FieldInstance(message, field)); + } + + if (field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) continue; + + if (max_depth < 0 && !field->is_required()) { + // Clear deep optional fields to avoid stack overflow. + reflection->ClearField(message, field); + if (field->is_repeated()) + assert(!reflection->FieldSize(*message, field)); + else + assert(!reflection->HasField(*message, field)); + continue; + } + + if (field->is_repeated()) { + const int field_size = reflection->FieldSize(*message, field); + for (int j = 0; j < field_size; ++j) { + Message* nested_message = + reflection->MutableRepeatedMessage(message, field, j); + Run(nested_message, max_depth); + } + } else if (reflection->HasField(*message, field)) { + Message* nested_message = reflection->MutableMessage(message, field); + Run(nested_message, max_depth); + } + } + + if (Any* any = CastToAny(message)) { + if (max_depth < 0) { + // Clear deep Any fields to avoid stack overflow. + any->Clear(); + } else { + auto It = any_.find(message); + if (It != any_.end()) { + Run(It->second.get(), max_depth); + TProtoStringType value; + It->second->SerializePartialToString(&value); + *any->mutable_value() = value; + } + } + } + + // Call user callback after message trimmed, initialized and packed. + auto range = post_processors_.equal_range(descriptor); + for (auto it = range.first; it != range.second; ++it) + it->second(message, (*random_)()); + } + + private: + bool keep_initialized_; + const PostProcessors& post_processors_; + const UnpackedAny& any_; + RandomEngine* random_; +}; + +} // namespace + +class FieldMutator { + public: + FieldMutator(int size_increase_hint, bool enforce_changes, + bool enforce_utf8_strings, const ConstMessages& sources, + Mutator* mutator) + : size_increase_hint_(size_increase_hint), + enforce_changes_(enforce_changes), + enforce_utf8_strings_(enforce_utf8_strings), + sources_(sources), + mutator_(mutator) {} + + void Mutate(int32_t* value) const { + RepeatMutate(value, std::bind(&Mutator::MutateInt32, mutator_, _1)); + } + + void Mutate(int64_t* value) const { + RepeatMutate(value, std::bind(&Mutator::MutateInt64, mutator_, _1)); + } + + void Mutate(uint32_t* value) const { + RepeatMutate(value, std::bind(&Mutator::MutateUInt32, mutator_, _1)); + } + + void Mutate(uint64_t* value) const { + RepeatMutate(value, std::bind(&Mutator::MutateUInt64, mutator_, _1)); + } + + void Mutate(float* value) const { + RepeatMutate(value, std::bind(&Mutator::MutateFloat, mutator_, _1)); + } + + void Mutate(double* value) const { + RepeatMutate(value, std::bind(&Mutator::MutateDouble, mutator_, _1)); + } + + void Mutate(bool* value) const { + RepeatMutate(value, std::bind(&Mutator::MutateBool, mutator_, _1)); + } + + void Mutate(FieldInstance::Enum* value) const { + RepeatMutate(&value->index, + std::bind(&Mutator::MutateEnum, mutator_, _1, value->count)); + assert(value->index < value->count); + } + + void Mutate(TProtoStringType* value) const { + if (enforce_utf8_strings_) { + RepeatMutate(value, std::bind(&Mutator::MutateUtf8String, mutator_, _1, + size_increase_hint_)); + } else { + RepeatMutate(value, std::bind(&Mutator::MutateString, mutator_, _1, + size_increase_hint_)); + } + } + + void Mutate(std::unique_ptr<Message>* message) const { + assert(!enforce_changes_); + assert(*message); + if (GetRandomBool(mutator_->random(), mutator_->random_to_default_ratio_)) + return; + mutator_->MutateImpl(sources_, {message->get()}, false, + size_increase_hint_); + } + + private: + template <class T, class F> + void RepeatMutate(T* value, F mutate) const { + if (!enforce_changes_ && + GetRandomBool(mutator_->random(), mutator_->random_to_default_ratio_)) { + return; + } + T tmp = *value; + for (int i = 0; i < 10; ++i) { + *value = mutate(*value); + if (!enforce_changes_ || *value != tmp) return; + } + } + + int size_increase_hint_; + size_t enforce_changes_; + bool enforce_utf8_strings_; + const ConstMessages& sources_; + Mutator* mutator_; +}; + +namespace { + +struct MutateField : public FieldFunction<MutateField> { + template <class T> + void ForType(const FieldInstance& field, int size_increase_hint, + const ConstMessages& sources, Mutator* mutator) const { + T value; + field.Load(&value); + FieldMutator(size_increase_hint, true, field.EnforceUtf8(), sources, + mutator) + .Mutate(&value); + field.Store(value); + } +}; + +struct CreateField : public FieldFunction<CreateField> { + public: + template <class T> + void ForType(const FieldInstance& field, int size_increase_hint, + const ConstMessages& sources, Mutator* mutator) const { + T value; + field.GetDefault(&value); + FieldMutator field_mutator(size_increase_hint, + false /* defaults could be useful */, + field.EnforceUtf8(), sources, mutator); + field_mutator.Mutate(&value); + field.Create(value); + } +}; + +} // namespace + +void Mutator::Seed(uint32_t value) { random_.seed(value); } + +void Mutator::Fix(Message* message) { + UnpackedAny any; + UnpackAny(*message, &any); + + PostProcessing(keep_initialized_, post_processors_, any, &random_) + .Run(message, kMaxInitializeDepth); + assert(IsInitialized(*message)); +} + +void Mutator::Mutate(Message* message, size_t max_size_hint) { + UnpackedAny any; + UnpackAny(*message, &any); + + Messages messages; + messages.reserve(any.size() + 1); + messages.push_back(message); + for (const auto& kv : any) messages.push_back(kv.second.get()); + + ConstMessages sources(messages.begin(), messages.end()); + MutateImpl(sources, messages, false, + static_cast<int>(max_size_hint) - + static_cast<int>(message->ByteSizeLong())); + + PostProcessing(keep_initialized_, post_processors_, any, &random_) + .Run(message, kMaxInitializeDepth); + assert(IsInitialized(*message)); +} + +void Mutator::CrossOver(const Message& message1, Message* message2, + size_t max_size_hint) { + UnpackedAny any; + UnpackAny(*message2, &any); + + Messages messages; + messages.reserve(any.size() + 1); + messages.push_back(message2); + for (auto& kv : any) messages.push_back(kv.second.get()); + + UnpackAny(message1, &any); + + ConstMessages sources; + sources.reserve(any.size() + 2); + sources.push_back(&message1); + sources.push_back(message2); + for (const auto& kv : any) sources.push_back(kv.second.get()); + + MutateImpl(sources, messages, true, + static_cast<int>(max_size_hint) - + static_cast<int>(message2->ByteSizeLong())); + + PostProcessing(keep_initialized_, post_processors_, any, &random_) + .Run(message2, kMaxInitializeDepth); + assert(IsInitialized(*message2)); +} + +void Mutator::RegisterPostProcessor(const Descriptor* desc, + PostProcess callback) { + post_processors_.emplace(desc, callback); +} + +bool Mutator::MutateImpl(const ConstMessages& sources, const Messages& messages, + bool copy_clone_only, int size_increase_hint) { + MutationBitset mutations; + if (copy_clone_only) { + mutations[static_cast<size_t>(Mutation::Copy)] = true; + mutations[static_cast<size_t>(Mutation::Clone)] = true; + } else if (size_increase_hint <= 16) { + mutations[static_cast<size_t>(Mutation::Delete)] = true; + } else { + mutations.set(); + mutations[static_cast<size_t>(Mutation::Copy)] = false; + mutations[static_cast<size_t>(Mutation::Clone)] = false; + } + while (mutations.any()) { + MutationSampler mutation(keep_initialized_, mutations, &random_); + for (Message* message : messages) mutation.Sample(message); + + switch (mutation.mutation()) { + case Mutation::None: + return true; + case Mutation::Add: + CreateField()(mutation.field(), size_increase_hint, sources, this); + return true; + case Mutation::Mutate: + MutateField()(mutation.field(), size_increase_hint, sources, this); + return true; + case Mutation::Delete: + DeleteField()(mutation.field()); + return true; + case Mutation::Clone: { + CreateDefaultField()(mutation.field()); + DataSourceSampler source_sampler(mutation.field(), &random_, + size_increase_hint); + for (const Message* source : sources) source_sampler.Sample(*source); + if (source_sampler.IsEmpty()) { + if (!IsProto3SimpleField(*mutation.field().descriptor())) + return true; // CreateField is enough for proto2. + break; + } + CopyField()(source_sampler.field(), mutation.field()); + return true; + } + case Mutation::Copy: { + DataSourceSampler source_sampler(mutation.field(), &random_, + size_increase_hint); + for (const Message* source : sources) source_sampler.Sample(*source); + if (source_sampler.IsEmpty()) break; + CopyField()(source_sampler.field(), mutation.field()); + return true; + } + default: + assert(false && "unexpected mutation"); + return false; + } + + // Don't try same mutation next time. + mutations[static_cast<size_t>(mutation.mutation())] = false; + } + return false; +} + +int32_t Mutator::MutateInt32(int32_t value) { return FlipBit(value, &random_); } + +int64_t Mutator::MutateInt64(int64_t value) { return FlipBit(value, &random_); } + +uint32_t Mutator::MutateUInt32(uint32_t value) { + return FlipBit(value, &random_); +} + +uint64_t Mutator::MutateUInt64(uint64_t value) { + return FlipBit(value, &random_); +} + +float Mutator::MutateFloat(float value) { return FlipBit(value, &random_); } + +double Mutator::MutateDouble(double value) { return FlipBit(value, &random_); } + +bool Mutator::MutateBool(bool value) { return !value; } + +size_t Mutator::MutateEnum(size_t index, size_t item_count) { + if (item_count <= 1) return 0; + return (index + 1 + GetRandomIndex(&random_, item_count - 1)) % item_count; +} + +TProtoStringType Mutator::MutateString(const TProtoStringType& value, + int size_increase_hint) { + TProtoStringType result = value; + + while (!result.empty() && GetRandomBool(&random_)) { + result.erase(GetRandomIndex(&random_, result.size()), 1); + } + + while (size_increase_hint > 0 && + result.size() < static_cast<size_t>(size_increase_hint) && + GetRandomBool(&random_)) { + size_t index = GetRandomIndex(&random_, result.size() + 1); + result.insert(result.begin() + index, GetRandomIndex(&random_, 1 << 8)); + } + + if (result != value) return result; + + if (result.empty()) { + result.push_back(GetRandomIndex(&random_, 1 << 8)); + return result; + } + + if (!result.empty()) + FlipBit(result.size(), reinterpret_cast<uint8_t*>(&result[0]), &random_); + return result; +} + +TProtoStringType Mutator::MutateUtf8String(const TProtoStringType& value, + int size_increase_hint) { + TProtoStringType str = MutateString(value, size_increase_hint); + FixUtf8String(&str, &random_); + return str; +} + +bool Mutator::IsInitialized(const Message& message) const { + if (!keep_initialized_ || message.IsInitialized()) return true; + std::cerr << "Uninitialized: " << message.DebugString() << "\n"; + return false; +} + +} // namespace protobuf_mutator diff --git a/contrib/libs/protobuf-mutator/src/mutator.h b/contrib/libs/protobuf-mutator/src/mutator.h new file mode 100644 index 0000000000..2f1d3a833c --- /dev/null +++ b/contrib/libs/protobuf-mutator/src/mutator.h @@ -0,0 +1,111 @@ +// Copyright 2016 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef SRC_MUTATOR_H_ +#define SRC_MUTATOR_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <functional> +#include <memory> +#include <random> +#include <string> +#include <unordered_map> +#include <vector> + +#include "port/protobuf.h" +#include "src/random.h" + +namespace protobuf_mutator { + +// Randomly makes incremental change in the given protobuf. +// Usage example: +// protobuf_mutator::Mutator mutator(1); +// MyMessage message; +// message.ParseFromString(encoded_message); +// mutator.Mutate(&message, 10000); +// +// Class implements very basic mutations of fields. E.g. it just flips bits for +// integers, floats and strings. Also it increases, decreases size of +// strings only by one. For better results users should override +// protobuf_mutator::Mutator::Mutate* methods with more useful logic, e.g. using +// library like libFuzzer. +class Mutator { + public: + // seed: value to initialize random number generator. + Mutator() = default; + virtual ~Mutator() = default; + + // Initialized internal random number generator. + void Seed(uint32_t value); + + // message: message to mutate. + // max_size_hint: approximate max ByteSize() of resulting message. Method does + // not guarantee that real result will be strictly smaller than value. Caller + // could repeat mutation if result was larger than expected. + void Mutate(protobuf::Message* message, size_t max_size_hint); + + void CrossOver(const protobuf::Message& message1, protobuf::Message* message2, + size_t max_size_hint); + + // Makes message initialized and calls post processors to make it valid. + void Fix(protobuf::Message* message); + + // Callback to postprocess mutations. + // Implementation should use seed to initialize random number generators. + using PostProcess = + std::function<void(protobuf::Message* message, unsigned int seed)>; + + // Register callback which will be called after every message mutation. + // In this callback fuzzer may adjust content of the message or mutate some + // fields in some fuzzer specific way. + void RegisterPostProcessor(const protobuf::Descriptor* desc, + PostProcess callback); + + protected: + // TODO(vitalybuka): Consider to replace with single mutate (uint8_t*, size). + virtual int32_t MutateInt32(int32_t value); + virtual int64_t MutateInt64(int64_t value); + virtual uint32_t MutateUInt32(uint32_t value); + virtual uint64_t MutateUInt64(uint64_t value); + virtual float MutateFloat(float value); + virtual double MutateDouble(double value); + virtual bool MutateBool(bool value); + virtual size_t MutateEnum(size_t index, size_t item_count); + virtual TProtoStringType MutateString(const TProtoStringType& value, + int size_increase_hint); + + RandomEngine* random() { return &random_; } + + private: + friend class FieldMutator; + friend class TestMutator; + bool MutateImpl(const std::vector<const protobuf::Message*>& sources, + const std::vector<protobuf::Message*>& messages, + bool copy_clone_only, int size_increase_hint); + TProtoStringType MutateUtf8String(const TProtoStringType& value, + int size_increase_hint); + bool IsInitialized(const protobuf::Message& message) const; + bool keep_initialized_ = true; + size_t random_to_default_ratio_ = 100; + RandomEngine random_; + using PostProcessors = + std::unordered_multimap<const protobuf::Descriptor*, PostProcess>; + PostProcessors post_processors_; +}; + +} // namespace protobuf_mutator + +#endif // SRC_MUTATOR_H_ diff --git a/contrib/libs/protobuf-mutator/src/random.h b/contrib/libs/protobuf-mutator/src/random.h new file mode 100644 index 0000000000..927997b9fb --- /dev/null +++ b/contrib/libs/protobuf-mutator/src/random.h @@ -0,0 +1,26 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef SRC_RANDOM_H_ +#define SRC_RANDOM_H_ + +#include <random> + +namespace protobuf_mutator { + +using RandomEngine = std::minstd_rand; + +} // namespace protobuf_mutator + +#endif // SRC_RANDOM_H_ diff --git a/contrib/libs/protobuf-mutator/src/text_format.cc b/contrib/libs/protobuf-mutator/src/text_format.cc new file mode 100644 index 0000000000..d8a8342b19 --- /dev/null +++ b/contrib/libs/protobuf-mutator/src/text_format.cc @@ -0,0 +1,57 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "src/text_format.h" + +#include "port/protobuf.h" + +namespace protobuf_mutator { + +using protobuf::Message; +using protobuf::TextFormat; + +bool ParseTextMessage(const uint8_t* data, size_t size, Message* output) { + return ParseTextMessage({reinterpret_cast<const char*>(data), size}, output); +} + +bool ParseTextMessage(const TProtoStringType& data, protobuf::Message* output) { + output->Clear(); + TextFormat::Parser parser; + parser.SetRecursionLimit(100); + parser.AllowPartialMessage(true); + parser.AllowUnknownField(true); + if (!parser.ParseFromString(data, output)) { + output->Clear(); + return false; + } + return true; +} + +size_t SaveMessageAsText(const Message& message, uint8_t* data, + size_t max_size) { + TProtoStringType result = SaveMessageAsText(message); + if (result.size() <= max_size) { + memcpy(data, result.data(), result.size()); + return result.size(); + } + return 0; +} + +TProtoStringType SaveMessageAsText(const protobuf::Message& message) { + String tmp; + if (!protobuf::TextFormat::PrintToString(message, &tmp)) return {}; + return tmp; +} + +} // namespace protobuf_mutator diff --git a/contrib/libs/protobuf-mutator/src/text_format.h b/contrib/libs/protobuf-mutator/src/text_format.h new file mode 100644 index 0000000000..f4fb023ab5 --- /dev/null +++ b/contrib/libs/protobuf-mutator/src/text_format.h @@ -0,0 +1,34 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef SRC_TEXT_FORMAT_H_ +#define SRC_TEXT_FORMAT_H_ + +#include <string> + +#include "port/protobuf.h" + +namespace protobuf_mutator { + +// Text serialization of protos. +bool ParseTextMessage(const uint8_t* data, size_t size, + protobuf::Message* output); +bool ParseTextMessage(const TProtoStringType& data, protobuf::Message* output); +size_t SaveMessageAsText(const protobuf::Message& message, uint8_t* data, + size_t max_size); +TProtoStringType SaveMessageAsText(const protobuf::Message& message); + +} // namespace protobuf_mutator + +#endif // SRC_TEXT_FORMAT_H_ diff --git a/contrib/libs/protobuf-mutator/src/utf8_fix.cc b/contrib/libs/protobuf-mutator/src/utf8_fix.cc new file mode 100644 index 0000000000..845de1cdd3 --- /dev/null +++ b/contrib/libs/protobuf-mutator/src/utf8_fix.cc @@ -0,0 +1,94 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "src/utf8_fix.h" + +#include <google/protobuf/stubs/port.h> + +#include <algorithm> +#include <cassert> + +namespace protobuf_mutator { + +namespace { + +void StoreCode(char* e, char32_t code, uint8_t size, uint8_t prefix) { + while (--size) { + *(--e) = 0x80 | (code & 0x3F); + code >>= 6; + } + *(--e) = prefix | code; +} + +char* FixCode(char* b, const char* e, RandomEngine* random) { + const char* start = b; + assert(b < e); + + e = std::min<const char*>(e, b + 4); + char32_t c = *b++; + for (; b < e && (*b & 0xC0) == 0x80; ++b) { + c = (c << 6) + (*b & 0x3F); + } + uint8_t size = b - start; + switch (size) { + case 1: + c &= 0x7F; + StoreCode(b, c, size, 0); + break; + case 2: + c &= 0x7FF; + if (c < 0x80) { + // Use uint32_t because uniform_int_distribution does not support + // char32_t on Windows. + c = std::uniform_int_distribution<uint32_t>(0x80, 0x7FF)(*random); + } + StoreCode(b, c, size, 0xC0); + break; + case 3: + c &= 0xFFFF; + + // [0xD800, 0xE000) are reserved for UTF-16 surrogate halves. + if (c < 0x800 || (c >= 0xD800 && c < 0xE000)) { + uint32_t halves = 0xE000 - 0xD800; + c = std::uniform_int_distribution<uint32_t>(0x800, + 0xFFFF - halves)(*random); + if (c >= 0xD800) c += halves; + } + StoreCode(b, c, size, 0xE0); + break; + case 4: + c &= 0x1FFFFF; + if (c < 0x10000 || c > 0x10FFFF) { + c = std::uniform_int_distribution<uint32_t>(0x10000, 0x10FFFF)(*random); + } + StoreCode(b, c, size, 0xF0); + break; + default: + assert(false && "Unexpected size of UTF-8 sequence"); + } + return b; +} + +} // namespace + +void FixUtf8String(TProtoStringType* str, RandomEngine* random) { + if (str->empty()) return; + char* b = &(*str)[0]; + const char* e = b + str->size(); + while (b < e) { + b = FixCode(b, e, random); + } +} + +} // namespace protobuf_mutator diff --git a/contrib/libs/protobuf-mutator/src/utf8_fix.h b/contrib/libs/protobuf-mutator/src/utf8_fix.h new file mode 100644 index 0000000000..8ec7e44285 --- /dev/null +++ b/contrib/libs/protobuf-mutator/src/utf8_fix.h @@ -0,0 +1,30 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef SRC_UTF8_FIX_H_ +#define SRC_UTF8_FIX_H_ + +#include <string> + +#include <google/protobuf/stubs/common.h> + +#include "src/random.h" + +namespace protobuf_mutator { + +void FixUtf8String(TProtoStringType* str, RandomEngine* random); + +} // namespace protobuf_mutator + +#endif // SRC_UTF8_FIX_H_ diff --git a/contrib/libs/protobuf-mutator/src/weighted_reservoir_sampler.h b/contrib/libs/protobuf-mutator/src/weighted_reservoir_sampler.h new file mode 100644 index 0000000000..d272bb5228 --- /dev/null +++ b/contrib/libs/protobuf-mutator/src/weighted_reservoir_sampler.h @@ -0,0 +1,59 @@ +// Copyright 2016 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef SRC_WEIGHTED_RESERVOIR_SAMPLER_H_ +#define SRC_WEIGHTED_RESERVOIR_SAMPLER_H_ + +#include <cassert> +#include <random> + +namespace protobuf_mutator { + +// Algorithm pick one item from the sequence of weighted items. +// https://en.wikipedia.org/wiki/Reservoir_sampling#Algorithm_A-Chao +// +// Example: +// WeightedReservoirSampler<int> sampler; +// for(int i = 0; i < size; ++i) +// sampler.Pick(weight[i], i); +// return sampler.GetSelected(); +template <class T, class RandomEngine = std::default_random_engine> +class WeightedReservoirSampler { + public: + explicit WeightedReservoirSampler(RandomEngine* random) : random_(random) {} + + void Try(uint64_t weight, const T& item) { + if (Pick(weight)) selected_ = item; + } + + const T& selected() const { return selected_; } + + bool IsEmpty() const { return total_weight_ == 0; } + + private: + bool Pick(uint64_t weight) { + if (weight == 0) return false; + total_weight_ += weight; + return weight == total_weight_ || std::uniform_int_distribution<uint64_t>( + 1, total_weight_)(*random_) <= weight; + } + + T selected_ = {}; + uint64_t total_weight_ = 0; + RandomEngine* random_; +}; + +} // namespace protobuf_mutator + +#endif // SRC_WEIGHTED_RESERVOIR_SAMPLER_H_ |