#include "binary_annotator.h" #include <algorithm> #include <cstdint> #include <iostream> #include <limits> #include <string> #include <vector> #include "flatbuffers/base.h" #include "flatbuffers/reflection.h" #include "flatbuffers/util.h" #include "flatbuffers/verifier.h" namespace flatbuffers { namespace { static bool BinaryRegionSort(const BinaryRegion &a, const BinaryRegion &b) { return a.offset < b.offset; } static void SetError(BinaryRegionComment &comment, BinaryRegionStatus status, std::string message = "") { comment.status = status; comment.status_message = message; } static BinaryRegion MakeBinaryRegion( const uint64_t offset = 0, const uint64_t length = 0, const BinaryRegionType type = BinaryRegionType::Unknown, const uint64_t array_length = 0, const uint64_t points_to_offset = 0, BinaryRegionComment comment = {}) { BinaryRegion region; region.offset = offset; region.length = length; region.type = type; region.array_length = array_length; region.points_to_offset = points_to_offset; region.comment = std::move(comment); return region; } static BinarySection MakeBinarySection(const std::string &name, const BinarySectionType type, std::vector<BinaryRegion> regions) { BinarySection section; section.name = name; section.type = type; section.regions = std::move(regions); return section; } static BinarySection MakeSingleRegionBinarySection(const std::string &name, const BinarySectionType type, const BinaryRegion ®ion) { std::vector<BinaryRegion> regions; regions.push_back(region); return MakeBinarySection(name, type, std::move(regions)); } static bool IsNonZeroRegion(const uint64_t offset, const uint64_t length, const uint8_t *const binary) { for (uint64_t i = offset; i < offset + length; ++i) { if (binary[i] != 0) { return true; } } return false; } static bool IsPrintableRegion(const uint64_t offset, const uint64_t length, const uint8_t *const binary) { for (uint64_t i = offset; i < offset + length; ++i) { if (!isprint(binary[i])) { return false; } } return true; } static BinarySection GenerateMissingSection(const uint64_t offset, const uint64_t length, const uint8_t *const binary) { std::vector<BinaryRegion> regions; // Check if the region is all zeros or not, as that can tell us if it is // padding or not. if (IsNonZeroRegion(offset, length, binary)) { // Some of the padding bytes are non-zero, so this might be an unknown // section of the binary. // TODO(dbaileychess): We could be a bit smarter with different sized // alignments. For now, the 8 byte check encompasses all the smaller // alignments. BinaryRegionComment comment; comment.type = BinaryRegionCommentType::Unknown; if (length >= 8) { SetError(comment, BinaryRegionStatus::WARN_NO_REFERENCES); } else { SetError(comment, BinaryRegionStatus::WARN_CORRUPTED_PADDING); } regions.push_back(MakeBinaryRegion(offset, length * sizeof(uint8_t), BinaryRegionType::Unknown, length, 0, comment)); return MakeBinarySection("no known references", BinarySectionType::Unknown, std::move(regions)); } BinaryRegionComment comment; comment.type = BinaryRegionCommentType::Padding; if (length >= 8) { SetError(comment, BinaryRegionStatus::WARN_PADDING_LENGTH); } // This region is most likely padding. regions.push_back(MakeBinaryRegion(offset, length * sizeof(uint8_t), BinaryRegionType::Uint8, length, 0, comment)); return MakeBinarySection("", BinarySectionType::Padding, std::move(regions)); } } // namespace std::map<uint64_t, BinarySection> BinaryAnnotator::Annotate() { flatbuffers::Verifier verifier(bfbs_, static_cast<size_t>(bfbs_length_)); if ((is_size_prefixed_ && !reflection::VerifySizePrefixedSchemaBuffer(verifier)) || !reflection::VerifySchemaBuffer(verifier)) { return {}; } // The binary is too short to read as a flatbuffers. if (binary_length_ < FLATBUFFERS_MIN_BUFFER_SIZE) { return {}; } // Make sure we start with a clean slate. vtables_.clear(); sections_.clear(); // First parse the header region which always start at offset 0. // The returned offset will point to the root_table location. const uint64_t root_table_offset = BuildHeader(0); if (IsValidOffset(root_table_offset)) { // Build the root table, and all else will be referenced from it. BuildTable(root_table_offset, BinarySectionType::RootTable, schema_->root_table()); } // Now that all the sections are built, make sure the binary sections are // contiguous. FixMissingRegions(); // Then scan the area between BinarySections insert padding sections that are // implied. FixMissingSections(); return sections_; } uint64_t BinaryAnnotator::BuildHeader(const uint64_t header_offset) { uint64_t offset = header_offset; std::vector<BinaryRegion> regions; // If this binary is a size prefixed one, attempt to parse the size. if (is_size_prefixed_) { BinaryRegionComment prefix_length_comment; prefix_length_comment.type = BinaryRegionCommentType::SizePrefix; bool has_prefix_value = false; const auto prefix_length = ReadScalar<uoffset64_t>(offset); if (*prefix_length <= binary_length_) { regions.push_back(MakeBinaryRegion(offset, sizeof(uoffset64_t), BinaryRegionType::Uint64, 0, 0, prefix_length_comment)); offset += sizeof(uoffset64_t); has_prefix_value = true; } if (!has_prefix_value) { const auto prefix_length = ReadScalar<uoffset_t>(offset); if (*prefix_length <= binary_length_) { regions.push_back(MakeBinaryRegion(offset, sizeof(uoffset_t), BinaryRegionType::Uint32, 0, 0, prefix_length_comment)); offset += sizeof(uoffset_t); has_prefix_value = true; } } if (!has_prefix_value) { SetError(prefix_length_comment, BinaryRegionStatus::ERROR); } } const auto root_table_offset = ReadScalar<uint32_t>(offset); if (!root_table_offset.has_value()) { // This shouldn't occur, since we validate the min size of the buffer // before. But for completion sake, we shouldn't read passed the binary end. return std::numeric_limits<uint64_t>::max(); } const auto root_table_loc = offset + *root_table_offset; BinaryRegionComment root_offset_comment; root_offset_comment.type = BinaryRegionCommentType::RootTableOffset; root_offset_comment.name = schema_->root_table()->name()->str(); if (!IsValidOffset(root_table_loc)) { SetError(root_offset_comment, BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY); } regions.push_back(MakeBinaryRegion(offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0, root_table_loc, root_offset_comment)); offset += sizeof(uint32_t); if (IsValidRead(offset, flatbuffers::kFileIdentifierLength) && IsPrintableRegion(offset, flatbuffers::kFileIdentifierLength, binary_)) { BinaryRegionComment comment; comment.type = BinaryRegionCommentType::FileIdentifier; // Check if the file identifier region has non-zero data, and assume its // the file identifier. Otherwise, it will get filled in with padding // later. regions.push_back(MakeBinaryRegion( offset, flatbuffers::kFileIdentifierLength * sizeof(uint8_t), BinaryRegionType::Char, flatbuffers::kFileIdentifierLength, 0, comment)); } AddSection(header_offset, MakeBinarySection("", BinarySectionType::Header, std::move(regions))); return root_table_loc; } BinaryAnnotator::VTable *BinaryAnnotator::GetOrBuildVTable( const uint64_t vtable_offset, const reflection::Object *const table, const uint64_t offset_of_referring_table) { // Get a list of vtables (if any) already defined at this offset. std::list<VTable> &vtables = vtables_[vtable_offset]; // See if this vtable for the table type has been generated before. for (VTable &vtable : vtables) { if (vtable.referring_table == table) { return &vtable; } } // If we are trying to make a new vtable and it is already encompassed by // another binary section, something is corrupted. if (vtables.empty() && ContainsSection(vtable_offset)) { return nullptr; } const std::string referring_table_name = table->name()->str(); BinaryRegionComment vtable_size_comment; vtable_size_comment.type = BinaryRegionCommentType::VTableSize; const auto vtable_length = ReadScalar<uint16_t>(vtable_offset); if (!vtable_length.has_value()) { const uint64_t remaining = RemainingBytes(vtable_offset); SetError(vtable_size_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "2"); AddSection(vtable_offset, MakeSingleRegionBinarySection( referring_table_name, BinarySectionType::VTable, MakeBinaryRegion(vtable_offset, remaining, BinaryRegionType::Unknown, remaining, 0, vtable_size_comment))); return nullptr; } // Vtables start with the size of the vtable const uint16_t vtable_size = vtable_length.value(); if (!IsValidOffset(vtable_offset + vtable_size - 1)) { SetError(vtable_size_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_LONG); // The vtable_size points to off the end of the binary. AddSection(vtable_offset, MakeSingleRegionBinarySection( referring_table_name, BinarySectionType::VTable, MakeBinaryRegion(vtable_offset, sizeof(uint16_t), BinaryRegionType::Uint16, 0, 0, vtable_size_comment))); return nullptr; } else if (vtable_size < 2 * sizeof(uint16_t)) { SetError(vtable_size_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_SHORT, "4"); // The size includes itself and the table size which are both uint16_t. AddSection(vtable_offset, MakeSingleRegionBinarySection( referring_table_name, BinarySectionType::VTable, MakeBinaryRegion(vtable_offset, sizeof(uint16_t), BinaryRegionType::Uint16, 0, 0, vtable_size_comment))); return nullptr; } std::vector<BinaryRegion> regions; regions.push_back(MakeBinaryRegion(vtable_offset, sizeof(uint16_t), BinaryRegionType::Uint16, 0, 0, vtable_size_comment)); uint64_t offset = vtable_offset + sizeof(uint16_t); BinaryRegionComment ref_table_len_comment; ref_table_len_comment.type = BinaryRegionCommentType::VTableRefferingTableLength; // Ensure we can read the next uint16_t field, which is the size of the // referring table. const auto table_length = ReadScalar<uint16_t>(offset); if (!table_length.has_value()) { const uint64_t remaining = RemainingBytes(offset); SetError(ref_table_len_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "2"); AddSection(offset, MakeSingleRegionBinarySection( referring_table_name, BinarySectionType::VTable, MakeBinaryRegion( offset, remaining, BinaryRegionType::Unknown, remaining, 0, ref_table_len_comment))); return nullptr; } // Then they have the size of the table they reference. const uint16_t table_size = table_length.value(); if (!IsValidOffset(offset_of_referring_table + table_size - 1)) { SetError(ref_table_len_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_LONG); } else if (table_size < 4) { SetError(ref_table_len_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_SHORT, "4"); } regions.push_back(MakeBinaryRegion(offset, sizeof(uint16_t), BinaryRegionType::Uint16, 0, 0, ref_table_len_comment)); offset += sizeof(uint16_t); const uint64_t offset_start = offset; // A mapping between field (and its id) to the relative offset (uin16_t) from // the start of the table. std::map<uint16_t, VTable::Entry> fields; // Counter for determining if the binary has more vtable entries than the // schema provided. This can occur if the binary was created at a newer schema // version and is being processed with an older one. uint16_t fields_processed = 0; // Loop over all the fields. ForAllFields(table, /*reverse=*/false, [&](const reflection::Field *field) { const uint64_t field_offset = offset_start + field->id() * sizeof(uint16_t); if (field_offset >= vtable_offset + vtable_size) { // This field_offset is too large for this vtable, so it must come from a // newer schema than the binary was create with or the binary writer did // not write it. For either case, it is safe to ignore. // TODO(dbaileychess): We could show which fields are not set an their // default values if we want. We just need a way to make it obvious that // it isn't part of the buffer. return; } BinaryRegionComment field_comment; field_comment.type = BinaryRegionCommentType::VTableFieldOffset; field_comment.name = std::string(field->name()->c_str()) + "` (id: " + std::to_string(field->id()) + ")"; const auto offset_from_table = ReadScalar<uint16_t>(field_offset); if (!offset_from_table.has_value()) { const uint64_t remaining = RemainingBytes(field_offset); SetError(field_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "2"); regions.push_back(MakeBinaryRegion(field_offset, remaining, BinaryRegionType::Unknown, remaining, 0, field_comment)); return; } if (!IsValidOffset(offset_of_referring_table + offset_from_table.value() - 1)) { SetError(field_comment, BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY); regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint16_t), BinaryRegionType::VOffset, 0, 0, field_comment)); return; } VTable::Entry entry; entry.field = field; entry.offset_from_table = offset_from_table.value(); fields.insert(std::make_pair(field->id(), entry)); std::string default_label; if (offset_from_table.value() == 0) { // Not present, so could be default or be optional. if (field->required()) { SetError(field_comment, BinaryRegionStatus::ERROR_REQUIRED_FIELD_NOT_PRESENT); // If this is a required field, make it known this is an error. regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint16_t), BinaryRegionType::VOffset, 0, 0, field_comment)); return; } else { // Its an optional field, so get the default value and interpret and // provided an annotation for it. if (IsScalar(field->type()->base_type())) { default_label += "<defaults to "; default_label += IsFloat(field->type()->base_type()) ? std::to_string(field->default_real()) : std::to_string(field->default_integer()); default_label += "> ("; } else { default_label += "<null> ("; } default_label += reflection::EnumNameBaseType(field->type()->base_type()); default_label += ")"; } } field_comment.default_value = default_label; regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint16_t), BinaryRegionType::VOffset, 0, 0, field_comment)); fields_processed++; }); // Check if we covered all the expectant fields. If not, we need to add them // as unknown fields. uint16_t expectant_vtable_fields = (vtable_size - sizeof(uint16_t) - sizeof(uint16_t)) / sizeof(uint16_t); // Prevent a bad binary from declaring a really large vtable_size, that we can // not independently verify. expectant_vtable_fields = std::min( static_cast<uint16_t>(fields_processed * 3), expectant_vtable_fields); for (uint16_t id = fields_processed; id < expectant_vtable_fields; ++id) { const uint64_t field_offset = offset_start + id * sizeof(uint16_t); const auto offset_from_table = ReadScalar<uint16_t>(field_offset); BinaryRegionComment field_comment; field_comment.type = BinaryRegionCommentType::VTableUnknownFieldOffset; field_comment.index = id; if (!offset_from_table.has_value()) { const uint64_t remaining = RemainingBytes(field_offset); SetError(field_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "2"); regions.push_back(MakeBinaryRegion(field_offset, remaining, BinaryRegionType::Unknown, remaining, 0, field_comment)); continue; } VTable::Entry entry; entry.field = nullptr; // No field to reference. entry.offset_from_table = offset_from_table.value(); fields.insert(std::make_pair(id, entry)); regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint16_t), BinaryRegionType::VOffset, 0, 0, field_comment)); } // If we have never added this vtable before record the Binary section. if (vtables.empty()) { sections_[vtable_offset] = MakeBinarySection( referring_table_name, BinarySectionType::VTable, std::move(regions)); } else { // Add the current table name to the name of the section. sections_[vtable_offset].name += ", " + referring_table_name; } VTable vtable; vtable.referring_table = table; vtable.fields = std::move(fields); vtable.table_size = table_size; vtable.vtable_size = vtable_size; // Add this vtable to the collection of vtables at this offset. vtables.push_back(std::move(vtable)); // Return the vtable we just added. return &vtables.back(); } void BinaryAnnotator::BuildTable(const uint64_t table_offset, const BinarySectionType type, const reflection::Object *const table) { if (ContainsSection(table_offset)) { return; } BinaryRegionComment vtable_offset_comment; vtable_offset_comment.type = BinaryRegionCommentType::TableVTableOffset; const auto vtable_soffset = ReadScalar<int32_t>(table_offset); if (!vtable_soffset.has_value()) { const uint64_t remaining = RemainingBytes(table_offset); SetError(vtable_offset_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4"); AddSection( table_offset, MakeSingleRegionBinarySection( table->name()->str(), type, MakeBinaryRegion(table_offset, remaining, BinaryRegionType::Unknown, remaining, 0, vtable_offset_comment))); // If there aren't enough bytes left to read the vtable offset, there is // nothing we can do. return; } // Tables start with the vtable const uint64_t vtable_offset = table_offset - vtable_soffset.value(); if (!IsValidOffset(vtable_offset)) { SetError(vtable_offset_comment, BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY); AddSection(table_offset, MakeSingleRegionBinarySection( table->name()->str(), type, MakeBinaryRegion(table_offset, sizeof(int32_t), BinaryRegionType::SOffset, 0, vtable_offset, vtable_offset_comment))); // There isn't much to do with an invalid vtable offset, as we won't be able // to intepret the rest of the table fields. return; } std::vector<BinaryRegion> regions; regions.push_back(MakeBinaryRegion(table_offset, sizeof(int32_t), BinaryRegionType::SOffset, 0, vtable_offset, vtable_offset_comment)); // Parse the vtable first so we know what the rest of the fields in the table // are. const VTable *const vtable = GetOrBuildVTable(vtable_offset, table, table_offset); if (vtable == nullptr) { // There is no valid vtable for this table, so we cannot process the rest of // the table entries. return; } // This is the size and length of this table. const uint16_t table_size = vtable->table_size; uint64_t table_end_offset = table_offset + table_size; if (!IsValidOffset(table_end_offset - 1)) { // We already validated the table size in BuildVTable, but we have to make // sure we don't use a bad value here. table_end_offset = binary_length_; } // We need to iterate over the vtable fields by their offset in the binary, // not by their IDs. So copy them over to another vector that we can sort on // the offset_from_table property. std::vector<VTable::Entry> fields; for (const auto &vtable_field : vtable->fields) { fields.push_back(vtable_field.second); } std::stable_sort(fields.begin(), fields.end(), [](const VTable::Entry &a, const VTable::Entry &b) { return a.offset_from_table < b.offset_from_table; }); // Iterate over all the fields by order of their offset. for (size_t i = 0; i < fields.size(); ++i) { const reflection::Field *field = fields[i].field; const uint16_t offset_from_table = fields[i].offset_from_table; if (offset_from_table == 0) { // Skip non-present fields. continue; } // The field offsets are relative to the start of the table. const uint64_t field_offset = table_offset + offset_from_table; if (!IsValidOffset(field_offset)) { // The field offset is larger than the binary, nothing we can do. continue; } // We have a vtable entry for a non-existant field, that means its a binary // generated by a newer schema than we are currently processing. if (field == nullptr) { // Calculate the length of this unknown field. const uint64_t unknown_field_length = // Check if there is another unknown field after this one. ((i + 1 < fields.size()) ? table_offset + fields[i + 1].offset_from_table // Otherwise use the known end of the table. : table_end_offset) - field_offset; if (unknown_field_length == 0) { continue; } std::string hint; if (unknown_field_length == 4) { const auto relative_offset = ReadScalar<uint32_t>(field_offset); if (relative_offset.has_value()) { // The field is 4 in length, so it could be an offset? Provide a hint. hint += "<possibly an offset? Check Loc: +0x"; hint += ToHex(field_offset + relative_offset.value()); hint += ">"; } } BinaryRegionComment unknown_field_comment; unknown_field_comment.type = BinaryRegionCommentType::TableUnknownField; if (!IsValidRead(field_offset, unknown_field_length)) { const uint64_t remaining = RemainingBytes(field_offset); SetError(unknown_field_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, std::to_string(unknown_field_length)); regions.push_back(MakeBinaryRegion(field_offset, remaining, BinaryRegionType::Unknown, remaining, 0, unknown_field_comment)); continue; } unknown_field_comment.default_value = hint; regions.push_back(MakeBinaryRegion( field_offset, unknown_field_length, BinaryRegionType::Unknown, unknown_field_length, 0, unknown_field_comment)); continue; } if (IsScalar(field->type()->base_type())) { // These are the raw values store in the table. const uint64_t type_size = GetTypeSize(field->type()->base_type()); const BinaryRegionType region_type = GetRegionType(field->type()->base_type()); BinaryRegionComment scalar_field_comment; scalar_field_comment.type = BinaryRegionCommentType::TableField; scalar_field_comment.name = std::string(field->name()->c_str()) + "` (" + reflection::EnumNameBaseType(field->type()->base_type()) + ")"; if (!IsValidRead(field_offset, type_size)) { const uint64_t remaining = RemainingBytes(field_offset); SetError(scalar_field_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, std::to_string(type_size)); regions.push_back(MakeBinaryRegion(field_offset, remaining, BinaryRegionType::Unknown, remaining, 0, scalar_field_comment)); continue; } if (IsUnionType(field)) { // This is a type for a union. Validate the value const auto enum_value = ReadScalar<uint8_t>(field_offset); // This should always have a value, due to the IsValidRead check above. if (!IsValidUnionValue(field, enum_value.value())) { SetError(scalar_field_comment, BinaryRegionStatus::ERROR_INVALID_UNION_TYPE); regions.push_back(MakeBinaryRegion(field_offset, type_size, region_type, 0, 0, scalar_field_comment)); continue; } } regions.push_back(MakeBinaryRegion(field_offset, type_size, region_type, 0, 0, scalar_field_comment)); continue; } // Read the offset uint64_t offset = 0; uint64_t length = sizeof(uint32_t); BinaryRegionType region_type = BinaryRegionType::UOffset; if (field->offset64()) { length = sizeof(uint64_t); region_type = BinaryRegionType::UOffset64; offset = ReadScalar<uint64_t>(field_offset).value_or(0); } else { offset = ReadScalar<uint32_t>(field_offset).value_or(0); } // const auto offset_from_field = ReadScalar<uint32_t>(field_offset); uint64_t offset_of_next_item = 0; BinaryRegionComment offset_field_comment; offset_field_comment.type = BinaryRegionCommentType::TableOffsetField; offset_field_comment.name = field->name()->c_str(); const std::string offset_prefix = "offset to field `" + std::string(field->name()->c_str()) + "`"; // Validate any field that isn't inline (i.e., non-structs). if (!IsInlineField(field)) { if (offset == 0) { const uint64_t remaining = RemainingBytes(field_offset); SetError(offset_field_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4"); regions.push_back(MakeBinaryRegion(field_offset, remaining, BinaryRegionType::Unknown, remaining, 0, offset_field_comment)); continue; } offset_of_next_item = field_offset + offset; if (!IsValidOffset(offset_of_next_item)) { SetError(offset_field_comment, BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY); regions.push_back(MakeBinaryRegion(field_offset, length, region_type, 0, offset_of_next_item, offset_field_comment)); continue; } } switch (field->type()->base_type()) { case reflection::BaseType::Obj: { const reflection::Object *next_object = schema_->objects()->Get(field->type()->index()); if (next_object->is_struct()) { // Structs are stored inline. BuildStruct(field_offset, regions, field->name()->c_str(), next_object); } else { offset_field_comment.default_value = "(table)"; regions.push_back(MakeBinaryRegion(field_offset, length, region_type, 0, offset_of_next_item, offset_field_comment)); BuildTable(offset_of_next_item, BinarySectionType::Table, next_object); } } break; case reflection::BaseType::String: { offset_field_comment.default_value = "(string)"; regions.push_back(MakeBinaryRegion(field_offset, length, region_type, 0, offset_of_next_item, offset_field_comment)); BuildString(offset_of_next_item, table, field); } break; case reflection::BaseType::Vector: { offset_field_comment.default_value = "(vector)"; regions.push_back(MakeBinaryRegion(field_offset, length, region_type, 0, offset_of_next_item, offset_field_comment)); BuildVector(offset_of_next_item, table, field, table_offset, vtable->fields); } break; case reflection::BaseType::Vector64: { offset_field_comment.default_value = "(vector64)"; regions.push_back(MakeBinaryRegion(field_offset, length, region_type, 0, offset_of_next_item, offset_field_comment)); BuildVector(offset_of_next_item, table, field, table_offset, vtable->fields); } break; case reflection::BaseType::Union: { const uint64_t union_offset = offset_of_next_item; // The union type field is always one less than the union itself. const uint16_t union_type_id = field->id() - 1; auto vtable_field = vtable->fields.find(union_type_id); if (vtable_field == vtable->fields.end()) { // TODO(dbaileychess): need to capture this error condition. break; } offset_field_comment.default_value = "(union)"; const uint64_t type_offset = table_offset + vtable_field->second.offset_from_table; const auto realized_type = ReadScalar<uint8_t>(type_offset); if (!realized_type.has_value()) { const uint64_t remaining = RemainingBytes(type_offset); SetError(offset_field_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "1"); regions.push_back(MakeBinaryRegion( type_offset, remaining, BinaryRegionType::Unknown, remaining, 0, offset_field_comment)); continue; } if (!IsValidUnionValue(field, realized_type.value())) { // We already export an error in the union type field, so just skip // building the union itself and it will default to an unreference // Binary section. continue; } const std::string enum_type = BuildUnion(union_offset, realized_type.value(), field); offset_field_comment.default_value = "(union of type `" + enum_type + "`)"; regions.push_back(MakeBinaryRegion(field_offset, length, region_type, 0, union_offset, offset_field_comment)); } break; default: break; } } // Handle the case where there is padding after the last known binary // region. Calculate where we left off towards the expected end of the // table. const uint64_t i = regions.back().offset + regions.back().length + 1; if (i < table_end_offset) { const uint64_t pad_bytes = table_end_offset - i + 1; BinaryRegionComment padding_comment; padding_comment.type = BinaryRegionCommentType::Padding; regions.push_back(MakeBinaryRegion(i - 1, pad_bytes * sizeof(uint8_t), BinaryRegionType::Uint8, pad_bytes, 0, padding_comment)); } AddSection(table_offset, MakeBinarySection(table->name()->str(), type, std::move(regions))); } uint64_t BinaryAnnotator::BuildStruct(const uint64_t struct_offset, std::vector<BinaryRegion> ®ions, const std::string referring_field_name, const reflection::Object *const object) { if (!object->is_struct()) { return struct_offset; } uint64_t offset = struct_offset; // Loop over all the fields in increasing order ForAllFields(object, /*reverse=*/false, [&](const reflection::Field *field) { if (IsScalar(field->type()->base_type())) { // Structure Field value const uint64_t type_size = GetTypeSize(field->type()->base_type()); const BinaryRegionType region_type = GetRegionType(field->type()->base_type()); BinaryRegionComment comment; comment.type = BinaryRegionCommentType::StructField; comment.name = referring_field_name + "." + field->name()->str(); comment.default_value = "of '" + object->name()->str() + "' (" + std::string(reflection::EnumNameBaseType( field->type()->base_type())) + ")"; if (!IsValidRead(offset, type_size)) { const uint64_t remaining = RemainingBytes(offset); SetError(comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, std::to_string(type_size)); regions.push_back(MakeBinaryRegion(offset, remaining, BinaryRegionType::Unknown, remaining, 0, comment)); // TODO(dbaileychess): Should I bail out here? This sets offset to the // end of the binary. So all other reads in the loop should fail. offset += remaining; return; } regions.push_back( MakeBinaryRegion(offset, type_size, region_type, 0, 0, comment)); offset += type_size; } else if (field->type()->base_type() == reflection::BaseType::Obj) { // Structs are stored inline, even when nested. offset = BuildStruct(offset, regions, referring_field_name + "." + field->name()->str(), schema_->objects()->Get(field->type()->index())); } else if (field->type()->base_type() == reflection::BaseType::Array) { const bool is_scalar = IsScalar(field->type()->element()); const uint64_t type_size = GetTypeSize(field->type()->element()); const BinaryRegionType region_type = GetRegionType(field->type()->element()); // Arrays are just repeated structures. for (uint16_t i = 0; i < field->type()->fixed_length(); ++i) { if (is_scalar) { BinaryRegionComment array_comment; array_comment.type = BinaryRegionCommentType::ArrayField; array_comment.name = referring_field_name + "." + field->name()->str(); array_comment.index = i; array_comment.default_value = "of '" + object->name()->str() + "' (" + std::string( reflection::EnumNameBaseType(field->type()->element())) + ")"; if (!IsValidRead(offset, type_size)) { const uint64_t remaining = RemainingBytes(offset); SetError(array_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, std::to_string(type_size)); regions.push_back(MakeBinaryRegion(offset, remaining, BinaryRegionType::Unknown, remaining, 0, array_comment)); // TODO(dbaileychess): Should I bail out here? This sets offset to // the end of the binary. So all other reads in the loop should // fail. offset += remaining; break; } regions.push_back(MakeBinaryRegion(offset, type_size, region_type, 0, 0, array_comment)); offset += type_size; } else { // Array of Structs. // // TODO(dbaileychess): This works, but the comments on the fields lose // some context. Need to figure a way how to plumb the nested arrays // comments together that isn't too confusing. offset = BuildStruct(offset, regions, referring_field_name + "." + field->name()->str(), schema_->objects()->Get(field->type()->index())); } } } // Insert any padding after this field. const uint16_t padding = field->padding(); if (padding > 0 && IsValidOffset(offset + padding)) { BinaryRegionComment padding_comment; padding_comment.type = BinaryRegionCommentType::Padding; regions.push_back(MakeBinaryRegion(offset, padding, BinaryRegionType::Uint8, padding, 0, padding_comment)); offset += padding; } }); return offset; } void BinaryAnnotator::BuildString(const uint64_t string_offset, const reflection::Object *const table, const reflection::Field *const field) { // Check if we have already generated this string section, and this is a // shared string instance. if (ContainsSection(string_offset)) { return; } std::vector<BinaryRegion> regions; const auto string_length = ReadScalar<uint32_t>(string_offset); BinaryRegionComment string_length_comment; string_length_comment.type = BinaryRegionCommentType::StringLength; if (!string_length.has_value()) { const uint64_t remaining = RemainingBytes(string_offset); SetError(string_length_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4"); regions.push_back(MakeBinaryRegion(string_offset, remaining, BinaryRegionType::Unknown, remaining, 0, string_length_comment)); } else { const uint32_t string_size = string_length.value(); const uint64_t string_end = string_offset + sizeof(uint32_t) + string_size + sizeof(char); if (!IsValidOffset(string_end - 1)) { SetError(string_length_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_LONG); regions.push_back(MakeBinaryRegion(string_offset, sizeof(uint32_t), BinaryRegionType::Uint32, 0, 0, string_length_comment)); } else { regions.push_back(MakeBinaryRegion(string_offset, sizeof(uint32_t), BinaryRegionType::Uint32, 0, 0, string_length_comment)); BinaryRegionComment string_comment; string_comment.type = BinaryRegionCommentType::StringValue; regions.push_back(MakeBinaryRegion(string_offset + sizeof(uint32_t), string_size, BinaryRegionType::Char, string_size, 0, string_comment)); BinaryRegionComment string_terminator_comment; string_terminator_comment.type = BinaryRegionCommentType::StringTerminator; regions.push_back(MakeBinaryRegion( string_offset + sizeof(uint32_t) + string_size, sizeof(char), BinaryRegionType::Char, 0, 0, string_terminator_comment)); } } AddSection(string_offset, MakeBinarySection(std::string(table->name()->c_str()) + "." + field->name()->c_str(), BinarySectionType::String, std::move(regions))); } void BinaryAnnotator::BuildVector( const uint64_t vector_offset, const reflection::Object *const table, const reflection::Field *const field, const uint64_t parent_table_offset, const std::map<uint16_t, VTable::Entry> vtable_fields) { if (ContainsSection(vector_offset)) { return; } BinaryRegionComment vector_length_comment; vector_length_comment.type = BinaryRegionCommentType::VectorLength; const bool is_64_bit_vector = field->type()->base_type() == reflection::BaseType::Vector64; flatbuffers::Optional<uint64_t> vector_length; uint32_t vector_length_size_type = 0; BinaryRegionType region_type = BinaryRegionType::Uint32; BinarySectionType section_type = BinarySectionType::Vector; if (is_64_bit_vector) { auto v = ReadScalar<uint64_t>(vector_offset); if (v.has_value()) { vector_length = v.value(); } vector_length_size_type = sizeof(uint64_t); region_type = BinaryRegionType::Uint64; section_type = BinarySectionType::Vector64; } else { auto v = ReadScalar<uint32_t>(vector_offset); if (v.has_value()) { vector_length = v.value(); } vector_length_size_type = sizeof(uint32_t); region_type = BinaryRegionType::Uint32; section_type = BinarySectionType::Vector; } if (!vector_length.has_value()) { const uint64_t remaining = RemainingBytes(vector_offset); SetError(vector_length_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4"); AddSection( vector_offset, MakeSingleRegionBinarySection( std::string(table->name()->c_str()) + "." + field->name()->c_str(), BinarySectionType::Vector, MakeBinaryRegion(vector_offset, remaining, BinaryRegionType::Unknown, remaining, 0, vector_length_comment))); return; } // Validate there are enough bytes left in the binary to process all the // items. const uint64_t last_item_offset = vector_offset + vector_length_size_type + vector_length.value() * GetElementSize(field); if (!IsValidOffset(last_item_offset - 1)) { SetError(vector_length_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_LONG); AddSection( vector_offset, MakeSingleRegionBinarySection( std::string(table->name()->c_str()) + "." + field->name()->c_str(), BinarySectionType::Vector, MakeBinaryRegion(vector_offset, vector_length_size_type, region_type, 0, 0, vector_length_comment))); return; } std::vector<BinaryRegion> regions; regions.push_back(MakeBinaryRegion(vector_offset, vector_length_size_type, region_type, 0, 0, vector_length_comment)); // Consume the vector length offset. uint64_t offset = vector_offset + vector_length_size_type; switch (field->type()->element()) { case reflection::BaseType::Obj: { const reflection::Object *object = schema_->objects()->Get(field->type()->index()); if (object->is_struct()) { // Vector of structs for (size_t i = 0; i < vector_length.value(); ++i) { // Structs are inline to the vector. const uint64_t next_offset = BuildStruct(offset, regions, "[" + NumToString(i) + "]", object); if (next_offset == offset) { break; } offset = next_offset; } } else { // Vector of objects for (size_t i = 0; i < vector_length.value(); ++i) { BinaryRegionComment vector_object_comment; vector_object_comment.type = BinaryRegionCommentType::VectorTableValue; vector_object_comment.index = i; const auto table_relative_offset = ReadScalar<uint32_t>(offset); if (!table_relative_offset.has_value()) { const uint64_t remaining = RemainingBytes(offset); SetError(vector_object_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4"); regions.push_back( MakeBinaryRegion(offset, remaining, BinaryRegionType::Unknown, remaining, 0, vector_object_comment)); break; } // The table offset is relative from the offset location itself. const uint64_t table_offset = offset + table_relative_offset.value(); if (!IsValidOffset(table_offset)) { SetError(vector_object_comment, BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY); regions.push_back(MakeBinaryRegion( offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0, table_offset, vector_object_comment)); offset += sizeof(uint32_t); continue; } if (table_offset == parent_table_offset) { SetError(vector_object_comment, BinaryRegionStatus::ERROR_CYCLE_DETECTED); // A cycle detected where a table vector field is pointing to // itself. This should only happen in corrupted files. regions.push_back(MakeBinaryRegion( offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0, table_offset, vector_object_comment)); offset += sizeof(uint32_t); continue; } regions.push_back(MakeBinaryRegion( offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0, table_offset, vector_object_comment)); // Consume the offset to the table. offset += sizeof(uint32_t); BuildTable(table_offset, BinarySectionType::Table, object); } } } break; case reflection::BaseType::String: { // Vector of strings for (size_t i = 0; i < vector_length.value(); ++i) { BinaryRegionComment vector_object_comment; vector_object_comment.type = BinaryRegionCommentType::VectorStringValue; vector_object_comment.index = i; const auto string_relative_offset = ReadScalar<uint32_t>(offset); if (!string_relative_offset.has_value()) { const uint64_t remaining = RemainingBytes(offset); SetError(vector_object_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4"); regions.push_back( MakeBinaryRegion(offset, remaining, BinaryRegionType::Unknown, remaining, 0, vector_object_comment)); break; } // The string offset is relative from the offset location itself. const uint64_t string_offset = offset + string_relative_offset.value(); if (!IsValidOffset(string_offset)) { SetError(vector_object_comment, BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY); regions.push_back(MakeBinaryRegion( offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0, string_offset, vector_object_comment)); offset += sizeof(uint32_t); continue; } regions.push_back(MakeBinaryRegion( offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0, string_offset, vector_object_comment)); BuildString(string_offset, table, field); offset += sizeof(uint32_t); } } break; case reflection::BaseType::Union: { // Vector of unions // Unions have both their realized type (uint8_t for now) that are // stored separately. These are stored in the field->index() - 1 // location. const uint16_t union_type_vector_id = field->id() - 1; auto vtable_entry = vtable_fields.find(union_type_vector_id); if (vtable_entry == vtable_fields.end()) { // TODO(dbaileychess): need to capture this error condition. break; } const uint64_t union_type_vector_field_offset = parent_table_offset + vtable_entry->second.offset_from_table; const auto union_type_vector_field_relative_offset = ReadScalar<uint16_t>(union_type_vector_field_offset); if (!union_type_vector_field_relative_offset.has_value()) { const uint64_t remaining = RemainingBytes(offset); BinaryRegionComment vector_union_comment; vector_union_comment.type = BinaryRegionCommentType::VectorUnionValue; SetError(vector_union_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "2"); regions.push_back(MakeBinaryRegion(offset, remaining, BinaryRegionType::Unknown, remaining, 0, vector_union_comment)); break; } // Get the offset to the first type (the + sizeof(uint32_t) is to skip // over the vector length which we already know). Validation happens // within the loop below. const uint64_t union_type_vector_data_offset = union_type_vector_field_offset + union_type_vector_field_relative_offset.value() + sizeof(uint32_t); for (size_t i = 0; i < vector_length.value(); ++i) { BinaryRegionComment comment; comment.type = BinaryRegionCommentType::VectorUnionValue; comment.index = i; const auto union_relative_offset = ReadScalar<uint32_t>(offset); if (!union_relative_offset.has_value()) { const uint64_t remaining = RemainingBytes(offset); SetError(comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4"); regions.push_back(MakeBinaryRegion(offset, remaining, BinaryRegionType::Unknown, remaining, 0, comment)); break; } // The union offset is relative from the offset location itself. const uint64_t union_offset = offset + union_relative_offset.value(); if (!IsValidOffset(union_offset)) { SetError(comment, BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY); regions.push_back(MakeBinaryRegion(offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0, union_offset, comment)); continue; } const auto realized_type = ReadScalar<uint8_t>(union_type_vector_data_offset + i); if (!realized_type.has_value()) { SetError(comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "1"); regions.push_back(MakeBinaryRegion( offset, 0, BinaryRegionType::Unknown, 0, 0, comment)); continue; } if (!IsValidUnionValue(vtable_entry->second.field->type()->index(), realized_type.value())) { // We already export an error in the union type field, so just skip // building the union itself and it will default to an unreference // Binary section. offset += sizeof(uint32_t); continue; } const std::string enum_type = BuildUnion(union_offset, realized_type.value(), field); comment.default_value = "(`" + enum_type + "`)"; regions.push_back(MakeBinaryRegion(offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0, union_offset, comment)); offset += sizeof(uint32_t); } } break; default: { if (IsScalar(field->type()->element())) { const BinaryRegionType binary_region_type = GetRegionType(field->type()->element()); const uint64_t type_size = GetTypeSize(field->type()->element()); // TODO(dbaileychess): It might be nicer to user the // BinaryRegion.array_length field to indicate this. for (size_t i = 0; i < vector_length.value(); ++i) { BinaryRegionComment vector_scalar_comment; vector_scalar_comment.type = BinaryRegionCommentType::VectorValue; vector_scalar_comment.index = i; if (!IsValidRead(offset, type_size)) { const uint64_t remaining = RemainingBytes(offset); SetError(vector_scalar_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, std::to_string(type_size)); regions.push_back( MakeBinaryRegion(offset, remaining, BinaryRegionType::Unknown, remaining, 0, vector_scalar_comment)); break; } if (IsUnionType(field->type()->element())) { // This is a type for a union. Validate the value const auto enum_value = ReadScalar<uint8_t>(offset); // This should always have a value, due to the IsValidRead check // above. if (!IsValidUnionValue(field->type()->index(), enum_value.value())) { SetError(vector_scalar_comment, BinaryRegionStatus::ERROR_INVALID_UNION_TYPE); regions.push_back(MakeBinaryRegion(offset, type_size, binary_region_type, 0, 0, vector_scalar_comment)); offset += type_size; continue; } } regions.push_back(MakeBinaryRegion(offset, type_size, binary_region_type, 0, 0, vector_scalar_comment)); offset += type_size; } } } break; } AddSection(vector_offset, MakeBinarySection(std::string(table->name()->c_str()) + "." + field->name()->c_str(), section_type, std::move(regions))); } std::string BinaryAnnotator::BuildUnion(const uint64_t union_offset, const uint8_t realized_type, const reflection::Field *const field) { const reflection::Enum *next_enum = schema_->enums()->Get(field->type()->index()); const reflection::EnumVal *enum_val = next_enum->values()->Get(realized_type); if (ContainsSection(union_offset)) { return enum_val->name()->c_str(); } const reflection::Type *union_type = enum_val->union_type(); if (union_type->base_type() == reflection::BaseType::Obj) { const reflection::Object *object = schema_->objects()->Get(union_type->index()); if (object->is_struct()) { // Union of vectors point to a new Binary section std::vector<BinaryRegion> regions; BuildStruct(union_offset, regions, field->name()->c_str(), object); AddSection( union_offset, MakeBinarySection(std::string(object->name()->c_str()) + "." + field->name()->c_str(), BinarySectionType::Union, std::move(regions))); } else { BuildTable(union_offset, BinarySectionType::Table, object); } } // TODO(dbaileychess): handle the other union types. return enum_val->name()->c_str(); } void BinaryAnnotator::FixMissingRegions() { std::vector<BinaryRegion> regions_to_insert; for (auto ¤t_section : sections_) { BinarySection §ion = current_section.second; if (section.regions.empty()) { // TODO(dbaileychess): is this possible? continue; } uint64_t offset = section.regions[0].offset + section.regions[0].length; for (size_t i = 1; i < section.regions.size(); ++i) { BinaryRegion ®ion = section.regions[i]; const uint64_t next_offset = region.offset; if (!IsValidOffset(next_offset)) { // TODO(dbaileychess): figure out how we get into this situation. continue; } if (offset < next_offset) { const uint64_t padding_bytes = next_offset - offset; BinaryRegionComment comment; comment.type = BinaryRegionCommentType::Padding; if (IsNonZeroRegion(offset, padding_bytes, binary_)) { SetError(comment, BinaryRegionStatus::WARN_NO_REFERENCES); regions_to_insert.push_back( MakeBinaryRegion(offset, padding_bytes, BinaryRegionType::Unknown, padding_bytes, 0, comment)); } else { regions_to_insert.push_back( MakeBinaryRegion(offset, padding_bytes, BinaryRegionType::Uint8, padding_bytes, 0, comment)); } } offset = next_offset + region.length; } if (!regions_to_insert.empty()) { section.regions.insert(section.regions.end(), regions_to_insert.begin(), regions_to_insert.end()); std::stable_sort(section.regions.begin(), section.regions.end(), BinaryRegionSort); regions_to_insert.clear(); } } } void BinaryAnnotator::FixMissingSections() { uint64_t offset = 0; std::vector<BinarySection> sections_to_insert; for (auto ¤t_section : sections_) { BinarySection §ion = current_section.second; const uint64_t section_start_offset = current_section.first; const uint64_t section_end_offset = section.regions.back().offset + section.regions.back().length; if (offset < section_start_offset) { // We are at an offset that is less then the current section. const uint64_t pad_bytes = section_start_offset - offset + 1; sections_to_insert.push_back( GenerateMissingSection(offset - 1, pad_bytes, binary_)); } offset = section_end_offset + 1; } // Handle the case where there are still bytes left in the binary that are // unaccounted for. if (offset < binary_length_) { const uint64_t pad_bytes = binary_length_ - offset + 1; sections_to_insert.push_back( GenerateMissingSection(offset - 1, pad_bytes, binary_)); } for (const BinarySection §ion_to_insert : sections_to_insert) { AddSection(section_to_insert.regions[0].offset, section_to_insert); } } bool BinaryAnnotator::ContainsSection(const uint64_t offset) { auto it = sections_.lower_bound(offset); // If the section is found, check that it is exactly equal its offset. if (it != sections_.end() && it->first == offset) { return true; } // If this was the first section, there are no other previous sections to // check. if (it == sections_.begin()) { return false; } // Go back one section. --it; // And check that if the offset is covered by the section. return offset >= it->first && offset < it->second.regions.back().offset + it->second.regions.back().length; } } // namespace flatbuffers