diff options
author | vitalyisaev <vitalyisaev@yandex-team.com> | 2023-06-29 10:00:50 +0300 |
---|---|---|
committer | vitalyisaev <vitalyisaev@yandex-team.com> | 2023-06-29 10:00:50 +0300 |
commit | 6ffe9e53658409f212834330e13564e4952558f6 (patch) | |
tree | 85b1e00183517648b228aafa7c8fb07f5276f419 /contrib/libs/llvm14/lib/Object | |
parent | 726057070f9c5a91fc10fde0d5024913d10f1ab9 (diff) | |
download | ydb-6ffe9e53658409f212834330e13564e4952558f6.tar.gz |
YQ Connector: support managed ClickHouse
Со стороны dqrun можно обратиться к инстансу коннектора, который работает на streaming стенде, и извлечь данные из облачного CH.
Diffstat (limited to 'contrib/libs/llvm14/lib/Object')
32 files changed, 19545 insertions, 0 deletions
diff --git a/contrib/libs/llvm14/lib/Object/Archive.cpp b/contrib/libs/llvm14/lib/Object/Archive.cpp new file mode 100644 index 0000000000..9a4ef055fa --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/Archive.cpp @@ -0,0 +1,1177 @@ +//===- Archive.cpp - ar File Format implementation ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the ArchiveObjectFile class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/Archive.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/Chrono.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <memory> +#include <string> +#include <system_error> + +using namespace llvm; +using namespace object; +using namespace llvm::support::endian; + +void Archive::anchor() {} + +static Error malformedError(Twine Msg) { + std::string StringMsg = "truncated or malformed archive (" + Msg.str() + ")"; + return make_error<GenericBinaryError>(std::move(StringMsg), + object_error::parse_failed); +} + +static Error +createMemberHeaderParseError(const AbstractArchiveMemberHeader *ArMemHeader, + const char *RawHeaderPtr, uint64_t Size) { + StringRef Msg("remaining size of archive too small for next archive " + "member header "); + + Expected<StringRef> NameOrErr = ArMemHeader->getName(Size); + if (NameOrErr) + return malformedError(Msg + "for " + *NameOrErr); + + consumeError(NameOrErr.takeError()); + uint64_t Offset = RawHeaderPtr - ArMemHeader->Parent->getData().data(); + return malformedError(Msg + "at offset " + Twine(Offset)); +} + +template <class T, std::size_t N> +StringRef getFieldRawString(const T (&Field)[N]) { + return StringRef(Field, N).rtrim(" "); +} + +template <class T> +StringRef CommonArchiveMemberHeader<T>::getRawAccessMode() const { + return getFieldRawString(ArMemHdr->AccessMode); +} + +template <class T> +StringRef CommonArchiveMemberHeader<T>::getRawLastModified() const { + return getFieldRawString(ArMemHdr->LastModified); +} + +template <class T> StringRef CommonArchiveMemberHeader<T>::getRawUID() const { + return getFieldRawString(ArMemHdr->UID); +} + +template <class T> StringRef CommonArchiveMemberHeader<T>::getRawGID() const { + return getFieldRawString(ArMemHdr->GID); +} + +template <class T> uint64_t CommonArchiveMemberHeader<T>::getOffset() const { + return reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); +} + +template class object::CommonArchiveMemberHeader<UnixArMemHdrType>; +template class object::CommonArchiveMemberHeader<BigArMemHdrType>; + +ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent, + const char *RawHeaderPtr, + uint64_t Size, Error *Err) + : CommonArchiveMemberHeader<UnixArMemHdrType>( + Parent, reinterpret_cast<const UnixArMemHdrType *>(RawHeaderPtr)) { + if (RawHeaderPtr == nullptr) + return; + ErrorAsOutParameter ErrAsOutParam(Err); + + if (Size < getSizeOf()) { + *Err = createMemberHeaderParseError(this, RawHeaderPtr, Size); + return; + } + if (ArMemHdr->Terminator[0] != '`' || ArMemHdr->Terminator[1] != '\n') { + if (Err) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped( + StringRef(ArMemHdr->Terminator, sizeof(ArMemHdr->Terminator))); + OS.flush(); + std::string Msg("terminator characters in archive member \"" + Buf + + "\" not the correct \"`\\n\" values for the archive " + "member header "); + Expected<StringRef> NameOrErr = getName(Size); + if (!NameOrErr) { + consumeError(NameOrErr.takeError()); + uint64_t Offset = RawHeaderPtr - Parent->getData().data(); + *Err = malformedError(Msg + "at offset " + Twine(Offset)); + } else + *Err = malformedError(Msg + "for " + NameOrErr.get()); + } + return; + } +} + +BigArchiveMemberHeader::BigArchiveMemberHeader(const Archive *Parent, + const char *RawHeaderPtr, + uint64_t Size, Error *Err) + : CommonArchiveMemberHeader<BigArMemHdrType>( + Parent, reinterpret_cast<const BigArMemHdrType *>(RawHeaderPtr)) { + if (RawHeaderPtr == nullptr) + return; + ErrorAsOutParameter ErrAsOutParam(Err); + + if (Size < getSizeOf()) + *Err = createMemberHeaderParseError(this, RawHeaderPtr, Size); +} + +// This gets the raw name from the ArMemHdr->Name field and checks that it is +// valid for the kind of archive. If it is not valid it returns an Error. +Expected<StringRef> ArchiveMemberHeader::getRawName() const { + char EndCond; + auto Kind = Parent->kind(); + if (Kind == Archive::K_BSD || Kind == Archive::K_DARWIN64) { + if (ArMemHdr->Name[0] == ' ') { + uint64_t Offset = + reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); + return malformedError("name contains a leading space for archive member " + "header at offset " + + Twine(Offset)); + } + EndCond = ' '; + } else if (ArMemHdr->Name[0] == '/' || ArMemHdr->Name[0] == '#') + EndCond = ' '; + else + EndCond = '/'; + StringRef::size_type end = + StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond); + if (end == StringRef::npos) + end = sizeof(ArMemHdr->Name); + assert(end <= sizeof(ArMemHdr->Name) && end > 0); + // Don't include the EndCond if there is one. + return StringRef(ArMemHdr->Name, end); +} + +Expected<uint64_t> +getArchiveMemberDecField(Twine FieldName, const StringRef RawField, + const Archive *Parent, + const AbstractArchiveMemberHeader *MemHeader) { + uint64_t Value; + if (RawField.getAsInteger(10, Value)) { + uint64_t Offset = MemHeader->getOffset(); + return malformedError("characters in " + FieldName + + " field in archive member header are not " + "all decimal numbers: '" + + RawField + + "' for the archive " + "member header at offset " + + Twine(Offset)); + } + return Value; +} + +Expected<uint64_t> +getArchiveMemberOctField(Twine FieldName, const StringRef RawField, + const Archive *Parent, + const AbstractArchiveMemberHeader *MemHeader) { + uint64_t Value; + if (RawField.getAsInteger(8, Value)) { + uint64_t Offset = MemHeader->getOffset(); + return malformedError("characters in " + FieldName + + " field in archive member header are not " + "all octal numbers: '" + + RawField + + "' for the archive " + "member header at offset " + + Twine(Offset)); + } + return Value; +} + +Expected<StringRef> BigArchiveMemberHeader::getRawName() const { + Expected<uint64_t> NameLenOrErr = getArchiveMemberDecField( + "NameLen", getFieldRawString(ArMemHdr->NameLen), Parent, this); + if (!NameLenOrErr) + // TODO: Out-of-line. + return NameLenOrErr.takeError(); + uint64_t NameLen = NameLenOrErr.get(); + + // If the name length is odd, pad with '\0' to get an even length. After + // padding, there is the name terminator "`\n". + uint64_t NameLenWithPadding = alignTo(NameLen, 2); + StringRef NameTerminator = "`\n"; + StringRef NameStringWithNameTerminator = + StringRef(ArMemHdr->Name, NameLenWithPadding + NameTerminator.size()); + if (!NameStringWithNameTerminator.endswith(NameTerminator)) { + uint64_t Offset = + reinterpret_cast<const char *>(ArMemHdr->Name + NameLenWithPadding) - + Parent->getData().data(); + // TODO: Out-of-line. + return malformedError( + "name does not have name terminator \"`\\n\" for archive member" + "header at offset " + + Twine(Offset)); + } + return StringRef(ArMemHdr->Name, NameLen); +} + +// member including the header, so the size of any name following the header +// is checked to make sure it does not overflow. +Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const { + + // This can be called from the ArchiveMemberHeader constructor when the + // archive header is truncated to produce an error message with the name. + // Make sure the name field is not truncated. + if (Size < offsetof(UnixArMemHdrType, Name) + sizeof(ArMemHdr->Name)) { + uint64_t ArchiveOffset = + reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); + return malformedError("archive header truncated before the name field " + "for archive member header at offset " + + Twine(ArchiveOffset)); + } + + // The raw name itself can be invalid. + Expected<StringRef> NameOrErr = getRawName(); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef Name = NameOrErr.get(); + + // Check if it's a special name. + if (Name[0] == '/') { + if (Name.size() == 1) // Linker member. + return Name; + if (Name.size() == 2 && Name[1] == '/') // String table. + return Name; + // It's a long name. + // Get the string table offset. + std::size_t StringOffset; + if (Name.substr(1).rtrim(' ').getAsInteger(10, StringOffset)) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(Name.substr(1).rtrim(' ')); + OS.flush(); + uint64_t ArchiveOffset = + reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); + return malformedError("long name offset characters after the '/' are " + "not all decimal numbers: '" + + Buf + "' for archive member header at offset " + + Twine(ArchiveOffset)); + } + + // Verify it. + if (StringOffset >= Parent->getStringTable().size()) { + uint64_t ArchiveOffset = + reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); + return malformedError("long name offset " + Twine(StringOffset) + + " past the end of the string table for archive " + "member header at offset " + + Twine(ArchiveOffset)); + } + + // GNU long file names end with a "/\n". + if (Parent->kind() == Archive::K_GNU || + Parent->kind() == Archive::K_GNU64) { + size_t End = Parent->getStringTable().find('\n', /*From=*/StringOffset); + if (End == StringRef::npos || End < 1 || + Parent->getStringTable()[End - 1] != '/') { + return malformedError("string table at long name offset " + + Twine(StringOffset) + "not terminated"); + } + return Parent->getStringTable().slice(StringOffset, End - 1); + } + return Parent->getStringTable().begin() + StringOffset; + } + + if (Name.startswith("#1/")) { + uint64_t NameLength; + if (Name.substr(3).rtrim(' ').getAsInteger(10, NameLength)) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(Name.substr(3).rtrim(' ')); + OS.flush(); + uint64_t ArchiveOffset = + reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); + return malformedError("long name length characters after the #1/ are " + "not all decimal numbers: '" + + Buf + "' for archive member header at offset " + + Twine(ArchiveOffset)); + } + if (getSizeOf() + NameLength > Size) { + uint64_t ArchiveOffset = + reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); + return malformedError("long name length: " + Twine(NameLength) + + " extends past the end of the member or archive " + "for archive member header at offset " + + Twine(ArchiveOffset)); + } + return StringRef(reinterpret_cast<const char *>(ArMemHdr) + getSizeOf(), + NameLength) + .rtrim('\0'); + } + + // It is not a long name so trim the blanks at the end of the name. + if (Name[Name.size() - 1] != '/') + return Name.rtrim(' '); + + // It's a simple name. + return Name.drop_back(1); +} + +Expected<StringRef> BigArchiveMemberHeader::getName(uint64_t Size) const { + return getRawName(); +} + +Expected<uint64_t> ArchiveMemberHeader::getSize() const { + return getArchiveMemberDecField("size", getFieldRawString(ArMemHdr->Size), + Parent, this); +} + +Expected<uint64_t> BigArchiveMemberHeader::getSize() const { + Expected<uint64_t> SizeOrErr = getArchiveMemberDecField( + "size", getFieldRawString(ArMemHdr->Size), Parent, this); + if (!SizeOrErr) + return SizeOrErr.takeError(); + + Expected<uint64_t> NameLenOrErr = getRawNameSize(); + if (!NameLenOrErr) + return NameLenOrErr.takeError(); + + return *SizeOrErr + alignTo(*NameLenOrErr, 2); +} + +Expected<uint64_t> BigArchiveMemberHeader::getRawNameSize() const { + return getArchiveMemberDecField( + "NameLen", getFieldRawString(ArMemHdr->NameLen), Parent, this); +} + +Expected<uint64_t> BigArchiveMemberHeader::getNextOffset() const { + return getArchiveMemberDecField( + "NextOffset", getFieldRawString(ArMemHdr->NextOffset), Parent, this); +} + +Expected<sys::fs::perms> AbstractArchiveMemberHeader::getAccessMode() const { + Expected<uint64_t> AccessModeOrErr = + getArchiveMemberOctField("AccessMode", getRawAccessMode(), Parent, this); + if (!AccessModeOrErr) + return AccessModeOrErr.takeError(); + return static_cast<sys::fs::perms>(*AccessModeOrErr); +} + +Expected<sys::TimePoint<std::chrono::seconds>> +AbstractArchiveMemberHeader::getLastModified() const { + Expected<uint64_t> SecondsOrErr = getArchiveMemberDecField( + "LastModified", getRawLastModified(), Parent, this); + + if (!SecondsOrErr) + return SecondsOrErr.takeError(); + + return sys::toTimePoint(*SecondsOrErr); +} + +Expected<unsigned> AbstractArchiveMemberHeader::getUID() const { + StringRef User = getRawUID(); + if (User.empty()) + return 0; + return getArchiveMemberDecField("UID", User, Parent, this); +} + +Expected<unsigned> AbstractArchiveMemberHeader::getGID() const { + StringRef Group = getRawGID(); + if (Group.empty()) + return 0; + return getArchiveMemberDecField("GID", Group, Parent, this); +} + +Expected<bool> ArchiveMemberHeader::isThin() const { + Expected<StringRef> NameOrErr = getRawName(); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef Name = NameOrErr.get(); + return Parent->isThin() && Name != "/" && Name != "//" && Name != "/SYM64/"; +} + +Expected<const char *> ArchiveMemberHeader::getNextChildLoc() const { + uint64_t Size = getSizeOf(); + Expected<bool> isThinOrErr = isThin(); + if (!isThinOrErr) + return isThinOrErr.takeError(); + + bool isThin = isThinOrErr.get(); + if (!isThin) { + Expected<uint64_t> MemberSize = getSize(); + if (!MemberSize) + return MemberSize.takeError(); + + Size += MemberSize.get(); + } + + // If Size is odd, add 1 to make it even. + const char *NextLoc = + reinterpret_cast<const char *>(ArMemHdr) + alignTo(Size, 2); + + if (NextLoc == Parent->getMemoryBufferRef().getBufferEnd()) + return nullptr; + + return NextLoc; +} + +Expected<const char *> BigArchiveMemberHeader::getNextChildLoc() const { + if (getOffset() == + static_cast<const BigArchive *>(Parent)->getLastChildOffset()) + return nullptr; + + Expected<uint64_t> NextOffsetOrErr = getNextOffset(); + if (!NextOffsetOrErr) + return NextOffsetOrErr.takeError(); + return Parent->getData().data() + NextOffsetOrErr.get(); +} + +Archive::Child::Child(const Archive *Parent, StringRef Data, + uint16_t StartOfFile) + : Parent(Parent), Data(Data), StartOfFile(StartOfFile) { + Header = Parent->createArchiveMemberHeader(Data.data(), Data.size(), nullptr); +} + +Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err) + : Parent(Parent) { + if (!Start) { + Header = nullptr; + return; + } + + Header = Parent->createArchiveMemberHeader( + Start, + Parent ? Parent->getData().size() - (Start - Parent->getData().data()) + : 0, + Err); + + // If we are pointed to real data, Start is not a nullptr, then there must be + // a non-null Err pointer available to report malformed data on. Only in + // the case sentinel value is being constructed is Err is permitted to be a + // nullptr. + assert(Err && "Err can't be nullptr if Start is not a nullptr"); + + ErrorAsOutParameter ErrAsOutParam(Err); + + // If there was an error in the construction of the Header + // then just return with the error now set. + if (*Err) + return; + + uint64_t Size = Header->getSizeOf(); + Data = StringRef(Start, Size); + Expected<bool> isThinOrErr = isThinMember(); + if (!isThinOrErr) { + *Err = isThinOrErr.takeError(); + return; + } + bool isThin = isThinOrErr.get(); + if (!isThin) { + Expected<uint64_t> MemberSize = getRawSize(); + if (!MemberSize) { + *Err = MemberSize.takeError(); + return; + } + Size += MemberSize.get(); + Data = StringRef(Start, Size); + } + + // Setup StartOfFile and PaddingBytes. + StartOfFile = Header->getSizeOf(); + // Don't include attached name. + Expected<StringRef> NameOrErr = getRawName(); + if (!NameOrErr) { + *Err = NameOrErr.takeError(); + return; + } + StringRef Name = NameOrErr.get(); + + if (Parent->kind() == Archive::K_AIXBIG) { + // The actual start of the file is after the name and any necessary + // even-alignment padding. + StartOfFile += ((Name.size() + 1) >> 1) << 1; + } else if (Name.startswith("#1/")) { + uint64_t NameSize; + StringRef RawNameSize = Name.substr(3).rtrim(' '); + if (RawNameSize.getAsInteger(10, NameSize)) { + uint64_t Offset = Start - Parent->getData().data(); + *Err = malformedError("long name length characters after the #1/ are " + "not all decimal numbers: '" + + RawNameSize + + "' for archive member header at offset " + + Twine(Offset)); + return; + } + StartOfFile += NameSize; + } +} + +Expected<uint64_t> Archive::Child::getSize() const { + if (Parent->IsThin) + return Header->getSize(); + return Data.size() - StartOfFile; +} + +Expected<uint64_t> Archive::Child::getRawSize() const { + return Header->getSize(); +} + +Expected<bool> Archive::Child::isThinMember() const { return Header->isThin(); } + +Expected<std::string> Archive::Child::getFullName() const { + Expected<bool> isThin = isThinMember(); + if (!isThin) + return isThin.takeError(); + assert(isThin.get()); + Expected<StringRef> NameOrErr = getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef Name = *NameOrErr; + if (sys::path::is_absolute(Name)) + return std::string(Name); + + SmallString<128> FullName = sys::path::parent_path( + Parent->getMemoryBufferRef().getBufferIdentifier()); + sys::path::append(FullName, Name); + return std::string(FullName.str()); +} + +Expected<StringRef> Archive::Child::getBuffer() const { + Expected<bool> isThinOrErr = isThinMember(); + if (!isThinOrErr) + return isThinOrErr.takeError(); + bool isThin = isThinOrErr.get(); + if (!isThin) { + Expected<uint64_t> Size = getSize(); + if (!Size) + return Size.takeError(); + return StringRef(Data.data() + StartOfFile, Size.get()); + } + Expected<std::string> FullNameOrErr = getFullName(); + if (!FullNameOrErr) + return FullNameOrErr.takeError(); + const std::string &FullName = *FullNameOrErr; + ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName); + if (std::error_code EC = Buf.getError()) + return errorCodeToError(EC); + Parent->ThinBuffers.push_back(std::move(*Buf)); + return Parent->ThinBuffers.back()->getBuffer(); +} + +Expected<Archive::Child> Archive::Child::getNext() const { + Expected<const char *> NextLocOrErr = Header->getNextChildLoc(); + if (!NextLocOrErr) + return NextLocOrErr.takeError(); + + const char *NextLoc = *NextLocOrErr; + + // Check to see if this is at the end of the archive. + if (NextLoc == nullptr) + return Child(nullptr, nullptr, nullptr); + + // Check to see if this is past the end of the archive. + if (NextLoc > Parent->Data.getBufferEnd()) { + std::string Msg("offset to next archive member past the end of the archive " + "after member "); + Expected<StringRef> NameOrErr = getName(); + if (!NameOrErr) { + consumeError(NameOrErr.takeError()); + uint64_t Offset = Data.data() - Parent->getData().data(); + return malformedError(Msg + "at offset " + Twine(Offset)); + } else + return malformedError(Msg + NameOrErr.get()); + } + + Error Err = Error::success(); + Child Ret(Parent, NextLoc, &Err); + if (Err) + return std::move(Err); + return Ret; +} + +uint64_t Archive::Child::getChildOffset() const { + const char *a = Parent->Data.getBuffer().data(); + const char *c = Data.data(); + uint64_t offset = c - a; + return offset; +} + +Expected<StringRef> Archive::Child::getName() const { + Expected<uint64_t> RawSizeOrErr = getRawSize(); + if (!RawSizeOrErr) + return RawSizeOrErr.takeError(); + uint64_t RawSize = RawSizeOrErr.get(); + Expected<StringRef> NameOrErr = + Header->getName(Header->getSizeOf() + RawSize); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef Name = NameOrErr.get(); + return Name; +} + +Expected<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const { + Expected<StringRef> NameOrErr = getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef Name = NameOrErr.get(); + Expected<StringRef> Buf = getBuffer(); + if (!Buf) + return createFileError(Name, Buf.takeError()); + return MemoryBufferRef(*Buf, Name); +} + +Expected<std::unique_ptr<Binary>> +Archive::Child::getAsBinary(LLVMContext *Context) const { + Expected<MemoryBufferRef> BuffOrErr = getMemoryBufferRef(); + if (!BuffOrErr) + return BuffOrErr.takeError(); + + auto BinaryOrErr = createBinary(BuffOrErr.get(), Context); + if (BinaryOrErr) + return std::move(*BinaryOrErr); + return BinaryOrErr.takeError(); +} + +Expected<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) { + Error Err = Error::success(); + std::unique_ptr<Archive> Ret; + StringRef Buffer = Source.getBuffer(); + + if (Buffer.startswith(BigArchiveMagic)) + Ret = std::make_unique<BigArchive>(Source, Err); + else + Ret = std::make_unique<Archive>(Source, Err); + + if (Err) + return std::move(Err); + return std::move(Ret); +} + +std::unique_ptr<AbstractArchiveMemberHeader> +Archive::createArchiveMemberHeader(const char *RawHeaderPtr, uint64_t Size, + Error *Err) const { + ErrorAsOutParameter ErrAsOutParam(Err); + if (kind() != K_AIXBIG) + return std::make_unique<ArchiveMemberHeader>(this, RawHeaderPtr, Size, Err); + return std::make_unique<BigArchiveMemberHeader>(this, RawHeaderPtr, Size, + Err); +} + +uint64_t Archive::getArchiveMagicLen() const { + if (isThin()) + return sizeof(ThinArchiveMagic) - 1; + + if (Kind() == K_AIXBIG) + return sizeof(BigArchiveMagic) - 1; + + return sizeof(ArchiveMagic) - 1; +} + +void Archive::setFirstRegular(const Child &C) { + FirstRegularData = C.Data; + FirstRegularStartOfFile = C.StartOfFile; +} + +Archive::Archive(MemoryBufferRef Source, Error &Err) + : Binary(Binary::ID_Archive, Source) { + ErrorAsOutParameter ErrAsOutParam(&Err); + StringRef Buffer = Data.getBuffer(); + // Check for sufficient magic. + if (Buffer.startswith(ThinArchiveMagic)) { + IsThin = true; + } else if (Buffer.startswith(ArchiveMagic)) { + IsThin = false; + } else if (Buffer.startswith(BigArchiveMagic)) { + Format = K_AIXBIG; + IsThin = false; + return; + } else { + Err = make_error<GenericBinaryError>("file too small to be an archive", + object_error::invalid_file_type); + return; + } + + // Make sure Format is initialized before any call to + // ArchiveMemberHeader::getName() is made. This could be a valid empty + // archive which is the same in all formats. So claiming it to be gnu to is + // fine if not totally correct before we look for a string table or table of + // contents. + Format = K_GNU; + + // Get the special members. + child_iterator I = child_begin(Err, false); + if (Err) + return; + child_iterator E = child_end(); + + // See if this is a valid empty archive and if so return. + if (I == E) { + Err = Error::success(); + return; + } + const Child *C = &*I; + + auto Increment = [&]() { + ++I; + if (Err) + return true; + C = &*I; + return false; + }; + + Expected<StringRef> NameOrErr = C->getRawName(); + if (!NameOrErr) { + Err = NameOrErr.takeError(); + return; + } + StringRef Name = NameOrErr.get(); + + // Below is the pattern that is used to figure out the archive format + // GNU archive format + // First member : / (may exist, if it exists, points to the symbol table ) + // Second member : // (may exist, if it exists, points to the string table) + // Note : The string table is used if the filename exceeds 15 characters + // BSD archive format + // First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table) + // There is no string table, if the filename exceeds 15 characters or has a + // embedded space, the filename has #1/<size>, The size represents the size + // of the filename that needs to be read after the archive header + // COFF archive format + // First member : / + // Second member : / (provides a directory of symbols) + // Third member : // (may exist, if it exists, contains the string table) + // Note: Microsoft PE/COFF Spec 8.3 says that the third member is present + // even if the string table is empty. However, lib.exe does not in fact + // seem to create the third member if there's no member whose filename + // exceeds 15 characters. So the third member is optional. + + if (Name == "__.SYMDEF" || Name == "__.SYMDEF_64") { + if (Name == "__.SYMDEF") + Format = K_BSD; + else // Name == "__.SYMDEF_64" + Format = K_DARWIN64; + // We know that the symbol table is not an external file, but we still must + // check any Expected<> return value. + Expected<StringRef> BufOrErr = C->getBuffer(); + if (!BufOrErr) { + Err = BufOrErr.takeError(); + return; + } + SymbolTable = BufOrErr.get(); + if (Increment()) + return; + setFirstRegular(*C); + + Err = Error::success(); + return; + } + + if (Name.startswith("#1/")) { + Format = K_BSD; + // We know this is BSD, so getName will work since there is no string table. + Expected<StringRef> NameOrErr = C->getName(); + if (!NameOrErr) { + Err = NameOrErr.takeError(); + return; + } + Name = NameOrErr.get(); + if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") { + // We know that the symbol table is not an external file, but we still + // must check any Expected<> return value. + Expected<StringRef> BufOrErr = C->getBuffer(); + if (!BufOrErr) { + Err = BufOrErr.takeError(); + return; + } + SymbolTable = BufOrErr.get(); + if (Increment()) + return; + } else if (Name == "__.SYMDEF_64 SORTED" || Name == "__.SYMDEF_64") { + Format = K_DARWIN64; + // We know that the symbol table is not an external file, but we still + // must check any Expected<> return value. + Expected<StringRef> BufOrErr = C->getBuffer(); + if (!BufOrErr) { + Err = BufOrErr.takeError(); + return; + } + SymbolTable = BufOrErr.get(); + if (Increment()) + return; + } + setFirstRegular(*C); + return; + } + + // MIPS 64-bit ELF archives use a special format of a symbol table. + // This format is marked by `ar_name` field equals to "/SYM64/". + // For detailed description see page 96 in the following document: + // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf + + bool has64SymTable = false; + if (Name == "/" || Name == "/SYM64/") { + // We know that the symbol table is not an external file, but we still + // must check any Expected<> return value. + Expected<StringRef> BufOrErr = C->getBuffer(); + if (!BufOrErr) { + Err = BufOrErr.takeError(); + return; + } + SymbolTable = BufOrErr.get(); + if (Name == "/SYM64/") + has64SymTable = true; + + if (Increment()) + return; + if (I == E) { + Err = Error::success(); + return; + } + Expected<StringRef> NameOrErr = C->getRawName(); + if (!NameOrErr) { + Err = NameOrErr.takeError(); + return; + } + Name = NameOrErr.get(); + } + + if (Name == "//") { + Format = has64SymTable ? K_GNU64 : K_GNU; + // The string table is never an external member, but we still + // must check any Expected<> return value. + Expected<StringRef> BufOrErr = C->getBuffer(); + if (!BufOrErr) { + Err = BufOrErr.takeError(); + return; + } + StringTable = BufOrErr.get(); + if (Increment()) + return; + setFirstRegular(*C); + Err = Error::success(); + return; + } + + if (Name[0] != '/') { + Format = has64SymTable ? K_GNU64 : K_GNU; + setFirstRegular(*C); + Err = Error::success(); + return; + } + + if (Name != "/") { + Err = errorCodeToError(object_error::parse_failed); + return; + } + + Format = K_COFF; + // We know that the symbol table is not an external file, but we still + // must check any Expected<> return value. + Expected<StringRef> BufOrErr = C->getBuffer(); + if (!BufOrErr) { + Err = BufOrErr.takeError(); + return; + } + SymbolTable = BufOrErr.get(); + + if (Increment()) + return; + + if (I == E) { + setFirstRegular(*C); + Err = Error::success(); + return; + } + + NameOrErr = C->getRawName(); + if (!NameOrErr) { + Err = NameOrErr.takeError(); + return; + } + Name = NameOrErr.get(); + + if (Name == "//") { + // The string table is never an external member, but we still + // must check any Expected<> return value. + Expected<StringRef> BufOrErr = C->getBuffer(); + if (!BufOrErr) { + Err = BufOrErr.takeError(); + return; + } + StringTable = BufOrErr.get(); + if (Increment()) + return; + } + + setFirstRegular(*C); + Err = Error::success(); +} + +Archive::child_iterator Archive::child_begin(Error &Err, + bool SkipInternal) const { + if (isEmpty()) + return child_end(); + + if (SkipInternal) + return child_iterator::itr( + Child(this, FirstRegularData, FirstRegularStartOfFile), Err); + + const char *Loc = Data.getBufferStart() + getFirstChildOffset(); + Child C(this, Loc, &Err); + if (Err) + return child_end(); + return child_iterator::itr(C, Err); +} + +Archive::child_iterator Archive::child_end() const { + return child_iterator::end(Child(nullptr, nullptr, nullptr)); +} + +StringRef Archive::Symbol::getName() const { + return Parent->getSymbolTable().begin() + StringIndex; +} + +Expected<Archive::Child> Archive::Symbol::getMember() const { + const char *Buf = Parent->getSymbolTable().begin(); + const char *Offsets = Buf; + if (Parent->kind() == K_GNU64 || Parent->kind() == K_DARWIN64) + Offsets += sizeof(uint64_t); + else + Offsets += sizeof(uint32_t); + uint64_t Offset = 0; + if (Parent->kind() == K_GNU) { + Offset = read32be(Offsets + SymbolIndex * 4); + } else if (Parent->kind() == K_GNU64) { + Offset = read64be(Offsets + SymbolIndex * 8); + } else if (Parent->kind() == K_BSD) { + // The SymbolIndex is an index into the ranlib structs that start at + // Offsets (the first uint32_t is the number of bytes of the ranlib + // structs). The ranlib structs are a pair of uint32_t's the first + // being a string table offset and the second being the offset into + // the archive of the member that defines the symbol. Which is what + // is needed here. + Offset = read32le(Offsets + SymbolIndex * 8 + 4); + } else if (Parent->kind() == K_DARWIN64) { + // The SymbolIndex is an index into the ranlib_64 structs that start at + // Offsets (the first uint64_t is the number of bytes of the ranlib_64 + // structs). The ranlib_64 structs are a pair of uint64_t's the first + // being a string table offset and the second being the offset into + // the archive of the member that defines the symbol. Which is what + // is needed here. + Offset = read64le(Offsets + SymbolIndex * 16 + 8); + } else { + // Skip offsets. + uint32_t MemberCount = read32le(Buf); + Buf += MemberCount * 4 + 4; + + uint32_t SymbolCount = read32le(Buf); + if (SymbolIndex >= SymbolCount) + return errorCodeToError(object_error::parse_failed); + + // Skip SymbolCount to get to the indices table. + const char *Indices = Buf + 4; + + // Get the index of the offset in the file member offset table for this + // symbol. + uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2); + // Subtract 1 since OffsetIndex is 1 based. + --OffsetIndex; + + if (OffsetIndex >= MemberCount) + return errorCodeToError(object_error::parse_failed); + + Offset = read32le(Offsets + OffsetIndex * 4); + } + + const char *Loc = Parent->getData().begin() + Offset; + Error Err = Error::success(); + Child C(Parent, Loc, &Err); + if (Err) + return std::move(Err); + return C; +} + +Archive::Symbol Archive::Symbol::getNext() const { + Symbol t(*this); + if (Parent->kind() == K_BSD) { + // t.StringIndex is an offset from the start of the __.SYMDEF or + // "__.SYMDEF SORTED" member into the string table for the ranlib + // struct indexed by t.SymbolIndex . To change t.StringIndex to the + // offset in the string table for t.SymbolIndex+1 we subtract the + // its offset from the start of the string table for t.SymbolIndex + // and add the offset of the string table for t.SymbolIndex+1. + + // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t + // which is the number of bytes of ranlib structs that follow. The ranlib + // structs are a pair of uint32_t's the first being a string table offset + // and the second being the offset into the archive of the member that + // define the symbol. After that the next uint32_t is the byte count of + // the string table followed by the string table. + const char *Buf = Parent->getSymbolTable().begin(); + uint32_t RanlibCount = 0; + RanlibCount = read32le(Buf) / 8; + // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount) + // don't change the t.StringIndex as we don't want to reference a ranlib + // past RanlibCount. + if (t.SymbolIndex + 1 < RanlibCount) { + const char *Ranlibs = Buf + 4; + uint32_t CurRanStrx = 0; + uint32_t NextRanStrx = 0; + CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8); + NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8); + t.StringIndex -= CurRanStrx; + t.StringIndex += NextRanStrx; + } + } else { + // Go to one past next null. + t.StringIndex = Parent->getSymbolTable().find('\0', t.StringIndex) + 1; + } + ++t.SymbolIndex; + return t; +} + +Archive::symbol_iterator Archive::symbol_begin() const { + if (!hasSymbolTable()) + return symbol_iterator(Symbol(this, 0, 0)); + + const char *buf = getSymbolTable().begin(); + if (kind() == K_GNU) { + uint32_t symbol_count = 0; + symbol_count = read32be(buf); + buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t))); + } else if (kind() == K_GNU64) { + uint64_t symbol_count = read64be(buf); + buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t))); + } else if (kind() == K_BSD) { + // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t + // which is the number of bytes of ranlib structs that follow. The ranlib + // structs are a pair of uint32_t's the first being a string table offset + // and the second being the offset into the archive of the member that + // define the symbol. After that the next uint32_t is the byte count of + // the string table followed by the string table. + uint32_t ranlib_count = 0; + ranlib_count = read32le(buf) / 8; + const char *ranlibs = buf + 4; + uint32_t ran_strx = 0; + ran_strx = read32le(ranlibs); + buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t)))); + // Skip the byte count of the string table. + buf += sizeof(uint32_t); + buf += ran_strx; + } else if (kind() == K_DARWIN64) { + // The __.SYMDEF_64 or "__.SYMDEF_64 SORTED" member starts with a uint64_t + // which is the number of bytes of ranlib_64 structs that follow. The + // ranlib_64 structs are a pair of uint64_t's the first being a string + // table offset and the second being the offset into the archive of the + // member that define the symbol. After that the next uint64_t is the byte + // count of the string table followed by the string table. + uint64_t ranlib_count = 0; + ranlib_count = read64le(buf) / 16; + const char *ranlibs = buf + 8; + uint64_t ran_strx = 0; + ran_strx = read64le(ranlibs); + buf += sizeof(uint64_t) + (ranlib_count * (2 * (sizeof(uint64_t)))); + // Skip the byte count of the string table. + buf += sizeof(uint64_t); + buf += ran_strx; + } else { + uint32_t member_count = 0; + uint32_t symbol_count = 0; + member_count = read32le(buf); + buf += 4 + (member_count * 4); // Skip offsets. + symbol_count = read32le(buf); + buf += 4 + (symbol_count * 2); // Skip indices. + } + uint32_t string_start_offset = buf - getSymbolTable().begin(); + return symbol_iterator(Symbol(this, 0, string_start_offset)); +} + +Archive::symbol_iterator Archive::symbol_end() const { + return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0)); +} + +uint32_t Archive::getNumberOfSymbols() const { + if (!hasSymbolTable()) + return 0; + const char *buf = getSymbolTable().begin(); + if (kind() == K_GNU) + return read32be(buf); + if (kind() == K_GNU64) + return read64be(buf); + if (kind() == K_BSD) + return read32le(buf) / 8; + if (kind() == K_DARWIN64) + return read64le(buf) / 16; + uint32_t member_count = 0; + member_count = read32le(buf); + buf += 4 + (member_count * 4); // Skip offsets. + return read32le(buf); +} + +Expected<Optional<Archive::Child>> Archive::findSym(StringRef name) const { + Archive::symbol_iterator bs = symbol_begin(); + Archive::symbol_iterator es = symbol_end(); + + for (; bs != es; ++bs) { + StringRef SymName = bs->getName(); + if (SymName == name) { + if (auto MemberOrErr = bs->getMember()) + return Child(*MemberOrErr); + else + return MemberOrErr.takeError(); + } + } + return Optional<Child>(); +} + +// Returns true if archive file contains no member file. +bool Archive::isEmpty() const { + return Data.getBufferSize() == getArchiveMagicLen(); +} + +bool Archive::hasSymbolTable() const { return !SymbolTable.empty(); } + +BigArchive::BigArchive(MemoryBufferRef Source, Error &Err) + : Archive(Source, Err) { + ErrorAsOutParameter ErrAsOutParam(&Err); + StringRef Buffer = Data.getBuffer(); + ArFixLenHdr = reinterpret_cast<const FixLenHdr *>(Buffer.data()); + + StringRef RawOffset = getFieldRawString(ArFixLenHdr->FirstChildOffset); + if (RawOffset.getAsInteger(10, FirstChildOffset)) + // TODO: Out-of-line. + Err = malformedError("malformed AIX big archive: first member offset \"" + + RawOffset + "\" is not a number"); + + RawOffset = getFieldRawString(ArFixLenHdr->LastChildOffset); + if (RawOffset.getAsInteger(10, LastChildOffset)) + // TODO: Out-of-line. + Err = malformedError("malformed AIX big archive: last member offset \"" + + RawOffset + "\" is not a number"); + + child_iterator I = child_begin(Err, false); + if (Err) + return; + child_iterator E = child_end(); + if (I == E) { + Err = Error::success(); + return; + } + setFirstRegular(*I); + Err = Error::success(); +} diff --git a/contrib/libs/llvm14/lib/Object/ArchiveWriter.cpp b/contrib/libs/llvm14/lib/Object/ArchiveWriter.cpp new file mode 100644 index 0000000000..053b3dafed --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/ArchiveWriter.cpp @@ -0,0 +1,704 @@ +//===- ArchiveWriter.cpp - ar File Format implementation --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the writeArchive function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/ArchiveWriter.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Support/EndianStream.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/SmallVectorMemoryBuffer.h" +#include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/raw_ostream.h" + +#include <map> + +#if !defined(_MSC_VER) && !defined(__MINGW32__) +#include <unistd.h> +#else +#include <io.h> +#endif + +using namespace llvm; + +NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef) + : Buf(MemoryBuffer::getMemBuffer(BufRef, false)), + MemberName(BufRef.getBufferIdentifier()) {} + +Expected<NewArchiveMember> +NewArchiveMember::getOldMember(const object::Archive::Child &OldMember, + bool Deterministic) { + Expected<llvm::MemoryBufferRef> BufOrErr = OldMember.getMemoryBufferRef(); + if (!BufOrErr) + return BufOrErr.takeError(); + + NewArchiveMember M; + M.Buf = MemoryBuffer::getMemBuffer(*BufOrErr, false); + M.MemberName = M.Buf->getBufferIdentifier(); + if (!Deterministic) { + auto ModTimeOrErr = OldMember.getLastModified(); + if (!ModTimeOrErr) + return ModTimeOrErr.takeError(); + M.ModTime = ModTimeOrErr.get(); + Expected<unsigned> UIDOrErr = OldMember.getUID(); + if (!UIDOrErr) + return UIDOrErr.takeError(); + M.UID = UIDOrErr.get(); + Expected<unsigned> GIDOrErr = OldMember.getGID(); + if (!GIDOrErr) + return GIDOrErr.takeError(); + M.GID = GIDOrErr.get(); + Expected<sys::fs::perms> AccessModeOrErr = OldMember.getAccessMode(); + if (!AccessModeOrErr) + return AccessModeOrErr.takeError(); + M.Perms = AccessModeOrErr.get(); + } + return std::move(M); +} + +Expected<NewArchiveMember> NewArchiveMember::getFile(StringRef FileName, + bool Deterministic) { + sys::fs::file_status Status; + auto FDOrErr = sys::fs::openNativeFileForRead(FileName); + if (!FDOrErr) + return FDOrErr.takeError(); + sys::fs::file_t FD = *FDOrErr; + assert(FD != sys::fs::kInvalidFile); + + if (auto EC = sys::fs::status(FD, Status)) + return errorCodeToError(EC); + + // Opening a directory doesn't make sense. Let it fail. + // Linux cannot open directories with open(2), although + // cygwin and *bsd can. + if (Status.type() == sys::fs::file_type::directory_file) + return errorCodeToError(make_error_code(errc::is_a_directory)); + + ErrorOr<std::unique_ptr<MemoryBuffer>> MemberBufferOrErr = + MemoryBuffer::getOpenFile(FD, FileName, Status.getSize(), false); + if (!MemberBufferOrErr) + return errorCodeToError(MemberBufferOrErr.getError()); + + if (auto EC = sys::fs::closeFile(FD)) + return errorCodeToError(EC); + + NewArchiveMember M; + M.Buf = std::move(*MemberBufferOrErr); + M.MemberName = M.Buf->getBufferIdentifier(); + if (!Deterministic) { + M.ModTime = std::chrono::time_point_cast<std::chrono::seconds>( + Status.getLastModificationTime()); + M.UID = Status.getUser(); + M.GID = Status.getGroup(); + M.Perms = Status.permissions(); + } + return std::move(M); +} + +template <typename T> +static void printWithSpacePadding(raw_ostream &OS, T Data, unsigned Size) { + uint64_t OldPos = OS.tell(); + OS << Data; + unsigned SizeSoFar = OS.tell() - OldPos; + assert(SizeSoFar <= Size && "Data doesn't fit in Size"); + OS.indent(Size - SizeSoFar); +} + +static bool isDarwin(object::Archive::Kind Kind) { + return Kind == object::Archive::K_DARWIN || + Kind == object::Archive::K_DARWIN64; +} + +static bool isBSDLike(object::Archive::Kind Kind) { + switch (Kind) { + case object::Archive::K_GNU: + case object::Archive::K_GNU64: + return false; + case object::Archive::K_BSD: + case object::Archive::K_DARWIN: + case object::Archive::K_DARWIN64: + return true; + case object::Archive::K_AIXBIG: + case object::Archive::K_COFF: + break; + } + llvm_unreachable("not supported for writting"); +} + +template <class T> +static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val) { + support::endian::write(Out, Val, + isBSDLike(Kind) ? support::little : support::big); +} + +static void printRestOfMemberHeader( + raw_ostream &Out, const sys::TimePoint<std::chrono::seconds> &ModTime, + unsigned UID, unsigned GID, unsigned Perms, uint64_t Size) { + printWithSpacePadding(Out, sys::toTimeT(ModTime), 12); + + // The format has only 6 chars for uid and gid. Truncate if the provided + // values don't fit. + printWithSpacePadding(Out, UID % 1000000, 6); + printWithSpacePadding(Out, GID % 1000000, 6); + + printWithSpacePadding(Out, format("%o", Perms), 8); + printWithSpacePadding(Out, Size, 10); + Out << "`\n"; +} + +static void +printGNUSmallMemberHeader(raw_ostream &Out, StringRef Name, + const sys::TimePoint<std::chrono::seconds> &ModTime, + unsigned UID, unsigned GID, unsigned Perms, + uint64_t Size) { + printWithSpacePadding(Out, Twine(Name) + "/", 16); + printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size); +} + +static void +printBSDMemberHeader(raw_ostream &Out, uint64_t Pos, StringRef Name, + const sys::TimePoint<std::chrono::seconds> &ModTime, + unsigned UID, unsigned GID, unsigned Perms, uint64_t Size) { + uint64_t PosAfterHeader = Pos + 60 + Name.size(); + // Pad so that even 64 bit object files are aligned. + unsigned Pad = offsetToAlignment(PosAfterHeader, Align(8)); + unsigned NameWithPadding = Name.size() + Pad; + printWithSpacePadding(Out, Twine("#1/") + Twine(NameWithPadding), 16); + printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, + NameWithPadding + Size); + Out << Name; + while (Pad--) + Out.write(uint8_t(0)); +} + +static bool useStringTable(bool Thin, StringRef Name) { + return Thin || Name.size() >= 16 || Name.contains('/'); +} + +static bool is64BitKind(object::Archive::Kind Kind) { + switch (Kind) { + case object::Archive::K_GNU: + case object::Archive::K_BSD: + case object::Archive::K_DARWIN: + case object::Archive::K_COFF: + case object::Archive::K_AIXBIG: + return false; + case object::Archive::K_DARWIN64: + case object::Archive::K_GNU64: + return true; + } + llvm_unreachable("not supported for writting"); +} + +static void +printMemberHeader(raw_ostream &Out, uint64_t Pos, raw_ostream &StringTable, + StringMap<uint64_t> &MemberNames, object::Archive::Kind Kind, + bool Thin, const NewArchiveMember &M, + sys::TimePoint<std::chrono::seconds> ModTime, uint64_t Size) { + if (isBSDLike(Kind)) + return printBSDMemberHeader(Out, Pos, M.MemberName, ModTime, M.UID, M.GID, + M.Perms, Size); + if (!useStringTable(Thin, M.MemberName)) + return printGNUSmallMemberHeader(Out, M.MemberName, ModTime, M.UID, M.GID, + M.Perms, Size); + Out << '/'; + uint64_t NamePos; + if (Thin) { + NamePos = StringTable.tell(); + StringTable << M.MemberName << "/\n"; + } else { + auto Insertion = MemberNames.insert({M.MemberName, uint64_t(0)}); + if (Insertion.second) { + Insertion.first->second = StringTable.tell(); + StringTable << M.MemberName << "/\n"; + } + NamePos = Insertion.first->second; + } + printWithSpacePadding(Out, NamePos, 15); + printRestOfMemberHeader(Out, ModTime, M.UID, M.GID, M.Perms, Size); +} + +namespace { +struct MemberData { + std::vector<unsigned> Symbols; + std::string Header; + StringRef Data; + StringRef Padding; +}; +} // namespace + +static MemberData computeStringTable(StringRef Names) { + unsigned Size = Names.size(); + unsigned Pad = offsetToAlignment(Size, Align(2)); + std::string Header; + raw_string_ostream Out(Header); + printWithSpacePadding(Out, "//", 48); + printWithSpacePadding(Out, Size + Pad, 10); + Out << "`\n"; + Out.flush(); + return {{}, std::move(Header), Names, Pad ? "\n" : ""}; +} + +static sys::TimePoint<std::chrono::seconds> now(bool Deterministic) { + using namespace std::chrono; + + if (!Deterministic) + return time_point_cast<seconds>(system_clock::now()); + return sys::TimePoint<seconds>(); +} + +static bool isArchiveSymbol(const object::BasicSymbolRef &S) { + Expected<uint32_t> SymFlagsOrErr = S.getFlags(); + if (!SymFlagsOrErr) + // TODO: Actually report errors helpfully. + report_fatal_error(SymFlagsOrErr.takeError()); + if (*SymFlagsOrErr & object::SymbolRef::SF_FormatSpecific) + return false; + if (!(*SymFlagsOrErr & object::SymbolRef::SF_Global)) + return false; + if (*SymFlagsOrErr & object::SymbolRef::SF_Undefined) + return false; + return true; +} + +static void printNBits(raw_ostream &Out, object::Archive::Kind Kind, + uint64_t Val) { + if (is64BitKind(Kind)) + print<uint64_t>(Out, Kind, Val); + else + print<uint32_t>(Out, Kind, Val); +} + +static uint64_t computeSymbolTableSize(object::Archive::Kind Kind, + uint64_t NumSyms, uint64_t OffsetSize, + StringRef StringTable, + uint32_t *Padding = nullptr) { + assert((OffsetSize == 4 || OffsetSize == 8) && "Unsupported OffsetSize"); + uint64_t Size = OffsetSize; // Number of entries + if (isBSDLike(Kind)) + Size += NumSyms * OffsetSize * 2; // Table + else + Size += NumSyms * OffsetSize; // Table + if (isBSDLike(Kind)) + Size += OffsetSize; // byte count + Size += StringTable.size(); + // ld64 expects the members to be 8-byte aligned for 64-bit content and at + // least 4-byte aligned for 32-bit content. Opt for the larger encoding + // uniformly. + // We do this for all bsd formats because it simplifies aligning members. + uint32_t Pad = offsetToAlignment(Size, Align(isBSDLike(Kind) ? 8 : 2)); + Size += Pad; + if (Padding) + *Padding = Pad; + return Size; +} + +static void writeSymbolTableHeader(raw_ostream &Out, object::Archive::Kind Kind, + bool Deterministic, uint64_t Size) { + if (isBSDLike(Kind)) { + const char *Name = is64BitKind(Kind) ? "__.SYMDEF_64" : "__.SYMDEF"; + printBSDMemberHeader(Out, Out.tell(), Name, now(Deterministic), 0, 0, 0, + Size); + } else { + const char *Name = is64BitKind(Kind) ? "/SYM64" : ""; + printGNUSmallMemberHeader(Out, Name, now(Deterministic), 0, 0, 0, Size); + } +} + +static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, + bool Deterministic, ArrayRef<MemberData> Members, + StringRef StringTable) { + // We don't write a symbol table on an archive with no members -- except on + // Darwin, where the linker will abort unless the archive has a symbol table. + if (StringTable.empty() && !isDarwin(Kind)) + return; + + unsigned NumSyms = 0; + for (const MemberData &M : Members) + NumSyms += M.Symbols.size(); + + uint64_t OffsetSize = is64BitKind(Kind) ? 8 : 4; + uint32_t Pad; + uint64_t Size = computeSymbolTableSize(Kind, NumSyms, OffsetSize, StringTable, &Pad); + writeSymbolTableHeader(Out, Kind, Deterministic, Size); + + uint64_t Pos = Out.tell() + Size; + + if (isBSDLike(Kind)) + printNBits(Out, Kind, NumSyms * 2 * OffsetSize); + else + printNBits(Out, Kind, NumSyms); + + for (const MemberData &M : Members) { + for (unsigned StringOffset : M.Symbols) { + if (isBSDLike(Kind)) + printNBits(Out, Kind, StringOffset); + printNBits(Out, Kind, Pos); // member offset + } + Pos += M.Header.size() + M.Data.size() + M.Padding.size(); + } + + if (isBSDLike(Kind)) + // byte count of the string table + printNBits(Out, Kind, StringTable.size()); + Out << StringTable; + + while (Pad--) + Out.write(uint8_t(0)); +} + +static Expected<std::vector<unsigned>> +getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) { + std::vector<unsigned> Ret; + + // In the scenario when LLVMContext is populated SymbolicFile will contain a + // reference to it, thus SymbolicFile should be destroyed first. + LLVMContext Context; + std::unique_ptr<object::SymbolicFile> Obj; + + const file_magic Type = identify_magic(Buf.getBuffer()); + // Treat unsupported file types as having no symbols. + if (!object::SymbolicFile::isSymbolicFile(Type, &Context)) + return Ret; + if (Type == file_magic::bitcode) { + auto ObjOrErr = object::SymbolicFile::createSymbolicFile( + Buf, file_magic::bitcode, &Context); + if (!ObjOrErr) + return ObjOrErr.takeError(); + Obj = std::move(*ObjOrErr); + } else { + auto ObjOrErr = object::SymbolicFile::createSymbolicFile(Buf); + if (!ObjOrErr) + return ObjOrErr.takeError(); + Obj = std::move(*ObjOrErr); + } + + HasObject = true; + for (const object::BasicSymbolRef &S : Obj->symbols()) { + if (!isArchiveSymbol(S)) + continue; + Ret.push_back(SymNames.tell()); + if (Error E = S.printName(SymNames)) + return std::move(E); + SymNames << '\0'; + } + return Ret; +} + +static Expected<std::vector<MemberData>> +computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, + object::Archive::Kind Kind, bool Thin, bool Deterministic, + bool NeedSymbols, ArrayRef<NewArchiveMember> NewMembers) { + static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'}; + + // This ignores the symbol table, but we only need the value mod 8 and the + // symbol table is aligned to be a multiple of 8 bytes + uint64_t Pos = 0; + + std::vector<MemberData> Ret; + bool HasObject = false; + + // Deduplicate long member names in the string table and reuse earlier name + // offsets. This especially saves space for COFF Import libraries where all + // members have the same name. + StringMap<uint64_t> MemberNames; + + // UniqueTimestamps is a special case to improve debugging on Darwin: + // + // The Darwin linker does not link debug info into the final + // binary. Instead, it emits entries of type N_OSO in in the output + // binary's symbol table, containing references to the linked-in + // object files. Using that reference, the debugger can read the + // debug data directly from the object files. Alternatively, an + // invocation of 'dsymutil' will link the debug data from the object + // files into a dSYM bundle, which can be loaded by the debugger, + // instead of the object files. + // + // For an object file, the N_OSO entries contain the absolute path + // path to the file, and the file's timestamp. For an object + // included in an archive, the path is formatted like + // "/absolute/path/to/archive.a(member.o)", and the timestamp is the + // archive member's timestamp, rather than the archive's timestamp. + // + // However, this doesn't always uniquely identify an object within + // an archive -- an archive file can have multiple entries with the + // same filename. (This will happen commonly if the original object + // files started in different directories.) The only way they get + // distinguished, then, is via the timestamp. But this process is + // unable to find the correct object file in the archive when there + // are two files of the same name and timestamp. + // + // Additionally, timestamp==0 is treated specially, and causes the + // timestamp to be ignored as a match criteria. + // + // That will "usually" work out okay when creating an archive not in + // deterministic timestamp mode, because the objects will probably + // have been created at different timestamps. + // + // To ameliorate this problem, in deterministic archive mode (which + // is the default), on Darwin we will emit a unique non-zero + // timestamp for each entry with a duplicated name. This is still + // deterministic: the only thing affecting that timestamp is the + // order of the files in the resultant archive. + // + // See also the functions that handle the lookup: + // in lldb: ObjectContainerBSDArchive::Archive::FindObject() + // in llvm/tools/dsymutil: BinaryHolder::GetArchiveMemberBuffers(). + bool UniqueTimestamps = Deterministic && isDarwin(Kind); + std::map<StringRef, unsigned> FilenameCount; + if (UniqueTimestamps) { + for (const NewArchiveMember &M : NewMembers) + FilenameCount[M.MemberName]++; + for (auto &Entry : FilenameCount) + Entry.second = Entry.second > 1 ? 1 : 0; + } + + for (const NewArchiveMember &M : NewMembers) { + std::string Header; + raw_string_ostream Out(Header); + + MemoryBufferRef Buf = M.Buf->getMemBufferRef(); + StringRef Data = Thin ? "" : Buf.getBuffer(); + + // ld64 expects the members to be 8-byte aligned for 64-bit content and at + // least 4-byte aligned for 32-bit content. Opt for the larger encoding + // uniformly. This matches the behaviour with cctools and ensures that ld64 + // is happy with archives that we generate. + unsigned MemberPadding = + isDarwin(Kind) ? offsetToAlignment(Data.size(), Align(8)) : 0; + unsigned TailPadding = + offsetToAlignment(Data.size() + MemberPadding, Align(2)); + StringRef Padding = StringRef(PaddingData, MemberPadding + TailPadding); + + sys::TimePoint<std::chrono::seconds> ModTime; + if (UniqueTimestamps) + // Increment timestamp for each file of a given name. + ModTime = sys::toTimePoint(FilenameCount[M.MemberName]++); + else + ModTime = M.ModTime; + + uint64_t Size = Buf.getBufferSize() + MemberPadding; + if (Size > object::Archive::MaxMemberSize) { + std::string StringMsg = + "File " + M.MemberName.str() + " exceeds size limit"; + return make_error<object::GenericBinaryError>( + std::move(StringMsg), object::object_error::parse_failed); + } + + printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, M, + ModTime, Size); + Out.flush(); + + std::vector<unsigned> Symbols; + if (NeedSymbols) { + Expected<std::vector<unsigned>> SymbolsOrErr = + getSymbols(Buf, SymNames, HasObject); + if (auto E = SymbolsOrErr.takeError()) + return std::move(E); + Symbols = std::move(*SymbolsOrErr); + } + + Pos += Header.size() + Data.size() + Padding.size(); + Ret.push_back({std::move(Symbols), std::move(Header), Data, Padding}); + } + // If there are no symbols, emit an empty symbol table, to satisfy Solaris + // tools, older versions of which expect a symbol table in a non-empty + // archive, regardless of whether there are any symbols in it. + if (HasObject && SymNames.tell() == 0) + SymNames << '\0' << '\0' << '\0'; + return Ret; +} + +namespace llvm { + +static ErrorOr<SmallString<128>> canonicalizePath(StringRef P) { + SmallString<128> Ret = P; + std::error_code Err = sys::fs::make_absolute(Ret); + if (Err) + return Err; + sys::path::remove_dots(Ret, /*removedotdot*/ true); + return Ret; +} + +// Compute the relative path from From to To. +Expected<std::string> computeArchiveRelativePath(StringRef From, StringRef To) { + ErrorOr<SmallString<128>> PathToOrErr = canonicalizePath(To); + ErrorOr<SmallString<128>> DirFromOrErr = canonicalizePath(From); + if (!PathToOrErr || !DirFromOrErr) + return errorCodeToError(std::error_code(errno, std::generic_category())); + + const SmallString<128> &PathTo = *PathToOrErr; + const SmallString<128> &DirFrom = sys::path::parent_path(*DirFromOrErr); + + // Can't construct a relative path between different roots + if (sys::path::root_name(PathTo) != sys::path::root_name(DirFrom)) + return sys::path::convert_to_slash(PathTo); + + // Skip common prefixes + auto FromTo = + std::mismatch(sys::path::begin(DirFrom), sys::path::end(DirFrom), + sys::path::begin(PathTo)); + auto FromI = FromTo.first; + auto ToI = FromTo.second; + + // Construct relative path + SmallString<128> Relative; + for (auto FromE = sys::path::end(DirFrom); FromI != FromE; ++FromI) + sys::path::append(Relative, sys::path::Style::posix, ".."); + + for (auto ToE = sys::path::end(PathTo); ToI != ToE; ++ToI) + sys::path::append(Relative, sys::path::Style::posix, *ToI); + + return std::string(Relative.str()); +} + +static Error writeArchiveToStream(raw_ostream &Out, + ArrayRef<NewArchiveMember> NewMembers, + bool WriteSymtab, object::Archive::Kind Kind, + bool Deterministic, bool Thin) { + assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode"); + + SmallString<0> SymNamesBuf; + raw_svector_ostream SymNames(SymNamesBuf); + SmallString<0> StringTableBuf; + raw_svector_ostream StringTable(StringTableBuf); + + Expected<std::vector<MemberData>> DataOrErr = + computeMemberData(StringTable, SymNames, Kind, Thin, Deterministic, + WriteSymtab, NewMembers); + if (Error E = DataOrErr.takeError()) + return E; + std::vector<MemberData> &Data = *DataOrErr; + + if (!StringTableBuf.empty()) + Data.insert(Data.begin(), computeStringTable(StringTableBuf)); + + // We would like to detect if we need to switch to a 64-bit symbol table. + if (WriteSymtab) { + uint64_t MaxOffset = 8; // For the file signature. + uint64_t LastOffset = MaxOffset; + uint64_t NumSyms = 0; + for (const auto &M : Data) { + // Record the start of the member's offset + LastOffset = MaxOffset; + // Account for the size of each part associated with the member. + MaxOffset += M.Header.size() + M.Data.size() + M.Padding.size(); + NumSyms += M.Symbols.size(); + } + + // We assume 32-bit offsets to see if 32-bit symbols are possible or not. + uint64_t SymtabSize = computeSymbolTableSize(Kind, NumSyms, 4, SymNamesBuf); + auto computeSymbolTableHeaderSize = + [=] { + SmallString<0> TmpBuf; + raw_svector_ostream Tmp(TmpBuf); + writeSymbolTableHeader(Tmp, Kind, Deterministic, SymtabSize); + return TmpBuf.size(); + }; + LastOffset += computeSymbolTableHeaderSize() + SymtabSize; + + // The SYM64 format is used when an archive's member offsets are larger than + // 32-bits can hold. The need for this shift in format is detected by + // writeArchive. To test this we need to generate a file with a member that + // has an offset larger than 32-bits but this demands a very slow test. To + // speed the test up we use this environment variable to pretend like the + // cutoff happens before 32-bits and instead happens at some much smaller + // value. + uint64_t Sym64Threshold = 1ULL << 32; + const char *Sym64Env = std::getenv("SYM64_THRESHOLD"); + if (Sym64Env) + StringRef(Sym64Env).getAsInteger(10, Sym64Threshold); + + // If LastOffset isn't going to fit in a 32-bit varible we need to switch + // to 64-bit. Note that the file can be larger than 4GB as long as the last + // member starts before the 4GB offset. + if (LastOffset >= Sym64Threshold) { + if (Kind == object::Archive::K_DARWIN) + Kind = object::Archive::K_DARWIN64; + else + Kind = object::Archive::K_GNU64; + } + } + + if (Thin) + Out << "!<thin>\n"; + else + Out << "!<arch>\n"; + + if (WriteSymtab) + writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf); + + for (const MemberData &M : Data) + Out << M.Header << M.Data << M.Padding; + + Out.flush(); + return Error::success(); +} + +Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers, + bool WriteSymtab, object::Archive::Kind Kind, + bool Deterministic, bool Thin, + std::unique_ptr<MemoryBuffer> OldArchiveBuf) { + Expected<sys::fs::TempFile> Temp = + sys::fs::TempFile::create(ArcName + ".temp-archive-%%%%%%%.a"); + if (!Temp) + return Temp.takeError(); + raw_fd_ostream Out(Temp->FD, false); + + if (Error E = writeArchiveToStream(Out, NewMembers, WriteSymtab, Kind, + Deterministic, Thin)) { + if (Error DiscardError = Temp->discard()) + return joinErrors(std::move(E), std::move(DiscardError)); + return E; + } + + // At this point, we no longer need whatever backing memory + // was used to generate the NewMembers. On Windows, this buffer + // could be a mapped view of the file we want to replace (if + // we're updating an existing archive, say). In that case, the + // rename would still succeed, but it would leave behind a + // temporary file (actually the original file renamed) because + // a file cannot be deleted while there's a handle open on it, + // only renamed. So by freeing this buffer, this ensures that + // the last open handle on the destination file, if any, is + // closed before we attempt to rename. + OldArchiveBuf.reset(); + + return Temp->keep(ArcName); +} + +Expected<std::unique_ptr<MemoryBuffer>> +writeArchiveToBuffer(ArrayRef<NewArchiveMember> NewMembers, bool WriteSymtab, + object::Archive::Kind Kind, bool Deterministic, + bool Thin) { + SmallVector<char, 0> ArchiveBufferVector; + raw_svector_ostream ArchiveStream(ArchiveBufferVector); + + if (Error E = writeArchiveToStream(ArchiveStream, NewMembers, WriteSymtab, + Kind, Deterministic, Thin)) + return std::move(E); + + return std::make_unique<SmallVectorMemoryBuffer>( + std::move(ArchiveBufferVector), /*RequiresNullTerminator=*/false); +} + +} // namespace llvm diff --git a/contrib/libs/llvm14/lib/Object/Binary.cpp b/contrib/libs/llvm14/lib/Object/Binary.cpp new file mode 100644 index 0000000000..1435543442 --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/Binary.cpp @@ -0,0 +1,114 @@ +//===- Binary.cpp - A generic binary file ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the Binary class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/Binary.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/MachOUniversal.h" +#include "llvm/Object/Minidump.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/TapiUniversal.h" +#include "llvm/Object/WindowsResource.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include <algorithm> +#include <memory> +#include <system_error> + +using namespace llvm; +using namespace object; + +Binary::~Binary() = default; + +Binary::Binary(unsigned int Type, MemoryBufferRef Source) + : TypeID(Type), Data(Source) {} + +StringRef Binary::getData() const { return Data.getBuffer(); } + +StringRef Binary::getFileName() const { return Data.getBufferIdentifier(); } + +MemoryBufferRef Binary::getMemoryBufferRef() const { return Data; } + +Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer, + LLVMContext *Context, + bool InitContent) { + file_magic Type = identify_magic(Buffer.getBuffer()); + + switch (Type) { + case file_magic::archive: + return Archive::create(Buffer); + case file_magic::elf: + case file_magic::elf_relocatable: + case file_magic::elf_executable: + case file_magic::elf_shared_object: + case file_magic::elf_core: + case file_magic::goff_object: + case file_magic::macho_object: + case file_magic::macho_executable: + case file_magic::macho_fixed_virtual_memory_shared_lib: + case file_magic::macho_core: + case file_magic::macho_preload_executable: + case file_magic::macho_dynamically_linked_shared_lib: + case file_magic::macho_dynamic_linker: + case file_magic::macho_bundle: + case file_magic::macho_dynamically_linked_shared_lib_stub: + case file_magic::macho_dsym_companion: + case file_magic::macho_kext_bundle: + case file_magic::coff_object: + case file_magic::coff_import_library: + case file_magic::pecoff_executable: + case file_magic::bitcode: + case file_magic::xcoff_object_32: + case file_magic::xcoff_object_64: + case file_magic::wasm_object: + return ObjectFile::createSymbolicFile(Buffer, Type, Context, InitContent); + case file_magic::macho_universal_binary: + return MachOUniversalBinary::create(Buffer); + case file_magic::windows_resource: + return WindowsResource::createWindowsResource(Buffer); + case file_magic::pdb: + // PDB does not support the Binary interface. + return errorCodeToError(object_error::invalid_file_type); + case file_magic::unknown: + case file_magic::coff_cl_gl_object: + // Unrecognized object file format. + return errorCodeToError(object_error::invalid_file_type); + case file_magic::minidump: + return MinidumpFile::create(Buffer); + case file_magic::tapi_file: + return TapiUniversal::create(Buffer); + } + llvm_unreachable("Unexpected Binary File Type"); +} + +Expected<OwningBinary<Binary>> +object::createBinary(StringRef Path, LLVMContext *Context, bool InitContent) { + ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = + MemoryBuffer::getFileOrSTDIN(Path, /*IsText=*/false, + /*RequiresNullTerminator=*/false); + if (std::error_code EC = FileOrErr.getError()) + return errorCodeToError(EC); + std::unique_ptr<MemoryBuffer> &Buffer = FileOrErr.get(); + + Expected<std::unique_ptr<Binary>> BinOrErr = + createBinary(Buffer->getMemBufferRef(), Context, InitContent); + if (!BinOrErr) + return BinOrErr.takeError(); + std::unique_ptr<Binary> &Bin = BinOrErr.get(); + + return OwningBinary<Binary>(std::move(Bin), std::move(Buffer)); +} diff --git a/contrib/libs/llvm14/lib/Object/COFFImportFile.cpp b/contrib/libs/llvm14/lib/Object/COFFImportFile.cpp new file mode 100644 index 0000000000..69bbf70b43 --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/COFFImportFile.cpp @@ -0,0 +1,625 @@ +//===- COFFImportFile.cpp - COFF short import file implementation ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the writeImportLibrary function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/COFFImportFile.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ArchiveWriter.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Path.h" + +#include <cstdint> +#include <string> +#include <vector> + +using namespace llvm::COFF; +using namespace llvm::object; +using namespace llvm; + +namespace llvm { +namespace object { + +static bool is32bit(MachineTypes Machine) { + switch (Machine) { + default: + llvm_unreachable("unsupported machine"); + case IMAGE_FILE_MACHINE_ARM64: + case IMAGE_FILE_MACHINE_AMD64: + return false; + case IMAGE_FILE_MACHINE_ARMNT: + case IMAGE_FILE_MACHINE_I386: + return true; + } +} + +static uint16_t getImgRelRelocation(MachineTypes Machine) { + switch (Machine) { + default: + llvm_unreachable("unsupported machine"); + case IMAGE_FILE_MACHINE_AMD64: + return IMAGE_REL_AMD64_ADDR32NB; + case IMAGE_FILE_MACHINE_ARMNT: + return IMAGE_REL_ARM_ADDR32NB; + case IMAGE_FILE_MACHINE_ARM64: + return IMAGE_REL_ARM64_ADDR32NB; + case IMAGE_FILE_MACHINE_I386: + return IMAGE_REL_I386_DIR32NB; + } +} + +template <class T> static void append(std::vector<uint8_t> &B, const T &Data) { + size_t S = B.size(); + B.resize(S + sizeof(T)); + memcpy(&B[S], &Data, sizeof(T)); +} + +static void writeStringTable(std::vector<uint8_t> &B, + ArrayRef<const std::string> Strings) { + // The COFF string table consists of a 4-byte value which is the size of the + // table, including the length field itself. This value is followed by the + // string content itself, which is an array of null-terminated C-style + // strings. The termination is important as they are referenced to by offset + // by the symbol entity in the file format. + + size_t Pos = B.size(); + size_t Offset = B.size(); + + // Skip over the length field, we will fill it in later as we will have + // computed the length while emitting the string content itself. + Pos += sizeof(uint32_t); + + for (const auto &S : Strings) { + B.resize(Pos + S.length() + 1); + strcpy(reinterpret_cast<char *>(&B[Pos]), S.c_str()); + Pos += S.length() + 1; + } + + // Backfill the length of the table now that it has been computed. + support::ulittle32_t Length(B.size() - Offset); + support::endian::write32le(&B[Offset], Length); +} + +static ImportNameType getNameType(StringRef Sym, StringRef ExtName, + MachineTypes Machine, bool MinGW) { + // A decorated stdcall function in MSVC is exported with the + // type IMPORT_NAME, and the exported function name includes the + // the leading underscore. In MinGW on the other hand, a decorated + // stdcall function still omits the underscore (IMPORT_NAME_NOPREFIX). + // See the comment in isDecorated in COFFModuleDefinition.cpp for more + // details. + if (ExtName.startswith("_") && ExtName.contains('@') && !MinGW) + return IMPORT_NAME; + if (Sym != ExtName) + return IMPORT_NAME_UNDECORATE; + if (Machine == IMAGE_FILE_MACHINE_I386 && Sym.startswith("_")) + return IMPORT_NAME_NOPREFIX; + return IMPORT_NAME; +} + +static Expected<std::string> replace(StringRef S, StringRef From, + StringRef To) { + size_t Pos = S.find(From); + + // From and To may be mangled, but substrings in S may not. + if (Pos == StringRef::npos && From.startswith("_") && To.startswith("_")) { + From = From.substr(1); + To = To.substr(1); + Pos = S.find(From); + } + + if (Pos == StringRef::npos) { + return make_error<StringError>( + StringRef(Twine(S + ": replacing '" + From + + "' with '" + To + "' failed").str()), object_error::parse_failed); + } + + return (Twine(S.substr(0, Pos)) + To + S.substr(Pos + From.size())).str(); +} + +static const std::string NullImportDescriptorSymbolName = + "__NULL_IMPORT_DESCRIPTOR"; + +namespace { +// This class constructs various small object files necessary to support linking +// symbols imported from a DLL. The contents are pretty strictly defined and +// nearly entirely static. The details of the structures files are defined in +// WINNT.h and the PE/COFF specification. +class ObjectFactory { + using u16 = support::ulittle16_t; + using u32 = support::ulittle32_t; + MachineTypes Machine; + BumpPtrAllocator Alloc; + StringRef ImportName; + StringRef Library; + std::string ImportDescriptorSymbolName; + std::string NullThunkSymbolName; + +public: + ObjectFactory(StringRef S, MachineTypes M) + : Machine(M), ImportName(S), Library(S.drop_back(4)), + ImportDescriptorSymbolName(("__IMPORT_DESCRIPTOR_" + Library).str()), + NullThunkSymbolName(("\x7f" + Library + "_NULL_THUNK_DATA").str()) {} + + // Creates an Import Descriptor. This is a small object file which contains a + // reference to the terminators and contains the library name (entry) for the + // import name table. It will force the linker to construct the necessary + // structure to import symbols from the DLL. + NewArchiveMember createImportDescriptor(std::vector<uint8_t> &Buffer); + + // Creates a NULL import descriptor. This is a small object file whcih + // contains a NULL import descriptor. It is used to terminate the imports + // from a specific DLL. + NewArchiveMember createNullImportDescriptor(std::vector<uint8_t> &Buffer); + + // Create a NULL Thunk Entry. This is a small object file which contains a + // NULL Import Address Table entry and a NULL Import Lookup Table Entry. It + // is used to terminate the IAT and ILT. + NewArchiveMember createNullThunk(std::vector<uint8_t> &Buffer); + + // Create a short import file which is described in PE/COFF spec 7. Import + // Library Format. + NewArchiveMember createShortImport(StringRef Sym, uint16_t Ordinal, + ImportType Type, ImportNameType NameType); + + // Create a weak external file which is described in PE/COFF Aux Format 3. + NewArchiveMember createWeakExternal(StringRef Sym, StringRef Weak, bool Imp); +}; +} // namespace + +NewArchiveMember +ObjectFactory::createImportDescriptor(std::vector<uint8_t> &Buffer) { + const uint32_t NumberOfSections = 2; + const uint32_t NumberOfSymbols = 7; + const uint32_t NumberOfRelocations = 3; + + // COFF Header + coff_file_header Header{ + u16(Machine), + u16(NumberOfSections), + u32(0), + u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + + // .idata$2 + sizeof(coff_import_directory_table_entry) + + NumberOfRelocations * sizeof(coff_relocation) + + // .idata$4 + (ImportName.size() + 1)), + u32(NumberOfSymbols), + u16(0), + u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : C_Invalid), + }; + append(Buffer, Header); + + // Section Header Table + const coff_section SectionTable[NumberOfSections] = { + {{'.', 'i', 'd', 'a', 't', 'a', '$', '2'}, + u32(0), + u32(0), + u32(sizeof(coff_import_directory_table_entry)), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section)), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + + sizeof(coff_import_directory_table_entry)), + u32(0), + u16(NumberOfRelocations), + u16(0), + u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + {{'.', 'i', 'd', 'a', 't', 'a', '$', '6'}, + u32(0), + u32(0), + u32(ImportName.size() + 1), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + + sizeof(coff_import_directory_table_entry) + + NumberOfRelocations * sizeof(coff_relocation)), + u32(0), + u32(0), + u16(0), + u16(0), + u32(IMAGE_SCN_ALIGN_2BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + }; + append(Buffer, SectionTable); + + // .idata$2 + const coff_import_directory_table_entry ImportDescriptor{ + u32(0), u32(0), u32(0), u32(0), u32(0), + }; + append(Buffer, ImportDescriptor); + + const coff_relocation RelocationTable[NumberOfRelocations] = { + {u32(offsetof(coff_import_directory_table_entry, NameRVA)), u32(2), + u16(getImgRelRelocation(Machine))}, + {u32(offsetof(coff_import_directory_table_entry, ImportLookupTableRVA)), + u32(3), u16(getImgRelRelocation(Machine))}, + {u32(offsetof(coff_import_directory_table_entry, ImportAddressTableRVA)), + u32(4), u16(getImgRelRelocation(Machine))}, + }; + append(Buffer, RelocationTable); + + // .idata$6 + auto S = Buffer.size(); + Buffer.resize(S + ImportName.size() + 1); + memcpy(&Buffer[S], ImportName.data(), ImportName.size()); + Buffer[S + ImportName.size()] = '\0'; + + // Symbol Table + coff_symbol16 SymbolTable[NumberOfSymbols] = { + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '2'}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_SECTION, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '6'}}, + u32(0), + u16(2), + u16(0), + IMAGE_SYM_CLASS_STATIC, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '4'}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_SECTION, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '5'}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_SECTION, + 0}, + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + }; + // TODO: Name.Offset.Offset here and in the all similar places below + // suggests a names refactoring. Maybe StringTableOffset.Value? + SymbolTable[0].Name.Offset.Offset = + sizeof(uint32_t); + SymbolTable[5].Name.Offset.Offset = + sizeof(uint32_t) + ImportDescriptorSymbolName.length() + 1; + SymbolTable[6].Name.Offset.Offset = + sizeof(uint32_t) + ImportDescriptorSymbolName.length() + 1 + + NullImportDescriptorSymbolName.length() + 1; + append(Buffer, SymbolTable); + + // String Table + writeStringTable(Buffer, + {ImportDescriptorSymbolName, NullImportDescriptorSymbolName, + NullThunkSymbolName}); + + StringRef F{reinterpret_cast<const char *>(Buffer.data()), Buffer.size()}; + return {MemoryBufferRef(F, ImportName)}; +} + +NewArchiveMember +ObjectFactory::createNullImportDescriptor(std::vector<uint8_t> &Buffer) { + const uint32_t NumberOfSections = 1; + const uint32_t NumberOfSymbols = 1; + + // COFF Header + coff_file_header Header{ + u16(Machine), + u16(NumberOfSections), + u32(0), + u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + + // .idata$3 + sizeof(coff_import_directory_table_entry)), + u32(NumberOfSymbols), + u16(0), + u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : C_Invalid), + }; + append(Buffer, Header); + + // Section Header Table + const coff_section SectionTable[NumberOfSections] = { + {{'.', 'i', 'd', 'a', 't', 'a', '$', '3'}, + u32(0), + u32(0), + u32(sizeof(coff_import_directory_table_entry)), + u32(sizeof(coff_file_header) + + (NumberOfSections * sizeof(coff_section))), + u32(0), + u32(0), + u16(0), + u16(0), + u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + }; + append(Buffer, SectionTable); + + // .idata$3 + const coff_import_directory_table_entry ImportDescriptor{ + u32(0), u32(0), u32(0), u32(0), u32(0), + }; + append(Buffer, ImportDescriptor); + + // Symbol Table + coff_symbol16 SymbolTable[NumberOfSymbols] = { + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + }; + SymbolTable[0].Name.Offset.Offset = sizeof(uint32_t); + append(Buffer, SymbolTable); + + // String Table + writeStringTable(Buffer, {NullImportDescriptorSymbolName}); + + StringRef F{reinterpret_cast<const char *>(Buffer.data()), Buffer.size()}; + return {MemoryBufferRef(F, ImportName)}; +} + +NewArchiveMember ObjectFactory::createNullThunk(std::vector<uint8_t> &Buffer) { + const uint32_t NumberOfSections = 2; + const uint32_t NumberOfSymbols = 1; + uint32_t VASize = is32bit(Machine) ? 4 : 8; + + // COFF Header + coff_file_header Header{ + u16(Machine), + u16(NumberOfSections), + u32(0), + u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + + // .idata$5 + VASize + + // .idata$4 + VASize), + u32(NumberOfSymbols), + u16(0), + u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : C_Invalid), + }; + append(Buffer, Header); + + // Section Header Table + const coff_section SectionTable[NumberOfSections] = { + {{'.', 'i', 'd', 'a', 't', 'a', '$', '5'}, + u32(0), + u32(0), + u32(VASize), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section)), + u32(0), + u32(0), + u16(0), + u16(0), + u32((is32bit(Machine) ? IMAGE_SCN_ALIGN_4BYTES + : IMAGE_SCN_ALIGN_8BYTES) | + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_WRITE)}, + {{'.', 'i', 'd', 'a', 't', 'a', '$', '4'}, + u32(0), + u32(0), + u32(VASize), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + + VASize), + u32(0), + u32(0), + u16(0), + u16(0), + u32((is32bit(Machine) ? IMAGE_SCN_ALIGN_4BYTES + : IMAGE_SCN_ALIGN_8BYTES) | + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_WRITE)}, + }; + append(Buffer, SectionTable); + + // .idata$5, ILT + append(Buffer, u32(0)); + if (!is32bit(Machine)) + append(Buffer, u32(0)); + + // .idata$4, IAT + append(Buffer, u32(0)); + if (!is32bit(Machine)) + append(Buffer, u32(0)); + + // Symbol Table + coff_symbol16 SymbolTable[NumberOfSymbols] = { + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + }; + SymbolTable[0].Name.Offset.Offset = sizeof(uint32_t); + append(Buffer, SymbolTable); + + // String Table + writeStringTable(Buffer, {NullThunkSymbolName}); + + StringRef F{reinterpret_cast<const char *>(Buffer.data()), Buffer.size()}; + return {MemoryBufferRef{F, ImportName}}; +} + +NewArchiveMember ObjectFactory::createShortImport(StringRef Sym, + uint16_t Ordinal, + ImportType ImportType, + ImportNameType NameType) { + size_t ImpSize = ImportName.size() + Sym.size() + 2; // +2 for NULs + size_t Size = sizeof(coff_import_header) + ImpSize; + char *Buf = Alloc.Allocate<char>(Size); + memset(Buf, 0, Size); + char *P = Buf; + + // Write short import library. + auto *Imp = reinterpret_cast<coff_import_header *>(P); + P += sizeof(*Imp); + Imp->Sig2 = 0xFFFF; + Imp->Machine = Machine; + Imp->SizeOfData = ImpSize; + if (Ordinal > 0) + Imp->OrdinalHint = Ordinal; + Imp->TypeInfo = (NameType << 2) | ImportType; + + // Write symbol name and DLL name. + memcpy(P, Sym.data(), Sym.size()); + P += Sym.size() + 1; + memcpy(P, ImportName.data(), ImportName.size()); + + return {MemoryBufferRef(StringRef(Buf, Size), ImportName)}; +} + +NewArchiveMember ObjectFactory::createWeakExternal(StringRef Sym, + StringRef Weak, bool Imp) { + std::vector<uint8_t> Buffer; + const uint32_t NumberOfSections = 1; + const uint32_t NumberOfSymbols = 5; + + // COFF Header + coff_file_header Header{ + u16(Machine), + u16(NumberOfSections), + u32(0), + u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section))), + u32(NumberOfSymbols), + u16(0), + u16(0), + }; + append(Buffer, Header); + + // Section Header Table + const coff_section SectionTable[NumberOfSections] = { + {{'.', 'd', 'r', 'e', 'c', 't', 'v', 'e'}, + u32(0), + u32(0), + u32(0), + u32(0), + u32(0), + u32(0), + u16(0), + u16(0), + u32(IMAGE_SCN_LNK_INFO | IMAGE_SCN_LNK_REMOVE)}}; + append(Buffer, SectionTable); + + // Symbol Table + coff_symbol16 SymbolTable[NumberOfSymbols] = { + {{{'@', 'c', 'o', 'm', 'p', '.', 'i', 'd'}}, + u32(0), + u16(0xFFFF), + u16(0), + IMAGE_SYM_CLASS_STATIC, + 0}, + {{{'@', 'f', 'e', 'a', 't', '.', '0', '0'}}, + u32(0), + u16(0xFFFF), + u16(0), + IMAGE_SYM_CLASS_STATIC, + 0}, + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_WEAK_EXTERNAL, + 1}, + {{{2, 0, 0, 0, IMAGE_WEAK_EXTERN_SEARCH_ALIAS, 0, 0, 0}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_NULL, + 0}, + }; + SymbolTable[2].Name.Offset.Offset = sizeof(uint32_t); + + //__imp_ String Table + StringRef Prefix = Imp ? "__imp_" : ""; + SymbolTable[3].Name.Offset.Offset = + sizeof(uint32_t) + Sym.size() + Prefix.size() + 1; + append(Buffer, SymbolTable); + writeStringTable(Buffer, {(Prefix + Sym).str(), + (Prefix + Weak).str()}); + + // Copied here so we can still use writeStringTable + char *Buf = Alloc.Allocate<char>(Buffer.size()); + memcpy(Buf, Buffer.data(), Buffer.size()); + return {MemoryBufferRef(StringRef(Buf, Buffer.size()), ImportName)}; +} + +Error writeImportLibrary(StringRef ImportName, StringRef Path, + ArrayRef<COFFShortExport> Exports, + MachineTypes Machine, bool MinGW) { + + std::vector<NewArchiveMember> Members; + ObjectFactory OF(llvm::sys::path::filename(ImportName), Machine); + + std::vector<uint8_t> ImportDescriptor; + Members.push_back(OF.createImportDescriptor(ImportDescriptor)); + + std::vector<uint8_t> NullImportDescriptor; + Members.push_back(OF.createNullImportDescriptor(NullImportDescriptor)); + + std::vector<uint8_t> NullThunk; + Members.push_back(OF.createNullThunk(NullThunk)); + + for (COFFShortExport E : Exports) { + if (E.Private) + continue; + + ImportType ImportType = IMPORT_CODE; + if (E.Data) + ImportType = IMPORT_DATA; + if (E.Constant) + ImportType = IMPORT_CONST; + + StringRef SymbolName = E.SymbolName.empty() ? E.Name : E.SymbolName; + ImportNameType NameType = E.Noname + ? IMPORT_ORDINAL + : getNameType(SymbolName, E.Name, + Machine, MinGW); + Expected<std::string> Name = E.ExtName.empty() + ? std::string(SymbolName) + : replace(SymbolName, E.Name, E.ExtName); + + if (!Name) + return Name.takeError(); + + if (!E.AliasTarget.empty() && *Name != E.AliasTarget) { + Members.push_back(OF.createWeakExternal(E.AliasTarget, *Name, false)); + Members.push_back(OF.createWeakExternal(E.AliasTarget, *Name, true)); + continue; + } + + Members.push_back( + OF.createShortImport(*Name, E.Ordinal, ImportType, NameType)); + } + + return writeArchive(Path, Members, /*WriteSymtab*/ true, + object::Archive::K_GNU, + /*Deterministic*/ true, /*Thin*/ false); +} + +} // namespace object +} // namespace llvm diff --git a/contrib/libs/llvm14/lib/Object/COFFModuleDefinition.cpp b/contrib/libs/llvm14/lib/Object/COFFModuleDefinition.cpp new file mode 100644 index 0000000000..55ddd3baca --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/COFFModuleDefinition.cpp @@ -0,0 +1,363 @@ +//===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Windows-specific. +// A parser for the module-definition file (.def file). +// +// The format of module-definition files are described in this document: +// https://msdn.microsoft.com/en-us/library/28d6s79h.aspx +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/COFFModuleDefinition.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Object/COFF.h" +#include "llvm/Object/COFFImportFile.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm::COFF; +using namespace llvm; + +namespace llvm { +namespace object { + +enum Kind { + Unknown, + Eof, + Identifier, + Comma, + Equal, + EqualEqual, + KwBase, + KwConstant, + KwData, + KwExports, + KwHeapsize, + KwLibrary, + KwName, + KwNoname, + KwPrivate, + KwStacksize, + KwVersion, +}; + +struct Token { + explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {} + Kind K; + StringRef Value; +}; + +static bool isDecorated(StringRef Sym, bool MingwDef) { + // In def files, the symbols can either be listed decorated or undecorated. + // + // - For cdecl symbols, only the undecorated form is allowed. + // - For fastcall and vectorcall symbols, both fully decorated or + // undecorated forms can be present. + // - For stdcall symbols in non-MinGW environments, the decorated form is + // fully decorated with leading underscore and trailing stack argument + // size - like "_Func@0". + // - In MinGW def files, a decorated stdcall symbol does not include the + // leading underscore though, like "Func@0". + + // This function controls whether a leading underscore should be added to + // the given symbol name or not. For MinGW, treat a stdcall symbol name such + // as "Func@0" as undecorated, i.e. a leading underscore must be added. + // For non-MinGW, look for '@' in the whole string and consider "_Func@0" + // as decorated, i.e. don't add any more leading underscores. + // We can't check for a leading underscore here, since function names + // themselves can start with an underscore, while a second one still needs + // to be added. + return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") || + (!MingwDef && Sym.contains('@')); +} + +class Lexer { +public: + Lexer(StringRef S) : Buf(S) {} + + Token lex() { + Buf = Buf.trim(); + if (Buf.empty()) + return Token(Eof); + + switch (Buf[0]) { + case '\0': + return Token(Eof); + case ';': { + size_t End = Buf.find('\n'); + Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); + return lex(); + } + case '=': + Buf = Buf.drop_front(); + if (Buf.startswith("=")) { + Buf = Buf.drop_front(); + return Token(EqualEqual, "=="); + } + return Token(Equal, "="); + case ',': + Buf = Buf.drop_front(); + return Token(Comma, ","); + case '"': { + StringRef S; + std::tie(S, Buf) = Buf.substr(1).split('"'); + return Token(Identifier, S); + } + default: { + size_t End = Buf.find_first_of("=,;\r\n \t\v"); + StringRef Word = Buf.substr(0, End); + Kind K = llvm::StringSwitch<Kind>(Word) + .Case("BASE", KwBase) + .Case("CONSTANT", KwConstant) + .Case("DATA", KwData) + .Case("EXPORTS", KwExports) + .Case("HEAPSIZE", KwHeapsize) + .Case("LIBRARY", KwLibrary) + .Case("NAME", KwName) + .Case("NONAME", KwNoname) + .Case("PRIVATE", KwPrivate) + .Case("STACKSIZE", KwStacksize) + .Case("VERSION", KwVersion) + .Default(Identifier); + Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); + return Token(K, Word); + } + } + } + +private: + StringRef Buf; +}; + +class Parser { +public: + explicit Parser(StringRef S, MachineTypes M, bool B) + : Lex(S), Machine(M), MingwDef(B) {} + + Expected<COFFModuleDefinition> parse() { + do { + if (Error Err = parseOne()) + return std::move(Err); + } while (Tok.K != Eof); + return Info; + } + +private: + void read() { + if (Stack.empty()) { + Tok = Lex.lex(); + return; + } + Tok = Stack.back(); + Stack.pop_back(); + } + + Error readAsInt(uint64_t *I) { + read(); + if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I)) + return createError("integer expected"); + return Error::success(); + } + + Error expect(Kind Expected, StringRef Msg) { + read(); + if (Tok.K != Expected) + return createError(Msg); + return Error::success(); + } + + void unget() { Stack.push_back(Tok); } + + Error parseOne() { + read(); + switch (Tok.K) { + case Eof: + return Error::success(); + case KwExports: + for (;;) { + read(); + if (Tok.K != Identifier) { + unget(); + return Error::success(); + } + if (Error Err = parseExport()) + return Err; + } + case KwHeapsize: + return parseNumbers(&Info.HeapReserve, &Info.HeapCommit); + case KwStacksize: + return parseNumbers(&Info.StackReserve, &Info.StackCommit); + case KwLibrary: + case KwName: { + bool IsDll = Tok.K == KwLibrary; // Check before parseName. + std::string Name; + if (Error Err = parseName(&Name, &Info.ImageBase)) + return Err; + + Info.ImportName = Name; + + // Set the output file, but don't override /out if it was already passed. + if (Info.OutputFile.empty()) { + Info.OutputFile = Name; + // Append the appropriate file extension if not already present. + if (!sys::path::has_extension(Name)) + Info.OutputFile += IsDll ? ".dll" : ".exe"; + } + + return Error::success(); + } + case KwVersion: + return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion); + default: + return createError("unknown directive: " + Tok.Value); + } + } + + Error parseExport() { + COFFShortExport E; + E.Name = std::string(Tok.Value); + read(); + if (Tok.K == Equal) { + read(); + if (Tok.K != Identifier) + return createError("identifier expected, but got " + Tok.Value); + E.ExtName = E.Name; + E.Name = std::string(Tok.Value); + } else { + unget(); + } + + if (Machine == IMAGE_FILE_MACHINE_I386) { + if (!isDecorated(E.Name, MingwDef)) + E.Name = (std::string("_").append(E.Name)); + if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef)) + E.ExtName = (std::string("_").append(E.ExtName)); + } + + for (;;) { + read(); + if (Tok.K == Identifier && Tok.Value[0] == '@') { + if (Tok.Value == "@") { + // "foo @ 10" + read(); + Tok.Value.getAsInteger(10, E.Ordinal); + } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) { + // "foo \n @bar" - Not an ordinal modifier at all, but the next + // export (fastcall decorated) - complete the current one. + unget(); + Info.Exports.push_back(E); + return Error::success(); + } + // "foo @10" + read(); + if (Tok.K == KwNoname) { + E.Noname = true; + } else { + unget(); + } + continue; + } + if (Tok.K == KwData) { + E.Data = true; + continue; + } + if (Tok.K == KwConstant) { + E.Constant = true; + continue; + } + if (Tok.K == KwPrivate) { + E.Private = true; + continue; + } + if (Tok.K == EqualEqual) { + read(); + E.AliasTarget = std::string(Tok.Value); + if (Machine == IMAGE_FILE_MACHINE_I386 && !isDecorated(E.AliasTarget, MingwDef)) + E.AliasTarget = std::string("_").append(E.AliasTarget); + continue; + } + unget(); + Info.Exports.push_back(E); + return Error::success(); + } + } + + // HEAPSIZE/STACKSIZE reserve[,commit] + Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) { + if (Error Err = readAsInt(Reserve)) + return Err; + read(); + if (Tok.K != Comma) { + unget(); + Commit = nullptr; + return Error::success(); + } + if (Error Err = readAsInt(Commit)) + return Err; + return Error::success(); + } + + // NAME outputPath [BASE=address] + Error parseName(std::string *Out, uint64_t *Baseaddr) { + read(); + if (Tok.K == Identifier) { + *Out = std::string(Tok.Value); + } else { + *Out = ""; + unget(); + return Error::success(); + } + read(); + if (Tok.K == KwBase) { + if (Error Err = expect(Equal, "'=' expected")) + return Err; + if (Error Err = readAsInt(Baseaddr)) + return Err; + } else { + unget(); + *Baseaddr = 0; + } + return Error::success(); + } + + // VERSION major[.minor] + Error parseVersion(uint32_t *Major, uint32_t *Minor) { + read(); + if (Tok.K != Identifier) + return createError("identifier expected, but got " + Tok.Value); + StringRef V1, V2; + std::tie(V1, V2) = Tok.Value.split('.'); + if (V1.getAsInteger(10, *Major)) + return createError("integer expected, but got " + Tok.Value); + if (V2.empty()) + *Minor = 0; + else if (V2.getAsInteger(10, *Minor)) + return createError("integer expected, but got " + Tok.Value); + return Error::success(); + } + + Lexer Lex; + Token Tok; + std::vector<Token> Stack; + MachineTypes Machine; + COFFModuleDefinition Info; + bool MingwDef; +}; + +Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB, + MachineTypes Machine, + bool MingwDef) { + return Parser(MB.getBuffer(), Machine, MingwDef).parse(); +} + +} // namespace object +} // namespace llvm diff --git a/contrib/libs/llvm14/lib/Object/COFFObjectFile.cpp b/contrib/libs/llvm14/lib/Object/COFFObjectFile.cpp new file mode 100644 index 0000000000..354b3c0d55 --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/COFFObjectFile.cpp @@ -0,0 +1,1891 @@ +//===- COFFObjectFile.cpp - COFF object file implementation ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the COFFObjectFile class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/COFF.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/MemoryBuffer.h" +#include <algorithm> +#include <cassert> +#include <cinttypes> +#include <cstddef> +#include <cstring> +#include <limits> +#include <memory> +#include <system_error> + +using namespace llvm; +using namespace object; + +using support::ulittle16_t; +using support::ulittle32_t; +using support::ulittle64_t; +using support::little16_t; + +// Returns false if size is greater than the buffer size. And sets ec. +static bool checkSize(MemoryBufferRef M, std::error_code &EC, uint64_t Size) { + if (M.getBufferSize() < Size) { + EC = object_error::unexpected_eof; + return false; + } + return true; +} + +// Sets Obj unless any bytes in [addr, addr + size) fall outsize of m. +// Returns unexpected_eof if error. +template <typename T> +static Error getObject(const T *&Obj, MemoryBufferRef M, const void *Ptr, + const uint64_t Size = sizeof(T)) { + uintptr_t Addr = reinterpret_cast<uintptr_t>(Ptr); + if (Error E = Binary::checkOffset(M, Addr, Size)) + return E; + Obj = reinterpret_cast<const T *>(Addr); + return Error::success(); +} + +// Decode a string table entry in base 64 (//AAAAAA). Expects \arg Str without +// prefixed slashes. +static bool decodeBase64StringEntry(StringRef Str, uint32_t &Result) { + assert(Str.size() <= 6 && "String too long, possible overflow."); + if (Str.size() > 6) + return true; + + uint64_t Value = 0; + while (!Str.empty()) { + unsigned CharVal; + if (Str[0] >= 'A' && Str[0] <= 'Z') // 0..25 + CharVal = Str[0] - 'A'; + else if (Str[0] >= 'a' && Str[0] <= 'z') // 26..51 + CharVal = Str[0] - 'a' + 26; + else if (Str[0] >= '0' && Str[0] <= '9') // 52..61 + CharVal = Str[0] - '0' + 52; + else if (Str[0] == '+') // 62 + CharVal = 62; + else if (Str[0] == '/') // 63 + CharVal = 63; + else + return true; + + Value = (Value * 64) + CharVal; + Str = Str.substr(1); + } + + if (Value > std::numeric_limits<uint32_t>::max()) + return true; + + Result = static_cast<uint32_t>(Value); + return false; +} + +template <typename coff_symbol_type> +const coff_symbol_type *COFFObjectFile::toSymb(DataRefImpl Ref) const { + const coff_symbol_type *Addr = + reinterpret_cast<const coff_symbol_type *>(Ref.p); + + assert(!checkOffset(Data, reinterpret_cast<uintptr_t>(Addr), sizeof(*Addr))); +#ifndef NDEBUG + // Verify that the symbol points to a valid entry in the symbol table. + uintptr_t Offset = + reinterpret_cast<uintptr_t>(Addr) - reinterpret_cast<uintptr_t>(base()); + + assert((Offset - getPointerToSymbolTable()) % sizeof(coff_symbol_type) == 0 && + "Symbol did not point to the beginning of a symbol"); +#endif + + return Addr; +} + +const coff_section *COFFObjectFile::toSec(DataRefImpl Ref) const { + const coff_section *Addr = reinterpret_cast<const coff_section*>(Ref.p); + +#ifndef NDEBUG + // Verify that the section points to a valid entry in the section table. + if (Addr < SectionTable || Addr >= (SectionTable + getNumberOfSections())) + report_fatal_error("Section was outside of section table."); + + uintptr_t Offset = reinterpret_cast<uintptr_t>(Addr) - + reinterpret_cast<uintptr_t>(SectionTable); + assert(Offset % sizeof(coff_section) == 0 && + "Section did not point to the beginning of a section"); +#endif + + return Addr; +} + +void COFFObjectFile::moveSymbolNext(DataRefImpl &Ref) const { + auto End = reinterpret_cast<uintptr_t>(StringTable); + if (SymbolTable16) { + const coff_symbol16 *Symb = toSymb<coff_symbol16>(Ref); + Symb += 1 + Symb->NumberOfAuxSymbols; + Ref.p = std::min(reinterpret_cast<uintptr_t>(Symb), End); + } else if (SymbolTable32) { + const coff_symbol32 *Symb = toSymb<coff_symbol32>(Ref); + Symb += 1 + Symb->NumberOfAuxSymbols; + Ref.p = std::min(reinterpret_cast<uintptr_t>(Symb), End); + } else { + llvm_unreachable("no symbol table pointer!"); + } +} + +Expected<StringRef> COFFObjectFile::getSymbolName(DataRefImpl Ref) const { + return getSymbolName(getCOFFSymbol(Ref)); +} + +uint64_t COFFObjectFile::getSymbolValueImpl(DataRefImpl Ref) const { + return getCOFFSymbol(Ref).getValue(); +} + +uint32_t COFFObjectFile::getSymbolAlignment(DataRefImpl Ref) const { + // MSVC/link.exe seems to align symbols to the next-power-of-2 + // up to 32 bytes. + COFFSymbolRef Symb = getCOFFSymbol(Ref); + return std::min(uint64_t(32), PowerOf2Ceil(Symb.getValue())); +} + +Expected<uint64_t> COFFObjectFile::getSymbolAddress(DataRefImpl Ref) const { + uint64_t Result = cantFail(getSymbolValue(Ref)); + COFFSymbolRef Symb = getCOFFSymbol(Ref); + int32_t SectionNumber = Symb.getSectionNumber(); + + if (Symb.isAnyUndefined() || Symb.isCommon() || + COFF::isReservedSectionNumber(SectionNumber)) + return Result; + + Expected<const coff_section *> Section = getSection(SectionNumber); + if (!Section) + return Section.takeError(); + Result += (*Section)->VirtualAddress; + + // The section VirtualAddress does not include ImageBase, and we want to + // return virtual addresses. + Result += getImageBase(); + + return Result; +} + +Expected<SymbolRef::Type> COFFObjectFile::getSymbolType(DataRefImpl Ref) const { + COFFSymbolRef Symb = getCOFFSymbol(Ref); + int32_t SectionNumber = Symb.getSectionNumber(); + + if (Symb.getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION) + return SymbolRef::ST_Function; + if (Symb.isAnyUndefined()) + return SymbolRef::ST_Unknown; + if (Symb.isCommon()) + return SymbolRef::ST_Data; + if (Symb.isFileRecord()) + return SymbolRef::ST_File; + + // TODO: perhaps we need a new symbol type ST_Section. + if (SectionNumber == COFF::IMAGE_SYM_DEBUG || Symb.isSectionDefinition()) + return SymbolRef::ST_Debug; + + if (!COFF::isReservedSectionNumber(SectionNumber)) + return SymbolRef::ST_Data; + + return SymbolRef::ST_Other; +} + +Expected<uint32_t> COFFObjectFile::getSymbolFlags(DataRefImpl Ref) const { + COFFSymbolRef Symb = getCOFFSymbol(Ref); + uint32_t Result = SymbolRef::SF_None; + + if (Symb.isExternal() || Symb.isWeakExternal()) + Result |= SymbolRef::SF_Global; + + if (const coff_aux_weak_external *AWE = Symb.getWeakExternal()) { + Result |= SymbolRef::SF_Weak; + if (AWE->Characteristics != COFF::IMAGE_WEAK_EXTERN_SEARCH_ALIAS) + Result |= SymbolRef::SF_Undefined; + } + + if (Symb.getSectionNumber() == COFF::IMAGE_SYM_ABSOLUTE) + Result |= SymbolRef::SF_Absolute; + + if (Symb.isFileRecord()) + Result |= SymbolRef::SF_FormatSpecific; + + if (Symb.isSectionDefinition()) + Result |= SymbolRef::SF_FormatSpecific; + + if (Symb.isCommon()) + Result |= SymbolRef::SF_Common; + + if (Symb.isUndefined()) + Result |= SymbolRef::SF_Undefined; + + return Result; +} + +uint64_t COFFObjectFile::getCommonSymbolSizeImpl(DataRefImpl Ref) const { + COFFSymbolRef Symb = getCOFFSymbol(Ref); + return Symb.getValue(); +} + +Expected<section_iterator> +COFFObjectFile::getSymbolSection(DataRefImpl Ref) const { + COFFSymbolRef Symb = getCOFFSymbol(Ref); + if (COFF::isReservedSectionNumber(Symb.getSectionNumber())) + return section_end(); + Expected<const coff_section *> Sec = getSection(Symb.getSectionNumber()); + if (!Sec) + return Sec.takeError(); + DataRefImpl Ret; + Ret.p = reinterpret_cast<uintptr_t>(*Sec); + return section_iterator(SectionRef(Ret, this)); +} + +unsigned COFFObjectFile::getSymbolSectionID(SymbolRef Sym) const { + COFFSymbolRef Symb = getCOFFSymbol(Sym.getRawDataRefImpl()); + return Symb.getSectionNumber(); +} + +void COFFObjectFile::moveSectionNext(DataRefImpl &Ref) const { + const coff_section *Sec = toSec(Ref); + Sec += 1; + Ref.p = reinterpret_cast<uintptr_t>(Sec); +} + +Expected<StringRef> COFFObjectFile::getSectionName(DataRefImpl Ref) const { + const coff_section *Sec = toSec(Ref); + return getSectionName(Sec); +} + +uint64_t COFFObjectFile::getSectionAddress(DataRefImpl Ref) const { + const coff_section *Sec = toSec(Ref); + uint64_t Result = Sec->VirtualAddress; + + // The section VirtualAddress does not include ImageBase, and we want to + // return virtual addresses. + Result += getImageBase(); + return Result; +} + +uint64_t COFFObjectFile::getSectionIndex(DataRefImpl Sec) const { + return toSec(Sec) - SectionTable; +} + +uint64_t COFFObjectFile::getSectionSize(DataRefImpl Ref) const { + return getSectionSize(toSec(Ref)); +} + +Expected<ArrayRef<uint8_t>> +COFFObjectFile::getSectionContents(DataRefImpl Ref) const { + const coff_section *Sec = toSec(Ref); + ArrayRef<uint8_t> Res; + if (Error E = getSectionContents(Sec, Res)) + return std::move(E); + return Res; +} + +uint64_t COFFObjectFile::getSectionAlignment(DataRefImpl Ref) const { + const coff_section *Sec = toSec(Ref); + return Sec->getAlignment(); +} + +bool COFFObjectFile::isSectionCompressed(DataRefImpl Sec) const { + return false; +} + +bool COFFObjectFile::isSectionText(DataRefImpl Ref) const { + const coff_section *Sec = toSec(Ref); + return Sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE; +} + +bool COFFObjectFile::isSectionData(DataRefImpl Ref) const { + const coff_section *Sec = toSec(Ref); + return Sec->Characteristics & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA; +} + +bool COFFObjectFile::isSectionBSS(DataRefImpl Ref) const { + const coff_section *Sec = toSec(Ref); + const uint32_t BssFlags = COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE; + return (Sec->Characteristics & BssFlags) == BssFlags; +} + +// The .debug sections are the only debug sections for COFF +// (\see MCObjectFileInfo.cpp). +bool COFFObjectFile::isDebugSection(DataRefImpl Ref) const { + Expected<StringRef> SectionNameOrErr = getSectionName(Ref); + if (!SectionNameOrErr) { + // TODO: Report the error message properly. + consumeError(SectionNameOrErr.takeError()); + return false; + } + StringRef SectionName = SectionNameOrErr.get(); + return SectionName.startswith(".debug"); +} + +unsigned COFFObjectFile::getSectionID(SectionRef Sec) const { + uintptr_t Offset = + Sec.getRawDataRefImpl().p - reinterpret_cast<uintptr_t>(SectionTable); + assert((Offset % sizeof(coff_section)) == 0); + return (Offset / sizeof(coff_section)) + 1; +} + +bool COFFObjectFile::isSectionVirtual(DataRefImpl Ref) const { + const coff_section *Sec = toSec(Ref); + // In COFF, a virtual section won't have any in-file + // content, so the file pointer to the content will be zero. + return Sec->PointerToRawData == 0; +} + +static uint32_t getNumberOfRelocations(const coff_section *Sec, + MemoryBufferRef M, const uint8_t *base) { + // The field for the number of relocations in COFF section table is only + // 16-bit wide. If a section has more than 65535 relocations, 0xFFFF is set to + // NumberOfRelocations field, and the actual relocation count is stored in the + // VirtualAddress field in the first relocation entry. + if (Sec->hasExtendedRelocations()) { + const coff_relocation *FirstReloc; + if (Error E = getObject(FirstReloc, M, + reinterpret_cast<const coff_relocation *>( + base + Sec->PointerToRelocations))) { + consumeError(std::move(E)); + return 0; + } + // -1 to exclude this first relocation entry. + return FirstReloc->VirtualAddress - 1; + } + return Sec->NumberOfRelocations; +} + +static const coff_relocation * +getFirstReloc(const coff_section *Sec, MemoryBufferRef M, const uint8_t *Base) { + uint64_t NumRelocs = getNumberOfRelocations(Sec, M, Base); + if (!NumRelocs) + return nullptr; + auto begin = reinterpret_cast<const coff_relocation *>( + Base + Sec->PointerToRelocations); + if (Sec->hasExtendedRelocations()) { + // Skip the first relocation entry repurposed to store the number of + // relocations. + begin++; + } + if (auto E = Binary::checkOffset(M, reinterpret_cast<uintptr_t>(begin), + sizeof(coff_relocation) * NumRelocs)) { + consumeError(std::move(E)); + return nullptr; + } + return begin; +} + +relocation_iterator COFFObjectFile::section_rel_begin(DataRefImpl Ref) const { + const coff_section *Sec = toSec(Ref); + const coff_relocation *begin = getFirstReloc(Sec, Data, base()); + if (begin && Sec->VirtualAddress != 0) + report_fatal_error("Sections with relocations should have an address of 0"); + DataRefImpl Ret; + Ret.p = reinterpret_cast<uintptr_t>(begin); + return relocation_iterator(RelocationRef(Ret, this)); +} + +relocation_iterator COFFObjectFile::section_rel_end(DataRefImpl Ref) const { + const coff_section *Sec = toSec(Ref); + const coff_relocation *I = getFirstReloc(Sec, Data, base()); + if (I) + I += getNumberOfRelocations(Sec, Data, base()); + DataRefImpl Ret; + Ret.p = reinterpret_cast<uintptr_t>(I); + return relocation_iterator(RelocationRef(Ret, this)); +} + +// Initialize the pointer to the symbol table. +Error COFFObjectFile::initSymbolTablePtr() { + if (COFFHeader) + if (Error E = getObject( + SymbolTable16, Data, base() + getPointerToSymbolTable(), + (uint64_t)getNumberOfSymbols() * getSymbolTableEntrySize())) + return E; + + if (COFFBigObjHeader) + if (Error E = getObject( + SymbolTable32, Data, base() + getPointerToSymbolTable(), + (uint64_t)getNumberOfSymbols() * getSymbolTableEntrySize())) + return E; + + // Find string table. The first four byte of the string table contains the + // total size of the string table, including the size field itself. If the + // string table is empty, the value of the first four byte would be 4. + uint32_t StringTableOffset = getPointerToSymbolTable() + + getNumberOfSymbols() * getSymbolTableEntrySize(); + const uint8_t *StringTableAddr = base() + StringTableOffset; + const ulittle32_t *StringTableSizePtr; + if (Error E = getObject(StringTableSizePtr, Data, StringTableAddr)) + return E; + StringTableSize = *StringTableSizePtr; + if (Error E = getObject(StringTable, Data, StringTableAddr, StringTableSize)) + return E; + + // Treat table sizes < 4 as empty because contrary to the PECOFF spec, some + // tools like cvtres write a size of 0 for an empty table instead of 4. + if (StringTableSize < 4) + StringTableSize = 4; + + // Check that the string table is null terminated if has any in it. + if (StringTableSize > 4 && StringTable[StringTableSize - 1] != 0) + return errorCodeToError(object_error::parse_failed); + return Error::success(); +} + +uint64_t COFFObjectFile::getImageBase() const { + if (PE32Header) + return PE32Header->ImageBase; + else if (PE32PlusHeader) + return PE32PlusHeader->ImageBase; + // This actually comes up in practice. + return 0; +} + +// Returns the file offset for the given VA. +Error COFFObjectFile::getVaPtr(uint64_t Addr, uintptr_t &Res) const { + uint64_t ImageBase = getImageBase(); + uint64_t Rva = Addr - ImageBase; + assert(Rva <= UINT32_MAX); + return getRvaPtr((uint32_t)Rva, Res); +} + +// Returns the file offset for the given RVA. +Error COFFObjectFile::getRvaPtr(uint32_t Addr, uintptr_t &Res) const { + for (const SectionRef &S : sections()) { + const coff_section *Section = getCOFFSection(S); + uint32_t SectionStart = Section->VirtualAddress; + uint32_t SectionEnd = Section->VirtualAddress + Section->VirtualSize; + if (SectionStart <= Addr && Addr < SectionEnd) { + uint32_t Offset = Addr - SectionStart; + Res = reinterpret_cast<uintptr_t>(base()) + Section->PointerToRawData + + Offset; + return Error::success(); + } + } + return errorCodeToError(object_error::parse_failed); +} + +Error COFFObjectFile::getRvaAndSizeAsBytes(uint32_t RVA, uint32_t Size, + ArrayRef<uint8_t> &Contents) const { + for (const SectionRef &S : sections()) { + const coff_section *Section = getCOFFSection(S); + uint32_t SectionStart = Section->VirtualAddress; + // Check if this RVA is within the section bounds. Be careful about integer + // overflow. + uint32_t OffsetIntoSection = RVA - SectionStart; + if (SectionStart <= RVA && OffsetIntoSection < Section->VirtualSize && + Size <= Section->VirtualSize - OffsetIntoSection) { + uintptr_t Begin = reinterpret_cast<uintptr_t>(base()) + + Section->PointerToRawData + OffsetIntoSection; + Contents = + ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(Begin), Size); + return Error::success(); + } + } + return errorCodeToError(object_error::parse_failed); +} + +// Returns hint and name fields, assuming \p Rva is pointing to a Hint/Name +// table entry. +Error COFFObjectFile::getHintName(uint32_t Rva, uint16_t &Hint, + StringRef &Name) const { + uintptr_t IntPtr = 0; + if (Error E = getRvaPtr(Rva, IntPtr)) + return E; + const uint8_t *Ptr = reinterpret_cast<const uint8_t *>(IntPtr); + Hint = *reinterpret_cast<const ulittle16_t *>(Ptr); + Name = StringRef(reinterpret_cast<const char *>(Ptr + 2)); + return Error::success(); +} + +Error COFFObjectFile::getDebugPDBInfo(const debug_directory *DebugDir, + const codeview::DebugInfo *&PDBInfo, + StringRef &PDBFileName) const { + ArrayRef<uint8_t> InfoBytes; + if (Error E = getRvaAndSizeAsBytes( + DebugDir->AddressOfRawData, DebugDir->SizeOfData, InfoBytes)) + return E; + if (InfoBytes.size() < sizeof(*PDBInfo) + 1) + return errorCodeToError(object_error::parse_failed); + PDBInfo = reinterpret_cast<const codeview::DebugInfo *>(InfoBytes.data()); + InfoBytes = InfoBytes.drop_front(sizeof(*PDBInfo)); + PDBFileName = StringRef(reinterpret_cast<const char *>(InfoBytes.data()), + InfoBytes.size()); + // Truncate the name at the first null byte. Ignore any padding. + PDBFileName = PDBFileName.split('\0').first; + return Error::success(); +} + +Error COFFObjectFile::getDebugPDBInfo(const codeview::DebugInfo *&PDBInfo, + StringRef &PDBFileName) const { + for (const debug_directory &D : debug_directories()) + if (D.Type == COFF::IMAGE_DEBUG_TYPE_CODEVIEW) + return getDebugPDBInfo(&D, PDBInfo, PDBFileName); + // If we get here, there is no PDB info to return. + PDBInfo = nullptr; + PDBFileName = StringRef(); + return Error::success(); +} + +// Find the import table. +Error COFFObjectFile::initImportTablePtr() { + // First, we get the RVA of the import table. If the file lacks a pointer to + // the import table, do nothing. + const data_directory *DataEntry = getDataDirectory(COFF::IMPORT_TABLE); + if (!DataEntry) + return Error::success(); + + // Do nothing if the pointer to import table is NULL. + if (DataEntry->RelativeVirtualAddress == 0) + return Error::success(); + + uint32_t ImportTableRva = DataEntry->RelativeVirtualAddress; + + // Find the section that contains the RVA. This is needed because the RVA is + // the import table's memory address which is different from its file offset. + uintptr_t IntPtr = 0; + if (Error E = getRvaPtr(ImportTableRva, IntPtr)) + return E; + if (Error E = checkOffset(Data, IntPtr, DataEntry->Size)) + return E; + ImportDirectory = reinterpret_cast< + const coff_import_directory_table_entry *>(IntPtr); + return Error::success(); +} + +// Initializes DelayImportDirectory and NumberOfDelayImportDirectory. +Error COFFObjectFile::initDelayImportTablePtr() { + const data_directory *DataEntry = + getDataDirectory(COFF::DELAY_IMPORT_DESCRIPTOR); + if (!DataEntry) + return Error::success(); + if (DataEntry->RelativeVirtualAddress == 0) + return Error::success(); + + uint32_t RVA = DataEntry->RelativeVirtualAddress; + NumberOfDelayImportDirectory = DataEntry->Size / + sizeof(delay_import_directory_table_entry) - 1; + + uintptr_t IntPtr = 0; + if (Error E = getRvaPtr(RVA, IntPtr)) + return E; + DelayImportDirectory = reinterpret_cast< + const delay_import_directory_table_entry *>(IntPtr); + return Error::success(); +} + +// Find the export table. +Error COFFObjectFile::initExportTablePtr() { + // First, we get the RVA of the export table. If the file lacks a pointer to + // the export table, do nothing. + const data_directory *DataEntry = getDataDirectory(COFF::EXPORT_TABLE); + if (!DataEntry) + return Error::success(); + + // Do nothing if the pointer to export table is NULL. + if (DataEntry->RelativeVirtualAddress == 0) + return Error::success(); + + uint32_t ExportTableRva = DataEntry->RelativeVirtualAddress; + uintptr_t IntPtr = 0; + if (Error E = getRvaPtr(ExportTableRva, IntPtr)) + return E; + ExportDirectory = + reinterpret_cast<const export_directory_table_entry *>(IntPtr); + return Error::success(); +} + +Error COFFObjectFile::initBaseRelocPtr() { + const data_directory *DataEntry = + getDataDirectory(COFF::BASE_RELOCATION_TABLE); + if (!DataEntry) + return Error::success(); + if (DataEntry->RelativeVirtualAddress == 0) + return Error::success(); + + uintptr_t IntPtr = 0; + if (Error E = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr)) + return E; + BaseRelocHeader = reinterpret_cast<const coff_base_reloc_block_header *>( + IntPtr); + BaseRelocEnd = reinterpret_cast<coff_base_reloc_block_header *>( + IntPtr + DataEntry->Size); + // FIXME: Verify the section containing BaseRelocHeader has at least + // DataEntry->Size bytes after DataEntry->RelativeVirtualAddress. + return Error::success(); +} + +Error COFFObjectFile::initDebugDirectoryPtr() { + // Get the RVA of the debug directory. Do nothing if it does not exist. + const data_directory *DataEntry = getDataDirectory(COFF::DEBUG_DIRECTORY); + if (!DataEntry) + return Error::success(); + + // Do nothing if the RVA is NULL. + if (DataEntry->RelativeVirtualAddress == 0) + return Error::success(); + + // Check that the size is a multiple of the entry size. + if (DataEntry->Size % sizeof(debug_directory) != 0) + return errorCodeToError(object_error::parse_failed); + + uintptr_t IntPtr = 0; + if (Error E = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr)) + return E; + DebugDirectoryBegin = reinterpret_cast<const debug_directory *>(IntPtr); + DebugDirectoryEnd = reinterpret_cast<const debug_directory *>( + IntPtr + DataEntry->Size); + // FIXME: Verify the section containing DebugDirectoryBegin has at least + // DataEntry->Size bytes after DataEntry->RelativeVirtualAddress. + return Error::success(); +} + +Error COFFObjectFile::initTLSDirectoryPtr() { + // Get the RVA of the TLS directory. Do nothing if it does not exist. + const data_directory *DataEntry = getDataDirectory(COFF::TLS_TABLE); + if (!DataEntry) + return Error::success(); + + // Do nothing if the RVA is NULL. + if (DataEntry->RelativeVirtualAddress == 0) + return Error::success(); + + uint64_t DirSize = + is64() ? sizeof(coff_tls_directory64) : sizeof(coff_tls_directory32); + + // Check that the size is correct. + if (DataEntry->Size != DirSize) + return createStringError( + object_error::parse_failed, + "TLS Directory size (%u) is not the expected size (%" PRIu64 ").", + static_cast<uint32_t>(DataEntry->Size), DirSize); + + uintptr_t IntPtr = 0; + if (Error E = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr)) + return E; + + if (is64()) + TLSDirectory64 = reinterpret_cast<const coff_tls_directory64 *>(IntPtr); + else + TLSDirectory32 = reinterpret_cast<const coff_tls_directory32 *>(IntPtr); + + return Error::success(); +} + +Error COFFObjectFile::initLoadConfigPtr() { + // Get the RVA of the debug directory. Do nothing if it does not exist. + const data_directory *DataEntry = getDataDirectory(COFF::LOAD_CONFIG_TABLE); + if (!DataEntry) + return Error::success(); + + // Do nothing if the RVA is NULL. + if (DataEntry->RelativeVirtualAddress == 0) + return Error::success(); + uintptr_t IntPtr = 0; + if (Error E = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr)) + return E; + + LoadConfig = (const void *)IntPtr; + return Error::success(); +} + +Expected<std::unique_ptr<COFFObjectFile>> +COFFObjectFile::create(MemoryBufferRef Object) { + std::unique_ptr<COFFObjectFile> Obj(new COFFObjectFile(std::move(Object))); + if (Error E = Obj->initialize()) + return std::move(E); + return std::move(Obj); +} + +COFFObjectFile::COFFObjectFile(MemoryBufferRef Object) + : ObjectFile(Binary::ID_COFF, Object), COFFHeader(nullptr), + COFFBigObjHeader(nullptr), PE32Header(nullptr), PE32PlusHeader(nullptr), + DataDirectory(nullptr), SectionTable(nullptr), SymbolTable16(nullptr), + SymbolTable32(nullptr), StringTable(nullptr), StringTableSize(0), + ImportDirectory(nullptr), DelayImportDirectory(nullptr), + NumberOfDelayImportDirectory(0), ExportDirectory(nullptr), + BaseRelocHeader(nullptr), BaseRelocEnd(nullptr), + DebugDirectoryBegin(nullptr), DebugDirectoryEnd(nullptr), + TLSDirectory32(nullptr), TLSDirectory64(nullptr) {} + +Error COFFObjectFile::initialize() { + // Check that we at least have enough room for a header. + std::error_code EC; + if (!checkSize(Data, EC, sizeof(coff_file_header))) + return errorCodeToError(EC); + + // The current location in the file where we are looking at. + uint64_t CurPtr = 0; + + // PE header is optional and is present only in executables. If it exists, + // it is placed right after COFF header. + bool HasPEHeader = false; + + // Check if this is a PE/COFF file. + if (checkSize(Data, EC, sizeof(dos_header) + sizeof(COFF::PEMagic))) { + // PE/COFF, seek through MS-DOS compatibility stub and 4-byte + // PE signature to find 'normal' COFF header. + const auto *DH = reinterpret_cast<const dos_header *>(base()); + if (DH->Magic[0] == 'M' && DH->Magic[1] == 'Z') { + CurPtr = DH->AddressOfNewExeHeader; + // Check the PE magic bytes. ("PE\0\0") + if (memcmp(base() + CurPtr, COFF::PEMagic, sizeof(COFF::PEMagic)) != 0) { + return errorCodeToError(object_error::parse_failed); + } + CurPtr += sizeof(COFF::PEMagic); // Skip the PE magic bytes. + HasPEHeader = true; + } + } + + if (Error E = getObject(COFFHeader, Data, base() + CurPtr)) + return E; + + // It might be a bigobj file, let's check. Note that COFF bigobj and COFF + // import libraries share a common prefix but bigobj is more restrictive. + if (!HasPEHeader && COFFHeader->Machine == COFF::IMAGE_FILE_MACHINE_UNKNOWN && + COFFHeader->NumberOfSections == uint16_t(0xffff) && + checkSize(Data, EC, sizeof(coff_bigobj_file_header))) { + if (Error E = getObject(COFFBigObjHeader, Data, base() + CurPtr)) + return E; + + // Verify that we are dealing with bigobj. + if (COFFBigObjHeader->Version >= COFF::BigObjHeader::MinBigObjectVersion && + std::memcmp(COFFBigObjHeader->UUID, COFF::BigObjMagic, + sizeof(COFF::BigObjMagic)) == 0) { + COFFHeader = nullptr; + CurPtr += sizeof(coff_bigobj_file_header); + } else { + // It's not a bigobj. + COFFBigObjHeader = nullptr; + } + } + if (COFFHeader) { + // The prior checkSize call may have failed. This isn't a hard error + // because we were just trying to sniff out bigobj. + EC = std::error_code(); + CurPtr += sizeof(coff_file_header); + + if (COFFHeader->isImportLibrary()) + return errorCodeToError(EC); + } + + if (HasPEHeader) { + const pe32_header *Header; + if (Error E = getObject(Header, Data, base() + CurPtr)) + return E; + + const uint8_t *DataDirAddr; + uint64_t DataDirSize; + if (Header->Magic == COFF::PE32Header::PE32) { + PE32Header = Header; + DataDirAddr = base() + CurPtr + sizeof(pe32_header); + DataDirSize = sizeof(data_directory) * PE32Header->NumberOfRvaAndSize; + } else if (Header->Magic == COFF::PE32Header::PE32_PLUS) { + PE32PlusHeader = reinterpret_cast<const pe32plus_header *>(Header); + DataDirAddr = base() + CurPtr + sizeof(pe32plus_header); + DataDirSize = sizeof(data_directory) * PE32PlusHeader->NumberOfRvaAndSize; + } else { + // It's neither PE32 nor PE32+. + return errorCodeToError(object_error::parse_failed); + } + if (Error E = getObject(DataDirectory, Data, DataDirAddr, DataDirSize)) + return E; + } + + if (COFFHeader) + CurPtr += COFFHeader->SizeOfOptionalHeader; + + assert(COFFHeader || COFFBigObjHeader); + + if (Error E = + getObject(SectionTable, Data, base() + CurPtr, + (uint64_t)getNumberOfSections() * sizeof(coff_section))) + return E; + + // Initialize the pointer to the symbol table. + if (getPointerToSymbolTable() != 0) { + if (Error E = initSymbolTablePtr()) { + // Recover from errors reading the symbol table. + consumeError(std::move(E)); + SymbolTable16 = nullptr; + SymbolTable32 = nullptr; + StringTable = nullptr; + StringTableSize = 0; + } + } else { + // We had better not have any symbols if we don't have a symbol table. + if (getNumberOfSymbols() != 0) { + return errorCodeToError(object_error::parse_failed); + } + } + + // Initialize the pointer to the beginning of the import table. + if (Error E = initImportTablePtr()) + return E; + if (Error E = initDelayImportTablePtr()) + return E; + + // Initialize the pointer to the export table. + if (Error E = initExportTablePtr()) + return E; + + // Initialize the pointer to the base relocation table. + if (Error E = initBaseRelocPtr()) + return E; + + // Initialize the pointer to the debug directory. + if (Error E = initDebugDirectoryPtr()) + return E; + + // Initialize the pointer to the TLS directory. + if (Error E = initTLSDirectoryPtr()) + return E; + + if (Error E = initLoadConfigPtr()) + return E; + + return Error::success(); +} + +basic_symbol_iterator COFFObjectFile::symbol_begin() const { + DataRefImpl Ret; + Ret.p = getSymbolTable(); + return basic_symbol_iterator(SymbolRef(Ret, this)); +} + +basic_symbol_iterator COFFObjectFile::symbol_end() const { + // The symbol table ends where the string table begins. + DataRefImpl Ret; + Ret.p = reinterpret_cast<uintptr_t>(StringTable); + return basic_symbol_iterator(SymbolRef(Ret, this)); +} + +import_directory_iterator COFFObjectFile::import_directory_begin() const { + if (!ImportDirectory) + return import_directory_end(); + if (ImportDirectory->isNull()) + return import_directory_end(); + return import_directory_iterator( + ImportDirectoryEntryRef(ImportDirectory, 0, this)); +} + +import_directory_iterator COFFObjectFile::import_directory_end() const { + return import_directory_iterator( + ImportDirectoryEntryRef(nullptr, -1, this)); +} + +delay_import_directory_iterator +COFFObjectFile::delay_import_directory_begin() const { + return delay_import_directory_iterator( + DelayImportDirectoryEntryRef(DelayImportDirectory, 0, this)); +} + +delay_import_directory_iterator +COFFObjectFile::delay_import_directory_end() const { + return delay_import_directory_iterator( + DelayImportDirectoryEntryRef( + DelayImportDirectory, NumberOfDelayImportDirectory, this)); +} + +export_directory_iterator COFFObjectFile::export_directory_begin() const { + return export_directory_iterator( + ExportDirectoryEntryRef(ExportDirectory, 0, this)); +} + +export_directory_iterator COFFObjectFile::export_directory_end() const { + if (!ExportDirectory) + return export_directory_iterator(ExportDirectoryEntryRef(nullptr, 0, this)); + ExportDirectoryEntryRef Ref(ExportDirectory, + ExportDirectory->AddressTableEntries, this); + return export_directory_iterator(Ref); +} + +section_iterator COFFObjectFile::section_begin() const { + DataRefImpl Ret; + Ret.p = reinterpret_cast<uintptr_t>(SectionTable); + return section_iterator(SectionRef(Ret, this)); +} + +section_iterator COFFObjectFile::section_end() const { + DataRefImpl Ret; + int NumSections = + COFFHeader && COFFHeader->isImportLibrary() ? 0 : getNumberOfSections(); + Ret.p = reinterpret_cast<uintptr_t>(SectionTable + NumSections); + return section_iterator(SectionRef(Ret, this)); +} + +base_reloc_iterator COFFObjectFile::base_reloc_begin() const { + return base_reloc_iterator(BaseRelocRef(BaseRelocHeader, this)); +} + +base_reloc_iterator COFFObjectFile::base_reloc_end() const { + return base_reloc_iterator(BaseRelocRef(BaseRelocEnd, this)); +} + +uint8_t COFFObjectFile::getBytesInAddress() const { + return getArch() == Triple::x86_64 || getArch() == Triple::aarch64 ? 8 : 4; +} + +StringRef COFFObjectFile::getFileFormatName() const { + switch(getMachine()) { + case COFF::IMAGE_FILE_MACHINE_I386: + return "COFF-i386"; + case COFF::IMAGE_FILE_MACHINE_AMD64: + return "COFF-x86-64"; + case COFF::IMAGE_FILE_MACHINE_ARMNT: + return "COFF-ARM"; + case COFF::IMAGE_FILE_MACHINE_ARM64: + return "COFF-ARM64"; + default: + return "COFF-<unknown arch>"; + } +} + +Triple::ArchType COFFObjectFile::getArch() const { + switch (getMachine()) { + case COFF::IMAGE_FILE_MACHINE_I386: + return Triple::x86; + case COFF::IMAGE_FILE_MACHINE_AMD64: + return Triple::x86_64; + case COFF::IMAGE_FILE_MACHINE_ARMNT: + return Triple::thumb; + case COFF::IMAGE_FILE_MACHINE_ARM64: + return Triple::aarch64; + default: + return Triple::UnknownArch; + } +} + +Expected<uint64_t> COFFObjectFile::getStartAddress() const { + if (PE32Header) + return PE32Header->AddressOfEntryPoint; + return 0; +} + +iterator_range<import_directory_iterator> +COFFObjectFile::import_directories() const { + return make_range(import_directory_begin(), import_directory_end()); +} + +iterator_range<delay_import_directory_iterator> +COFFObjectFile::delay_import_directories() const { + return make_range(delay_import_directory_begin(), + delay_import_directory_end()); +} + +iterator_range<export_directory_iterator> +COFFObjectFile::export_directories() const { + return make_range(export_directory_begin(), export_directory_end()); +} + +iterator_range<base_reloc_iterator> COFFObjectFile::base_relocs() const { + return make_range(base_reloc_begin(), base_reloc_end()); +} + +const data_directory *COFFObjectFile::getDataDirectory(uint32_t Index) const { + if (!DataDirectory) + return nullptr; + assert(PE32Header || PE32PlusHeader); + uint32_t NumEnt = PE32Header ? PE32Header->NumberOfRvaAndSize + : PE32PlusHeader->NumberOfRvaAndSize; + if (Index >= NumEnt) + return nullptr; + return &DataDirectory[Index]; +} + +Expected<const coff_section *> COFFObjectFile::getSection(int32_t Index) const { + // Perhaps getting the section of a reserved section index should be an error, + // but callers rely on this to return null. + if (COFF::isReservedSectionNumber(Index)) + return (const coff_section *)nullptr; + if (static_cast<uint32_t>(Index) <= getNumberOfSections()) { + // We already verified the section table data, so no need to check again. + return SectionTable + (Index - 1); + } + return errorCodeToError(object_error::parse_failed); +} + +Expected<StringRef> COFFObjectFile::getString(uint32_t Offset) const { + if (StringTableSize <= 4) + // Tried to get a string from an empty string table. + return errorCodeToError(object_error::parse_failed); + if (Offset >= StringTableSize) + return errorCodeToError(object_error::unexpected_eof); + return StringRef(StringTable + Offset); +} + +Expected<StringRef> COFFObjectFile::getSymbolName(COFFSymbolRef Symbol) const { + return getSymbolName(Symbol.getGeneric()); +} + +Expected<StringRef> +COFFObjectFile::getSymbolName(const coff_symbol_generic *Symbol) const { + // Check for string table entry. First 4 bytes are 0. + if (Symbol->Name.Offset.Zeroes == 0) + return getString(Symbol->Name.Offset.Offset); + + // Null terminated, let ::strlen figure out the length. + if (Symbol->Name.ShortName[COFF::NameSize - 1] == 0) + return StringRef(Symbol->Name.ShortName); + + // Not null terminated, use all 8 bytes. + return StringRef(Symbol->Name.ShortName, COFF::NameSize); +} + +ArrayRef<uint8_t> +COFFObjectFile::getSymbolAuxData(COFFSymbolRef Symbol) const { + const uint8_t *Aux = nullptr; + + size_t SymbolSize = getSymbolTableEntrySize(); + if (Symbol.getNumberOfAuxSymbols() > 0) { + // AUX data comes immediately after the symbol in COFF + Aux = reinterpret_cast<const uint8_t *>(Symbol.getRawPtr()) + SymbolSize; +#ifndef NDEBUG + // Verify that the Aux symbol points to a valid entry in the symbol table. + uintptr_t Offset = uintptr_t(Aux) - uintptr_t(base()); + if (Offset < getPointerToSymbolTable() || + Offset >= + getPointerToSymbolTable() + (getNumberOfSymbols() * SymbolSize)) + report_fatal_error("Aux Symbol data was outside of symbol table."); + + assert((Offset - getPointerToSymbolTable()) % SymbolSize == 0 && + "Aux Symbol data did not point to the beginning of a symbol"); +#endif + } + return makeArrayRef(Aux, Symbol.getNumberOfAuxSymbols() * SymbolSize); +} + +uint32_t COFFObjectFile::getSymbolIndex(COFFSymbolRef Symbol) const { + uintptr_t Offset = + reinterpret_cast<uintptr_t>(Symbol.getRawPtr()) - getSymbolTable(); + assert(Offset % getSymbolTableEntrySize() == 0 && + "Symbol did not point to the beginning of a symbol"); + size_t Index = Offset / getSymbolTableEntrySize(); + assert(Index < getNumberOfSymbols()); + return Index; +} + +Expected<StringRef> +COFFObjectFile::getSectionName(const coff_section *Sec) const { + StringRef Name; + if (Sec->Name[COFF::NameSize - 1] == 0) + // Null terminated, let ::strlen figure out the length. + Name = Sec->Name; + else + // Not null terminated, use all 8 bytes. + Name = StringRef(Sec->Name, COFF::NameSize); + + // Check for string table entry. First byte is '/'. + if (Name.startswith("/")) { + uint32_t Offset; + if (Name.startswith("//")) { + if (decodeBase64StringEntry(Name.substr(2), Offset)) + return createStringError(object_error::parse_failed, + "invalid section name"); + } else { + if (Name.substr(1).getAsInteger(10, Offset)) + return createStringError(object_error::parse_failed, + "invalid section name"); + } + return getString(Offset); + } + + return Name; +} + +uint64_t COFFObjectFile::getSectionSize(const coff_section *Sec) const { + // SizeOfRawData and VirtualSize change what they represent depending on + // whether or not we have an executable image. + // + // For object files, SizeOfRawData contains the size of section's data; + // VirtualSize should be zero but isn't due to buggy COFF writers. + // + // For executables, SizeOfRawData *must* be a multiple of FileAlignment; the + // actual section size is in VirtualSize. It is possible for VirtualSize to + // be greater than SizeOfRawData; the contents past that point should be + // considered to be zero. + if (getDOSHeader()) + return std::min(Sec->VirtualSize, Sec->SizeOfRawData); + return Sec->SizeOfRawData; +} + +Error COFFObjectFile::getSectionContents(const coff_section *Sec, + ArrayRef<uint8_t> &Res) const { + // In COFF, a virtual section won't have any in-file + // content, so the file pointer to the content will be zero. + if (Sec->PointerToRawData == 0) + return Error::success(); + // The only thing that we need to verify is that the contents is contained + // within the file bounds. We don't need to make sure it doesn't cover other + // data, as there's nothing that says that is not allowed. + uintptr_t ConStart = + reinterpret_cast<uintptr_t>(base()) + Sec->PointerToRawData; + uint32_t SectionSize = getSectionSize(Sec); + if (Error E = checkOffset(Data, ConStart, SectionSize)) + return E; + Res = makeArrayRef(reinterpret_cast<const uint8_t *>(ConStart), SectionSize); + return Error::success(); +} + +const coff_relocation *COFFObjectFile::toRel(DataRefImpl Rel) const { + return reinterpret_cast<const coff_relocation*>(Rel.p); +} + +void COFFObjectFile::moveRelocationNext(DataRefImpl &Rel) const { + Rel.p = reinterpret_cast<uintptr_t>( + reinterpret_cast<const coff_relocation*>(Rel.p) + 1); +} + +uint64_t COFFObjectFile::getRelocationOffset(DataRefImpl Rel) const { + const coff_relocation *R = toRel(Rel); + return R->VirtualAddress; +} + +symbol_iterator COFFObjectFile::getRelocationSymbol(DataRefImpl Rel) const { + const coff_relocation *R = toRel(Rel); + DataRefImpl Ref; + if (R->SymbolTableIndex >= getNumberOfSymbols()) + return symbol_end(); + if (SymbolTable16) + Ref.p = reinterpret_cast<uintptr_t>(SymbolTable16 + R->SymbolTableIndex); + else if (SymbolTable32) + Ref.p = reinterpret_cast<uintptr_t>(SymbolTable32 + R->SymbolTableIndex); + else + llvm_unreachable("no symbol table pointer!"); + return symbol_iterator(SymbolRef(Ref, this)); +} + +uint64_t COFFObjectFile::getRelocationType(DataRefImpl Rel) const { + const coff_relocation* R = toRel(Rel); + return R->Type; +} + +const coff_section * +COFFObjectFile::getCOFFSection(const SectionRef &Section) const { + return toSec(Section.getRawDataRefImpl()); +} + +COFFSymbolRef COFFObjectFile::getCOFFSymbol(const DataRefImpl &Ref) const { + if (SymbolTable16) + return toSymb<coff_symbol16>(Ref); + if (SymbolTable32) + return toSymb<coff_symbol32>(Ref); + llvm_unreachable("no symbol table pointer!"); +} + +COFFSymbolRef COFFObjectFile::getCOFFSymbol(const SymbolRef &Symbol) const { + return getCOFFSymbol(Symbol.getRawDataRefImpl()); +} + +const coff_relocation * +COFFObjectFile::getCOFFRelocation(const RelocationRef &Reloc) const { + return toRel(Reloc.getRawDataRefImpl()); +} + +ArrayRef<coff_relocation> +COFFObjectFile::getRelocations(const coff_section *Sec) const { + return {getFirstReloc(Sec, Data, base()), + getNumberOfRelocations(Sec, Data, base())}; +} + +#define LLVM_COFF_SWITCH_RELOC_TYPE_NAME(reloc_type) \ + case COFF::reloc_type: \ + return #reloc_type; + +StringRef COFFObjectFile::getRelocationTypeName(uint16_t Type) const { + switch (getMachine()) { + case COFF::IMAGE_FILE_MACHINE_AMD64: + switch (Type) { + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ABSOLUTE); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ADDR64); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ADDR32); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ADDR32NB); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_1); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_2); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_3); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_4); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_5); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SECTION); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SECREL); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SECREL7); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_TOKEN); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SREL32); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_PAIR); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SSPAN32); + default: + return "Unknown"; + } + break; + case COFF::IMAGE_FILE_MACHINE_ARMNT: + switch (Type) { + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_ABSOLUTE); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_ADDR32); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_ADDR32NB); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BRANCH24); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BRANCH11); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_TOKEN); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BLX24); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BLX11); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_REL32); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_SECTION); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_SECREL); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_MOV32A); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_MOV32T); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BRANCH20T); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BRANCH24T); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BLX23T); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_PAIR); + default: + return "Unknown"; + } + break; + case COFF::IMAGE_FILE_MACHINE_ARM64: + switch (Type) { + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ABSOLUTE); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ADDR32); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ADDR32NB); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_BRANCH26); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_PAGEBASE_REL21); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_REL21); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_PAGEOFFSET_12A); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_PAGEOFFSET_12L); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECREL); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECREL_LOW12A); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECREL_HIGH12A); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECREL_LOW12L); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_TOKEN); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECTION); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ADDR64); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_BRANCH19); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_BRANCH14); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_REL32); + default: + return "Unknown"; + } + break; + case COFF::IMAGE_FILE_MACHINE_I386: + switch (Type) { + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_ABSOLUTE); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_DIR16); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_REL16); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_DIR32); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_DIR32NB); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_SEG12); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_SECTION); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_SECREL); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_TOKEN); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_SECREL7); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_REL32); + default: + return "Unknown"; + } + break; + default: + return "Unknown"; + } +} + +#undef LLVM_COFF_SWITCH_RELOC_TYPE_NAME + +void COFFObjectFile::getRelocationTypeName( + DataRefImpl Rel, SmallVectorImpl<char> &Result) const { + const coff_relocation *Reloc = toRel(Rel); + StringRef Res = getRelocationTypeName(Reloc->Type); + Result.append(Res.begin(), Res.end()); +} + +bool COFFObjectFile::isRelocatableObject() const { + return !DataDirectory; +} + +StringRef COFFObjectFile::mapDebugSectionName(StringRef Name) const { + return StringSwitch<StringRef>(Name) + .Case("eh_fram", "eh_frame") + .Default(Name); +} + +bool ImportDirectoryEntryRef:: +operator==(const ImportDirectoryEntryRef &Other) const { + return ImportTable == Other.ImportTable && Index == Other.Index; +} + +void ImportDirectoryEntryRef::moveNext() { + ++Index; + if (ImportTable[Index].isNull()) { + Index = -1; + ImportTable = nullptr; + } +} + +Error ImportDirectoryEntryRef::getImportTableEntry( + const coff_import_directory_table_entry *&Result) const { + return getObject(Result, OwningObject->Data, ImportTable + Index); +} + +static imported_symbol_iterator +makeImportedSymbolIterator(const COFFObjectFile *Object, + uintptr_t Ptr, int Index) { + if (Object->getBytesInAddress() == 4) { + auto *P = reinterpret_cast<const import_lookup_table_entry32 *>(Ptr); + return imported_symbol_iterator(ImportedSymbolRef(P, Index, Object)); + } + auto *P = reinterpret_cast<const import_lookup_table_entry64 *>(Ptr); + return imported_symbol_iterator(ImportedSymbolRef(P, Index, Object)); +} + +static imported_symbol_iterator +importedSymbolBegin(uint32_t RVA, const COFFObjectFile *Object) { + uintptr_t IntPtr = 0; + // FIXME: Handle errors. + cantFail(Object->getRvaPtr(RVA, IntPtr)); + return makeImportedSymbolIterator(Object, IntPtr, 0); +} + +static imported_symbol_iterator +importedSymbolEnd(uint32_t RVA, const COFFObjectFile *Object) { + uintptr_t IntPtr = 0; + // FIXME: Handle errors. + cantFail(Object->getRvaPtr(RVA, IntPtr)); + // Forward the pointer to the last entry which is null. + int Index = 0; + if (Object->getBytesInAddress() == 4) { + auto *Entry = reinterpret_cast<ulittle32_t *>(IntPtr); + while (*Entry++) + ++Index; + } else { + auto *Entry = reinterpret_cast<ulittle64_t *>(IntPtr); + while (*Entry++) + ++Index; + } + return makeImportedSymbolIterator(Object, IntPtr, Index); +} + +imported_symbol_iterator +ImportDirectoryEntryRef::imported_symbol_begin() const { + return importedSymbolBegin(ImportTable[Index].ImportAddressTableRVA, + OwningObject); +} + +imported_symbol_iterator +ImportDirectoryEntryRef::imported_symbol_end() const { + return importedSymbolEnd(ImportTable[Index].ImportAddressTableRVA, + OwningObject); +} + +iterator_range<imported_symbol_iterator> +ImportDirectoryEntryRef::imported_symbols() const { + return make_range(imported_symbol_begin(), imported_symbol_end()); +} + +imported_symbol_iterator ImportDirectoryEntryRef::lookup_table_begin() const { + return importedSymbolBegin(ImportTable[Index].ImportLookupTableRVA, + OwningObject); +} + +imported_symbol_iterator ImportDirectoryEntryRef::lookup_table_end() const { + return importedSymbolEnd(ImportTable[Index].ImportLookupTableRVA, + OwningObject); +} + +iterator_range<imported_symbol_iterator> +ImportDirectoryEntryRef::lookup_table_symbols() const { + return make_range(lookup_table_begin(), lookup_table_end()); +} + +Error ImportDirectoryEntryRef::getName(StringRef &Result) const { + uintptr_t IntPtr = 0; + if (Error E = OwningObject->getRvaPtr(ImportTable[Index].NameRVA, IntPtr)) + return E; + Result = StringRef(reinterpret_cast<const char *>(IntPtr)); + return Error::success(); +} + +Error +ImportDirectoryEntryRef::getImportLookupTableRVA(uint32_t &Result) const { + Result = ImportTable[Index].ImportLookupTableRVA; + return Error::success(); +} + +Error ImportDirectoryEntryRef::getImportAddressTableRVA( + uint32_t &Result) const { + Result = ImportTable[Index].ImportAddressTableRVA; + return Error::success(); +} + +bool DelayImportDirectoryEntryRef:: +operator==(const DelayImportDirectoryEntryRef &Other) const { + return Table == Other.Table && Index == Other.Index; +} + +void DelayImportDirectoryEntryRef::moveNext() { + ++Index; +} + +imported_symbol_iterator +DelayImportDirectoryEntryRef::imported_symbol_begin() const { + return importedSymbolBegin(Table[Index].DelayImportNameTable, + OwningObject); +} + +imported_symbol_iterator +DelayImportDirectoryEntryRef::imported_symbol_end() const { + return importedSymbolEnd(Table[Index].DelayImportNameTable, + OwningObject); +} + +iterator_range<imported_symbol_iterator> +DelayImportDirectoryEntryRef::imported_symbols() const { + return make_range(imported_symbol_begin(), imported_symbol_end()); +} + +Error DelayImportDirectoryEntryRef::getName(StringRef &Result) const { + uintptr_t IntPtr = 0; + if (Error E = OwningObject->getRvaPtr(Table[Index].Name, IntPtr)) + return E; + Result = StringRef(reinterpret_cast<const char *>(IntPtr)); + return Error::success(); +} + +Error DelayImportDirectoryEntryRef::getDelayImportTable( + const delay_import_directory_table_entry *&Result) const { + Result = &Table[Index]; + return Error::success(); +} + +Error DelayImportDirectoryEntryRef::getImportAddress(int AddrIndex, + uint64_t &Result) const { + uint32_t RVA = Table[Index].DelayImportAddressTable + + AddrIndex * (OwningObject->is64() ? 8 : 4); + uintptr_t IntPtr = 0; + if (Error E = OwningObject->getRvaPtr(RVA, IntPtr)) + return E; + if (OwningObject->is64()) + Result = *reinterpret_cast<const ulittle64_t *>(IntPtr); + else + Result = *reinterpret_cast<const ulittle32_t *>(IntPtr); + return Error::success(); +} + +bool ExportDirectoryEntryRef:: +operator==(const ExportDirectoryEntryRef &Other) const { + return ExportTable == Other.ExportTable && Index == Other.Index; +} + +void ExportDirectoryEntryRef::moveNext() { + ++Index; +} + +// Returns the name of the current export symbol. If the symbol is exported only +// by ordinal, the empty string is set as a result. +Error ExportDirectoryEntryRef::getDllName(StringRef &Result) const { + uintptr_t IntPtr = 0; + if (Error E = OwningObject->getRvaPtr(ExportTable->NameRVA, IntPtr)) + return E; + Result = StringRef(reinterpret_cast<const char *>(IntPtr)); + return Error::success(); +} + +// Returns the starting ordinal number. +Error ExportDirectoryEntryRef::getOrdinalBase(uint32_t &Result) const { + Result = ExportTable->OrdinalBase; + return Error::success(); +} + +// Returns the export ordinal of the current export symbol. +Error ExportDirectoryEntryRef::getOrdinal(uint32_t &Result) const { + Result = ExportTable->OrdinalBase + Index; + return Error::success(); +} + +// Returns the address of the current export symbol. +Error ExportDirectoryEntryRef::getExportRVA(uint32_t &Result) const { + uintptr_t IntPtr = 0; + if (Error EC = + OwningObject->getRvaPtr(ExportTable->ExportAddressTableRVA, IntPtr)) + return EC; + const export_address_table_entry *entry = + reinterpret_cast<const export_address_table_entry *>(IntPtr); + Result = entry[Index].ExportRVA; + return Error::success(); +} + +// Returns the name of the current export symbol. If the symbol is exported only +// by ordinal, the empty string is set as a result. +Error +ExportDirectoryEntryRef::getSymbolName(StringRef &Result) const { + uintptr_t IntPtr = 0; + if (Error EC = + OwningObject->getRvaPtr(ExportTable->OrdinalTableRVA, IntPtr)) + return EC; + const ulittle16_t *Start = reinterpret_cast<const ulittle16_t *>(IntPtr); + + uint32_t NumEntries = ExportTable->NumberOfNamePointers; + int Offset = 0; + for (const ulittle16_t *I = Start, *E = Start + NumEntries; + I < E; ++I, ++Offset) { + if (*I != Index) + continue; + if (Error EC = + OwningObject->getRvaPtr(ExportTable->NamePointerRVA, IntPtr)) + return EC; + const ulittle32_t *NamePtr = reinterpret_cast<const ulittle32_t *>(IntPtr); + if (Error EC = OwningObject->getRvaPtr(NamePtr[Offset], IntPtr)) + return EC; + Result = StringRef(reinterpret_cast<const char *>(IntPtr)); + return Error::success(); + } + Result = ""; + return Error::success(); +} + +Error ExportDirectoryEntryRef::isForwarder(bool &Result) const { + const data_directory *DataEntry = + OwningObject->getDataDirectory(COFF::EXPORT_TABLE); + if (!DataEntry) + return errorCodeToError(object_error::parse_failed); + uint32_t RVA; + if (auto EC = getExportRVA(RVA)) + return EC; + uint32_t Begin = DataEntry->RelativeVirtualAddress; + uint32_t End = DataEntry->RelativeVirtualAddress + DataEntry->Size; + Result = (Begin <= RVA && RVA < End); + return Error::success(); +} + +Error ExportDirectoryEntryRef::getForwardTo(StringRef &Result) const { + uint32_t RVA; + if (auto EC = getExportRVA(RVA)) + return EC; + uintptr_t IntPtr = 0; + if (auto EC = OwningObject->getRvaPtr(RVA, IntPtr)) + return EC; + Result = StringRef(reinterpret_cast<const char *>(IntPtr)); + return Error::success(); +} + +bool ImportedSymbolRef:: +operator==(const ImportedSymbolRef &Other) const { + return Entry32 == Other.Entry32 && Entry64 == Other.Entry64 + && Index == Other.Index; +} + +void ImportedSymbolRef::moveNext() { + ++Index; +} + +Error ImportedSymbolRef::getSymbolName(StringRef &Result) const { + uint32_t RVA; + if (Entry32) { + // If a symbol is imported only by ordinal, it has no name. + if (Entry32[Index].isOrdinal()) + return Error::success(); + RVA = Entry32[Index].getHintNameRVA(); + } else { + if (Entry64[Index].isOrdinal()) + return Error::success(); + RVA = Entry64[Index].getHintNameRVA(); + } + uintptr_t IntPtr = 0; + if (Error EC = OwningObject->getRvaPtr(RVA, IntPtr)) + return EC; + // +2 because the first two bytes is hint. + Result = StringRef(reinterpret_cast<const char *>(IntPtr + 2)); + return Error::success(); +} + +Error ImportedSymbolRef::isOrdinal(bool &Result) const { + if (Entry32) + Result = Entry32[Index].isOrdinal(); + else + Result = Entry64[Index].isOrdinal(); + return Error::success(); +} + +Error ImportedSymbolRef::getHintNameRVA(uint32_t &Result) const { + if (Entry32) + Result = Entry32[Index].getHintNameRVA(); + else + Result = Entry64[Index].getHintNameRVA(); + return Error::success(); +} + +Error ImportedSymbolRef::getOrdinal(uint16_t &Result) const { + uint32_t RVA; + if (Entry32) { + if (Entry32[Index].isOrdinal()) { + Result = Entry32[Index].getOrdinal(); + return Error::success(); + } + RVA = Entry32[Index].getHintNameRVA(); + } else { + if (Entry64[Index].isOrdinal()) { + Result = Entry64[Index].getOrdinal(); + return Error::success(); + } + RVA = Entry64[Index].getHintNameRVA(); + } + uintptr_t IntPtr = 0; + if (Error EC = OwningObject->getRvaPtr(RVA, IntPtr)) + return EC; + Result = *reinterpret_cast<const ulittle16_t *>(IntPtr); + return Error::success(); +} + +Expected<std::unique_ptr<COFFObjectFile>> +ObjectFile::createCOFFObjectFile(MemoryBufferRef Object) { + return COFFObjectFile::create(Object); +} + +bool BaseRelocRef::operator==(const BaseRelocRef &Other) const { + return Header == Other.Header && Index == Other.Index; +} + +void BaseRelocRef::moveNext() { + // Header->BlockSize is the size of the current block, including the + // size of the header itself. + uint32_t Size = sizeof(*Header) + + sizeof(coff_base_reloc_block_entry) * (Index + 1); + if (Size == Header->BlockSize) { + // .reloc contains a list of base relocation blocks. Each block + // consists of the header followed by entries. The header contains + // how many entories will follow. When we reach the end of the + // current block, proceed to the next block. + Header = reinterpret_cast<const coff_base_reloc_block_header *>( + reinterpret_cast<const uint8_t *>(Header) + Size); + Index = 0; + } else { + ++Index; + } +} + +Error BaseRelocRef::getType(uint8_t &Type) const { + auto *Entry = reinterpret_cast<const coff_base_reloc_block_entry *>(Header + 1); + Type = Entry[Index].getType(); + return Error::success(); +} + +Error BaseRelocRef::getRVA(uint32_t &Result) const { + auto *Entry = reinterpret_cast<const coff_base_reloc_block_entry *>(Header + 1); + Result = Header->PageRVA + Entry[Index].getOffset(); + return Error::success(); +} + +#define RETURN_IF_ERROR(Expr) \ + do { \ + Error E = (Expr); \ + if (E) \ + return std::move(E); \ + } while (0) + +Expected<ArrayRef<UTF16>> +ResourceSectionRef::getDirStringAtOffset(uint32_t Offset) { + BinaryStreamReader Reader = BinaryStreamReader(BBS); + Reader.setOffset(Offset); + uint16_t Length; + RETURN_IF_ERROR(Reader.readInteger(Length)); + ArrayRef<UTF16> RawDirString; + RETURN_IF_ERROR(Reader.readArray(RawDirString, Length)); + return RawDirString; +} + +Expected<ArrayRef<UTF16>> +ResourceSectionRef::getEntryNameString(const coff_resource_dir_entry &Entry) { + return getDirStringAtOffset(Entry.Identifier.getNameOffset()); +} + +Expected<const coff_resource_dir_table &> +ResourceSectionRef::getTableAtOffset(uint32_t Offset) { + const coff_resource_dir_table *Table = nullptr; + + BinaryStreamReader Reader(BBS); + Reader.setOffset(Offset); + RETURN_IF_ERROR(Reader.readObject(Table)); + assert(Table != nullptr); + return *Table; +} + +Expected<const coff_resource_dir_entry &> +ResourceSectionRef::getTableEntryAtOffset(uint32_t Offset) { + const coff_resource_dir_entry *Entry = nullptr; + + BinaryStreamReader Reader(BBS); + Reader.setOffset(Offset); + RETURN_IF_ERROR(Reader.readObject(Entry)); + assert(Entry != nullptr); + return *Entry; +} + +Expected<const coff_resource_data_entry &> +ResourceSectionRef::getDataEntryAtOffset(uint32_t Offset) { + const coff_resource_data_entry *Entry = nullptr; + + BinaryStreamReader Reader(BBS); + Reader.setOffset(Offset); + RETURN_IF_ERROR(Reader.readObject(Entry)); + assert(Entry != nullptr); + return *Entry; +} + +Expected<const coff_resource_dir_table &> +ResourceSectionRef::getEntrySubDir(const coff_resource_dir_entry &Entry) { + assert(Entry.Offset.isSubDir()); + return getTableAtOffset(Entry.Offset.value()); +} + +Expected<const coff_resource_data_entry &> +ResourceSectionRef::getEntryData(const coff_resource_dir_entry &Entry) { + assert(!Entry.Offset.isSubDir()); + return getDataEntryAtOffset(Entry.Offset.value()); +} + +Expected<const coff_resource_dir_table &> ResourceSectionRef::getBaseTable() { + return getTableAtOffset(0); +} + +Expected<const coff_resource_dir_entry &> +ResourceSectionRef::getTableEntry(const coff_resource_dir_table &Table, + uint32_t Index) { + if (Index >= (uint32_t)(Table.NumberOfNameEntries + Table.NumberOfIDEntries)) + return createStringError(object_error::parse_failed, "index out of range"); + const uint8_t *TablePtr = reinterpret_cast<const uint8_t *>(&Table); + ptrdiff_t TableOffset = TablePtr - BBS.data().data(); + return getTableEntryAtOffset(TableOffset + sizeof(Table) + + Index * sizeof(coff_resource_dir_entry)); +} + +Error ResourceSectionRef::load(const COFFObjectFile *O) { + for (const SectionRef &S : O->sections()) { + Expected<StringRef> Name = S.getName(); + if (!Name) + return Name.takeError(); + + if (*Name == ".rsrc" || *Name == ".rsrc$01") + return load(O, S); + } + return createStringError(object_error::parse_failed, + "no resource section found"); +} + +Error ResourceSectionRef::load(const COFFObjectFile *O, const SectionRef &S) { + Obj = O; + Section = S; + Expected<StringRef> Contents = Section.getContents(); + if (!Contents) + return Contents.takeError(); + BBS = BinaryByteStream(*Contents, support::little); + const coff_section *COFFSect = Obj->getCOFFSection(Section); + ArrayRef<coff_relocation> OrigRelocs = Obj->getRelocations(COFFSect); + Relocs.reserve(OrigRelocs.size()); + for (const coff_relocation &R : OrigRelocs) + Relocs.push_back(&R); + llvm::sort(Relocs, [](const coff_relocation *A, const coff_relocation *B) { + return A->VirtualAddress < B->VirtualAddress; + }); + return Error::success(); +} + +Expected<StringRef> +ResourceSectionRef::getContents(const coff_resource_data_entry &Entry) { + if (!Obj) + return createStringError(object_error::parse_failed, "no object provided"); + + // Find a potential relocation at the DataRVA field (first member of + // the coff_resource_data_entry struct). + const uint8_t *EntryPtr = reinterpret_cast<const uint8_t *>(&Entry); + ptrdiff_t EntryOffset = EntryPtr - BBS.data().data(); + coff_relocation RelocTarget{ulittle32_t(EntryOffset), ulittle32_t(0), + ulittle16_t(0)}; + auto RelocsForOffset = + std::equal_range(Relocs.begin(), Relocs.end(), &RelocTarget, + [](const coff_relocation *A, const coff_relocation *B) { + return A->VirtualAddress < B->VirtualAddress; + }); + + if (RelocsForOffset.first != RelocsForOffset.second) { + // We found a relocation with the right offset. Check that it does have + // the expected type. + const coff_relocation &R = **RelocsForOffset.first; + uint16_t RVAReloc; + switch (Obj->getMachine()) { + case COFF::IMAGE_FILE_MACHINE_I386: + RVAReloc = COFF::IMAGE_REL_I386_DIR32NB; + break; + case COFF::IMAGE_FILE_MACHINE_AMD64: + RVAReloc = COFF::IMAGE_REL_AMD64_ADDR32NB; + break; + case COFF::IMAGE_FILE_MACHINE_ARMNT: + RVAReloc = COFF::IMAGE_REL_ARM_ADDR32NB; + break; + case COFF::IMAGE_FILE_MACHINE_ARM64: + RVAReloc = COFF::IMAGE_REL_ARM64_ADDR32NB; + break; + default: + return createStringError(object_error::parse_failed, + "unsupported architecture"); + } + if (R.Type != RVAReloc) + return createStringError(object_error::parse_failed, + "unexpected relocation type"); + // Get the relocation's symbol + Expected<COFFSymbolRef> Sym = Obj->getSymbol(R.SymbolTableIndex); + if (!Sym) + return Sym.takeError(); + // And the symbol's section + Expected<const coff_section *> Section = + Obj->getSection(Sym->getSectionNumber()); + if (!Section) + return Section.takeError(); + // Add the initial value of DataRVA to the symbol's offset to find the + // data it points at. + uint64_t Offset = Entry.DataRVA + Sym->getValue(); + ArrayRef<uint8_t> Contents; + if (Error E = Obj->getSectionContents(*Section, Contents)) + return std::move(E); + if (Offset + Entry.DataSize > Contents.size()) + return createStringError(object_error::parse_failed, + "data outside of section"); + // Return a reference to the data inside the section. + return StringRef(reinterpret_cast<const char *>(Contents.data()) + Offset, + Entry.DataSize); + } else { + // Relocatable objects need a relocation for the DataRVA field. + if (Obj->isRelocatableObject()) + return createStringError(object_error::parse_failed, + "no relocation found for DataRVA"); + + // Locate the section that contains the address that DataRVA points at. + uint64_t VA = Entry.DataRVA + Obj->getImageBase(); + for (const SectionRef &S : Obj->sections()) { + if (VA >= S.getAddress() && + VA + Entry.DataSize <= S.getAddress() + S.getSize()) { + uint64_t Offset = VA - S.getAddress(); + Expected<StringRef> Contents = S.getContents(); + if (!Contents) + return Contents.takeError(); + return Contents->slice(Offset, Offset + Entry.DataSize); + } + } + return createStringError(object_error::parse_failed, + "address not found in image"); + } +} diff --git a/contrib/libs/llvm14/lib/Object/Decompressor.cpp b/contrib/libs/llvm14/lib/Object/Decompressor.cpp new file mode 100644 index 0000000000..11efd857d1 --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/Decompressor.cpp @@ -0,0 +1,98 @@ +//===-- Decompressor.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/Decompressor.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/Compression.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::support::endian; +using namespace object; + +Expected<Decompressor> Decompressor::create(StringRef Name, StringRef Data, + bool IsLE, bool Is64Bit) { + if (!zlib::isAvailable()) + return createError("zlib is not available"); + + Decompressor D(Data); + Error Err = isGnuStyle(Name) ? D.consumeCompressedGnuHeader() + : D.consumeCompressedZLibHeader(Is64Bit, IsLE); + if (Err) + return std::move(Err); + return D; +} + +Decompressor::Decompressor(StringRef Data) + : SectionData(Data), DecompressedSize(0) {} + +Error Decompressor::consumeCompressedGnuHeader() { + if (!SectionData.startswith("ZLIB")) + return createError("corrupted compressed section header"); + + SectionData = SectionData.substr(4); + + // Consume uncompressed section size (big-endian 8 bytes). + if (SectionData.size() < 8) + return createError("corrupted uncompressed section size"); + DecompressedSize = read64be(SectionData.data()); + SectionData = SectionData.substr(8); + + return Error::success(); +} + +Error Decompressor::consumeCompressedZLibHeader(bool Is64Bit, + bool IsLittleEndian) { + using namespace ELF; + uint64_t HdrSize = Is64Bit ? sizeof(Elf64_Chdr) : sizeof(Elf32_Chdr); + if (SectionData.size() < HdrSize) + return createError("corrupted compressed section header"); + + DataExtractor Extractor(SectionData, IsLittleEndian, 0); + uint64_t Offset = 0; + if (Extractor.getUnsigned(&Offset, Is64Bit ? sizeof(Elf64_Word) + : sizeof(Elf32_Word)) != + ELFCOMPRESS_ZLIB) + return createError("unsupported compression type"); + + // Skip Elf64_Chdr::ch_reserved field. + if (Is64Bit) + Offset += sizeof(Elf64_Word); + + DecompressedSize = Extractor.getUnsigned( + &Offset, Is64Bit ? sizeof(Elf64_Xword) : sizeof(Elf32_Word)); + SectionData = SectionData.substr(HdrSize); + return Error::success(); +} + +bool Decompressor::isGnuStyle(StringRef Name) { + return Name.startswith(".zdebug"); +} + +bool Decompressor::isCompressed(const object::SectionRef &Section) { + if (Section.isCompressed()) + return true; + + Expected<StringRef> SecNameOrErr = Section.getName(); + if (SecNameOrErr) + return isGnuStyle(*SecNameOrErr); + + consumeError(SecNameOrErr.takeError()); + return false; +} + +bool Decompressor::isCompressedELFSection(uint64_t Flags, StringRef Name) { + return (Flags & ELF::SHF_COMPRESSED) || isGnuStyle(Name); +} + +Error Decompressor::decompress(MutableArrayRef<char> Buffer) { + size_t Size = Buffer.size(); + return zlib::uncompress(SectionData, Buffer.data(), Size); +} diff --git a/contrib/libs/llvm14/lib/Object/ELF.cpp b/contrib/libs/llvm14/lib/Object/ELF.cpp new file mode 100644 index 0000000000..56a4262117 --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/ELF.cpp @@ -0,0 +1,679 @@ +//===- ELF.cpp - ELF object file implementation ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/ELF.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/Support/DataExtractor.h" + +using namespace llvm; +using namespace object; + +#define STRINGIFY_ENUM_CASE(ns, name) \ + case ns::name: \ + return #name; + +#define ELF_RELOC(name, value) STRINGIFY_ENUM_CASE(ELF, name) + +StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine, + uint32_t Type) { + switch (Machine) { + case ELF::EM_68K: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/M68k.def" + default: + break; + } + break; + case ELF::EM_X86_64: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/x86_64.def" + default: + break; + } + break; + case ELF::EM_386: + case ELF::EM_IAMCU: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/i386.def" + default: + break; + } + break; + case ELF::EM_MIPS: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/Mips.def" + default: + break; + } + break; + case ELF::EM_AARCH64: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/AArch64.def" + default: + break; + } + break; + case ELF::EM_ARM: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/ARM.def" + default: + break; + } + break; + case ELF::EM_ARC_COMPACT: + case ELF::EM_ARC_COMPACT2: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/ARC.def" + default: + break; + } + break; + case ELF::EM_AVR: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/AVR.def" + default: + break; + } + break; + case ELF::EM_HEXAGON: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/Hexagon.def" + default: + break; + } + break; + case ELF::EM_LANAI: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/Lanai.def" + default: + break; + } + break; + case ELF::EM_PPC: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/PowerPC.def" + default: + break; + } + break; + case ELF::EM_PPC64: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/PowerPC64.def" + default: + break; + } + break; + case ELF::EM_RISCV: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/RISCV.def" + default: + break; + } + break; + case ELF::EM_S390: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/SystemZ.def" + default: + break; + } + break; + case ELF::EM_SPARC: + case ELF::EM_SPARC32PLUS: + case ELF::EM_SPARCV9: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/Sparc.def" + default: + break; + } + break; + case ELF::EM_AMDGPU: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/AMDGPU.def" + default: + break; + } + break; + case ELF::EM_BPF: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/BPF.def" + default: + break; + } + break; + case ELF::EM_MSP430: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/MSP430.def" + default: + break; + } + break; + case ELF::EM_VE: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/VE.def" + default: + break; + } + break; + case ELF::EM_CSKY: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/CSKY.def" + default: + break; + } + break; + default: + break; + } + return "Unknown"; +} + +#undef ELF_RELOC + +uint32_t llvm::object::getELFRelativeRelocationType(uint32_t Machine) { + switch (Machine) { + case ELF::EM_X86_64: + return ELF::R_X86_64_RELATIVE; + case ELF::EM_386: + case ELF::EM_IAMCU: + return ELF::R_386_RELATIVE; + case ELF::EM_MIPS: + break; + case ELF::EM_AARCH64: + return ELF::R_AARCH64_RELATIVE; + case ELF::EM_ARM: + return ELF::R_ARM_RELATIVE; + case ELF::EM_ARC_COMPACT: + case ELF::EM_ARC_COMPACT2: + return ELF::R_ARC_RELATIVE; + case ELF::EM_AVR: + break; + case ELF::EM_HEXAGON: + return ELF::R_HEX_RELATIVE; + case ELF::EM_LANAI: + break; + case ELF::EM_PPC: + break; + case ELF::EM_PPC64: + return ELF::R_PPC64_RELATIVE; + case ELF::EM_RISCV: + return ELF::R_RISCV_RELATIVE; + case ELF::EM_S390: + return ELF::R_390_RELATIVE; + case ELF::EM_SPARC: + case ELF::EM_SPARC32PLUS: + case ELF::EM_SPARCV9: + return ELF::R_SPARC_RELATIVE; + case ELF::EM_CSKY: + return ELF::R_CKCORE_RELATIVE; + case ELF::EM_VE: + return ELF::R_VE_RELATIVE; + case ELF::EM_AMDGPU: + break; + case ELF::EM_BPF: + break; + default: + break; + } + return 0; +} + +StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) { + switch (Machine) { + case ELF::EM_ARM: + switch (Type) { + STRINGIFY_ENUM_CASE(ELF, SHT_ARM_EXIDX); + STRINGIFY_ENUM_CASE(ELF, SHT_ARM_PREEMPTMAP); + STRINGIFY_ENUM_CASE(ELF, SHT_ARM_ATTRIBUTES); + STRINGIFY_ENUM_CASE(ELF, SHT_ARM_DEBUGOVERLAY); + STRINGIFY_ENUM_CASE(ELF, SHT_ARM_OVERLAYSECTION); + } + break; + case ELF::EM_HEXAGON: + switch (Type) { STRINGIFY_ENUM_CASE(ELF, SHT_HEX_ORDERED); } + break; + case ELF::EM_X86_64: + switch (Type) { STRINGIFY_ENUM_CASE(ELF, SHT_X86_64_UNWIND); } + break; + case ELF::EM_MIPS: + case ELF::EM_MIPS_RS3_LE: + switch (Type) { + STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_REGINFO); + STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_OPTIONS); + STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_DWARF); + STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_ABIFLAGS); + } + break; + case ELF::EM_MSP430: + switch (Type) { STRINGIFY_ENUM_CASE(ELF, SHT_MSP430_ATTRIBUTES); } + break; + case ELF::EM_RISCV: + switch (Type) { STRINGIFY_ENUM_CASE(ELF, SHT_RISCV_ATTRIBUTES); } + break; + default: + break; + } + + switch (Type) { + STRINGIFY_ENUM_CASE(ELF, SHT_NULL); + STRINGIFY_ENUM_CASE(ELF, SHT_PROGBITS); + STRINGIFY_ENUM_CASE(ELF, SHT_SYMTAB); + STRINGIFY_ENUM_CASE(ELF, SHT_STRTAB); + STRINGIFY_ENUM_CASE(ELF, SHT_RELA); + STRINGIFY_ENUM_CASE(ELF, SHT_HASH); + STRINGIFY_ENUM_CASE(ELF, SHT_DYNAMIC); + STRINGIFY_ENUM_CASE(ELF, SHT_NOTE); + STRINGIFY_ENUM_CASE(ELF, SHT_NOBITS); + STRINGIFY_ENUM_CASE(ELF, SHT_REL); + STRINGIFY_ENUM_CASE(ELF, SHT_SHLIB); + STRINGIFY_ENUM_CASE(ELF, SHT_DYNSYM); + STRINGIFY_ENUM_CASE(ELF, SHT_INIT_ARRAY); + STRINGIFY_ENUM_CASE(ELF, SHT_FINI_ARRAY); + STRINGIFY_ENUM_CASE(ELF, SHT_PREINIT_ARRAY); + STRINGIFY_ENUM_CASE(ELF, SHT_GROUP); + STRINGIFY_ENUM_CASE(ELF, SHT_SYMTAB_SHNDX); + STRINGIFY_ENUM_CASE(ELF, SHT_RELR); + STRINGIFY_ENUM_CASE(ELF, SHT_ANDROID_REL); + STRINGIFY_ENUM_CASE(ELF, SHT_ANDROID_RELA); + STRINGIFY_ENUM_CASE(ELF, SHT_ANDROID_RELR); + STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_ODRTAB); + STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_LINKER_OPTIONS); + STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_CALL_GRAPH_PROFILE); + STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_ADDRSIG); + STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_DEPENDENT_LIBRARIES); + STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_SYMPART); + STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_PART_EHDR); + STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_PART_PHDR); + STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_BB_ADDR_MAP); + STRINGIFY_ENUM_CASE(ELF, SHT_GNU_ATTRIBUTES); + STRINGIFY_ENUM_CASE(ELF, SHT_GNU_HASH); + STRINGIFY_ENUM_CASE(ELF, SHT_GNU_verdef); + STRINGIFY_ENUM_CASE(ELF, SHT_GNU_verneed); + STRINGIFY_ENUM_CASE(ELF, SHT_GNU_versym); + default: + return "Unknown"; + } +} + +template <class ELFT> +std::vector<typename ELFT::Rel> +ELFFile<ELFT>::decode_relrs(Elf_Relr_Range relrs) const { + // This function decodes the contents of an SHT_RELR packed relocation + // section. + // + // Proposal for adding SHT_RELR sections to generic-abi is here: + // https://groups.google.com/forum/#!topic/generic-abi/bX460iggiKg + // + // The encoded sequence of Elf64_Relr entries in a SHT_RELR section looks + // like [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ] + // + // i.e. start with an address, followed by any number of bitmaps. The address + // entry encodes 1 relocation. The subsequent bitmap entries encode up to 63 + // relocations each, at subsequent offsets following the last address entry. + // + // The bitmap entries must have 1 in the least significant bit. The assumption + // here is that an address cannot have 1 in lsb. Odd addresses are not + // supported. + // + // Excluding the least significant bit in the bitmap, each non-zero bit in + // the bitmap represents a relocation to be applied to a corresponding machine + // word that follows the base address word. The second least significant bit + // represents the machine word immediately following the initial address, and + // each bit that follows represents the next word, in linear order. As such, + // a single bitmap can encode up to 31 relocations in a 32-bit object, and + // 63 relocations in a 64-bit object. + // + // This encoding has a couple of interesting properties: + // 1. Looking at any entry, it is clear whether it's an address or a bitmap: + // even means address, odd means bitmap. + // 2. Just a simple list of addresses is a valid encoding. + + Elf_Rel Rel; + Rel.r_info = 0; + Rel.setType(getRelativeRelocationType(), false); + std::vector<Elf_Rel> Relocs; + + // Word type: uint32_t for Elf32, and uint64_t for Elf64. + using Addr = typename ELFT::uint; + + Addr Base = 0; + for (Elf_Relr R : relrs) { + typename ELFT::uint Entry = R; + if ((Entry & 1) == 0) { + // Even entry: encodes the offset for next relocation. + Rel.r_offset = Entry; + Relocs.push_back(Rel); + // Set base offset for subsequent bitmap entries. + Base = Entry + sizeof(Addr); + } else { + // Odd entry: encodes bitmap for relocations starting at base. + for (Addr Offset = Base; (Entry >>= 1) != 0; Offset += sizeof(Addr)) + if ((Entry & 1) != 0) { + Rel.r_offset = Offset; + Relocs.push_back(Rel); + } + Base += (CHAR_BIT * sizeof(Entry) - 1) * sizeof(Addr); + } + } + + return Relocs; +} + +template <class ELFT> +Expected<std::vector<typename ELFT::Rela>> +ELFFile<ELFT>::android_relas(const Elf_Shdr &Sec) const { + // This function reads relocations in Android's packed relocation format, + // which is based on SLEB128 and delta encoding. + Expected<ArrayRef<uint8_t>> ContentsOrErr = getSectionContents(Sec); + if (!ContentsOrErr) + return ContentsOrErr.takeError(); + ArrayRef<uint8_t> Content = *ContentsOrErr; + if (Content.size() < 4 || Content[0] != 'A' || Content[1] != 'P' || + Content[2] != 'S' || Content[3] != '2') + return createError("invalid packed relocation header"); + DataExtractor Data(Content, isLE(), ELFT::Is64Bits ? 8 : 4); + DataExtractor::Cursor Cur(/*Offset=*/4); + + uint64_t NumRelocs = Data.getSLEB128(Cur); + uint64_t Offset = Data.getSLEB128(Cur); + uint64_t Addend = 0; + + if (!Cur) + return std::move(Cur.takeError()); + + std::vector<Elf_Rela> Relocs; + Relocs.reserve(NumRelocs); + while (NumRelocs) { + uint64_t NumRelocsInGroup = Data.getSLEB128(Cur); + if (!Cur) + return std::move(Cur.takeError()); + if (NumRelocsInGroup > NumRelocs) + return createError("relocation group unexpectedly large"); + NumRelocs -= NumRelocsInGroup; + + uint64_t GroupFlags = Data.getSLEB128(Cur); + bool GroupedByInfo = GroupFlags & ELF::RELOCATION_GROUPED_BY_INFO_FLAG; + bool GroupedByOffsetDelta = GroupFlags & ELF::RELOCATION_GROUPED_BY_OFFSET_DELTA_FLAG; + bool GroupedByAddend = GroupFlags & ELF::RELOCATION_GROUPED_BY_ADDEND_FLAG; + bool GroupHasAddend = GroupFlags & ELF::RELOCATION_GROUP_HAS_ADDEND_FLAG; + + uint64_t GroupOffsetDelta; + if (GroupedByOffsetDelta) + GroupOffsetDelta = Data.getSLEB128(Cur); + + uint64_t GroupRInfo; + if (GroupedByInfo) + GroupRInfo = Data.getSLEB128(Cur); + + if (GroupedByAddend && GroupHasAddend) + Addend += Data.getSLEB128(Cur); + + if (!GroupHasAddend) + Addend = 0; + + for (uint64_t I = 0; Cur && I != NumRelocsInGroup; ++I) { + Elf_Rela R; + Offset += GroupedByOffsetDelta ? GroupOffsetDelta : Data.getSLEB128(Cur); + R.r_offset = Offset; + R.r_info = GroupedByInfo ? GroupRInfo : Data.getSLEB128(Cur); + if (GroupHasAddend && !GroupedByAddend) + Addend += Data.getSLEB128(Cur); + R.r_addend = Addend; + Relocs.push_back(R); + } + if (!Cur) + return std::move(Cur.takeError()); + } + + return Relocs; +} + +template <class ELFT> +std::string ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch, + uint64_t Type) const { +#define DYNAMIC_STRINGIFY_ENUM(tag, value) \ + case value: \ + return #tag; + +#define DYNAMIC_TAG(n, v) + switch (Arch) { + case ELF::EM_AARCH64: + switch (Type) { +#define AARCH64_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value) +#include "llvm/BinaryFormat/DynamicTags.def" +#undef AARCH64_DYNAMIC_TAG + } + break; + + case ELF::EM_HEXAGON: + switch (Type) { +#define HEXAGON_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value) +#include "llvm/BinaryFormat/DynamicTags.def" +#undef HEXAGON_DYNAMIC_TAG + } + break; + + case ELF::EM_MIPS: + switch (Type) { +#define MIPS_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value) +#include "llvm/BinaryFormat/DynamicTags.def" +#undef MIPS_DYNAMIC_TAG + } + break; + + case ELF::EM_PPC: + switch (Type) { +#define PPC_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value) +#include "llvm/BinaryFormat/DynamicTags.def" +#undef PPC_DYNAMIC_TAG + } + break; + + case ELF::EM_PPC64: + switch (Type) { +#define PPC64_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value) +#include "llvm/BinaryFormat/DynamicTags.def" +#undef PPC64_DYNAMIC_TAG + } + break; + + case ELF::EM_RISCV: + switch (Type) { +#define RISCV_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value) +#include "llvm/BinaryFormat/DynamicTags.def" +#undef RISCV_DYNAMIC_TAG + } + break; + } +#undef DYNAMIC_TAG + switch (Type) { +// Now handle all dynamic tags except the architecture specific ones +#define AARCH64_DYNAMIC_TAG(name, value) +#define MIPS_DYNAMIC_TAG(name, value) +#define HEXAGON_DYNAMIC_TAG(name, value) +#define PPC_DYNAMIC_TAG(name, value) +#define PPC64_DYNAMIC_TAG(name, value) +#define RISCV_DYNAMIC_TAG(name, value) +// Also ignore marker tags such as DT_HIOS (maps to DT_VERNEEDNUM), etc. +#define DYNAMIC_TAG_MARKER(name, value) +#define DYNAMIC_TAG(name, value) case value: return #name; +#include "llvm/BinaryFormat/DynamicTags.def" +#undef DYNAMIC_TAG +#undef AARCH64_DYNAMIC_TAG +#undef MIPS_DYNAMIC_TAG +#undef HEXAGON_DYNAMIC_TAG +#undef PPC_DYNAMIC_TAG +#undef PPC64_DYNAMIC_TAG +#undef RISCV_DYNAMIC_TAG +#undef DYNAMIC_TAG_MARKER +#undef DYNAMIC_STRINGIFY_ENUM + default: + return "<unknown:>0x" + utohexstr(Type, true); + } +} + +template <class ELFT> +std::string ELFFile<ELFT>::getDynamicTagAsString(uint64_t Type) const { + return getDynamicTagAsString(getHeader().e_machine, Type); +} + +template <class ELFT> +Expected<typename ELFT::DynRange> ELFFile<ELFT>::dynamicEntries() const { + ArrayRef<Elf_Dyn> Dyn; + + auto ProgramHeadersOrError = program_headers(); + if (!ProgramHeadersOrError) + return ProgramHeadersOrError.takeError(); + + for (const Elf_Phdr &Phdr : *ProgramHeadersOrError) { + if (Phdr.p_type == ELF::PT_DYNAMIC) { + Dyn = makeArrayRef( + reinterpret_cast<const Elf_Dyn *>(base() + Phdr.p_offset), + Phdr.p_filesz / sizeof(Elf_Dyn)); + break; + } + } + + // If we can't find the dynamic section in the program headers, we just fall + // back on the sections. + if (Dyn.empty()) { + auto SectionsOrError = sections(); + if (!SectionsOrError) + return SectionsOrError.takeError(); + + for (const Elf_Shdr &Sec : *SectionsOrError) { + if (Sec.sh_type == ELF::SHT_DYNAMIC) { + Expected<ArrayRef<Elf_Dyn>> DynOrError = + getSectionContentsAsArray<Elf_Dyn>(Sec); + if (!DynOrError) + return DynOrError.takeError(); + Dyn = *DynOrError; + break; + } + } + + if (!Dyn.data()) + return ArrayRef<Elf_Dyn>(); + } + + if (Dyn.empty()) + return createError("invalid empty dynamic section"); + + if (Dyn.back().d_tag != ELF::DT_NULL) + return createError("dynamic sections must be DT_NULL terminated"); + + return Dyn; +} + +template <class ELFT> +Expected<const uint8_t *> +ELFFile<ELFT>::toMappedAddr(uint64_t VAddr, WarningHandler WarnHandler) const { + auto ProgramHeadersOrError = program_headers(); + if (!ProgramHeadersOrError) + return ProgramHeadersOrError.takeError(); + + llvm::SmallVector<Elf_Phdr *, 4> LoadSegments; + + for (const Elf_Phdr &Phdr : *ProgramHeadersOrError) + if (Phdr.p_type == ELF::PT_LOAD) + LoadSegments.push_back(const_cast<Elf_Phdr *>(&Phdr)); + + auto SortPred = [](const Elf_Phdr_Impl<ELFT> *A, + const Elf_Phdr_Impl<ELFT> *B) { + return A->p_vaddr < B->p_vaddr; + }; + if (!llvm::is_sorted(LoadSegments, SortPred)) { + if (Error E = + WarnHandler("loadable segments are unsorted by virtual address")) + return std::move(E); + llvm::stable_sort(LoadSegments, SortPred); + } + + const Elf_Phdr *const *I = llvm::upper_bound( + LoadSegments, VAddr, [](uint64_t VAddr, const Elf_Phdr_Impl<ELFT> *Phdr) { + return VAddr < Phdr->p_vaddr; + }); + + if (I == LoadSegments.begin()) + return createError("virtual address is not in any segment: 0x" + + Twine::utohexstr(VAddr)); + --I; + const Elf_Phdr &Phdr = **I; + uint64_t Delta = VAddr - Phdr.p_vaddr; + if (Delta >= Phdr.p_filesz) + return createError("virtual address is not in any segment: 0x" + + Twine::utohexstr(VAddr)); + + uint64_t Offset = Phdr.p_offset + Delta; + if (Offset >= getBufSize()) + return createError("can't map virtual address 0x" + + Twine::utohexstr(VAddr) + " to the segment with index " + + Twine(&Phdr - (*ProgramHeadersOrError).data() + 1) + + ": the segment ends at 0x" + + Twine::utohexstr(Phdr.p_offset + Phdr.p_filesz) + + ", which is greater than the file size (0x" + + Twine::utohexstr(getBufSize()) + ")"); + + return base() + Offset; +} + +template <class ELFT> +Expected<std::vector<BBAddrMap>> +ELFFile<ELFT>::decodeBBAddrMap(const Elf_Shdr &Sec) const { + Expected<ArrayRef<uint8_t>> ContentsOrErr = getSectionContents(Sec); + if (!ContentsOrErr) + return ContentsOrErr.takeError(); + ArrayRef<uint8_t> Content = *ContentsOrErr; + DataExtractor Data(Content, isLE(), ELFT::Is64Bits ? 8 : 4); + std::vector<BBAddrMap> FunctionEntries; + + DataExtractor::Cursor Cur(0); + Error ULEBSizeErr = Error::success(); + + // Helper to extract and decode the next ULEB128 value as uint32_t. + // Returns zero and sets ULEBSizeErr if the ULEB128 value exceeds the uint32_t + // limit. + // Also returns zero if ULEBSizeErr is already in an error state. + auto ReadULEB128AsUInt32 = [&Data, &Cur, &ULEBSizeErr]() -> uint32_t { + // Bail out and do not extract data if ULEBSizeErr is already set. + if (ULEBSizeErr) + return 0; + uint64_t Offset = Cur.tell(); + uint64_t Value = Data.getULEB128(Cur); + if (Value > UINT32_MAX) { + ULEBSizeErr = createError( + "ULEB128 value at offset 0x" + Twine::utohexstr(Offset) + + " exceeds UINT32_MAX (0x" + Twine::utohexstr(Value) + ")"); + return 0; + } + return static_cast<uint32_t>(Value); + }; + + while (!ULEBSizeErr && Cur && Cur.tell() < Content.size()) { + uintX_t Address = static_cast<uintX_t>(Data.getAddress(Cur)); + uint32_t NumBlocks = ReadULEB128AsUInt32(); + std::vector<BBAddrMap::BBEntry> BBEntries; + for (uint32_t BlockID = 0; !ULEBSizeErr && Cur && (BlockID < NumBlocks); + ++BlockID) { + uint32_t Offset = ReadULEB128AsUInt32(); + uint32_t Size = ReadULEB128AsUInt32(); + uint32_t Metadata = ReadULEB128AsUInt32(); + BBEntries.push_back({Offset, Size, Metadata}); + } + FunctionEntries.push_back({Address, BBEntries}); + } + // Either Cur is in the error state, or ULEBSizeError is set (not both), but + // we join the two errors here to be safe. + if (!Cur || ULEBSizeErr) + return joinErrors(Cur.takeError(), std::move(ULEBSizeErr)); + return FunctionEntries; +} + +template class llvm::object::ELFFile<ELF32LE>; +template class llvm::object::ELFFile<ELF32BE>; +template class llvm::object::ELFFile<ELF64LE>; +template class llvm::object::ELFFile<ELF64BE>; diff --git a/contrib/libs/llvm14/lib/Object/ELFObjectFile.cpp b/contrib/libs/llvm14/lib/Object/ELFObjectFile.cpp new file mode 100644 index 0000000000..cf1f12d9a9 --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/ELFObjectFile.cpp @@ -0,0 +1,725 @@ +//===- ELFObjectFile.cpp - ELF object file implementation -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Part of the ELFObjectFile class implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Object/ELF.h" +#include "llvm/Object/ELFTypes.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/ARMAttributeParser.h" +#include "llvm/Support/ARMBuildAttributes.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/RISCVAttributeParser.h" +#include "llvm/Support/RISCVAttributes.h" +#include <algorithm> +#include <cstddef> +#include <cstdint> +#include <memory> +#include <string> +#include <system_error> +#include <utility> + +using namespace llvm; +using namespace object; + +const EnumEntry<unsigned> llvm::object::ElfSymbolTypes[NumElfSymbolTypes] = { + {"None", "NOTYPE", ELF::STT_NOTYPE}, + {"Object", "OBJECT", ELF::STT_OBJECT}, + {"Function", "FUNC", ELF::STT_FUNC}, + {"Section", "SECTION", ELF::STT_SECTION}, + {"File", "FILE", ELF::STT_FILE}, + {"Common", "COMMON", ELF::STT_COMMON}, + {"TLS", "TLS", ELF::STT_TLS}, + {"Unknown", "<unknown>: 7", 7}, + {"Unknown", "<unknown>: 8", 8}, + {"Unknown", "<unknown>: 9", 9}, + {"GNU_IFunc", "IFUNC", ELF::STT_GNU_IFUNC}, + {"OS Specific", "<OS specific>: 11", 11}, + {"OS Specific", "<OS specific>: 12", 12}, + {"Proc Specific", "<processor specific>: 13", 13}, + {"Proc Specific", "<processor specific>: 14", 14}, + {"Proc Specific", "<processor specific>: 15", 15} +}; + +ELFObjectFileBase::ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source) + : ObjectFile(Type, Source) {} + +template <class ELFT> +static Expected<std::unique_ptr<ELFObjectFile<ELFT>>> +createPtr(MemoryBufferRef Object, bool InitContent) { + auto Ret = ELFObjectFile<ELFT>::create(Object, InitContent); + if (Error E = Ret.takeError()) + return std::move(E); + return std::make_unique<ELFObjectFile<ELFT>>(std::move(*Ret)); +} + +Expected<std::unique_ptr<ObjectFile>> +ObjectFile::createELFObjectFile(MemoryBufferRef Obj, bool InitContent) { + std::pair<unsigned char, unsigned char> Ident = + getElfArchType(Obj.getBuffer()); + std::size_t MaxAlignment = + 1ULL << countTrailingZeros( + reinterpret_cast<uintptr_t>(Obj.getBufferStart())); + + if (MaxAlignment < 2) + return createError("Insufficient alignment"); + + if (Ident.first == ELF::ELFCLASS32) { + if (Ident.second == ELF::ELFDATA2LSB) + return createPtr<ELF32LE>(Obj, InitContent); + else if (Ident.second == ELF::ELFDATA2MSB) + return createPtr<ELF32BE>(Obj, InitContent); + else + return createError("Invalid ELF data"); + } else if (Ident.first == ELF::ELFCLASS64) { + if (Ident.second == ELF::ELFDATA2LSB) + return createPtr<ELF64LE>(Obj, InitContent); + else if (Ident.second == ELF::ELFDATA2MSB) + return createPtr<ELF64BE>(Obj, InitContent); + else + return createError("Invalid ELF data"); + } + return createError("Invalid ELF class"); +} + +SubtargetFeatures ELFObjectFileBase::getMIPSFeatures() const { + SubtargetFeatures Features; + unsigned PlatformFlags = getPlatformFlags(); + + switch (PlatformFlags & ELF::EF_MIPS_ARCH) { + case ELF::EF_MIPS_ARCH_1: + break; + case ELF::EF_MIPS_ARCH_2: + Features.AddFeature("mips2"); + break; + case ELF::EF_MIPS_ARCH_3: + Features.AddFeature("mips3"); + break; + case ELF::EF_MIPS_ARCH_4: + Features.AddFeature("mips4"); + break; + case ELF::EF_MIPS_ARCH_5: + Features.AddFeature("mips5"); + break; + case ELF::EF_MIPS_ARCH_32: + Features.AddFeature("mips32"); + break; + case ELF::EF_MIPS_ARCH_64: + Features.AddFeature("mips64"); + break; + case ELF::EF_MIPS_ARCH_32R2: + Features.AddFeature("mips32r2"); + break; + case ELF::EF_MIPS_ARCH_64R2: + Features.AddFeature("mips64r2"); + break; + case ELF::EF_MIPS_ARCH_32R6: + Features.AddFeature("mips32r6"); + break; + case ELF::EF_MIPS_ARCH_64R6: + Features.AddFeature("mips64r6"); + break; + default: + llvm_unreachable("Unknown EF_MIPS_ARCH value"); + } + + switch (PlatformFlags & ELF::EF_MIPS_MACH) { + case ELF::EF_MIPS_MACH_NONE: + // No feature associated with this value. + break; + case ELF::EF_MIPS_MACH_OCTEON: + Features.AddFeature("cnmips"); + break; + default: + llvm_unreachable("Unknown EF_MIPS_ARCH value"); + } + + if (PlatformFlags & ELF::EF_MIPS_ARCH_ASE_M16) + Features.AddFeature("mips16"); + if (PlatformFlags & ELF::EF_MIPS_MICROMIPS) + Features.AddFeature("micromips"); + + return Features; +} + +SubtargetFeatures ELFObjectFileBase::getARMFeatures() const { + SubtargetFeatures Features; + ARMAttributeParser Attributes; + if (Error E = getBuildAttributes(Attributes)) { + consumeError(std::move(E)); + return SubtargetFeatures(); + } + + // both ARMv7-M and R have to support thumb hardware div + bool isV7 = false; + Optional<unsigned> Attr = + Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch); + if (Attr.hasValue()) + isV7 = Attr.getValue() == ARMBuildAttrs::v7; + + Attr = Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch_profile); + if (Attr.hasValue()) { + switch (Attr.getValue()) { + case ARMBuildAttrs::ApplicationProfile: + Features.AddFeature("aclass"); + break; + case ARMBuildAttrs::RealTimeProfile: + Features.AddFeature("rclass"); + if (isV7) + Features.AddFeature("hwdiv"); + break; + case ARMBuildAttrs::MicroControllerProfile: + Features.AddFeature("mclass"); + if (isV7) + Features.AddFeature("hwdiv"); + break; + } + } + + Attr = Attributes.getAttributeValue(ARMBuildAttrs::THUMB_ISA_use); + if (Attr.hasValue()) { + switch (Attr.getValue()) { + default: + break; + case ARMBuildAttrs::Not_Allowed: + Features.AddFeature("thumb", false); + Features.AddFeature("thumb2", false); + break; + case ARMBuildAttrs::AllowThumb32: + Features.AddFeature("thumb2"); + break; + } + } + + Attr = Attributes.getAttributeValue(ARMBuildAttrs::FP_arch); + if (Attr.hasValue()) { + switch (Attr.getValue()) { + default: + break; + case ARMBuildAttrs::Not_Allowed: + Features.AddFeature("vfp2sp", false); + Features.AddFeature("vfp3d16sp", false); + Features.AddFeature("vfp4d16sp", false); + break; + case ARMBuildAttrs::AllowFPv2: + Features.AddFeature("vfp2"); + break; + case ARMBuildAttrs::AllowFPv3A: + case ARMBuildAttrs::AllowFPv3B: + Features.AddFeature("vfp3"); + break; + case ARMBuildAttrs::AllowFPv4A: + case ARMBuildAttrs::AllowFPv4B: + Features.AddFeature("vfp4"); + break; + } + } + + Attr = Attributes.getAttributeValue(ARMBuildAttrs::Advanced_SIMD_arch); + if (Attr.hasValue()) { + switch (Attr.getValue()) { + default: + break; + case ARMBuildAttrs::Not_Allowed: + Features.AddFeature("neon", false); + Features.AddFeature("fp16", false); + break; + case ARMBuildAttrs::AllowNeon: + Features.AddFeature("neon"); + break; + case ARMBuildAttrs::AllowNeon2: + Features.AddFeature("neon"); + Features.AddFeature("fp16"); + break; + } + } + + Attr = Attributes.getAttributeValue(ARMBuildAttrs::MVE_arch); + if (Attr.hasValue()) { + switch (Attr.getValue()) { + default: + break; + case ARMBuildAttrs::Not_Allowed: + Features.AddFeature("mve", false); + Features.AddFeature("mve.fp", false); + break; + case ARMBuildAttrs::AllowMVEInteger: + Features.AddFeature("mve.fp", false); + Features.AddFeature("mve"); + break; + case ARMBuildAttrs::AllowMVEIntegerAndFloat: + Features.AddFeature("mve.fp"); + break; + } + } + + Attr = Attributes.getAttributeValue(ARMBuildAttrs::DIV_use); + if (Attr.hasValue()) { + switch (Attr.getValue()) { + default: + break; + case ARMBuildAttrs::DisallowDIV: + Features.AddFeature("hwdiv", false); + Features.AddFeature("hwdiv-arm", false); + break; + case ARMBuildAttrs::AllowDIVExt: + Features.AddFeature("hwdiv"); + Features.AddFeature("hwdiv-arm"); + break; + } + } + + return Features; +} + +SubtargetFeatures ELFObjectFileBase::getRISCVFeatures() const { + SubtargetFeatures Features; + unsigned PlatformFlags = getPlatformFlags(); + + if (PlatformFlags & ELF::EF_RISCV_RVC) { + Features.AddFeature("c"); + } + + // Add features according to the ELF attribute section. + // If there are any unrecognized features, ignore them. + RISCVAttributeParser Attributes; + if (Error E = getBuildAttributes(Attributes)) { + // TODO Propagate Error. + consumeError(std::move(E)); + return Features; // Keep "c" feature if there is one in PlatformFlags. + } + + Optional<StringRef> Attr = Attributes.getAttributeString(RISCVAttrs::ARCH); + if (Attr.hasValue()) { + // The Arch pattern is [rv32|rv64][i|e]version(_[m|a|f|d|c]version)* + // Version string pattern is (major)p(minor). Major and minor are optional. + // For example, a version number could be 2p0, 2, or p92. + StringRef Arch = Attr.getValue(); + if (Arch.consume_front("rv32")) + Features.AddFeature("64bit", false); + else if (Arch.consume_front("rv64")) + Features.AddFeature("64bit"); + + while (!Arch.empty()) { + switch (Arch[0]) { + default: + break; // Ignore unexpected features. + case 'i': + Features.AddFeature("e", false); + break; + case 'd': + Features.AddFeature("f"); // D-ext will imply F-ext. + LLVM_FALLTHROUGH; + case 'e': + case 'm': + case 'a': + case 'f': + case 'c': + Features.AddFeature(Arch.take_front()); + break; + } + + // FIXME: Handle version numbers. + Arch = Arch.drop_until([](char c) { return c == '_' || c == '\0'; }); + Arch = Arch.drop_while([](char c) { return c == '_'; }); + } + } + + return Features; +} + +SubtargetFeatures ELFObjectFileBase::getFeatures() const { + switch (getEMachine()) { + case ELF::EM_MIPS: + return getMIPSFeatures(); + case ELF::EM_ARM: + return getARMFeatures(); + case ELF::EM_RISCV: + return getRISCVFeatures(); + default: + return SubtargetFeatures(); + } +} + +Optional<StringRef> ELFObjectFileBase::tryGetCPUName() const { + switch (getEMachine()) { + case ELF::EM_AMDGPU: + return getAMDGPUCPUName(); + default: + return None; + } +} + +StringRef ELFObjectFileBase::getAMDGPUCPUName() const { + assert(getEMachine() == ELF::EM_AMDGPU); + unsigned CPU = getPlatformFlags() & ELF::EF_AMDGPU_MACH; + + switch (CPU) { + // Radeon HD 2000/3000 Series (R600). + case ELF::EF_AMDGPU_MACH_R600_R600: + return "r600"; + case ELF::EF_AMDGPU_MACH_R600_R630: + return "r630"; + case ELF::EF_AMDGPU_MACH_R600_RS880: + return "rs880"; + case ELF::EF_AMDGPU_MACH_R600_RV670: + return "rv670"; + + // Radeon HD 4000 Series (R700). + case ELF::EF_AMDGPU_MACH_R600_RV710: + return "rv710"; + case ELF::EF_AMDGPU_MACH_R600_RV730: + return "rv730"; + case ELF::EF_AMDGPU_MACH_R600_RV770: + return "rv770"; + + // Radeon HD 5000 Series (Evergreen). + case ELF::EF_AMDGPU_MACH_R600_CEDAR: + return "cedar"; + case ELF::EF_AMDGPU_MACH_R600_CYPRESS: + return "cypress"; + case ELF::EF_AMDGPU_MACH_R600_JUNIPER: + return "juniper"; + case ELF::EF_AMDGPU_MACH_R600_REDWOOD: + return "redwood"; + case ELF::EF_AMDGPU_MACH_R600_SUMO: + return "sumo"; + + // Radeon HD 6000 Series (Northern Islands). + case ELF::EF_AMDGPU_MACH_R600_BARTS: + return "barts"; + case ELF::EF_AMDGPU_MACH_R600_CAICOS: + return "caicos"; + case ELF::EF_AMDGPU_MACH_R600_CAYMAN: + return "cayman"; + case ELF::EF_AMDGPU_MACH_R600_TURKS: + return "turks"; + + // AMDGCN GFX6. + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: + return "gfx600"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: + return "gfx601"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX602: + return "gfx602"; + + // AMDGCN GFX7. + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: + return "gfx700"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: + return "gfx701"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: + return "gfx702"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: + return "gfx703"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: + return "gfx704"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX705: + return "gfx705"; + + // AMDGCN GFX8. + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: + return "gfx801"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: + return "gfx802"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: + return "gfx803"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX805: + return "gfx805"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: + return "gfx810"; + + // AMDGCN GFX9. + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: + return "gfx900"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: + return "gfx902"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: + return "gfx904"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: + return "gfx906"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX908: + return "gfx908"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: + return "gfx909"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A: + return "gfx90a"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: + return "gfx90c"; + + // AMDGCN GFX10. + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: + return "gfx1010"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: + return "gfx1011"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: + return "gfx1012"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013: + return "gfx1013"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030: + return "gfx1030"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031: + return "gfx1031"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032: + return "gfx1032"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033: + return "gfx1033"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034: + return "gfx1034"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035: + return "gfx1035"; + default: + llvm_unreachable("Unknown EF_AMDGPU_MACH value"); + } +} + +// FIXME Encode from a tablegen description or target parser. +void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const { + if (TheTriple.getSubArch() != Triple::NoSubArch) + return; + + ARMAttributeParser Attributes; + if (Error E = getBuildAttributes(Attributes)) { + // TODO Propagate Error. + consumeError(std::move(E)); + return; + } + + std::string Triple; + // Default to ARM, but use the triple if it's been set. + if (TheTriple.isThumb()) + Triple = "thumb"; + else + Triple = "arm"; + + Optional<unsigned> Attr = + Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch); + if (Attr.hasValue()) { + switch (Attr.getValue()) { + case ARMBuildAttrs::v4: + Triple += "v4"; + break; + case ARMBuildAttrs::v4T: + Triple += "v4t"; + break; + case ARMBuildAttrs::v5T: + Triple += "v5t"; + break; + case ARMBuildAttrs::v5TE: + Triple += "v5te"; + break; + case ARMBuildAttrs::v5TEJ: + Triple += "v5tej"; + break; + case ARMBuildAttrs::v6: + Triple += "v6"; + break; + case ARMBuildAttrs::v6KZ: + Triple += "v6kz"; + break; + case ARMBuildAttrs::v6T2: + Triple += "v6t2"; + break; + case ARMBuildAttrs::v6K: + Triple += "v6k"; + break; + case ARMBuildAttrs::v7: { + Optional<unsigned> ArchProfileAttr = + Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch_profile); + if (ArchProfileAttr.hasValue() && + ArchProfileAttr.getValue() == ARMBuildAttrs::MicroControllerProfile) + Triple += "v7m"; + else + Triple += "v7"; + break; + } + case ARMBuildAttrs::v6_M: + Triple += "v6m"; + break; + case ARMBuildAttrs::v6S_M: + Triple += "v6sm"; + break; + case ARMBuildAttrs::v7E_M: + Triple += "v7em"; + break; + case ARMBuildAttrs::v8_A: + Triple += "v8a"; + break; + case ARMBuildAttrs::v8_R: + Triple += "v8r"; + break; + case ARMBuildAttrs::v8_M_Base: + Triple += "v8m.base"; + break; + case ARMBuildAttrs::v8_M_Main: + Triple += "v8m.main"; + break; + case ARMBuildAttrs::v8_1_M_Main: + Triple += "v8.1m.main"; + break; + } + } + if (!isLittleEndian()) + Triple += "eb"; + + TheTriple.setArchName(Triple); +} + +std::vector<std::pair<Optional<DataRefImpl>, uint64_t>> +ELFObjectFileBase::getPltAddresses() const { + std::string Err; + const auto Triple = makeTriple(); + const auto *T = TargetRegistry::lookupTarget(Triple.str(), Err); + if (!T) + return {}; + uint64_t JumpSlotReloc = 0; + switch (Triple.getArch()) { + case Triple::x86: + JumpSlotReloc = ELF::R_386_JUMP_SLOT; + break; + case Triple::x86_64: + JumpSlotReloc = ELF::R_X86_64_JUMP_SLOT; + break; + case Triple::aarch64: + case Triple::aarch64_be: + JumpSlotReloc = ELF::R_AARCH64_JUMP_SLOT; + break; + default: + return {}; + } + std::unique_ptr<const MCInstrInfo> MII(T->createMCInstrInfo()); + std::unique_ptr<const MCInstrAnalysis> MIA( + T->createMCInstrAnalysis(MII.get())); + if (!MIA) + return {}; + Optional<SectionRef> Plt = None, RelaPlt = None, GotPlt = None; + for (const SectionRef &Section : sections()) { + Expected<StringRef> NameOrErr = Section.getName(); + if (!NameOrErr) { + consumeError(NameOrErr.takeError()); + continue; + } + StringRef Name = *NameOrErr; + + if (Name == ".plt") + Plt = Section; + else if (Name == ".rela.plt" || Name == ".rel.plt") + RelaPlt = Section; + else if (Name == ".got.plt") + GotPlt = Section; + } + if (!Plt || !RelaPlt || !GotPlt) + return {}; + Expected<StringRef> PltContents = Plt->getContents(); + if (!PltContents) { + consumeError(PltContents.takeError()); + return {}; + } + auto PltEntries = MIA->findPltEntries(Plt->getAddress(), + arrayRefFromStringRef(*PltContents), + GotPlt->getAddress(), Triple); + // Build a map from GOT entry virtual address to PLT entry virtual address. + DenseMap<uint64_t, uint64_t> GotToPlt; + for (const auto &Entry : PltEntries) + GotToPlt.insert(std::make_pair(Entry.second, Entry.first)); + // Find the relocations in the dynamic relocation table that point to + // locations in the GOT for which we know the corresponding PLT entry. + std::vector<std::pair<Optional<DataRefImpl>, uint64_t>> Result; + for (const auto &Relocation : RelaPlt->relocations()) { + if (Relocation.getType() != JumpSlotReloc) + continue; + auto PltEntryIter = GotToPlt.find(Relocation.getOffset()); + if (PltEntryIter != GotToPlt.end()) { + symbol_iterator Sym = Relocation.getSymbol(); + if (Sym == symbol_end()) + Result.emplace_back(None, PltEntryIter->second); + else + Result.emplace_back(Sym->getRawDataRefImpl(), PltEntryIter->second); + } + } + return Result; +} + +template <class ELFT> +static Expected<std::vector<VersionEntry>> +readDynsymVersionsImpl(const ELFFile<ELFT> &EF, + ELFObjectFileBase::elf_symbol_iterator_range Symbols) { + using Elf_Shdr = typename ELFT::Shdr; + const Elf_Shdr *VerSec = nullptr; + const Elf_Shdr *VerNeedSec = nullptr; + const Elf_Shdr *VerDefSec = nullptr; + // The user should ensure sections() can't fail here. + for (const Elf_Shdr &Sec : cantFail(EF.sections())) { + if (Sec.sh_type == ELF::SHT_GNU_versym) + VerSec = &Sec; + else if (Sec.sh_type == ELF::SHT_GNU_verdef) + VerDefSec = &Sec; + else if (Sec.sh_type == ELF::SHT_GNU_verneed) + VerNeedSec = &Sec; + } + if (!VerSec) + return std::vector<VersionEntry>(); + + Expected<SmallVector<Optional<VersionEntry>, 0>> MapOrErr = + EF.loadVersionMap(VerNeedSec, VerDefSec); + if (!MapOrErr) + return MapOrErr.takeError(); + + std::vector<VersionEntry> Ret; + size_t I = 0; + for (const ELFSymbolRef &Sym : Symbols) { + ++I; + Expected<const typename ELFT::Versym *> VerEntryOrErr = + EF.template getEntry<typename ELFT::Versym>(*VerSec, I); + if (!VerEntryOrErr) + return createError("unable to read an entry with index " + Twine(I) + + " from " + describe(EF, *VerSec) + ": " + + toString(VerEntryOrErr.takeError())); + + Expected<uint32_t> FlagsOrErr = Sym.getFlags(); + if (!FlagsOrErr) + return createError("unable to read flags for symbol with index " + + Twine(I) + ": " + toString(FlagsOrErr.takeError())); + + bool IsDefault; + Expected<StringRef> VerOrErr = EF.getSymbolVersionByIndex( + (*VerEntryOrErr)->vs_index, IsDefault, *MapOrErr, + (*FlagsOrErr) & SymbolRef::SF_Undefined); + if (!VerOrErr) + return createError("unable to get a version for entry " + Twine(I) + + " of " + describe(EF, *VerSec) + ": " + + toString(VerOrErr.takeError())); + + Ret.push_back({(*VerOrErr).str(), IsDefault}); + } + + return Ret; +} + +Expected<std::vector<VersionEntry>> +ELFObjectFileBase::readDynsymVersions() const { + elf_symbol_iterator_range Symbols = getDynamicSymbolIterators(); + if (const auto *Obj = dyn_cast<ELF32LEObjectFile>(this)) + return readDynsymVersionsImpl(Obj->getELFFile(), Symbols); + if (const auto *Obj = dyn_cast<ELF32BEObjectFile>(this)) + return readDynsymVersionsImpl(Obj->getELFFile(), Symbols); + if (const auto *Obj = dyn_cast<ELF64LEObjectFile>(this)) + return readDynsymVersionsImpl(Obj->getELFFile(), Symbols); + return readDynsymVersionsImpl(cast<ELF64BEObjectFile>(this)->getELFFile(), + Symbols); +} diff --git a/contrib/libs/llvm14/lib/Object/Error.cpp b/contrib/libs/llvm14/lib/Object/Error.cpp new file mode 100644 index 0000000000..bc75bc6c04 --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/Error.cpp @@ -0,0 +1,94 @@ +//===- Error.cpp - system_error extensions for Object -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This defines a new error_category for the Object library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/Error.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" + +using namespace llvm; +using namespace object; + +namespace { +// FIXME: This class is only here to support the transition to llvm::Error. It +// will be removed once this transition is complete. Clients should prefer to +// deal with the Error value directly, rather than converting to error_code. +class _object_error_category : public std::error_category { +public: + const char* name() const noexcept override; + std::string message(int ev) const override; +}; +} + +const char *_object_error_category::name() const noexcept { + return "llvm.object"; +} + +std::string _object_error_category::message(int EV) const { + object_error E = static_cast<object_error>(EV); + switch (E) { + case object_error::arch_not_found: + return "No object file for requested architecture"; + case object_error::invalid_file_type: + return "The file was not recognized as a valid object file"; + case object_error::parse_failed: + return "Invalid data was encountered while parsing the file"; + case object_error::unexpected_eof: + return "The end of the file was unexpectedly encountered"; + case object_error::string_table_non_null_end: + return "String table must end with a null terminator"; + case object_error::invalid_section_index: + return "Invalid section index"; + case object_error::bitcode_section_not_found: + return "Bitcode section not found in object file"; + case object_error::invalid_symbol_index: + return "Invalid symbol index"; + } + llvm_unreachable("An enumerator of object_error does not have a message " + "defined."); +} + +void BinaryError::anchor() {} +char BinaryError::ID = 0; +char GenericBinaryError::ID = 0; + +GenericBinaryError::GenericBinaryError(const Twine &Msg) : Msg(Msg.str()) {} + +GenericBinaryError::GenericBinaryError(const Twine &Msg, + object_error ECOverride) + : Msg(Msg.str()) { + setErrorCode(make_error_code(ECOverride)); +} + +void GenericBinaryError::log(raw_ostream &OS) const { + OS << Msg; +} + +static ManagedStatic<_object_error_category> error_category; + +const std::error_category &object::object_category() { + return *error_category; +} + +llvm::Error llvm::object::isNotObjectErrorInvalidFileType(llvm::Error Err) { + return handleErrors(std::move(Err), [](std::unique_ptr<ECError> M) -> Error { + // Try to handle 'M'. If successful, return a success value from + // the handler. + if (M->convertToErrorCode() == object_error::invalid_file_type) + return Error::success(); + + // We failed to handle 'M' - return it from the handler. + // This value will be passed back from catchErrors and + // wind up in Err2, where it will be returned from this function. + return Error(std::move(M)); + }); +} diff --git a/contrib/libs/llvm14/lib/Object/FaultMapParser.cpp b/contrib/libs/llvm14/lib/Object/FaultMapParser.cpp new file mode 100644 index 0000000000..9e83bc1de7 --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/FaultMapParser.cpp @@ -0,0 +1,66 @@ +//===----------------------- FaultMapParser.cpp ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/FaultMapParser.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +void printFaultType(FaultMapParser::FaultKind FT, raw_ostream &OS) { + switch (FT) { + default: + llvm_unreachable("unhandled fault type!"); + case FaultMapParser::FaultingLoad: + OS << "FaultingLoad"; + break; + case FaultMapParser::FaultingLoadStore: + OS << "FaultingLoadStore"; + break; + case FaultMapParser::FaultingStore: + OS << "FaultingStore"; + break; + } +} + +raw_ostream & +llvm::operator<<(raw_ostream &OS, + const FaultMapParser::FunctionFaultInfoAccessor &FFI) { + OS << "Fault kind: "; + printFaultType((FaultMapParser::FaultKind)FFI.getFaultKind(), OS); + OS << ", faulting PC offset: " << FFI.getFaultingPCOffset() + << ", handling PC offset: " << FFI.getHandlerPCOffset(); + return OS; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, + const FaultMapParser::FunctionInfoAccessor &FI) { + OS << "FunctionAddress: " << format_hex(FI.getFunctionAddr(), 8) + << ", NumFaultingPCs: " << FI.getNumFaultingPCs() << "\n"; + for (unsigned I = 0, E = FI.getNumFaultingPCs(); I != E; ++I) + OS << FI.getFunctionFaultInfoAt(I) << "\n"; + return OS; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const FaultMapParser &FMP) { + OS << "Version: " << format_hex(FMP.getFaultMapVersion(), 2) << "\n"; + OS << "NumFunctions: " << FMP.getNumFunctions() << "\n"; + + if (FMP.getNumFunctions() == 0) + return OS; + + FaultMapParser::FunctionInfoAccessor FI; + + for (unsigned I = 0, E = FMP.getNumFunctions(); I != E; ++I) { + FI = (I == 0) ? FMP.getFirstFunctionInfo() : FI.getNextFunctionInfo(); + OS << FI; + } + + return OS; +} diff --git a/contrib/libs/llvm14/lib/Object/IRObjectFile.cpp b/contrib/libs/llvm14/lib/Object/IRObjectFile.cpp new file mode 100644 index 0000000000..c653262791 --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/IRObjectFile.cpp @@ -0,0 +1,156 @@ +//===- IRObjectFile.cpp - IR object file implementation ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Part of the IRObjectFile class implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/IRObjectFile.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/IR/GVMaterializer.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; +using namespace object; + +IRObjectFile::IRObjectFile(MemoryBufferRef Object, + std::vector<std::unique_ptr<Module>> Mods) + : SymbolicFile(Binary::ID_IR, Object), Mods(std::move(Mods)) { + for (auto &M : this->Mods) + SymTab.addModule(M.get()); +} + +IRObjectFile::~IRObjectFile() {} + +static ModuleSymbolTable::Symbol getSym(DataRefImpl &Symb) { + return *reinterpret_cast<ModuleSymbolTable::Symbol *>(Symb.p); +} + +void IRObjectFile::moveSymbolNext(DataRefImpl &Symb) const { + Symb.p += sizeof(ModuleSymbolTable::Symbol); +} + +Error IRObjectFile::printSymbolName(raw_ostream &OS, DataRefImpl Symb) const { + SymTab.printSymbolName(OS, getSym(Symb)); + return Error::success(); +} + +Expected<uint32_t> IRObjectFile::getSymbolFlags(DataRefImpl Symb) const { + return SymTab.getSymbolFlags(getSym(Symb)); +} + +basic_symbol_iterator IRObjectFile::symbol_begin() const { + DataRefImpl Ret; + Ret.p = reinterpret_cast<uintptr_t>(SymTab.symbols().data()); + return basic_symbol_iterator(BasicSymbolRef(Ret, this)); +} + +basic_symbol_iterator IRObjectFile::symbol_end() const { + DataRefImpl Ret; + Ret.p = reinterpret_cast<uintptr_t>(SymTab.symbols().data() + + SymTab.symbols().size()); + return basic_symbol_iterator(BasicSymbolRef(Ret, this)); +} + +StringRef IRObjectFile::getTargetTriple() const { + // Each module must have the same target triple, so we arbitrarily access the + // first one. + return Mods[0]->getTargetTriple(); +} + +Expected<MemoryBufferRef> +IRObjectFile::findBitcodeInObject(const ObjectFile &Obj) { + for (const SectionRef &Sec : Obj.sections()) { + if (Sec.isBitcode()) { + Expected<StringRef> Contents = Sec.getContents(); + if (!Contents) + return Contents.takeError(); + if (Contents->size() <= 1) + return errorCodeToError(object_error::bitcode_section_not_found); + return MemoryBufferRef(*Contents, Obj.getFileName()); + } + } + + return errorCodeToError(object_error::bitcode_section_not_found); +} + +Expected<MemoryBufferRef> +IRObjectFile::findBitcodeInMemBuffer(MemoryBufferRef Object) { + file_magic Type = identify_magic(Object.getBuffer()); + switch (Type) { + case file_magic::bitcode: + return Object; + case file_magic::elf_relocatable: + case file_magic::macho_object: + case file_magic::wasm_object: + case file_magic::coff_object: { + Expected<std::unique_ptr<ObjectFile>> ObjFile = + ObjectFile::createObjectFile(Object, Type); + if (!ObjFile) + return ObjFile.takeError(); + return findBitcodeInObject(*ObjFile->get()); + } + default: + return errorCodeToError(object_error::invalid_file_type); + } +} + +Expected<std::unique_ptr<IRObjectFile>> +IRObjectFile::create(MemoryBufferRef Object, LLVMContext &Context) { + Expected<MemoryBufferRef> BCOrErr = findBitcodeInMemBuffer(Object); + if (!BCOrErr) + return BCOrErr.takeError(); + + Expected<std::vector<BitcodeModule>> BMsOrErr = + getBitcodeModuleList(*BCOrErr); + if (!BMsOrErr) + return BMsOrErr.takeError(); + + std::vector<std::unique_ptr<Module>> Mods; + for (auto BM : *BMsOrErr) { + Expected<std::unique_ptr<Module>> MOrErr = + BM.getLazyModule(Context, /*ShouldLazyLoadMetadata*/ true, + /*IsImporting*/ false); + if (!MOrErr) + return MOrErr.takeError(); + + Mods.push_back(std::move(*MOrErr)); + } + + return std::unique_ptr<IRObjectFile>( + new IRObjectFile(*BCOrErr, std::move(Mods))); +} + +Expected<IRSymtabFile> object::readIRSymtab(MemoryBufferRef MBRef) { + IRSymtabFile F; + Expected<MemoryBufferRef> BCOrErr = + IRObjectFile::findBitcodeInMemBuffer(MBRef); + if (!BCOrErr) + return BCOrErr.takeError(); + + Expected<BitcodeFileContents> BFCOrErr = getBitcodeFileContents(*BCOrErr); + if (!BFCOrErr) + return BFCOrErr.takeError(); + + Expected<irsymtab::FileContents> FCOrErr = irsymtab::readBitcode(*BFCOrErr); + if (!FCOrErr) + return FCOrErr.takeError(); + + F.Mods = std::move(BFCOrErr->Mods); + F.Symtab = std::move(FCOrErr->Symtab); + F.Strtab = std::move(FCOrErr->Strtab); + F.TheReader = std::move(FCOrErr->TheReader); + return std::move(F); +} diff --git a/contrib/libs/llvm14/lib/Object/IRSymtab.cpp b/contrib/libs/llvm14/lib/Object/IRSymtab.cpp new file mode 100644 index 0000000000..dea3d90d35 --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/IRSymtab.cpp @@ -0,0 +1,439 @@ +//===- IRSymtab.cpp - implementation of IR symbol tables ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/IRSymtab.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/IR/Comdat.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Object/IRObjectFile.h" +#include "llvm/Object/ModuleSymbolTable.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/StringSaver.h" +#include "llvm/Support/VCSRevision.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <string> +#include <utility> +#include <vector> + +using namespace llvm; +using namespace irsymtab; + +cl::opt<bool> DisableBitcodeVersionUpgrade( + "disable-bitcode-version-upgrade", cl::init(false), cl::Hidden, + cl::desc("Disable automatic bitcode upgrade for version mismatch")); + +static const char *PreservedSymbols[] = { +#define HANDLE_LIBCALL(code, name) name, +#include "llvm/IR/RuntimeLibcalls.def" +#undef HANDLE_LIBCALL + // There are global variables, so put it here instead of in + // RuntimeLibcalls.def. + // TODO: Are there similar such variables? + "__ssp_canary_word", + "__stack_chk_guard", +}; + +namespace { + +const char *getExpectedProducerName() { + static char DefaultName[] = LLVM_VERSION_STRING +#ifdef LLVM_REVISION + " " LLVM_REVISION +#endif + ; + // Allows for testing of the irsymtab writer and upgrade mechanism. This + // environment variable should not be set by users. + if (char *OverrideName = getenv("LLVM_OVERRIDE_PRODUCER")) + return OverrideName; + return DefaultName; +} + +const char *kExpectedProducerName = getExpectedProducerName(); + +/// Stores the temporary state that is required to build an IR symbol table. +struct Builder { + SmallVector<char, 0> &Symtab; + StringTableBuilder &StrtabBuilder; + StringSaver Saver; + + // This ctor initializes a StringSaver using the passed in BumpPtrAllocator. + // The StringTableBuilder does not create a copy of any strings added to it, + // so this provides somewhere to store any strings that we create. + Builder(SmallVector<char, 0> &Symtab, StringTableBuilder &StrtabBuilder, + BumpPtrAllocator &Alloc) + : Symtab(Symtab), StrtabBuilder(StrtabBuilder), Saver(Alloc) {} + + DenseMap<const Comdat *, int> ComdatMap; + Mangler Mang; + Triple TT; + + std::vector<storage::Comdat> Comdats; + std::vector<storage::Module> Mods; + std::vector<storage::Symbol> Syms; + std::vector<storage::Uncommon> Uncommons; + + std::string COFFLinkerOpts; + raw_string_ostream COFFLinkerOptsOS{COFFLinkerOpts}; + + std::vector<storage::Str> DependentLibraries; + + void setStr(storage::Str &S, StringRef Value) { + S.Offset = StrtabBuilder.add(Value); + S.Size = Value.size(); + } + + template <typename T> + void writeRange(storage::Range<T> &R, const std::vector<T> &Objs) { + R.Offset = Symtab.size(); + R.Size = Objs.size(); + Symtab.insert(Symtab.end(), reinterpret_cast<const char *>(Objs.data()), + reinterpret_cast<const char *>(Objs.data() + Objs.size())); + } + + Expected<int> getComdatIndex(const Comdat *C, const Module *M); + + Error addModule(Module *M); + Error addSymbol(const ModuleSymbolTable &Msymtab, + const SmallPtrSet<GlobalValue *, 4> &Used, + ModuleSymbolTable::Symbol Sym); + + Error build(ArrayRef<Module *> Mods); +}; + +Error Builder::addModule(Module *M) { + if (M->getDataLayoutStr().empty()) + return make_error<StringError>("input module has no datalayout", + inconvertibleErrorCode()); + + // Symbols in the llvm.used list will get the FB_Used bit and will not be + // internalized. We do this for llvm.compiler.used as well: + // + // IR symbol table tracks module-level asm symbol references but not inline + // asm. A symbol only referenced by inline asm is not in the IR symbol table, + // so we may not know that the definition (in another translation unit) is + // referenced. That definition may have __attribute__((used)) (which lowers to + // llvm.compiler.used on ELF targets) to communicate to the compiler that it + // may be used by inline asm. The usage is perfectly fine, so we treat + // llvm.compiler.used conservatively as llvm.used to work around our own + // limitation. + SmallVector<GlobalValue *, 4> UsedV; + collectUsedGlobalVariables(*M, UsedV, /*CompilerUsed=*/false); + collectUsedGlobalVariables(*M, UsedV, /*CompilerUsed=*/true); + SmallPtrSet<GlobalValue *, 4> Used(UsedV.begin(), UsedV.end()); + + ModuleSymbolTable Msymtab; + Msymtab.addModule(M); + + storage::Module Mod; + Mod.Begin = Syms.size(); + Mod.End = Syms.size() + Msymtab.symbols().size(); + Mod.UncBegin = Uncommons.size(); + Mods.push_back(Mod); + + if (TT.isOSBinFormatCOFF()) { + if (auto E = M->materializeMetadata()) + return E; + if (NamedMDNode *LinkerOptions = + M->getNamedMetadata("llvm.linker.options")) { + for (MDNode *MDOptions : LinkerOptions->operands()) + for (const MDOperand &MDOption : cast<MDNode>(MDOptions)->operands()) + COFFLinkerOptsOS << " " << cast<MDString>(MDOption)->getString(); + } + } + + if (TT.isOSBinFormatELF()) { + if (auto E = M->materializeMetadata()) + return E; + if (NamedMDNode *N = M->getNamedMetadata("llvm.dependent-libraries")) { + for (MDNode *MDOptions : N->operands()) { + const auto OperandStr = + cast<MDString>(cast<MDNode>(MDOptions)->getOperand(0))->getString(); + storage::Str Specifier; + setStr(Specifier, OperandStr); + DependentLibraries.emplace_back(Specifier); + } + } + } + + for (ModuleSymbolTable::Symbol Msym : Msymtab.symbols()) + if (Error Err = addSymbol(Msymtab, Used, Msym)) + return Err; + + return Error::success(); +} + +Expected<int> Builder::getComdatIndex(const Comdat *C, const Module *M) { + auto P = ComdatMap.insert(std::make_pair(C, Comdats.size())); + if (P.second) { + std::string Name; + if (TT.isOSBinFormatCOFF()) { + const GlobalValue *GV = M->getNamedValue(C->getName()); + if (!GV) + return make_error<StringError>("Could not find leader", + inconvertibleErrorCode()); + // Internal leaders do not affect symbol resolution, therefore they do not + // appear in the symbol table. + if (GV->hasLocalLinkage()) { + P.first->second = -1; + return -1; + } + llvm::raw_string_ostream OS(Name); + Mang.getNameWithPrefix(OS, GV, false); + } else { + Name = std::string(C->getName()); + } + + storage::Comdat Comdat; + setStr(Comdat.Name, Saver.save(Name)); + Comdat.SelectionKind = C->getSelectionKind(); + Comdats.push_back(Comdat); + } + + return P.first->second; +} + +Error Builder::addSymbol(const ModuleSymbolTable &Msymtab, + const SmallPtrSet<GlobalValue *, 4> &Used, + ModuleSymbolTable::Symbol Msym) { + Syms.emplace_back(); + storage::Symbol &Sym = Syms.back(); + Sym = {}; + + storage::Uncommon *Unc = nullptr; + auto Uncommon = [&]() -> storage::Uncommon & { + if (Unc) + return *Unc; + Sym.Flags |= 1 << storage::Symbol::FB_has_uncommon; + Uncommons.emplace_back(); + Unc = &Uncommons.back(); + *Unc = {}; + setStr(Unc->COFFWeakExternFallbackName, ""); + setStr(Unc->SectionName, ""); + return *Unc; + }; + + SmallString<64> Name; + { + raw_svector_ostream OS(Name); + Msymtab.printSymbolName(OS, Msym); + } + setStr(Sym.Name, Saver.save(Name.str())); + + auto Flags = Msymtab.getSymbolFlags(Msym); + if (Flags & object::BasicSymbolRef::SF_Undefined) + Sym.Flags |= 1 << storage::Symbol::FB_undefined; + if (Flags & object::BasicSymbolRef::SF_Weak) + Sym.Flags |= 1 << storage::Symbol::FB_weak; + if (Flags & object::BasicSymbolRef::SF_Common) + Sym.Flags |= 1 << storage::Symbol::FB_common; + if (Flags & object::BasicSymbolRef::SF_Indirect) + Sym.Flags |= 1 << storage::Symbol::FB_indirect; + if (Flags & object::BasicSymbolRef::SF_Global) + Sym.Flags |= 1 << storage::Symbol::FB_global; + if (Flags & object::BasicSymbolRef::SF_FormatSpecific) + Sym.Flags |= 1 << storage::Symbol::FB_format_specific; + if (Flags & object::BasicSymbolRef::SF_Executable) + Sym.Flags |= 1 << storage::Symbol::FB_executable; + + Sym.ComdatIndex = -1; + auto *GV = Msym.dyn_cast<GlobalValue *>(); + if (!GV) { + // Undefined module asm symbols act as GC roots and are implicitly used. + if (Flags & object::BasicSymbolRef::SF_Undefined) + Sym.Flags |= 1 << storage::Symbol::FB_used; + setStr(Sym.IRName, ""); + return Error::success(); + } + + setStr(Sym.IRName, GV->getName()); + + bool IsPreservedSymbol = llvm::is_contained(PreservedSymbols, GV->getName()); + + if (Used.count(GV) || IsPreservedSymbol) + Sym.Flags |= 1 << storage::Symbol::FB_used; + if (GV->isThreadLocal()) + Sym.Flags |= 1 << storage::Symbol::FB_tls; + if (GV->hasGlobalUnnamedAddr()) + Sym.Flags |= 1 << storage::Symbol::FB_unnamed_addr; + if (GV->canBeOmittedFromSymbolTable()) + Sym.Flags |= 1 << storage::Symbol::FB_may_omit; + Sym.Flags |= unsigned(GV->getVisibility()) << storage::Symbol::FB_visibility; + + if (Flags & object::BasicSymbolRef::SF_Common) { + auto *GVar = dyn_cast<GlobalVariable>(GV); + if (!GVar) + return make_error<StringError>("Only variables can have common linkage!", + inconvertibleErrorCode()); + Uncommon().CommonSize = + GV->getParent()->getDataLayout().getTypeAllocSize(GV->getValueType()); + Uncommon().CommonAlign = GVar->getAlignment(); + } + + const GlobalObject *GO = GV->getAliaseeObject(); + if (!GO) { + if (isa<GlobalIFunc>(GV)) + GO = cast<GlobalIFunc>(GV)->getResolverFunction(); + if (!GO) + return make_error<StringError>("Unable to determine comdat of alias!", + inconvertibleErrorCode()); + } + if (const Comdat *C = GO->getComdat()) { + Expected<int> ComdatIndexOrErr = getComdatIndex(C, GV->getParent()); + if (!ComdatIndexOrErr) + return ComdatIndexOrErr.takeError(); + Sym.ComdatIndex = *ComdatIndexOrErr; + } + + if (TT.isOSBinFormatCOFF()) { + emitLinkerFlagsForGlobalCOFF(COFFLinkerOptsOS, GV, TT, Mang); + + if ((Flags & object::BasicSymbolRef::SF_Weak) && + (Flags & object::BasicSymbolRef::SF_Indirect)) { + auto *Fallback = dyn_cast<GlobalValue>( + cast<GlobalAlias>(GV)->getAliasee()->stripPointerCasts()); + if (!Fallback) + return make_error<StringError>("Invalid weak external", + inconvertibleErrorCode()); + std::string FallbackName; + raw_string_ostream OS(FallbackName); + Msymtab.printSymbolName(OS, Fallback); + OS.flush(); + setStr(Uncommon().COFFWeakExternFallbackName, Saver.save(FallbackName)); + } + } + + if (!GO->getSection().empty()) + setStr(Uncommon().SectionName, Saver.save(GO->getSection())); + + return Error::success(); +} + +Error Builder::build(ArrayRef<Module *> IRMods) { + storage::Header Hdr; + + assert(!IRMods.empty()); + Hdr.Version = storage::Header::kCurrentVersion; + setStr(Hdr.Producer, kExpectedProducerName); + setStr(Hdr.TargetTriple, IRMods[0]->getTargetTriple()); + setStr(Hdr.SourceFileName, IRMods[0]->getSourceFileName()); + TT = Triple(IRMods[0]->getTargetTriple()); + + for (auto *M : IRMods) + if (Error Err = addModule(M)) + return Err; + + COFFLinkerOptsOS.flush(); + setStr(Hdr.COFFLinkerOpts, Saver.save(COFFLinkerOpts)); + + // We are about to fill in the header's range fields, so reserve space for it + // and copy it in afterwards. + Symtab.resize(sizeof(storage::Header)); + writeRange(Hdr.Modules, Mods); + writeRange(Hdr.Comdats, Comdats); + writeRange(Hdr.Symbols, Syms); + writeRange(Hdr.Uncommons, Uncommons); + writeRange(Hdr.DependentLibraries, DependentLibraries); + *reinterpret_cast<storage::Header *>(Symtab.data()) = Hdr; + return Error::success(); +} + +} // end anonymous namespace + +Error irsymtab::build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab, + StringTableBuilder &StrtabBuilder, + BumpPtrAllocator &Alloc) { + return Builder(Symtab, StrtabBuilder, Alloc).build(Mods); +} + +// Upgrade a vector of bitcode modules created by an old version of LLVM by +// creating an irsymtab for them in the current format. +static Expected<FileContents> upgrade(ArrayRef<BitcodeModule> BMs) { + FileContents FC; + + LLVMContext Ctx; + std::vector<Module *> Mods; + std::vector<std::unique_ptr<Module>> OwnedMods; + for (auto BM : BMs) { + Expected<std::unique_ptr<Module>> MOrErr = + BM.getLazyModule(Ctx, /*ShouldLazyLoadMetadata*/ true, + /*IsImporting*/ false); + if (!MOrErr) + return MOrErr.takeError(); + + Mods.push_back(MOrErr->get()); + OwnedMods.push_back(std::move(*MOrErr)); + } + + StringTableBuilder StrtabBuilder(StringTableBuilder::RAW); + BumpPtrAllocator Alloc; + if (Error E = build(Mods, FC.Symtab, StrtabBuilder, Alloc)) + return std::move(E); + + StrtabBuilder.finalizeInOrder(); + FC.Strtab.resize(StrtabBuilder.getSize()); + StrtabBuilder.write((uint8_t *)FC.Strtab.data()); + + FC.TheReader = {{FC.Symtab.data(), FC.Symtab.size()}, + {FC.Strtab.data(), FC.Strtab.size()}}; + return std::move(FC); +} + +Expected<FileContents> irsymtab::readBitcode(const BitcodeFileContents &BFC) { + if (BFC.Mods.empty()) + return make_error<StringError>("Bitcode file does not contain any modules", + inconvertibleErrorCode()); + + if (!DisableBitcodeVersionUpgrade) { + if (BFC.StrtabForSymtab.empty() || + BFC.Symtab.size() < sizeof(storage::Header)) + return upgrade(BFC.Mods); + + // We cannot use the regular reader to read the version and producer, + // because it will expect the header to be in the current format. The only + // thing we can rely on is that the version and producer will be present as + // the first struct elements. + auto *Hdr = reinterpret_cast<const storage::Header *>(BFC.Symtab.data()); + unsigned Version = Hdr->Version; + StringRef Producer = Hdr->Producer.get(BFC.StrtabForSymtab); + if (Version != storage::Header::kCurrentVersion || + Producer != kExpectedProducerName) + return upgrade(BFC.Mods); + } + + FileContents FC; + FC.TheReader = {{BFC.Symtab.data(), BFC.Symtab.size()}, + {BFC.StrtabForSymtab.data(), BFC.StrtabForSymtab.size()}}; + + // Finally, make sure that the number of modules in the symbol table matches + // the number of modules in the bitcode file. If they differ, it may mean that + // the bitcode file was created by binary concatenation, so we need to create + // a new symbol table from scratch. + if (FC.TheReader.getNumModules() != BFC.Mods.size()) + return upgrade(std::move(BFC.Mods)); + + return std::move(FC); +} diff --git a/contrib/libs/llvm14/lib/Object/MachOObjectFile.cpp b/contrib/libs/llvm14/lib/Object/MachOObjectFile.cpp new file mode 100644 index 0000000000..3d95b18f46 --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/MachOObjectFile.cpp @@ -0,0 +1,4780 @@ +//===- MachOObjectFile.cpp - Mach-O object file binding -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the MachOObjectFile class, which binds the MachOObject +// class to the generic ObjectFile wrapper. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/BinaryFormat/Swift.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/MachO.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/SwapByteOrder.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <limits> +#include <list> +#include <memory> +#include <system_error> + +using namespace llvm; +using namespace object; + +namespace { + + struct section_base { + char sectname[16]; + char segname[16]; + }; + +} // end anonymous namespace + +static Error malformedError(const Twine &Msg) { + return make_error<GenericBinaryError>("truncated or malformed object (" + + Msg + ")", + object_error::parse_failed); +} + +// FIXME: Replace all uses of this function with getStructOrErr. +template <typename T> +static T getStruct(const MachOObjectFile &O, const char *P) { + // Don't read before the beginning or past the end of the file + if (P < O.getData().begin() || P + sizeof(T) > O.getData().end()) + report_fatal_error("Malformed MachO file."); + + T Cmd; + memcpy(&Cmd, P, sizeof(T)); + if (O.isLittleEndian() != sys::IsLittleEndianHost) + MachO::swapStruct(Cmd); + return Cmd; +} + +template <typename T> +static Expected<T> getStructOrErr(const MachOObjectFile &O, const char *P) { + // Don't read before the beginning or past the end of the file + if (P < O.getData().begin() || P + sizeof(T) > O.getData().end()) + return malformedError("Structure read out-of-range"); + + T Cmd; + memcpy(&Cmd, P, sizeof(T)); + if (O.isLittleEndian() != sys::IsLittleEndianHost) + MachO::swapStruct(Cmd); + return Cmd; +} + +static const char * +getSectionPtr(const MachOObjectFile &O, MachOObjectFile::LoadCommandInfo L, + unsigned Sec) { + uintptr_t CommandAddr = reinterpret_cast<uintptr_t>(L.Ptr); + + bool Is64 = O.is64Bit(); + unsigned SegmentLoadSize = Is64 ? sizeof(MachO::segment_command_64) : + sizeof(MachO::segment_command); + unsigned SectionSize = Is64 ? sizeof(MachO::section_64) : + sizeof(MachO::section); + + uintptr_t SectionAddr = CommandAddr + SegmentLoadSize + Sec * SectionSize; + return reinterpret_cast<const char*>(SectionAddr); +} + +static const char *getPtr(const MachOObjectFile &O, size_t Offset) { + assert(Offset <= O.getData().size()); + return O.getData().data() + Offset; +} + +static MachO::nlist_base +getSymbolTableEntryBase(const MachOObjectFile &O, DataRefImpl DRI) { + const char *P = reinterpret_cast<const char *>(DRI.p); + return getStruct<MachO::nlist_base>(O, P); +} + +static StringRef parseSegmentOrSectionName(const char *P) { + if (P[15] == 0) + // Null terminated. + return P; + // Not null terminated, so this is a 16 char string. + return StringRef(P, 16); +} + +static unsigned getCPUType(const MachOObjectFile &O) { + return O.getHeader().cputype; +} + +static unsigned getCPUSubType(const MachOObjectFile &O) { + return O.getHeader().cpusubtype; +} + +static uint32_t +getPlainRelocationAddress(const MachO::any_relocation_info &RE) { + return RE.r_word0; +} + +static unsigned +getScatteredRelocationAddress(const MachO::any_relocation_info &RE) { + return RE.r_word0 & 0xffffff; +} + +static bool getPlainRelocationPCRel(const MachOObjectFile &O, + const MachO::any_relocation_info &RE) { + if (O.isLittleEndian()) + return (RE.r_word1 >> 24) & 1; + return (RE.r_word1 >> 7) & 1; +} + +static bool +getScatteredRelocationPCRel(const MachO::any_relocation_info &RE) { + return (RE.r_word0 >> 30) & 1; +} + +static unsigned getPlainRelocationLength(const MachOObjectFile &O, + const MachO::any_relocation_info &RE) { + if (O.isLittleEndian()) + return (RE.r_word1 >> 25) & 3; + return (RE.r_word1 >> 5) & 3; +} + +static unsigned +getScatteredRelocationLength(const MachO::any_relocation_info &RE) { + return (RE.r_word0 >> 28) & 3; +} + +static unsigned getPlainRelocationType(const MachOObjectFile &O, + const MachO::any_relocation_info &RE) { + if (O.isLittleEndian()) + return RE.r_word1 >> 28; + return RE.r_word1 & 0xf; +} + +static uint32_t getSectionFlags(const MachOObjectFile &O, + DataRefImpl Sec) { + if (O.is64Bit()) { + MachO::section_64 Sect = O.getSection64(Sec); + return Sect.flags; + } + MachO::section Sect = O.getSection(Sec); + return Sect.flags; +} + +static Expected<MachOObjectFile::LoadCommandInfo> +getLoadCommandInfo(const MachOObjectFile &Obj, const char *Ptr, + uint32_t LoadCommandIndex) { + if (auto CmdOrErr = getStructOrErr<MachO::load_command>(Obj, Ptr)) { + if (CmdOrErr->cmdsize + Ptr > Obj.getData().end()) + return malformedError("load command " + Twine(LoadCommandIndex) + + " extends past end of file"); + if (CmdOrErr->cmdsize < 8) + return malformedError("load command " + Twine(LoadCommandIndex) + + " with size less than 8 bytes"); + return MachOObjectFile::LoadCommandInfo({Ptr, *CmdOrErr}); + } else + return CmdOrErr.takeError(); +} + +static Expected<MachOObjectFile::LoadCommandInfo> +getFirstLoadCommandInfo(const MachOObjectFile &Obj) { + unsigned HeaderSize = Obj.is64Bit() ? sizeof(MachO::mach_header_64) + : sizeof(MachO::mach_header); + if (sizeof(MachO::load_command) > Obj.getHeader().sizeofcmds) + return malformedError("load command 0 extends past the end all load " + "commands in the file"); + return getLoadCommandInfo(Obj, getPtr(Obj, HeaderSize), 0); +} + +static Expected<MachOObjectFile::LoadCommandInfo> +getNextLoadCommandInfo(const MachOObjectFile &Obj, uint32_t LoadCommandIndex, + const MachOObjectFile::LoadCommandInfo &L) { + unsigned HeaderSize = Obj.is64Bit() ? sizeof(MachO::mach_header_64) + : sizeof(MachO::mach_header); + if (L.Ptr + L.C.cmdsize + sizeof(MachO::load_command) > + Obj.getData().data() + HeaderSize + Obj.getHeader().sizeofcmds) + return malformedError("load command " + Twine(LoadCommandIndex + 1) + + " extends past the end all load commands in the file"); + return getLoadCommandInfo(Obj, L.Ptr + L.C.cmdsize, LoadCommandIndex + 1); +} + +template <typename T> +static void parseHeader(const MachOObjectFile &Obj, T &Header, + Error &Err) { + if (sizeof(T) > Obj.getData().size()) { + Err = malformedError("the mach header extends past the end of the " + "file"); + return; + } + if (auto HeaderOrErr = getStructOrErr<T>(Obj, getPtr(Obj, 0))) + Header = *HeaderOrErr; + else + Err = HeaderOrErr.takeError(); +} + +// This is used to check for overlapping of Mach-O elements. +struct MachOElement { + uint64_t Offset; + uint64_t Size; + const char *Name; +}; + +static Error checkOverlappingElement(std::list<MachOElement> &Elements, + uint64_t Offset, uint64_t Size, + const char *Name) { + if (Size == 0) + return Error::success(); + + for (auto it = Elements.begin(); it != Elements.end(); ++it) { + const auto &E = *it; + if ((Offset >= E.Offset && Offset < E.Offset + E.Size) || + (Offset + Size > E.Offset && Offset + Size < E.Offset + E.Size) || + (Offset <= E.Offset && Offset + Size >= E.Offset + E.Size)) + return malformedError(Twine(Name) + " at offset " + Twine(Offset) + + " with a size of " + Twine(Size) + ", overlaps " + + E.Name + " at offset " + Twine(E.Offset) + " with " + "a size of " + Twine(E.Size)); + auto nt = it; + nt++; + if (nt != Elements.end()) { + const auto &N = *nt; + if (Offset + Size <= N.Offset) { + Elements.insert(nt, {Offset, Size, Name}); + return Error::success(); + } + } + } + Elements.push_back({Offset, Size, Name}); + return Error::success(); +} + +// Parses LC_SEGMENT or LC_SEGMENT_64 load command, adds addresses of all +// sections to \param Sections, and optionally sets +// \param IsPageZeroSegment to true. +template <typename Segment, typename Section> +static Error parseSegmentLoadCommand( + const MachOObjectFile &Obj, const MachOObjectFile::LoadCommandInfo &Load, + SmallVectorImpl<const char *> &Sections, bool &IsPageZeroSegment, + uint32_t LoadCommandIndex, const char *CmdName, uint64_t SizeOfHeaders, + std::list<MachOElement> &Elements) { + const unsigned SegmentLoadSize = sizeof(Segment); + if (Load.C.cmdsize < SegmentLoadSize) + return malformedError("load command " + Twine(LoadCommandIndex) + + " " + CmdName + " cmdsize too small"); + if (auto SegOrErr = getStructOrErr<Segment>(Obj, Load.Ptr)) { + Segment S = SegOrErr.get(); + const unsigned SectionSize = sizeof(Section); + uint64_t FileSize = Obj.getData().size(); + if (S.nsects > std::numeric_limits<uint32_t>::max() / SectionSize || + S.nsects * SectionSize > Load.C.cmdsize - SegmentLoadSize) + return malformedError("load command " + Twine(LoadCommandIndex) + + " inconsistent cmdsize in " + CmdName + + " for the number of sections"); + for (unsigned J = 0; J < S.nsects; ++J) { + const char *Sec = getSectionPtr(Obj, Load, J); + Sections.push_back(Sec); + auto SectionOrErr = getStructOrErr<Section>(Obj, Sec); + if (!SectionOrErr) + return SectionOrErr.takeError(); + Section s = SectionOrErr.get(); + if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB && + Obj.getHeader().filetype != MachO::MH_DSYM && + s.flags != MachO::S_ZEROFILL && + s.flags != MachO::S_THREAD_LOCAL_ZEROFILL && + s.offset > FileSize) + return malformedError("offset field of section " + Twine(J) + " in " + + CmdName + " command " + Twine(LoadCommandIndex) + + " extends past the end of the file"); + if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB && + Obj.getHeader().filetype != MachO::MH_DSYM && + s.flags != MachO::S_ZEROFILL && + s.flags != MachO::S_THREAD_LOCAL_ZEROFILL && S.fileoff == 0 && + s.offset < SizeOfHeaders && s.size != 0) + return malformedError("offset field of section " + Twine(J) + " in " + + CmdName + " command " + Twine(LoadCommandIndex) + + " not past the headers of the file"); + uint64_t BigSize = s.offset; + BigSize += s.size; + if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB && + Obj.getHeader().filetype != MachO::MH_DSYM && + s.flags != MachO::S_ZEROFILL && + s.flags != MachO::S_THREAD_LOCAL_ZEROFILL && + BigSize > FileSize) + return malformedError("offset field plus size field of section " + + Twine(J) + " in " + CmdName + " command " + + Twine(LoadCommandIndex) + + " extends past the end of the file"); + if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB && + Obj.getHeader().filetype != MachO::MH_DSYM && + s.flags != MachO::S_ZEROFILL && + s.flags != MachO::S_THREAD_LOCAL_ZEROFILL && + s.size > S.filesize) + return malformedError("size field of section " + + Twine(J) + " in " + CmdName + " command " + + Twine(LoadCommandIndex) + + " greater than the segment"); + if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB && + Obj.getHeader().filetype != MachO::MH_DSYM && s.size != 0 && + s.addr < S.vmaddr) + return malformedError("addr field of section " + Twine(J) + " in " + + CmdName + " command " + Twine(LoadCommandIndex) + + " less than the segment's vmaddr"); + BigSize = s.addr; + BigSize += s.size; + uint64_t BigEnd = S.vmaddr; + BigEnd += S.vmsize; + if (S.vmsize != 0 && s.size != 0 && BigSize > BigEnd) + return malformedError("addr field plus size of section " + Twine(J) + + " in " + CmdName + " command " + + Twine(LoadCommandIndex) + + " greater than than " + "the segment's vmaddr plus vmsize"); + if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB && + Obj.getHeader().filetype != MachO::MH_DSYM && + s.flags != MachO::S_ZEROFILL && + s.flags != MachO::S_THREAD_LOCAL_ZEROFILL) + if (Error Err = checkOverlappingElement(Elements, s.offset, s.size, + "section contents")) + return Err; + if (s.reloff > FileSize) + return malformedError("reloff field of section " + Twine(J) + " in " + + CmdName + " command " + Twine(LoadCommandIndex) + + " extends past the end of the file"); + BigSize = s.nreloc; + BigSize *= sizeof(struct MachO::relocation_info); + BigSize += s.reloff; + if (BigSize > FileSize) + return malformedError("reloff field plus nreloc field times sizeof(" + "struct relocation_info) of section " + + Twine(J) + " in " + CmdName + " command " + + Twine(LoadCommandIndex) + + " extends past the end of the file"); + if (Error Err = checkOverlappingElement(Elements, s.reloff, s.nreloc * + sizeof(struct + MachO::relocation_info), + "section relocation entries")) + return Err; + } + if (S.fileoff > FileSize) + return malformedError("load command " + Twine(LoadCommandIndex) + + " fileoff field in " + CmdName + + " extends past the end of the file"); + uint64_t BigSize = S.fileoff; + BigSize += S.filesize; + if (BigSize > FileSize) + return malformedError("load command " + Twine(LoadCommandIndex) + + " fileoff field plus filesize field in " + + CmdName + " extends past the end of the file"); + if (S.vmsize != 0 && S.filesize > S.vmsize) + return malformedError("load command " + Twine(LoadCommandIndex) + + " filesize field in " + CmdName + + " greater than vmsize field"); + IsPageZeroSegment |= StringRef("__PAGEZERO").equals(S.segname); + } else + return SegOrErr.takeError(); + + return Error::success(); +} + +static Error checkSymtabCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + const char **SymtabLoadCmd, + std::list<MachOElement> &Elements) { + if (Load.C.cmdsize < sizeof(MachO::symtab_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_SYMTAB cmdsize too small"); + if (*SymtabLoadCmd != nullptr) + return malformedError("more than one LC_SYMTAB command"); + auto SymtabOrErr = getStructOrErr<MachO::symtab_command>(Obj, Load.Ptr); + if (!SymtabOrErr) + return SymtabOrErr.takeError(); + MachO::symtab_command Symtab = SymtabOrErr.get(); + if (Symtab.cmdsize != sizeof(MachO::symtab_command)) + return malformedError("LC_SYMTAB command " + Twine(LoadCommandIndex) + + " has incorrect cmdsize"); + uint64_t FileSize = Obj.getData().size(); + if (Symtab.symoff > FileSize) + return malformedError("symoff field of LC_SYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end " + "of the file"); + uint64_t SymtabSize = Symtab.nsyms; + const char *struct_nlist_name; + if (Obj.is64Bit()) { + SymtabSize *= sizeof(MachO::nlist_64); + struct_nlist_name = "struct nlist_64"; + } else { + SymtabSize *= sizeof(MachO::nlist); + struct_nlist_name = "struct nlist"; + } + uint64_t BigSize = SymtabSize; + BigSize += Symtab.symoff; + if (BigSize > FileSize) + return malformedError("symoff field plus nsyms field times sizeof(" + + Twine(struct_nlist_name) + ") of LC_SYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end " + "of the file"); + if (Error Err = checkOverlappingElement(Elements, Symtab.symoff, SymtabSize, + "symbol table")) + return Err; + if (Symtab.stroff > FileSize) + return malformedError("stroff field of LC_SYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end " + "of the file"); + BigSize = Symtab.stroff; + BigSize += Symtab.strsize; + if (BigSize > FileSize) + return malformedError("stroff field plus strsize field of LC_SYMTAB " + "command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + if (Error Err = checkOverlappingElement(Elements, Symtab.stroff, + Symtab.strsize, "string table")) + return Err; + *SymtabLoadCmd = Load.Ptr; + return Error::success(); +} + +static Error checkDysymtabCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + const char **DysymtabLoadCmd, + std::list<MachOElement> &Elements) { + if (Load.C.cmdsize < sizeof(MachO::dysymtab_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_DYSYMTAB cmdsize too small"); + if (*DysymtabLoadCmd != nullptr) + return malformedError("more than one LC_DYSYMTAB command"); + auto DysymtabOrErr = + getStructOrErr<MachO::dysymtab_command>(Obj, Load.Ptr); + if (!DysymtabOrErr) + return DysymtabOrErr.takeError(); + MachO::dysymtab_command Dysymtab = DysymtabOrErr.get(); + if (Dysymtab.cmdsize != sizeof(MachO::dysymtab_command)) + return malformedError("LC_DYSYMTAB command " + Twine(LoadCommandIndex) + + " has incorrect cmdsize"); + uint64_t FileSize = Obj.getData().size(); + if (Dysymtab.tocoff > FileSize) + return malformedError("tocoff field of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + uint64_t BigSize = Dysymtab.ntoc; + BigSize *= sizeof(MachO::dylib_table_of_contents); + BigSize += Dysymtab.tocoff; + if (BigSize > FileSize) + return malformedError("tocoff field plus ntoc field times sizeof(struct " + "dylib_table_of_contents) of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, Dysymtab.tocoff, + Dysymtab.ntoc * sizeof(struct + MachO::dylib_table_of_contents), + "table of contents")) + return Err; + if (Dysymtab.modtaboff > FileSize) + return malformedError("modtaboff field of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + BigSize = Dysymtab.nmodtab; + const char *struct_dylib_module_name; + uint64_t sizeof_modtab; + if (Obj.is64Bit()) { + sizeof_modtab = sizeof(MachO::dylib_module_64); + struct_dylib_module_name = "struct dylib_module_64"; + } else { + sizeof_modtab = sizeof(MachO::dylib_module); + struct_dylib_module_name = "struct dylib_module"; + } + BigSize *= sizeof_modtab; + BigSize += Dysymtab.modtaboff; + if (BigSize > FileSize) + return malformedError("modtaboff field plus nmodtab field times sizeof(" + + Twine(struct_dylib_module_name) + ") of LC_DYSYMTAB " + "command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + if (Error Err = checkOverlappingElement(Elements, Dysymtab.modtaboff, + Dysymtab.nmodtab * sizeof_modtab, + "module table")) + return Err; + if (Dysymtab.extrefsymoff > FileSize) + return malformedError("extrefsymoff field of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + BigSize = Dysymtab.nextrefsyms; + BigSize *= sizeof(MachO::dylib_reference); + BigSize += Dysymtab.extrefsymoff; + if (BigSize > FileSize) + return malformedError("extrefsymoff field plus nextrefsyms field times " + "sizeof(struct dylib_reference) of LC_DYSYMTAB " + "command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + if (Error Err = checkOverlappingElement(Elements, Dysymtab.extrefsymoff, + Dysymtab.nextrefsyms * + sizeof(MachO::dylib_reference), + "reference table")) + return Err; + if (Dysymtab.indirectsymoff > FileSize) + return malformedError("indirectsymoff field of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + BigSize = Dysymtab.nindirectsyms; + BigSize *= sizeof(uint32_t); + BigSize += Dysymtab.indirectsymoff; + if (BigSize > FileSize) + return malformedError("indirectsymoff field plus nindirectsyms field times " + "sizeof(uint32_t) of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, Dysymtab.indirectsymoff, + Dysymtab.nindirectsyms * + sizeof(uint32_t), + "indirect table")) + return Err; + if (Dysymtab.extreloff > FileSize) + return malformedError("extreloff field of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + BigSize = Dysymtab.nextrel; + BigSize *= sizeof(MachO::relocation_info); + BigSize += Dysymtab.extreloff; + if (BigSize > FileSize) + return malformedError("extreloff field plus nextrel field times sizeof" + "(struct relocation_info) of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, Dysymtab.extreloff, + Dysymtab.nextrel * + sizeof(MachO::relocation_info), + "external relocation table")) + return Err; + if (Dysymtab.locreloff > FileSize) + return malformedError("locreloff field of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + BigSize = Dysymtab.nlocrel; + BigSize *= sizeof(MachO::relocation_info); + BigSize += Dysymtab.locreloff; + if (BigSize > FileSize) + return malformedError("locreloff field plus nlocrel field times sizeof" + "(struct relocation_info) of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, Dysymtab.locreloff, + Dysymtab.nlocrel * + sizeof(MachO::relocation_info), + "local relocation table")) + return Err; + *DysymtabLoadCmd = Load.Ptr; + return Error::success(); +} + +static Error checkLinkeditDataCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + const char **LoadCmd, const char *CmdName, + std::list<MachOElement> &Elements, + const char *ElementName) { + if (Load.C.cmdsize < sizeof(MachO::linkedit_data_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " cmdsize too small"); + if (*LoadCmd != nullptr) + return malformedError("more than one " + Twine(CmdName) + " command"); + auto LinkDataOrError = + getStructOrErr<MachO::linkedit_data_command>(Obj, Load.Ptr); + if (!LinkDataOrError) + return LinkDataOrError.takeError(); + MachO::linkedit_data_command LinkData = LinkDataOrError.get(); + if (LinkData.cmdsize != sizeof(MachO::linkedit_data_command)) + return malformedError(Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " has incorrect cmdsize"); + uint64_t FileSize = Obj.getData().size(); + if (LinkData.dataoff > FileSize) + return malformedError("dataoff field of " + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + uint64_t BigSize = LinkData.dataoff; + BigSize += LinkData.datasize; + if (BigSize > FileSize) + return malformedError("dataoff field plus datasize field of " + + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, LinkData.dataoff, + LinkData.datasize, ElementName)) + return Err; + *LoadCmd = Load.Ptr; + return Error::success(); +} + +static Error checkDyldInfoCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + const char **LoadCmd, const char *CmdName, + std::list<MachOElement> &Elements) { + if (Load.C.cmdsize < sizeof(MachO::dyld_info_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " cmdsize too small"); + if (*LoadCmd != nullptr) + return malformedError("more than one LC_DYLD_INFO and or LC_DYLD_INFO_ONLY " + "command"); + auto DyldInfoOrErr = + getStructOrErr<MachO::dyld_info_command>(Obj, Load.Ptr); + if (!DyldInfoOrErr) + return DyldInfoOrErr.takeError(); + MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get(); + if (DyldInfo.cmdsize != sizeof(MachO::dyld_info_command)) + return malformedError(Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " has incorrect cmdsize"); + uint64_t FileSize = Obj.getData().size(); + if (DyldInfo.rebase_off > FileSize) + return malformedError("rebase_off field of " + Twine(CmdName) + + " command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + uint64_t BigSize = DyldInfo.rebase_off; + BigSize += DyldInfo.rebase_size; + if (BigSize > FileSize) + return malformedError("rebase_off field plus rebase_size field of " + + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, DyldInfo.rebase_off, + DyldInfo.rebase_size, + "dyld rebase info")) + return Err; + if (DyldInfo.bind_off > FileSize) + return malformedError("bind_off field of " + Twine(CmdName) + + " command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + BigSize = DyldInfo.bind_off; + BigSize += DyldInfo.bind_size; + if (BigSize > FileSize) + return malformedError("bind_off field plus bind_size field of " + + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, DyldInfo.bind_off, + DyldInfo.bind_size, + "dyld bind info")) + return Err; + if (DyldInfo.weak_bind_off > FileSize) + return malformedError("weak_bind_off field of " + Twine(CmdName) + + " command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + BigSize = DyldInfo.weak_bind_off; + BigSize += DyldInfo.weak_bind_size; + if (BigSize > FileSize) + return malformedError("weak_bind_off field plus weak_bind_size field of " + + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, DyldInfo.weak_bind_off, + DyldInfo.weak_bind_size, + "dyld weak bind info")) + return Err; + if (DyldInfo.lazy_bind_off > FileSize) + return malformedError("lazy_bind_off field of " + Twine(CmdName) + + " command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + BigSize = DyldInfo.lazy_bind_off; + BigSize += DyldInfo.lazy_bind_size; + if (BigSize > FileSize) + return malformedError("lazy_bind_off field plus lazy_bind_size field of " + + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, DyldInfo.lazy_bind_off, + DyldInfo.lazy_bind_size, + "dyld lazy bind info")) + return Err; + if (DyldInfo.export_off > FileSize) + return malformedError("export_off field of " + Twine(CmdName) + + " command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + BigSize = DyldInfo.export_off; + BigSize += DyldInfo.export_size; + if (BigSize > FileSize) + return malformedError("export_off field plus export_size field of " + + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, DyldInfo.export_off, + DyldInfo.export_size, + "dyld export info")) + return Err; + *LoadCmd = Load.Ptr; + return Error::success(); +} + +static Error checkDylibCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, const char *CmdName) { + if (Load.C.cmdsize < sizeof(MachO::dylib_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " cmdsize too small"); + auto CommandOrErr = getStructOrErr<MachO::dylib_command>(Obj, Load.Ptr); + if (!CommandOrErr) + return CommandOrErr.takeError(); + MachO::dylib_command D = CommandOrErr.get(); + if (D.dylib.name < sizeof(MachO::dylib_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " name.offset field too small, not past " + "the end of the dylib_command struct"); + if (D.dylib.name >= D.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " name.offset field extends past the end " + "of the load command"); + // Make sure there is a null between the starting offset of the name and + // the end of the load command. + uint32_t i; + const char *P = (const char *)Load.Ptr; + for (i = D.dylib.name; i < D.cmdsize; i++) + if (P[i] == '\0') + break; + if (i >= D.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " library name extends past the end of the " + "load command"); + return Error::success(); +} + +static Error checkDylibIdCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + const char **LoadCmd) { + if (Error Err = checkDylibCommand(Obj, Load, LoadCommandIndex, + "LC_ID_DYLIB")) + return Err; + if (*LoadCmd != nullptr) + return malformedError("more than one LC_ID_DYLIB command"); + if (Obj.getHeader().filetype != MachO::MH_DYLIB && + Obj.getHeader().filetype != MachO::MH_DYLIB_STUB) + return malformedError("LC_ID_DYLIB load command in non-dynamic library " + "file type"); + *LoadCmd = Load.Ptr; + return Error::success(); +} + +static Error checkDyldCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, const char *CmdName) { + if (Load.C.cmdsize < sizeof(MachO::dylinker_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " cmdsize too small"); + auto CommandOrErr = getStructOrErr<MachO::dylinker_command>(Obj, Load.Ptr); + if (!CommandOrErr) + return CommandOrErr.takeError(); + MachO::dylinker_command D = CommandOrErr.get(); + if (D.name < sizeof(MachO::dylinker_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " name.offset field too small, not past " + "the end of the dylinker_command struct"); + if (D.name >= D.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " name.offset field extends past the end " + "of the load command"); + // Make sure there is a null between the starting offset of the name and + // the end of the load command. + uint32_t i; + const char *P = (const char *)Load.Ptr; + for (i = D.name; i < D.cmdsize; i++) + if (P[i] == '\0') + break; + if (i >= D.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " dyld name extends past the end of the " + "load command"); + return Error::success(); +} + +static Error checkVersCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + const char **LoadCmd, const char *CmdName) { + if (Load.C.cmdsize != sizeof(MachO::version_min_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " has incorrect cmdsize"); + if (*LoadCmd != nullptr) + return malformedError("more than one LC_VERSION_MIN_MACOSX, " + "LC_VERSION_MIN_IPHONEOS, LC_VERSION_MIN_TVOS or " + "LC_VERSION_MIN_WATCHOS command"); + *LoadCmd = Load.Ptr; + return Error::success(); +} + +static Error checkNoteCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + std::list<MachOElement> &Elements) { + if (Load.C.cmdsize != sizeof(MachO::note_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_NOTE has incorrect cmdsize"); + auto NoteCmdOrErr = getStructOrErr<MachO::note_command>(Obj, Load.Ptr); + if (!NoteCmdOrErr) + return NoteCmdOrErr.takeError(); + MachO::note_command Nt = NoteCmdOrErr.get(); + uint64_t FileSize = Obj.getData().size(); + if (Nt.offset > FileSize) + return malformedError("offset field of LC_NOTE command " + + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + uint64_t BigSize = Nt.offset; + BigSize += Nt.size; + if (BigSize > FileSize) + return malformedError("size field plus offset field of LC_NOTE command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, Nt.offset, Nt.size, + "LC_NOTE data")) + return Err; + return Error::success(); +} + +static Error +parseBuildVersionCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + SmallVectorImpl<const char*> &BuildTools, + uint32_t LoadCommandIndex) { + auto BVCOrErr = + getStructOrErr<MachO::build_version_command>(Obj, Load.Ptr); + if (!BVCOrErr) + return BVCOrErr.takeError(); + MachO::build_version_command BVC = BVCOrErr.get(); + if (Load.C.cmdsize != + sizeof(MachO::build_version_command) + + BVC.ntools * sizeof(MachO::build_tool_version)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_BUILD_VERSION_COMMAND has incorrect cmdsize"); + + auto Start = Load.Ptr + sizeof(MachO::build_version_command); + BuildTools.resize(BVC.ntools); + for (unsigned i = 0; i < BVC.ntools; ++i) + BuildTools[i] = Start + i * sizeof(MachO::build_tool_version); + + return Error::success(); +} + +static Error checkRpathCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex) { + if (Load.C.cmdsize < sizeof(MachO::rpath_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_RPATH cmdsize too small"); + auto ROrErr = getStructOrErr<MachO::rpath_command>(Obj, Load.Ptr); + if (!ROrErr) + return ROrErr.takeError(); + MachO::rpath_command R = ROrErr.get(); + if (R.path < sizeof(MachO::rpath_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_RPATH path.offset field too small, not past " + "the end of the rpath_command struct"); + if (R.path >= R.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_RPATH path.offset field extends past the end " + "of the load command"); + // Make sure there is a null between the starting offset of the path and + // the end of the load command. + uint32_t i; + const char *P = (const char *)Load.Ptr; + for (i = R.path; i < R.cmdsize; i++) + if (P[i] == '\0') + break; + if (i >= R.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_RPATH library name extends past the end of the " + "load command"); + return Error::success(); +} + +static Error checkEncryptCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + uint64_t cryptoff, uint64_t cryptsize, + const char **LoadCmd, const char *CmdName) { + if (*LoadCmd != nullptr) + return malformedError("more than one LC_ENCRYPTION_INFO and or " + "LC_ENCRYPTION_INFO_64 command"); + uint64_t FileSize = Obj.getData().size(); + if (cryptoff > FileSize) + return malformedError("cryptoff field of " + Twine(CmdName) + + " command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + uint64_t BigSize = cryptoff; + BigSize += cryptsize; + if (BigSize > FileSize) + return malformedError("cryptoff field plus cryptsize field of " + + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + *LoadCmd = Load.Ptr; + return Error::success(); +} + +static Error checkLinkerOptCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex) { + if (Load.C.cmdsize < sizeof(MachO::linker_option_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_LINKER_OPTION cmdsize too small"); + auto LinkOptionOrErr = + getStructOrErr<MachO::linker_option_command>(Obj, Load.Ptr); + if (!LinkOptionOrErr) + return LinkOptionOrErr.takeError(); + MachO::linker_option_command L = LinkOptionOrErr.get(); + // Make sure the count of strings is correct. + const char *string = (const char *)Load.Ptr + + sizeof(struct MachO::linker_option_command); + uint32_t left = L.cmdsize - sizeof(struct MachO::linker_option_command); + uint32_t i = 0; + while (left > 0) { + while (*string == '\0' && left > 0) { + string++; + left--; + } + if (left > 0) { + i++; + uint32_t NullPos = StringRef(string, left).find('\0'); + if (0xffffffff == NullPos) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_LINKER_OPTION string #" + Twine(i) + + " is not NULL terminated"); + uint32_t len = std::min(NullPos, left) + 1; + string += len; + left -= len; + } + } + if (L.count != i) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_LINKER_OPTION string count " + Twine(L.count) + + " does not match number of strings"); + return Error::success(); +} + +static Error checkSubCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, const char *CmdName, + size_t SizeOfCmd, const char *CmdStructName, + uint32_t PathOffset, const char *PathFieldName) { + if (PathOffset < SizeOfCmd) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " " + PathFieldName + ".offset field too " + "small, not past the end of the " + CmdStructName); + if (PathOffset >= Load.C.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " " + PathFieldName + ".offset field " + "extends past the end of the load command"); + // Make sure there is a null between the starting offset of the path and + // the end of the load command. + uint32_t i; + const char *P = (const char *)Load.Ptr; + for (i = PathOffset; i < Load.C.cmdsize; i++) + if (P[i] == '\0') + break; + if (i >= Load.C.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " " + PathFieldName + " name extends past " + "the end of the load command"); + return Error::success(); +} + +static Error checkThreadCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + const char *CmdName) { + if (Load.C.cmdsize < sizeof(MachO::thread_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + CmdName + " cmdsize too small"); + auto ThreadCommandOrErr = + getStructOrErr<MachO::thread_command>(Obj, Load.Ptr); + if (!ThreadCommandOrErr) + return ThreadCommandOrErr.takeError(); + MachO::thread_command T = ThreadCommandOrErr.get(); + const char *state = Load.Ptr + sizeof(MachO::thread_command); + const char *end = Load.Ptr + T.cmdsize; + uint32_t nflavor = 0; + uint32_t cputype = getCPUType(Obj); + while (state < end) { + if(state + sizeof(uint32_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + "flavor in " + CmdName + " extends past end of " + "command"); + uint32_t flavor; + memcpy(&flavor, state, sizeof(uint32_t)); + if (Obj.isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(flavor); + state += sizeof(uint32_t); + + if(state + sizeof(uint32_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count in " + CmdName + " extends past end of " + "command"); + uint32_t count; + memcpy(&count, state, sizeof(uint32_t)); + if (Obj.isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(count); + state += sizeof(uint32_t); + + if (cputype == MachO::CPU_TYPE_I386) { + if (flavor == MachO::x86_THREAD_STATE32) { + if (count != MachO::x86_THREAD_STATE32_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not x86_THREAD_STATE32_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a x86_THREAD_STATE32 flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::x86_thread_state32_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " x86_THREAD_STATE32 extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::x86_thread_state32_t); + } else { + return malformedError("load command " + Twine(LoadCommandIndex) + + " unknown flavor (" + Twine(flavor) + ") for " + "flavor number " + Twine(nflavor) + " in " + + CmdName + " command"); + } + } else if (cputype == MachO::CPU_TYPE_X86_64) { + if (flavor == MachO::x86_THREAD_STATE) { + if (count != MachO::x86_THREAD_STATE_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not x86_THREAD_STATE_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a x86_THREAD_STATE flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::x86_thread_state_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " x86_THREAD_STATE extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::x86_thread_state_t); + } else if (flavor == MachO::x86_FLOAT_STATE) { + if (count != MachO::x86_FLOAT_STATE_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not x86_FLOAT_STATE_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a x86_FLOAT_STATE flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::x86_float_state_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " x86_FLOAT_STATE extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::x86_float_state_t); + } else if (flavor == MachO::x86_EXCEPTION_STATE) { + if (count != MachO::x86_EXCEPTION_STATE_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not x86_EXCEPTION_STATE_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a x86_EXCEPTION_STATE flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::x86_exception_state_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " x86_EXCEPTION_STATE extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::x86_exception_state_t); + } else if (flavor == MachO::x86_THREAD_STATE64) { + if (count != MachO::x86_THREAD_STATE64_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not x86_THREAD_STATE64_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a x86_THREAD_STATE64 flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::x86_thread_state64_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " x86_THREAD_STATE64 extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::x86_thread_state64_t); + } else if (flavor == MachO::x86_EXCEPTION_STATE64) { + if (count != MachO::x86_EXCEPTION_STATE64_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not x86_EXCEPTION_STATE64_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a x86_EXCEPTION_STATE64 flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::x86_exception_state64_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " x86_EXCEPTION_STATE64 extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::x86_exception_state64_t); + } else { + return malformedError("load command " + Twine(LoadCommandIndex) + + " unknown flavor (" + Twine(flavor) + ") for " + "flavor number " + Twine(nflavor) + " in " + + CmdName + " command"); + } + } else if (cputype == MachO::CPU_TYPE_ARM) { + if (flavor == MachO::ARM_THREAD_STATE) { + if (count != MachO::ARM_THREAD_STATE_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not ARM_THREAD_STATE_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a ARM_THREAD_STATE flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::arm_thread_state32_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " ARM_THREAD_STATE extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::arm_thread_state32_t); + } else { + return malformedError("load command " + Twine(LoadCommandIndex) + + " unknown flavor (" + Twine(flavor) + ") for " + "flavor number " + Twine(nflavor) + " in " + + CmdName + " command"); + } + } else if (cputype == MachO::CPU_TYPE_ARM64 || + cputype == MachO::CPU_TYPE_ARM64_32) { + if (flavor == MachO::ARM_THREAD_STATE64) { + if (count != MachO::ARM_THREAD_STATE64_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not ARM_THREAD_STATE64_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a ARM_THREAD_STATE64 flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::arm_thread_state64_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " ARM_THREAD_STATE64 extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::arm_thread_state64_t); + } else { + return malformedError("load command " + Twine(LoadCommandIndex) + + " unknown flavor (" + Twine(flavor) + ") for " + "flavor number " + Twine(nflavor) + " in " + + CmdName + " command"); + } + } else if (cputype == MachO::CPU_TYPE_POWERPC) { + if (flavor == MachO::PPC_THREAD_STATE) { + if (count != MachO::PPC_THREAD_STATE_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not PPC_THREAD_STATE_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a PPC_THREAD_STATE flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::ppc_thread_state32_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " PPC_THREAD_STATE extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::ppc_thread_state32_t); + } else { + return malformedError("load command " + Twine(LoadCommandIndex) + + " unknown flavor (" + Twine(flavor) + ") for " + "flavor number " + Twine(nflavor) + " in " + + CmdName + " command"); + } + } else { + return malformedError("unknown cputype (" + Twine(cputype) + ") load " + "command " + Twine(LoadCommandIndex) + " for " + + CmdName + " command can't be checked"); + } + nflavor++; + } + return Error::success(); +} + +static Error checkTwoLevelHintsCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo + &Load, + uint32_t LoadCommandIndex, + const char **LoadCmd, + std::list<MachOElement> &Elements) { + if (Load.C.cmdsize != sizeof(MachO::twolevel_hints_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_TWOLEVEL_HINTS has incorrect cmdsize"); + if (*LoadCmd != nullptr) + return malformedError("more than one LC_TWOLEVEL_HINTS command"); + auto HintsOrErr = getStructOrErr<MachO::twolevel_hints_command>(Obj, Load.Ptr); + if(!HintsOrErr) + return HintsOrErr.takeError(); + MachO::twolevel_hints_command Hints = HintsOrErr.get(); + uint64_t FileSize = Obj.getData().size(); + if (Hints.offset > FileSize) + return malformedError("offset field of LC_TWOLEVEL_HINTS command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + uint64_t BigSize = Hints.nhints; + BigSize *= sizeof(MachO::twolevel_hint); + BigSize += Hints.offset; + if (BigSize > FileSize) + return malformedError("offset field plus nhints times sizeof(struct " + "twolevel_hint) field of LC_TWOLEVEL_HINTS command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, Hints.offset, Hints.nhints * + sizeof(MachO::twolevel_hint), + "two level hints")) + return Err; + *LoadCmd = Load.Ptr; + return Error::success(); +} + +// Returns true if the libObject code does not support the load command and its +// contents. The cmd value it is treated as an unknown load command but with +// an error message that says the cmd value is obsolete. +static bool isLoadCommandObsolete(uint32_t cmd) { + if (cmd == MachO::LC_SYMSEG || + cmd == MachO::LC_LOADFVMLIB || + cmd == MachO::LC_IDFVMLIB || + cmd == MachO::LC_IDENT || + cmd == MachO::LC_FVMFILE || + cmd == MachO::LC_PREPAGE || + cmd == MachO::LC_PREBOUND_DYLIB || + cmd == MachO::LC_TWOLEVEL_HINTS || + cmd == MachO::LC_PREBIND_CKSUM) + return true; + return false; +} + +Expected<std::unique_ptr<MachOObjectFile>> +MachOObjectFile::create(MemoryBufferRef Object, bool IsLittleEndian, + bool Is64Bits, uint32_t UniversalCputype, + uint32_t UniversalIndex) { + Error Err = Error::success(); + std::unique_ptr<MachOObjectFile> Obj( + new MachOObjectFile(std::move(Object), IsLittleEndian, + Is64Bits, Err, UniversalCputype, + UniversalIndex)); + if (Err) + return std::move(Err); + return std::move(Obj); +} + +MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, + bool Is64bits, Error &Err, + uint32_t UniversalCputype, + uint32_t UniversalIndex) + : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object) { + ErrorAsOutParameter ErrAsOutParam(&Err); + uint64_t SizeOfHeaders; + uint32_t cputype; + if (is64Bit()) { + parseHeader(*this, Header64, Err); + SizeOfHeaders = sizeof(MachO::mach_header_64); + cputype = Header64.cputype; + } else { + parseHeader(*this, Header, Err); + SizeOfHeaders = sizeof(MachO::mach_header); + cputype = Header.cputype; + } + if (Err) + return; + SizeOfHeaders += getHeader().sizeofcmds; + if (getData().data() + SizeOfHeaders > getData().end()) { + Err = malformedError("load commands extend past the end of the file"); + return; + } + if (UniversalCputype != 0 && cputype != UniversalCputype) { + Err = malformedError("universal header architecture: " + + Twine(UniversalIndex) + "'s cputype does not match " + "object file's mach header"); + return; + } + std::list<MachOElement> Elements; + Elements.push_back({0, SizeOfHeaders, "Mach-O headers"}); + + uint32_t LoadCommandCount = getHeader().ncmds; + LoadCommandInfo Load; + if (LoadCommandCount != 0) { + if (auto LoadOrErr = getFirstLoadCommandInfo(*this)) + Load = *LoadOrErr; + else { + Err = LoadOrErr.takeError(); + return; + } + } + + const char *DyldIdLoadCmd = nullptr; + const char *FuncStartsLoadCmd = nullptr; + const char *SplitInfoLoadCmd = nullptr; + const char *CodeSignDrsLoadCmd = nullptr; + const char *CodeSignLoadCmd = nullptr; + const char *VersLoadCmd = nullptr; + const char *SourceLoadCmd = nullptr; + const char *EntryPointLoadCmd = nullptr; + const char *EncryptLoadCmd = nullptr; + const char *RoutinesLoadCmd = nullptr; + const char *UnixThreadLoadCmd = nullptr; + const char *TwoLevelHintsLoadCmd = nullptr; + for (unsigned I = 0; I < LoadCommandCount; ++I) { + if (is64Bit()) { + if (Load.C.cmdsize % 8 != 0) { + // We have a hack here to allow 64-bit Mach-O core files to have + // LC_THREAD commands that are only a multiple of 4 and not 8 to be + // allowed since the macOS kernel produces them. + if (getHeader().filetype != MachO::MH_CORE || + Load.C.cmd != MachO::LC_THREAD || Load.C.cmdsize % 4) { + Err = malformedError("load command " + Twine(I) + " cmdsize not a " + "multiple of 8"); + return; + } + } + } else { + if (Load.C.cmdsize % 4 != 0) { + Err = malformedError("load command " + Twine(I) + " cmdsize not a " + "multiple of 4"); + return; + } + } + LoadCommands.push_back(Load); + if (Load.C.cmd == MachO::LC_SYMTAB) { + if ((Err = checkSymtabCommand(*this, Load, I, &SymtabLoadCmd, Elements))) + return; + } else if (Load.C.cmd == MachO::LC_DYSYMTAB) { + if ((Err = checkDysymtabCommand(*this, Load, I, &DysymtabLoadCmd, + Elements))) + return; + } else if (Load.C.cmd == MachO::LC_DATA_IN_CODE) { + if ((Err = checkLinkeditDataCommand(*this, Load, I, &DataInCodeLoadCmd, + "LC_DATA_IN_CODE", Elements, + "data in code info"))) + return; + } else if (Load.C.cmd == MachO::LC_LINKER_OPTIMIZATION_HINT) { + if ((Err = checkLinkeditDataCommand(*this, Load, I, &LinkOptHintsLoadCmd, + "LC_LINKER_OPTIMIZATION_HINT", + Elements, "linker optimization " + "hints"))) + return; + } else if (Load.C.cmd == MachO::LC_FUNCTION_STARTS) { + if ((Err = checkLinkeditDataCommand(*this, Load, I, &FuncStartsLoadCmd, + "LC_FUNCTION_STARTS", Elements, + "function starts data"))) + return; + } else if (Load.C.cmd == MachO::LC_SEGMENT_SPLIT_INFO) { + if ((Err = checkLinkeditDataCommand(*this, Load, I, &SplitInfoLoadCmd, + "LC_SEGMENT_SPLIT_INFO", Elements, + "split info data"))) + return; + } else if (Load.C.cmd == MachO::LC_DYLIB_CODE_SIGN_DRS) { + if ((Err = checkLinkeditDataCommand(*this, Load, I, &CodeSignDrsLoadCmd, + "LC_DYLIB_CODE_SIGN_DRS", Elements, + "code signing RDs data"))) + return; + } else if (Load.C.cmd == MachO::LC_CODE_SIGNATURE) { + if ((Err = checkLinkeditDataCommand(*this, Load, I, &CodeSignLoadCmd, + "LC_CODE_SIGNATURE", Elements, + "code signature data"))) + return; + } else if (Load.C.cmd == MachO::LC_DYLD_INFO) { + if ((Err = checkDyldInfoCommand(*this, Load, I, &DyldInfoLoadCmd, + "LC_DYLD_INFO", Elements))) + return; + } else if (Load.C.cmd == MachO::LC_DYLD_INFO_ONLY) { + if ((Err = checkDyldInfoCommand(*this, Load, I, &DyldInfoLoadCmd, + "LC_DYLD_INFO_ONLY", Elements))) + return; + } else if (Load.C.cmd == MachO::LC_UUID) { + if (Load.C.cmdsize != sizeof(MachO::uuid_command)) { + Err = malformedError("LC_UUID command " + Twine(I) + " has incorrect " + "cmdsize"); + return; + } + if (UuidLoadCmd) { + Err = malformedError("more than one LC_UUID command"); + return; + } + UuidLoadCmd = Load.Ptr; + } else if (Load.C.cmd == MachO::LC_SEGMENT_64) { + if ((Err = parseSegmentLoadCommand<MachO::segment_command_64, + MachO::section_64>( + *this, Load, Sections, HasPageZeroSegment, I, + "LC_SEGMENT_64", SizeOfHeaders, Elements))) + return; + } else if (Load.C.cmd == MachO::LC_SEGMENT) { + if ((Err = parseSegmentLoadCommand<MachO::segment_command, + MachO::section>( + *this, Load, Sections, HasPageZeroSegment, I, + "LC_SEGMENT", SizeOfHeaders, Elements))) + return; + } else if (Load.C.cmd == MachO::LC_ID_DYLIB) { + if ((Err = checkDylibIdCommand(*this, Load, I, &DyldIdLoadCmd))) + return; + } else if (Load.C.cmd == MachO::LC_LOAD_DYLIB) { + if ((Err = checkDylibCommand(*this, Load, I, "LC_LOAD_DYLIB"))) + return; + Libraries.push_back(Load.Ptr); + } else if (Load.C.cmd == MachO::LC_LOAD_WEAK_DYLIB) { + if ((Err = checkDylibCommand(*this, Load, I, "LC_LOAD_WEAK_DYLIB"))) + return; + Libraries.push_back(Load.Ptr); + } else if (Load.C.cmd == MachO::LC_LAZY_LOAD_DYLIB) { + if ((Err = checkDylibCommand(*this, Load, I, "LC_LAZY_LOAD_DYLIB"))) + return; + Libraries.push_back(Load.Ptr); + } else if (Load.C.cmd == MachO::LC_REEXPORT_DYLIB) { + if ((Err = checkDylibCommand(*this, Load, I, "LC_REEXPORT_DYLIB"))) + return; + Libraries.push_back(Load.Ptr); + } else if (Load.C.cmd == MachO::LC_LOAD_UPWARD_DYLIB) { + if ((Err = checkDylibCommand(*this, Load, I, "LC_LOAD_UPWARD_DYLIB"))) + return; + Libraries.push_back(Load.Ptr); + } else if (Load.C.cmd == MachO::LC_ID_DYLINKER) { + if ((Err = checkDyldCommand(*this, Load, I, "LC_ID_DYLINKER"))) + return; + } else if (Load.C.cmd == MachO::LC_LOAD_DYLINKER) { + if ((Err = checkDyldCommand(*this, Load, I, "LC_LOAD_DYLINKER"))) + return; + } else if (Load.C.cmd == MachO::LC_DYLD_ENVIRONMENT) { + if ((Err = checkDyldCommand(*this, Load, I, "LC_DYLD_ENVIRONMENT"))) + return; + } else if (Load.C.cmd == MachO::LC_VERSION_MIN_MACOSX) { + if ((Err = checkVersCommand(*this, Load, I, &VersLoadCmd, + "LC_VERSION_MIN_MACOSX"))) + return; + } else if (Load.C.cmd == MachO::LC_VERSION_MIN_IPHONEOS) { + if ((Err = checkVersCommand(*this, Load, I, &VersLoadCmd, + "LC_VERSION_MIN_IPHONEOS"))) + return; + } else if (Load.C.cmd == MachO::LC_VERSION_MIN_TVOS) { + if ((Err = checkVersCommand(*this, Load, I, &VersLoadCmd, + "LC_VERSION_MIN_TVOS"))) + return; + } else if (Load.C.cmd == MachO::LC_VERSION_MIN_WATCHOS) { + if ((Err = checkVersCommand(*this, Load, I, &VersLoadCmd, + "LC_VERSION_MIN_WATCHOS"))) + return; + } else if (Load.C.cmd == MachO::LC_NOTE) { + if ((Err = checkNoteCommand(*this, Load, I, Elements))) + return; + } else if (Load.C.cmd == MachO::LC_BUILD_VERSION) { + if ((Err = parseBuildVersionCommand(*this, Load, BuildTools, I))) + return; + } else if (Load.C.cmd == MachO::LC_RPATH) { + if ((Err = checkRpathCommand(*this, Load, I))) + return; + } else if (Load.C.cmd == MachO::LC_SOURCE_VERSION) { + if (Load.C.cmdsize != sizeof(MachO::source_version_command)) { + Err = malformedError("LC_SOURCE_VERSION command " + Twine(I) + + " has incorrect cmdsize"); + return; + } + if (SourceLoadCmd) { + Err = malformedError("more than one LC_SOURCE_VERSION command"); + return; + } + SourceLoadCmd = Load.Ptr; + } else if (Load.C.cmd == MachO::LC_MAIN) { + if (Load.C.cmdsize != sizeof(MachO::entry_point_command)) { + Err = malformedError("LC_MAIN command " + Twine(I) + + " has incorrect cmdsize"); + return; + } + if (EntryPointLoadCmd) { + Err = malformedError("more than one LC_MAIN command"); + return; + } + EntryPointLoadCmd = Load.Ptr; + } else if (Load.C.cmd == MachO::LC_ENCRYPTION_INFO) { + if (Load.C.cmdsize != sizeof(MachO::encryption_info_command)) { + Err = malformedError("LC_ENCRYPTION_INFO command " + Twine(I) + + " has incorrect cmdsize"); + return; + } + MachO::encryption_info_command E = + getStruct<MachO::encryption_info_command>(*this, Load.Ptr); + if ((Err = checkEncryptCommand(*this, Load, I, E.cryptoff, E.cryptsize, + &EncryptLoadCmd, "LC_ENCRYPTION_INFO"))) + return; + } else if (Load.C.cmd == MachO::LC_ENCRYPTION_INFO_64) { + if (Load.C.cmdsize != sizeof(MachO::encryption_info_command_64)) { + Err = malformedError("LC_ENCRYPTION_INFO_64 command " + Twine(I) + + " has incorrect cmdsize"); + return; + } + MachO::encryption_info_command_64 E = + getStruct<MachO::encryption_info_command_64>(*this, Load.Ptr); + if ((Err = checkEncryptCommand(*this, Load, I, E.cryptoff, E.cryptsize, + &EncryptLoadCmd, "LC_ENCRYPTION_INFO_64"))) + return; + } else if (Load.C.cmd == MachO::LC_LINKER_OPTION) { + if ((Err = checkLinkerOptCommand(*this, Load, I))) + return; + } else if (Load.C.cmd == MachO::LC_SUB_FRAMEWORK) { + if (Load.C.cmdsize < sizeof(MachO::sub_framework_command)) { + Err = malformedError("load command " + Twine(I) + + " LC_SUB_FRAMEWORK cmdsize too small"); + return; + } + MachO::sub_framework_command S = + getStruct<MachO::sub_framework_command>(*this, Load.Ptr); + if ((Err = checkSubCommand(*this, Load, I, "LC_SUB_FRAMEWORK", + sizeof(MachO::sub_framework_command), + "sub_framework_command", S.umbrella, + "umbrella"))) + return; + } else if (Load.C.cmd == MachO::LC_SUB_UMBRELLA) { + if (Load.C.cmdsize < sizeof(MachO::sub_umbrella_command)) { + Err = malformedError("load command " + Twine(I) + + " LC_SUB_UMBRELLA cmdsize too small"); + return; + } + MachO::sub_umbrella_command S = + getStruct<MachO::sub_umbrella_command>(*this, Load.Ptr); + if ((Err = checkSubCommand(*this, Load, I, "LC_SUB_UMBRELLA", + sizeof(MachO::sub_umbrella_command), + "sub_umbrella_command", S.sub_umbrella, + "sub_umbrella"))) + return; + } else if (Load.C.cmd == MachO::LC_SUB_LIBRARY) { + if (Load.C.cmdsize < sizeof(MachO::sub_library_command)) { + Err = malformedError("load command " + Twine(I) + + " LC_SUB_LIBRARY cmdsize too small"); + return; + } + MachO::sub_library_command S = + getStruct<MachO::sub_library_command>(*this, Load.Ptr); + if ((Err = checkSubCommand(*this, Load, I, "LC_SUB_LIBRARY", + sizeof(MachO::sub_library_command), + "sub_library_command", S.sub_library, + "sub_library"))) + return; + } else if (Load.C.cmd == MachO::LC_SUB_CLIENT) { + if (Load.C.cmdsize < sizeof(MachO::sub_client_command)) { + Err = malformedError("load command " + Twine(I) + + " LC_SUB_CLIENT cmdsize too small"); + return; + } + MachO::sub_client_command S = + getStruct<MachO::sub_client_command>(*this, Load.Ptr); + if ((Err = checkSubCommand(*this, Load, I, "LC_SUB_CLIENT", + sizeof(MachO::sub_client_command), + "sub_client_command", S.client, "client"))) + return; + } else if (Load.C.cmd == MachO::LC_ROUTINES) { + if (Load.C.cmdsize != sizeof(MachO::routines_command)) { + Err = malformedError("LC_ROUTINES command " + Twine(I) + + " has incorrect cmdsize"); + return; + } + if (RoutinesLoadCmd) { + Err = malformedError("more than one LC_ROUTINES and or LC_ROUTINES_64 " + "command"); + return; + } + RoutinesLoadCmd = Load.Ptr; + } else if (Load.C.cmd == MachO::LC_ROUTINES_64) { + if (Load.C.cmdsize != sizeof(MachO::routines_command_64)) { + Err = malformedError("LC_ROUTINES_64 command " + Twine(I) + + " has incorrect cmdsize"); + return; + } + if (RoutinesLoadCmd) { + Err = malformedError("more than one LC_ROUTINES_64 and or LC_ROUTINES " + "command"); + return; + } + RoutinesLoadCmd = Load.Ptr; + } else if (Load.C.cmd == MachO::LC_UNIXTHREAD) { + if ((Err = checkThreadCommand(*this, Load, I, "LC_UNIXTHREAD"))) + return; + if (UnixThreadLoadCmd) { + Err = malformedError("more than one LC_UNIXTHREAD command"); + return; + } + UnixThreadLoadCmd = Load.Ptr; + } else if (Load.C.cmd == MachO::LC_THREAD) { + if ((Err = checkThreadCommand(*this, Load, I, "LC_THREAD"))) + return; + // Note: LC_TWOLEVEL_HINTS is really obsolete and is not supported. + } else if (Load.C.cmd == MachO::LC_TWOLEVEL_HINTS) { + if ((Err = checkTwoLevelHintsCommand(*this, Load, I, + &TwoLevelHintsLoadCmd, Elements))) + return; + } else if (Load.C.cmd == MachO::LC_IDENT) { + // Note: LC_IDENT is ignored. + continue; + } else if (isLoadCommandObsolete(Load.C.cmd)) { + Err = malformedError("load command " + Twine(I) + " for cmd value of: " + + Twine(Load.C.cmd) + " is obsolete and not " + "supported"); + return; + } + // TODO: generate a error for unknown load commands by default. But still + // need work out an approach to allow or not allow unknown values like this + // as an option for some uses like lldb. + if (I < LoadCommandCount - 1) { + if (auto LoadOrErr = getNextLoadCommandInfo(*this, I, Load)) + Load = *LoadOrErr; + else { + Err = LoadOrErr.takeError(); + return; + } + } + } + if (!SymtabLoadCmd) { + if (DysymtabLoadCmd) { + Err = malformedError("contains LC_DYSYMTAB load command without a " + "LC_SYMTAB load command"); + return; + } + } else if (DysymtabLoadCmd) { + MachO::symtab_command Symtab = + getStruct<MachO::symtab_command>(*this, SymtabLoadCmd); + MachO::dysymtab_command Dysymtab = + getStruct<MachO::dysymtab_command>(*this, DysymtabLoadCmd); + if (Dysymtab.nlocalsym != 0 && Dysymtab.ilocalsym > Symtab.nsyms) { + Err = malformedError("ilocalsym in LC_DYSYMTAB load command " + "extends past the end of the symbol table"); + return; + } + uint64_t BigSize = Dysymtab.ilocalsym; + BigSize += Dysymtab.nlocalsym; + if (Dysymtab.nlocalsym != 0 && BigSize > Symtab.nsyms) { + Err = malformedError("ilocalsym plus nlocalsym in LC_DYSYMTAB load " + "command extends past the end of the symbol table"); + return; + } + if (Dysymtab.nextdefsym != 0 && Dysymtab.iextdefsym > Symtab.nsyms) { + Err = malformedError("iextdefsym in LC_DYSYMTAB load command " + "extends past the end of the symbol table"); + return; + } + BigSize = Dysymtab.iextdefsym; + BigSize += Dysymtab.nextdefsym; + if (Dysymtab.nextdefsym != 0 && BigSize > Symtab.nsyms) { + Err = malformedError("iextdefsym plus nextdefsym in LC_DYSYMTAB " + "load command extends past the end of the symbol " + "table"); + return; + } + if (Dysymtab.nundefsym != 0 && Dysymtab.iundefsym > Symtab.nsyms) { + Err = malformedError("iundefsym in LC_DYSYMTAB load command " + "extends past the end of the symbol table"); + return; + } + BigSize = Dysymtab.iundefsym; + BigSize += Dysymtab.nundefsym; + if (Dysymtab.nundefsym != 0 && BigSize > Symtab.nsyms) { + Err = malformedError("iundefsym plus nundefsym in LC_DYSYMTAB load " + " command extends past the end of the symbol table"); + return; + } + } + if ((getHeader().filetype == MachO::MH_DYLIB || + getHeader().filetype == MachO::MH_DYLIB_STUB) && + DyldIdLoadCmd == nullptr) { + Err = malformedError("no LC_ID_DYLIB load command in dynamic library " + "filetype"); + return; + } + assert(LoadCommands.size() == LoadCommandCount); + + Err = Error::success(); +} + +Error MachOObjectFile::checkSymbolTable() const { + uint32_t Flags = 0; + if (is64Bit()) { + MachO::mach_header_64 H_64 = MachOObjectFile::getHeader64(); + Flags = H_64.flags; + } else { + MachO::mach_header H = MachOObjectFile::getHeader(); + Flags = H.flags; + } + uint8_t NType = 0; + uint8_t NSect = 0; + uint16_t NDesc = 0; + uint32_t NStrx = 0; + uint64_t NValue = 0; + uint32_t SymbolIndex = 0; + MachO::symtab_command S = getSymtabLoadCommand(); + for (const SymbolRef &Symbol : symbols()) { + DataRefImpl SymDRI = Symbol.getRawDataRefImpl(); + if (is64Bit()) { + MachO::nlist_64 STE_64 = getSymbol64TableEntry(SymDRI); + NType = STE_64.n_type; + NSect = STE_64.n_sect; + NDesc = STE_64.n_desc; + NStrx = STE_64.n_strx; + NValue = STE_64.n_value; + } else { + MachO::nlist STE = getSymbolTableEntry(SymDRI); + NType = STE.n_type; + NSect = STE.n_sect; + NDesc = STE.n_desc; + NStrx = STE.n_strx; + NValue = STE.n_value; + } + if ((NType & MachO::N_STAB) == 0) { + if ((NType & MachO::N_TYPE) == MachO::N_SECT) { + if (NSect == 0 || NSect > Sections.size()) + return malformedError("bad section index: " + Twine((int)NSect) + + " for symbol at index " + Twine(SymbolIndex)); + } + if ((NType & MachO::N_TYPE) == MachO::N_INDR) { + if (NValue >= S.strsize) + return malformedError("bad n_value: " + Twine((int)NValue) + " past " + "the end of string table, for N_INDR symbol at " + "index " + Twine(SymbolIndex)); + } + if ((Flags & MachO::MH_TWOLEVEL) == MachO::MH_TWOLEVEL && + (((NType & MachO::N_TYPE) == MachO::N_UNDF && NValue == 0) || + (NType & MachO::N_TYPE) == MachO::N_PBUD)) { + uint32_t LibraryOrdinal = MachO::GET_LIBRARY_ORDINAL(NDesc); + if (LibraryOrdinal != 0 && + LibraryOrdinal != MachO::EXECUTABLE_ORDINAL && + LibraryOrdinal != MachO::DYNAMIC_LOOKUP_ORDINAL && + LibraryOrdinal - 1 >= Libraries.size() ) { + return malformedError("bad library ordinal: " + Twine(LibraryOrdinal) + + " for symbol at index " + Twine(SymbolIndex)); + } + } + } + if (NStrx >= S.strsize) + return malformedError("bad string table index: " + Twine((int)NStrx) + + " past the end of string table, for symbol at " + "index " + Twine(SymbolIndex)); + SymbolIndex++; + } + return Error::success(); +} + +void MachOObjectFile::moveSymbolNext(DataRefImpl &Symb) const { + unsigned SymbolTableEntrySize = is64Bit() ? + sizeof(MachO::nlist_64) : + sizeof(MachO::nlist); + Symb.p += SymbolTableEntrySize; +} + +Expected<StringRef> MachOObjectFile::getSymbolName(DataRefImpl Symb) const { + StringRef StringTable = getStringTableData(); + MachO::nlist_base Entry = getSymbolTableEntryBase(*this, Symb); + if (Entry.n_strx == 0) + // A n_strx value of 0 indicates that no name is associated with a + // particular symbol table entry. + return StringRef(); + const char *Start = &StringTable.data()[Entry.n_strx]; + if (Start < getData().begin() || Start >= getData().end()) { + return malformedError("bad string index: " + Twine(Entry.n_strx) + + " for symbol at index " + Twine(getSymbolIndex(Symb))); + } + return StringRef(Start); +} + +unsigned MachOObjectFile::getSectionType(SectionRef Sec) const { + DataRefImpl DRI = Sec.getRawDataRefImpl(); + uint32_t Flags = getSectionFlags(*this, DRI); + return Flags & MachO::SECTION_TYPE; +} + +uint64_t MachOObjectFile::getNValue(DataRefImpl Sym) const { + if (is64Bit()) { + MachO::nlist_64 Entry = getSymbol64TableEntry(Sym); + return Entry.n_value; + } + MachO::nlist Entry = getSymbolTableEntry(Sym); + return Entry.n_value; +} + +// getIndirectName() returns the name of the alias'ed symbol who's string table +// index is in the n_value field. +std::error_code MachOObjectFile::getIndirectName(DataRefImpl Symb, + StringRef &Res) const { + StringRef StringTable = getStringTableData(); + MachO::nlist_base Entry = getSymbolTableEntryBase(*this, Symb); + if ((Entry.n_type & MachO::N_TYPE) != MachO::N_INDR) + return object_error::parse_failed; + uint64_t NValue = getNValue(Symb); + if (NValue >= StringTable.size()) + return object_error::parse_failed; + const char *Start = &StringTable.data()[NValue]; + Res = StringRef(Start); + return std::error_code(); +} + +uint64_t MachOObjectFile::getSymbolValueImpl(DataRefImpl Sym) const { + return getNValue(Sym); +} + +Expected<uint64_t> MachOObjectFile::getSymbolAddress(DataRefImpl Sym) const { + return getSymbolValue(Sym); +} + +uint32_t MachOObjectFile::getSymbolAlignment(DataRefImpl DRI) const { + uint32_t Flags = cantFail(getSymbolFlags(DRI)); + if (Flags & SymbolRef::SF_Common) { + MachO::nlist_base Entry = getSymbolTableEntryBase(*this, DRI); + return 1 << MachO::GET_COMM_ALIGN(Entry.n_desc); + } + return 0; +} + +uint64_t MachOObjectFile::getCommonSymbolSizeImpl(DataRefImpl DRI) const { + return getNValue(DRI); +} + +Expected<SymbolRef::Type> +MachOObjectFile::getSymbolType(DataRefImpl Symb) const { + MachO::nlist_base Entry = getSymbolTableEntryBase(*this, Symb); + uint8_t n_type = Entry.n_type; + + // If this is a STAB debugging symbol, we can do nothing more. + if (n_type & MachO::N_STAB) + return SymbolRef::ST_Debug; + + switch (n_type & MachO::N_TYPE) { + case MachO::N_UNDF : + return SymbolRef::ST_Unknown; + case MachO::N_SECT : + Expected<section_iterator> SecOrError = getSymbolSection(Symb); + if (!SecOrError) + return SecOrError.takeError(); + section_iterator Sec = *SecOrError; + if (Sec == section_end()) + return SymbolRef::ST_Other; + if (Sec->isData() || Sec->isBSS()) + return SymbolRef::ST_Data; + return SymbolRef::ST_Function; + } + return SymbolRef::ST_Other; +} + +Expected<uint32_t> MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const { + MachO::nlist_base Entry = getSymbolTableEntryBase(*this, DRI); + + uint8_t MachOType = Entry.n_type; + uint16_t MachOFlags = Entry.n_desc; + + uint32_t Result = SymbolRef::SF_None; + + if ((MachOType & MachO::N_TYPE) == MachO::N_INDR) + Result |= SymbolRef::SF_Indirect; + + if (MachOType & MachO::N_STAB) + Result |= SymbolRef::SF_FormatSpecific; + + if (MachOType & MachO::N_EXT) { + Result |= SymbolRef::SF_Global; + if ((MachOType & MachO::N_TYPE) == MachO::N_UNDF) { + if (getNValue(DRI)) + Result |= SymbolRef::SF_Common; + else + Result |= SymbolRef::SF_Undefined; + } + + if (!(MachOType & MachO::N_PEXT)) + Result |= SymbolRef::SF_Exported; + } + + if (MachOFlags & (MachO::N_WEAK_REF | MachO::N_WEAK_DEF)) + Result |= SymbolRef::SF_Weak; + + if (MachOFlags & (MachO::N_ARM_THUMB_DEF)) + Result |= SymbolRef::SF_Thumb; + + if ((MachOType & MachO::N_TYPE) == MachO::N_ABS) + Result |= SymbolRef::SF_Absolute; + + return Result; +} + +Expected<section_iterator> +MachOObjectFile::getSymbolSection(DataRefImpl Symb) const { + MachO::nlist_base Entry = getSymbolTableEntryBase(*this, Symb); + uint8_t index = Entry.n_sect; + + if (index == 0) + return section_end(); + DataRefImpl DRI; + DRI.d.a = index - 1; + if (DRI.d.a >= Sections.size()){ + return malformedError("bad section index: " + Twine((int)index) + + " for symbol at index " + Twine(getSymbolIndex(Symb))); + } + return section_iterator(SectionRef(DRI, this)); +} + +unsigned MachOObjectFile::getSymbolSectionID(SymbolRef Sym) const { + MachO::nlist_base Entry = + getSymbolTableEntryBase(*this, Sym.getRawDataRefImpl()); + return Entry.n_sect - 1; +} + +void MachOObjectFile::moveSectionNext(DataRefImpl &Sec) const { + Sec.d.a++; +} + +Expected<StringRef> MachOObjectFile::getSectionName(DataRefImpl Sec) const { + ArrayRef<char> Raw = getSectionRawName(Sec); + return parseSegmentOrSectionName(Raw.data()); +} + +uint64_t MachOObjectFile::getSectionAddress(DataRefImpl Sec) const { + if (is64Bit()) + return getSection64(Sec).addr; + return getSection(Sec).addr; +} + +uint64_t MachOObjectFile::getSectionIndex(DataRefImpl Sec) const { + return Sec.d.a; +} + +uint64_t MachOObjectFile::getSectionSize(DataRefImpl Sec) const { + // In the case if a malformed Mach-O file where the section offset is past + // the end of the file or some part of the section size is past the end of + // the file return a size of zero or a size that covers the rest of the file + // but does not extend past the end of the file. + uint32_t SectOffset, SectType; + uint64_t SectSize; + + if (is64Bit()) { + MachO::section_64 Sect = getSection64(Sec); + SectOffset = Sect.offset; + SectSize = Sect.size; + SectType = Sect.flags & MachO::SECTION_TYPE; + } else { + MachO::section Sect = getSection(Sec); + SectOffset = Sect.offset; + SectSize = Sect.size; + SectType = Sect.flags & MachO::SECTION_TYPE; + } + if (SectType == MachO::S_ZEROFILL || SectType == MachO::S_GB_ZEROFILL) + return SectSize; + uint64_t FileSize = getData().size(); + if (SectOffset > FileSize) + return 0; + if (FileSize - SectOffset < SectSize) + return FileSize - SectOffset; + return SectSize; +} + +ArrayRef<uint8_t> MachOObjectFile::getSectionContents(uint32_t Offset, + uint64_t Size) const { + return arrayRefFromStringRef(getData().substr(Offset, Size)); +} + +Expected<ArrayRef<uint8_t>> +MachOObjectFile::getSectionContents(DataRefImpl Sec) const { + uint32_t Offset; + uint64_t Size; + + if (is64Bit()) { + MachO::section_64 Sect = getSection64(Sec); + Offset = Sect.offset; + Size = Sect.size; + } else { + MachO::section Sect = getSection(Sec); + Offset = Sect.offset; + Size = Sect.size; + } + + return getSectionContents(Offset, Size); +} + +uint64_t MachOObjectFile::getSectionAlignment(DataRefImpl Sec) const { + uint32_t Align; + if (is64Bit()) { + MachO::section_64 Sect = getSection64(Sec); + Align = Sect.align; + } else { + MachO::section Sect = getSection(Sec); + Align = Sect.align; + } + + return uint64_t(1) << Align; +} + +Expected<SectionRef> MachOObjectFile::getSection(unsigned SectionIndex) const { + if (SectionIndex < 1 || SectionIndex > Sections.size()) + return malformedError("bad section index: " + Twine((int)SectionIndex)); + + DataRefImpl DRI; + DRI.d.a = SectionIndex - 1; + return SectionRef(DRI, this); +} + +Expected<SectionRef> MachOObjectFile::getSection(StringRef SectionName) const { + for (const SectionRef &Section : sections()) { + auto NameOrErr = Section.getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + if (*NameOrErr == SectionName) + return Section; + } + return errorCodeToError(object_error::parse_failed); +} + +bool MachOObjectFile::isSectionCompressed(DataRefImpl Sec) const { + return false; +} + +bool MachOObjectFile::isSectionText(DataRefImpl Sec) const { + uint32_t Flags = getSectionFlags(*this, Sec); + return Flags & MachO::S_ATTR_PURE_INSTRUCTIONS; +} + +bool MachOObjectFile::isSectionData(DataRefImpl Sec) const { + uint32_t Flags = getSectionFlags(*this, Sec); + unsigned SectionType = Flags & MachO::SECTION_TYPE; + return !(Flags & MachO::S_ATTR_PURE_INSTRUCTIONS) && + !(SectionType == MachO::S_ZEROFILL || + SectionType == MachO::S_GB_ZEROFILL); +} + +bool MachOObjectFile::isSectionBSS(DataRefImpl Sec) const { + uint32_t Flags = getSectionFlags(*this, Sec); + unsigned SectionType = Flags & MachO::SECTION_TYPE; + return !(Flags & MachO::S_ATTR_PURE_INSTRUCTIONS) && + (SectionType == MachO::S_ZEROFILL || + SectionType == MachO::S_GB_ZEROFILL); +} + +bool MachOObjectFile::isDebugSection(DataRefImpl Sec) const { + Expected<StringRef> SectionNameOrErr = getSectionName(Sec); + if (!SectionNameOrErr) { + // TODO: Report the error message properly. + consumeError(SectionNameOrErr.takeError()); + return false; + } + StringRef SectionName = SectionNameOrErr.get(); + return SectionName.startswith("__debug") || + SectionName.startswith("__zdebug") || + SectionName.startswith("__apple") || SectionName == "__gdb_index" || + SectionName == "__swift_ast"; +} + +namespace { +template <typename LoadCommandType> +ArrayRef<uint8_t> getSegmentContents(const MachOObjectFile &Obj, + MachOObjectFile::LoadCommandInfo LoadCmd, + StringRef SegmentName) { + auto SegmentOrErr = getStructOrErr<LoadCommandType>(Obj, LoadCmd.Ptr); + if (!SegmentOrErr) { + consumeError(SegmentOrErr.takeError()); + return {}; + } + auto &Segment = SegmentOrErr.get(); + if (StringRef(Segment.segname, 16).startswith(SegmentName)) + return arrayRefFromStringRef(Obj.getData().slice( + Segment.fileoff, Segment.fileoff + Segment.filesize)); + return {}; +} +} // namespace + +ArrayRef<uint8_t> +MachOObjectFile::getSegmentContents(StringRef SegmentName) const { + for (auto LoadCmd : load_commands()) { + ArrayRef<uint8_t> Contents; + switch (LoadCmd.C.cmd) { + case MachO::LC_SEGMENT: + Contents = ::getSegmentContents<MachO::segment_command>(*this, LoadCmd, + SegmentName); + break; + case MachO::LC_SEGMENT_64: + Contents = ::getSegmentContents<MachO::segment_command_64>(*this, LoadCmd, + SegmentName); + break; + default: + continue; + } + if (!Contents.empty()) + return Contents; + } + return {}; +} + +unsigned MachOObjectFile::getSectionID(SectionRef Sec) const { + return Sec.getRawDataRefImpl().d.a; +} + +bool MachOObjectFile::isSectionVirtual(DataRefImpl Sec) const { + uint32_t Flags = getSectionFlags(*this, Sec); + unsigned SectionType = Flags & MachO::SECTION_TYPE; + return SectionType == MachO::S_ZEROFILL || + SectionType == MachO::S_GB_ZEROFILL; +} + +bool MachOObjectFile::isSectionBitcode(DataRefImpl Sec) const { + StringRef SegmentName = getSectionFinalSegmentName(Sec); + if (Expected<StringRef> NameOrErr = getSectionName(Sec)) + return (SegmentName == "__LLVM" && *NameOrErr == "__bitcode"); + return false; +} + +bool MachOObjectFile::isSectionStripped(DataRefImpl Sec) const { + if (is64Bit()) + return getSection64(Sec).offset == 0; + return getSection(Sec).offset == 0; +} + +relocation_iterator MachOObjectFile::section_rel_begin(DataRefImpl Sec) const { + DataRefImpl Ret; + Ret.d.a = Sec.d.a; + Ret.d.b = 0; + return relocation_iterator(RelocationRef(Ret, this)); +} + +relocation_iterator +MachOObjectFile::section_rel_end(DataRefImpl Sec) const { + uint32_t Num; + if (is64Bit()) { + MachO::section_64 Sect = getSection64(Sec); + Num = Sect.nreloc; + } else { + MachO::section Sect = getSection(Sec); + Num = Sect.nreloc; + } + + DataRefImpl Ret; + Ret.d.a = Sec.d.a; + Ret.d.b = Num; + return relocation_iterator(RelocationRef(Ret, this)); +} + +relocation_iterator MachOObjectFile::extrel_begin() const { + DataRefImpl Ret; + // for DYSYMTAB symbols, Ret.d.a == 0 for external relocations + Ret.d.a = 0; // Would normally be a section index. + Ret.d.b = 0; // Index into the external relocations + return relocation_iterator(RelocationRef(Ret, this)); +} + +relocation_iterator MachOObjectFile::extrel_end() const { + MachO::dysymtab_command DysymtabLoadCmd = getDysymtabLoadCommand(); + DataRefImpl Ret; + // for DYSYMTAB symbols, Ret.d.a == 0 for external relocations + Ret.d.a = 0; // Would normally be a section index. + Ret.d.b = DysymtabLoadCmd.nextrel; // Index into the external relocations + return relocation_iterator(RelocationRef(Ret, this)); +} + +relocation_iterator MachOObjectFile::locrel_begin() const { + DataRefImpl Ret; + // for DYSYMTAB symbols, Ret.d.a == 1 for local relocations + Ret.d.a = 1; // Would normally be a section index. + Ret.d.b = 0; // Index into the local relocations + return relocation_iterator(RelocationRef(Ret, this)); +} + +relocation_iterator MachOObjectFile::locrel_end() const { + MachO::dysymtab_command DysymtabLoadCmd = getDysymtabLoadCommand(); + DataRefImpl Ret; + // for DYSYMTAB symbols, Ret.d.a == 1 for local relocations + Ret.d.a = 1; // Would normally be a section index. + Ret.d.b = DysymtabLoadCmd.nlocrel; // Index into the local relocations + return relocation_iterator(RelocationRef(Ret, this)); +} + +void MachOObjectFile::moveRelocationNext(DataRefImpl &Rel) const { + ++Rel.d.b; +} + +uint64_t MachOObjectFile::getRelocationOffset(DataRefImpl Rel) const { + assert((getHeader().filetype == MachO::MH_OBJECT || + getHeader().filetype == MachO::MH_KEXT_BUNDLE) && + "Only implemented for MH_OBJECT && MH_KEXT_BUNDLE"); + MachO::any_relocation_info RE = getRelocation(Rel); + return getAnyRelocationAddress(RE); +} + +symbol_iterator +MachOObjectFile::getRelocationSymbol(DataRefImpl Rel) const { + MachO::any_relocation_info RE = getRelocation(Rel); + if (isRelocationScattered(RE)) + return symbol_end(); + + uint32_t SymbolIdx = getPlainRelocationSymbolNum(RE); + bool isExtern = getPlainRelocationExternal(RE); + if (!isExtern) + return symbol_end(); + + MachO::symtab_command S = getSymtabLoadCommand(); + unsigned SymbolTableEntrySize = is64Bit() ? + sizeof(MachO::nlist_64) : + sizeof(MachO::nlist); + uint64_t Offset = S.symoff + SymbolIdx * SymbolTableEntrySize; + DataRefImpl Sym; + Sym.p = reinterpret_cast<uintptr_t>(getPtr(*this, Offset)); + return symbol_iterator(SymbolRef(Sym, this)); +} + +section_iterator +MachOObjectFile::getRelocationSection(DataRefImpl Rel) const { + return section_iterator(getAnyRelocationSection(getRelocation(Rel))); +} + +uint64_t MachOObjectFile::getRelocationType(DataRefImpl Rel) const { + MachO::any_relocation_info RE = getRelocation(Rel); + return getAnyRelocationType(RE); +} + +void MachOObjectFile::getRelocationTypeName( + DataRefImpl Rel, SmallVectorImpl<char> &Result) const { + StringRef res; + uint64_t RType = getRelocationType(Rel); + + unsigned Arch = this->getArch(); + + switch (Arch) { + case Triple::x86: { + static const char *const Table[] = { + "GENERIC_RELOC_VANILLA", + "GENERIC_RELOC_PAIR", + "GENERIC_RELOC_SECTDIFF", + "GENERIC_RELOC_PB_LA_PTR", + "GENERIC_RELOC_LOCAL_SECTDIFF", + "GENERIC_RELOC_TLV" }; + + if (RType > 5) + res = "Unknown"; + else + res = Table[RType]; + break; + } + case Triple::x86_64: { + static const char *const Table[] = { + "X86_64_RELOC_UNSIGNED", + "X86_64_RELOC_SIGNED", + "X86_64_RELOC_BRANCH", + "X86_64_RELOC_GOT_LOAD", + "X86_64_RELOC_GOT", + "X86_64_RELOC_SUBTRACTOR", + "X86_64_RELOC_SIGNED_1", + "X86_64_RELOC_SIGNED_2", + "X86_64_RELOC_SIGNED_4", + "X86_64_RELOC_TLV" }; + + if (RType > 9) + res = "Unknown"; + else + res = Table[RType]; + break; + } + case Triple::arm: { + static const char *const Table[] = { + "ARM_RELOC_VANILLA", + "ARM_RELOC_PAIR", + "ARM_RELOC_SECTDIFF", + "ARM_RELOC_LOCAL_SECTDIFF", + "ARM_RELOC_PB_LA_PTR", + "ARM_RELOC_BR24", + "ARM_THUMB_RELOC_BR22", + "ARM_THUMB_32BIT_BRANCH", + "ARM_RELOC_HALF", + "ARM_RELOC_HALF_SECTDIFF" }; + + if (RType > 9) + res = "Unknown"; + else + res = Table[RType]; + break; + } + case Triple::aarch64: + case Triple::aarch64_32: { + static const char *const Table[] = { + "ARM64_RELOC_UNSIGNED", "ARM64_RELOC_SUBTRACTOR", + "ARM64_RELOC_BRANCH26", "ARM64_RELOC_PAGE21", + "ARM64_RELOC_PAGEOFF12", "ARM64_RELOC_GOT_LOAD_PAGE21", + "ARM64_RELOC_GOT_LOAD_PAGEOFF12", "ARM64_RELOC_POINTER_TO_GOT", + "ARM64_RELOC_TLVP_LOAD_PAGE21", "ARM64_RELOC_TLVP_LOAD_PAGEOFF12", + "ARM64_RELOC_ADDEND" + }; + + if (RType >= array_lengthof(Table)) + res = "Unknown"; + else + res = Table[RType]; + break; + } + case Triple::ppc: { + static const char *const Table[] = { + "PPC_RELOC_VANILLA", + "PPC_RELOC_PAIR", + "PPC_RELOC_BR14", + "PPC_RELOC_BR24", + "PPC_RELOC_HI16", + "PPC_RELOC_LO16", + "PPC_RELOC_HA16", + "PPC_RELOC_LO14", + "PPC_RELOC_SECTDIFF", + "PPC_RELOC_PB_LA_PTR", + "PPC_RELOC_HI16_SECTDIFF", + "PPC_RELOC_LO16_SECTDIFF", + "PPC_RELOC_HA16_SECTDIFF", + "PPC_RELOC_JBSR", + "PPC_RELOC_LO14_SECTDIFF", + "PPC_RELOC_LOCAL_SECTDIFF" }; + + if (RType > 15) + res = "Unknown"; + else + res = Table[RType]; + break; + } + case Triple::UnknownArch: + res = "Unknown"; + break; + } + Result.append(res.begin(), res.end()); +} + +uint8_t MachOObjectFile::getRelocationLength(DataRefImpl Rel) const { + MachO::any_relocation_info RE = getRelocation(Rel); + return getAnyRelocationLength(RE); +} + +// +// guessLibraryShortName() is passed a name of a dynamic library and returns a +// guess on what the short name is. Then name is returned as a substring of the +// StringRef Name passed in. The name of the dynamic library is recognized as +// a framework if it has one of the two following forms: +// Foo.framework/Versions/A/Foo +// Foo.framework/Foo +// Where A and Foo can be any string. And may contain a trailing suffix +// starting with an underbar. If the Name is recognized as a framework then +// isFramework is set to true else it is set to false. If the Name has a +// suffix then Suffix is set to the substring in Name that contains the suffix +// else it is set to a NULL StringRef. +// +// The Name of the dynamic library is recognized as a library name if it has +// one of the two following forms: +// libFoo.A.dylib +// libFoo.dylib +// +// The library may have a suffix trailing the name Foo of the form: +// libFoo_profile.A.dylib +// libFoo_profile.dylib +// These dyld image suffixes are separated from the short name by a '_' +// character. Because the '_' character is commonly used to separate words in +// filenames guessLibraryShortName() cannot reliably separate a dylib's short +// name from an arbitrary image suffix; imagine if both the short name and the +// suffix contains an '_' character! To better deal with this ambiguity, +// guessLibraryShortName() will recognize only "_debug" and "_profile" as valid +// Suffix values. Calling code needs to be tolerant of guessLibraryShortName() +// guessing incorrectly. +// +// The Name of the dynamic library is also recognized as a library name if it +// has the following form: +// Foo.qtx +// +// If the Name of the dynamic library is none of the forms above then a NULL +// StringRef is returned. +StringRef MachOObjectFile::guessLibraryShortName(StringRef Name, + bool &isFramework, + StringRef &Suffix) { + StringRef Foo, F, DotFramework, V, Dylib, Lib, Dot, Qtx; + size_t a, b, c, d, Idx; + + isFramework = false; + Suffix = StringRef(); + + // Pull off the last component and make Foo point to it + a = Name.rfind('/'); + if (a == Name.npos || a == 0) + goto guess_library; + Foo = Name.slice(a+1, Name.npos); + + // Look for a suffix starting with a '_' + Idx = Foo.rfind('_'); + if (Idx != Foo.npos && Foo.size() >= 2) { + Suffix = Foo.slice(Idx, Foo.npos); + if (Suffix != "_debug" && Suffix != "_profile") + Suffix = StringRef(); + else + Foo = Foo.slice(0, Idx); + } + + // First look for the form Foo.framework/Foo + b = Name.rfind('/', a); + if (b == Name.npos) + Idx = 0; + else + Idx = b+1; + F = Name.slice(Idx, Idx + Foo.size()); + DotFramework = Name.slice(Idx + Foo.size(), + Idx + Foo.size() + sizeof(".framework/")-1); + if (F == Foo && DotFramework == ".framework/") { + isFramework = true; + return Foo; + } + + // Next look for the form Foo.framework/Versions/A/Foo + if (b == Name.npos) + goto guess_library; + c = Name.rfind('/', b); + if (c == Name.npos || c == 0) + goto guess_library; + V = Name.slice(c+1, Name.npos); + if (!V.startswith("Versions/")) + goto guess_library; + d = Name.rfind('/', c); + if (d == Name.npos) + Idx = 0; + else + Idx = d+1; + F = Name.slice(Idx, Idx + Foo.size()); + DotFramework = Name.slice(Idx + Foo.size(), + Idx + Foo.size() + sizeof(".framework/")-1); + if (F == Foo && DotFramework == ".framework/") { + isFramework = true; + return Foo; + } + +guess_library: + // pull off the suffix after the "." and make a point to it + a = Name.rfind('.'); + if (a == Name.npos || a == 0) + return StringRef(); + Dylib = Name.slice(a, Name.npos); + if (Dylib != ".dylib") + goto guess_qtx; + + // First pull off the version letter for the form Foo.A.dylib if any. + if (a >= 3) { + Dot = Name.slice(a-2, a-1); + if (Dot == ".") + a = a - 2; + } + + b = Name.rfind('/', a); + if (b == Name.npos) + b = 0; + else + b = b+1; + // ignore any suffix after an underbar like Foo_profile.A.dylib + Idx = Name.rfind('_'); + if (Idx != Name.npos && Idx != b) { + Lib = Name.slice(b, Idx); + Suffix = Name.slice(Idx, a); + if (Suffix != "_debug" && Suffix != "_profile") { + Suffix = StringRef(); + Lib = Name.slice(b, a); + } + } + else + Lib = Name.slice(b, a); + // There are incorrect library names of the form: + // libATS.A_profile.dylib so check for these. + if (Lib.size() >= 3) { + Dot = Lib.slice(Lib.size()-2, Lib.size()-1); + if (Dot == ".") + Lib = Lib.slice(0, Lib.size()-2); + } + return Lib; + +guess_qtx: + Qtx = Name.slice(a, Name.npos); + if (Qtx != ".qtx") + return StringRef(); + b = Name.rfind('/', a); + if (b == Name.npos) + Lib = Name.slice(0, a); + else + Lib = Name.slice(b+1, a); + // There are library names of the form: QT.A.qtx so check for these. + if (Lib.size() >= 3) { + Dot = Lib.slice(Lib.size()-2, Lib.size()-1); + if (Dot == ".") + Lib = Lib.slice(0, Lib.size()-2); + } + return Lib; +} + +// getLibraryShortNameByIndex() is used to get the short name of the library +// for an undefined symbol in a linked Mach-O binary that was linked with the +// normal two-level namespace default (that is MH_TWOLEVEL in the header). +// It is passed the index (0 - based) of the library as translated from +// GET_LIBRARY_ORDINAL (1 - based). +std::error_code MachOObjectFile::getLibraryShortNameByIndex(unsigned Index, + StringRef &Res) const { + if (Index >= Libraries.size()) + return object_error::parse_failed; + + // If the cache of LibrariesShortNames is not built up do that first for + // all the Libraries. + if (LibrariesShortNames.size() == 0) { + for (unsigned i = 0; i < Libraries.size(); i++) { + auto CommandOrErr = + getStructOrErr<MachO::dylib_command>(*this, Libraries[i]); + if (!CommandOrErr) + return object_error::parse_failed; + MachO::dylib_command D = CommandOrErr.get(); + if (D.dylib.name >= D.cmdsize) + return object_error::parse_failed; + const char *P = (const char *)(Libraries[i]) + D.dylib.name; + StringRef Name = StringRef(P); + if (D.dylib.name+Name.size() >= D.cmdsize) + return object_error::parse_failed; + StringRef Suffix; + bool isFramework; + StringRef shortName = guessLibraryShortName(Name, isFramework, Suffix); + if (shortName.empty()) + LibrariesShortNames.push_back(Name); + else + LibrariesShortNames.push_back(shortName); + } + } + + Res = LibrariesShortNames[Index]; + return std::error_code(); +} + +uint32_t MachOObjectFile::getLibraryCount() const { + return Libraries.size(); +} + +section_iterator +MachOObjectFile::getRelocationRelocatedSection(relocation_iterator Rel) const { + DataRefImpl Sec; + Sec.d.a = Rel->getRawDataRefImpl().d.a; + return section_iterator(SectionRef(Sec, this)); +} + +basic_symbol_iterator MachOObjectFile::symbol_begin() const { + DataRefImpl DRI; + MachO::symtab_command Symtab = getSymtabLoadCommand(); + if (!SymtabLoadCmd || Symtab.nsyms == 0) + return basic_symbol_iterator(SymbolRef(DRI, this)); + + return getSymbolByIndex(0); +} + +basic_symbol_iterator MachOObjectFile::symbol_end() const { + DataRefImpl DRI; + MachO::symtab_command Symtab = getSymtabLoadCommand(); + if (!SymtabLoadCmd || Symtab.nsyms == 0) + return basic_symbol_iterator(SymbolRef(DRI, this)); + + unsigned SymbolTableEntrySize = is64Bit() ? + sizeof(MachO::nlist_64) : + sizeof(MachO::nlist); + unsigned Offset = Symtab.symoff + + Symtab.nsyms * SymbolTableEntrySize; + DRI.p = reinterpret_cast<uintptr_t>(getPtr(*this, Offset)); + return basic_symbol_iterator(SymbolRef(DRI, this)); +} + +symbol_iterator MachOObjectFile::getSymbolByIndex(unsigned Index) const { + MachO::symtab_command Symtab = getSymtabLoadCommand(); + if (!SymtabLoadCmd || Index >= Symtab.nsyms) + report_fatal_error("Requested symbol index is out of range."); + unsigned SymbolTableEntrySize = + is64Bit() ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); + DataRefImpl DRI; + DRI.p = reinterpret_cast<uintptr_t>(getPtr(*this, Symtab.symoff)); + DRI.p += Index * SymbolTableEntrySize; + return basic_symbol_iterator(SymbolRef(DRI, this)); +} + +uint64_t MachOObjectFile::getSymbolIndex(DataRefImpl Symb) const { + MachO::symtab_command Symtab = getSymtabLoadCommand(); + if (!SymtabLoadCmd) + report_fatal_error("getSymbolIndex() called with no symbol table symbol"); + unsigned SymbolTableEntrySize = + is64Bit() ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); + DataRefImpl DRIstart; + DRIstart.p = reinterpret_cast<uintptr_t>(getPtr(*this, Symtab.symoff)); + uint64_t Index = (Symb.p - DRIstart.p) / SymbolTableEntrySize; + return Index; +} + +section_iterator MachOObjectFile::section_begin() const { + DataRefImpl DRI; + return section_iterator(SectionRef(DRI, this)); +} + +section_iterator MachOObjectFile::section_end() const { + DataRefImpl DRI; + DRI.d.a = Sections.size(); + return section_iterator(SectionRef(DRI, this)); +} + +uint8_t MachOObjectFile::getBytesInAddress() const { + return is64Bit() ? 8 : 4; +} + +StringRef MachOObjectFile::getFileFormatName() const { + unsigned CPUType = getCPUType(*this); + if (!is64Bit()) { + switch (CPUType) { + case MachO::CPU_TYPE_I386: + return "Mach-O 32-bit i386"; + case MachO::CPU_TYPE_ARM: + return "Mach-O arm"; + case MachO::CPU_TYPE_ARM64_32: + return "Mach-O arm64 (ILP32)"; + case MachO::CPU_TYPE_POWERPC: + return "Mach-O 32-bit ppc"; + default: + return "Mach-O 32-bit unknown"; + } + } + + switch (CPUType) { + case MachO::CPU_TYPE_X86_64: + return "Mach-O 64-bit x86-64"; + case MachO::CPU_TYPE_ARM64: + return "Mach-O arm64"; + case MachO::CPU_TYPE_POWERPC64: + return "Mach-O 64-bit ppc64"; + default: + return "Mach-O 64-bit unknown"; + } +} + +Triple::ArchType MachOObjectFile::getArch(uint32_t CPUType, uint32_t CPUSubType) { + switch (CPUType) { + case MachO::CPU_TYPE_I386: + return Triple::x86; + case MachO::CPU_TYPE_X86_64: + return Triple::x86_64; + case MachO::CPU_TYPE_ARM: + return Triple::arm; + case MachO::CPU_TYPE_ARM64: + return Triple::aarch64; + case MachO::CPU_TYPE_ARM64_32: + return Triple::aarch64_32; + case MachO::CPU_TYPE_POWERPC: + return Triple::ppc; + case MachO::CPU_TYPE_POWERPC64: + return Triple::ppc64; + default: + return Triple::UnknownArch; + } +} + +Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType, + const char **McpuDefault, + const char **ArchFlag) { + if (McpuDefault) + *McpuDefault = nullptr; + if (ArchFlag) + *ArchFlag = nullptr; + + switch (CPUType) { + case MachO::CPU_TYPE_I386: + switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { + case MachO::CPU_SUBTYPE_I386_ALL: + if (ArchFlag) + *ArchFlag = "i386"; + return Triple("i386-apple-darwin"); + default: + return Triple(); + } + case MachO::CPU_TYPE_X86_64: + switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { + case MachO::CPU_SUBTYPE_X86_64_ALL: + if (ArchFlag) + *ArchFlag = "x86_64"; + return Triple("x86_64-apple-darwin"); + case MachO::CPU_SUBTYPE_X86_64_H: + if (ArchFlag) + *ArchFlag = "x86_64h"; + return Triple("x86_64h-apple-darwin"); + default: + return Triple(); + } + case MachO::CPU_TYPE_ARM: + switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { + case MachO::CPU_SUBTYPE_ARM_V4T: + if (ArchFlag) + *ArchFlag = "armv4t"; + return Triple("armv4t-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V5TEJ: + if (ArchFlag) + *ArchFlag = "armv5e"; + return Triple("armv5e-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_XSCALE: + if (ArchFlag) + *ArchFlag = "xscale"; + return Triple("xscale-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V6: + if (ArchFlag) + *ArchFlag = "armv6"; + return Triple("armv6-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V6M: + if (McpuDefault) + *McpuDefault = "cortex-m0"; + if (ArchFlag) + *ArchFlag = "armv6m"; + return Triple("armv6m-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V7: + if (ArchFlag) + *ArchFlag = "armv7"; + return Triple("armv7-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V7EM: + if (McpuDefault) + *McpuDefault = "cortex-m4"; + if (ArchFlag) + *ArchFlag = "armv7em"; + return Triple("thumbv7em-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V7K: + if (McpuDefault) + *McpuDefault = "cortex-a7"; + if (ArchFlag) + *ArchFlag = "armv7k"; + return Triple("armv7k-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V7M: + if (McpuDefault) + *McpuDefault = "cortex-m3"; + if (ArchFlag) + *ArchFlag = "armv7m"; + return Triple("thumbv7m-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V7S: + if (McpuDefault) + *McpuDefault = "cortex-a7"; + if (ArchFlag) + *ArchFlag = "armv7s"; + return Triple("armv7s-apple-darwin"); + default: + return Triple(); + } + case MachO::CPU_TYPE_ARM64: + switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { + case MachO::CPU_SUBTYPE_ARM64_ALL: + if (McpuDefault) + *McpuDefault = "cyclone"; + if (ArchFlag) + *ArchFlag = "arm64"; + return Triple("arm64-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM64E: + if (McpuDefault) + *McpuDefault = "apple-a12"; + if (ArchFlag) + *ArchFlag = "arm64e"; + return Triple("arm64e-apple-darwin"); + default: + return Triple(); + } + case MachO::CPU_TYPE_ARM64_32: + switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { + case MachO::CPU_SUBTYPE_ARM64_32_V8: + if (McpuDefault) + *McpuDefault = "cyclone"; + if (ArchFlag) + *ArchFlag = "arm64_32"; + return Triple("arm64_32-apple-darwin"); + default: + return Triple(); + } + case MachO::CPU_TYPE_POWERPC: + switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { + case MachO::CPU_SUBTYPE_POWERPC_ALL: + if (ArchFlag) + *ArchFlag = "ppc"; + return Triple("ppc-apple-darwin"); + default: + return Triple(); + } + case MachO::CPU_TYPE_POWERPC64: + switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { + case MachO::CPU_SUBTYPE_POWERPC_ALL: + if (ArchFlag) + *ArchFlag = "ppc64"; + return Triple("ppc64-apple-darwin"); + default: + return Triple(); + } + default: + return Triple(); + } +} + +Triple MachOObjectFile::getHostArch() { + return Triple(sys::getDefaultTargetTriple()); +} + +bool MachOObjectFile::isValidArch(StringRef ArchFlag) { + auto validArchs = getValidArchs(); + return llvm::is_contained(validArchs, ArchFlag); +} + +ArrayRef<StringRef> MachOObjectFile::getValidArchs() { + static const std::array<StringRef, 18> ValidArchs = {{ + "i386", + "x86_64", + "x86_64h", + "armv4t", + "arm", + "armv5e", + "armv6", + "armv6m", + "armv7", + "armv7em", + "armv7k", + "armv7m", + "armv7s", + "arm64", + "arm64e", + "arm64_32", + "ppc", + "ppc64", + }}; + + return ValidArchs; +} + +Triple::ArchType MachOObjectFile::getArch() const { + return getArch(getCPUType(*this), getCPUSubType(*this)); +} + +Triple MachOObjectFile::getArchTriple(const char **McpuDefault) const { + return getArchTriple(Header.cputype, Header.cpusubtype, McpuDefault); +} + +relocation_iterator MachOObjectFile::section_rel_begin(unsigned Index) const { + DataRefImpl DRI; + DRI.d.a = Index; + return section_rel_begin(DRI); +} + +relocation_iterator MachOObjectFile::section_rel_end(unsigned Index) const { + DataRefImpl DRI; + DRI.d.a = Index; + return section_rel_end(DRI); +} + +dice_iterator MachOObjectFile::begin_dices() const { + DataRefImpl DRI; + if (!DataInCodeLoadCmd) + return dice_iterator(DiceRef(DRI, this)); + + MachO::linkedit_data_command DicLC = getDataInCodeLoadCommand(); + DRI.p = reinterpret_cast<uintptr_t>(getPtr(*this, DicLC.dataoff)); + return dice_iterator(DiceRef(DRI, this)); +} + +dice_iterator MachOObjectFile::end_dices() const { + DataRefImpl DRI; + if (!DataInCodeLoadCmd) + return dice_iterator(DiceRef(DRI, this)); + + MachO::linkedit_data_command DicLC = getDataInCodeLoadCommand(); + unsigned Offset = DicLC.dataoff + DicLC.datasize; + DRI.p = reinterpret_cast<uintptr_t>(getPtr(*this, Offset)); + return dice_iterator(DiceRef(DRI, this)); +} + +ExportEntry::ExportEntry(Error *E, const MachOObjectFile *O, + ArrayRef<uint8_t> T) : E(E), O(O), Trie(T) {} + +void ExportEntry::moveToFirst() { + ErrorAsOutParameter ErrAsOutParam(E); + pushNode(0); + if (*E) + return; + pushDownUntilBottom(); +} + +void ExportEntry::moveToEnd() { + Stack.clear(); + Done = true; +} + +bool ExportEntry::operator==(const ExportEntry &Other) const { + // Common case, one at end, other iterating from begin. + if (Done || Other.Done) + return (Done == Other.Done); + // Not equal if different stack sizes. + if (Stack.size() != Other.Stack.size()) + return false; + // Not equal if different cumulative strings. + if (!CumulativeString.equals(Other.CumulativeString)) + return false; + // Equal if all nodes in both stacks match. + for (unsigned i=0; i < Stack.size(); ++i) { + if (Stack[i].Start != Other.Stack[i].Start) + return false; + } + return true; +} + +uint64_t ExportEntry::readULEB128(const uint8_t *&Ptr, const char **error) { + unsigned Count; + uint64_t Result = decodeULEB128(Ptr, &Count, Trie.end(), error); + Ptr += Count; + if (Ptr > Trie.end()) + Ptr = Trie.end(); + return Result; +} + +StringRef ExportEntry::name() const { + return CumulativeString; +} + +uint64_t ExportEntry::flags() const { + return Stack.back().Flags; +} + +uint64_t ExportEntry::address() const { + return Stack.back().Address; +} + +uint64_t ExportEntry::other() const { + return Stack.back().Other; +} + +StringRef ExportEntry::otherName() const { + const char* ImportName = Stack.back().ImportName; + if (ImportName) + return StringRef(ImportName); + return StringRef(); +} + +uint32_t ExportEntry::nodeOffset() const { + return Stack.back().Start - Trie.begin(); +} + +ExportEntry::NodeState::NodeState(const uint8_t *Ptr) + : Start(Ptr), Current(Ptr) {} + +void ExportEntry::pushNode(uint64_t offset) { + ErrorAsOutParameter ErrAsOutParam(E); + const uint8_t *Ptr = Trie.begin() + offset; + NodeState State(Ptr); + const char *error; + uint64_t ExportInfoSize = readULEB128(State.Current, &error); + if (error) { + *E = malformedError("export info size " + Twine(error) + + " in export trie data at node: 0x" + + Twine::utohexstr(offset)); + moveToEnd(); + return; + } + State.IsExportNode = (ExportInfoSize != 0); + const uint8_t* Children = State.Current + ExportInfoSize; + if (Children > Trie.end()) { + *E = malformedError( + "export info size: 0x" + Twine::utohexstr(ExportInfoSize) + + " in export trie data at node: 0x" + Twine::utohexstr(offset) + + " too big and extends past end of trie data"); + moveToEnd(); + return; + } + if (State.IsExportNode) { + const uint8_t *ExportStart = State.Current; + State.Flags = readULEB128(State.Current, &error); + if (error) { + *E = malformedError("flags " + Twine(error) + + " in export trie data at node: 0x" + + Twine::utohexstr(offset)); + moveToEnd(); + return; + } + uint64_t Kind = State.Flags & MachO::EXPORT_SYMBOL_FLAGS_KIND_MASK; + if (State.Flags != 0 && + (Kind != MachO::EXPORT_SYMBOL_FLAGS_KIND_REGULAR && + Kind != MachO::EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE && + Kind != MachO::EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL)) { + *E = malformedError( + "unsupported exported symbol kind: " + Twine((int)Kind) + + " in flags: 0x" + Twine::utohexstr(State.Flags) + + " in export trie data at node: 0x" + Twine::utohexstr(offset)); + moveToEnd(); + return; + } + if (State.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) { + State.Address = 0; + State.Other = readULEB128(State.Current, &error); // dylib ordinal + if (error) { + *E = malformedError("dylib ordinal of re-export " + Twine(error) + + " in export trie data at node: 0x" + + Twine::utohexstr(offset)); + moveToEnd(); + return; + } + if (O != nullptr) { + if (State.Other > O->getLibraryCount()) { + *E = malformedError( + "bad library ordinal: " + Twine((int)State.Other) + " (max " + + Twine((int)O->getLibraryCount()) + + ") in export trie data at node: 0x" + Twine::utohexstr(offset)); + moveToEnd(); + return; + } + } + State.ImportName = reinterpret_cast<const char*>(State.Current); + if (*State.ImportName == '\0') { + State.Current++; + } else { + const uint8_t *End = State.Current + 1; + if (End >= Trie.end()) { + *E = malformedError("import name of re-export in export trie data at " + "node: 0x" + + Twine::utohexstr(offset) + + " starts past end of trie data"); + moveToEnd(); + return; + } + while(*End != '\0' && End < Trie.end()) + End++; + if (*End != '\0') { + *E = malformedError("import name of re-export in export trie data at " + "node: 0x" + + Twine::utohexstr(offset) + + " extends past end of trie data"); + moveToEnd(); + return; + } + State.Current = End + 1; + } + } else { + State.Address = readULEB128(State.Current, &error); + if (error) { + *E = malformedError("address " + Twine(error) + + " in export trie data at node: 0x" + + Twine::utohexstr(offset)); + moveToEnd(); + return; + } + if (State.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) { + State.Other = readULEB128(State.Current, &error); + if (error) { + *E = malformedError("resolver of stub and resolver " + Twine(error) + + " in export trie data at node: 0x" + + Twine::utohexstr(offset)); + moveToEnd(); + return; + } + } + } + if(ExportStart + ExportInfoSize != State.Current) { + *E = malformedError( + "inconsistant export info size: 0x" + + Twine::utohexstr(ExportInfoSize) + " where actual size was: 0x" + + Twine::utohexstr(State.Current - ExportStart) + + " in export trie data at node: 0x" + Twine::utohexstr(offset)); + moveToEnd(); + return; + } + } + State.ChildCount = *Children; + if (State.ChildCount != 0 && Children + 1 >= Trie.end()) { + *E = malformedError("byte for count of childern in export trie data at " + "node: 0x" + + Twine::utohexstr(offset) + + " extends past end of trie data"); + moveToEnd(); + return; + } + State.Current = Children + 1; + State.NextChildIndex = 0; + State.ParentStringLength = CumulativeString.size(); + Stack.push_back(State); +} + +void ExportEntry::pushDownUntilBottom() { + ErrorAsOutParameter ErrAsOutParam(E); + const char *error; + while (Stack.back().NextChildIndex < Stack.back().ChildCount) { + NodeState &Top = Stack.back(); + CumulativeString.resize(Top.ParentStringLength); + for (;*Top.Current != 0 && Top.Current < Trie.end(); Top.Current++) { + char C = *Top.Current; + CumulativeString.push_back(C); + } + if (Top.Current >= Trie.end()) { + *E = malformedError("edge sub-string in export trie data at node: 0x" + + Twine::utohexstr(Top.Start - Trie.begin()) + + " for child #" + Twine((int)Top.NextChildIndex) + + " extends past end of trie data"); + moveToEnd(); + return; + } + Top.Current += 1; + uint64_t childNodeIndex = readULEB128(Top.Current, &error); + if (error) { + *E = malformedError("child node offset " + Twine(error) + + " in export trie data at node: 0x" + + Twine::utohexstr(Top.Start - Trie.begin())); + moveToEnd(); + return; + } + for (const NodeState &node : nodes()) { + if (node.Start == Trie.begin() + childNodeIndex){ + *E = malformedError("loop in childern in export trie data at node: 0x" + + Twine::utohexstr(Top.Start - Trie.begin()) + + " back to node: 0x" + + Twine::utohexstr(childNodeIndex)); + moveToEnd(); + return; + } + } + Top.NextChildIndex += 1; + pushNode(childNodeIndex); + if (*E) + return; + } + if (!Stack.back().IsExportNode) { + *E = malformedError("node is not an export node in export trie data at " + "node: 0x" + + Twine::utohexstr(Stack.back().Start - Trie.begin())); + moveToEnd(); + return; + } +} + +// We have a trie data structure and need a way to walk it that is compatible +// with the C++ iterator model. The solution is a non-recursive depth first +// traversal where the iterator contains a stack of parent nodes along with a +// string that is the accumulation of all edge strings along the parent chain +// to this point. +// +// There is one "export" node for each exported symbol. But because some +// symbols may be a prefix of another symbol (e.g. _dup and _dup2), an export +// node may have child nodes too. +// +// The algorithm for moveNext() is to keep moving down the leftmost unvisited +// child until hitting a node with no children (which is an export node or +// else the trie is malformed). On the way down, each node is pushed on the +// stack ivar. If there is no more ways down, it pops up one and tries to go +// down a sibling path until a childless node is reached. +void ExportEntry::moveNext() { + assert(!Stack.empty() && "ExportEntry::moveNext() with empty node stack"); + if (!Stack.back().IsExportNode) { + *E = malformedError("node is not an export node in export trie data at " + "node: 0x" + + Twine::utohexstr(Stack.back().Start - Trie.begin())); + moveToEnd(); + return; + } + + Stack.pop_back(); + while (!Stack.empty()) { + NodeState &Top = Stack.back(); + if (Top.NextChildIndex < Top.ChildCount) { + pushDownUntilBottom(); + // Now at the next export node. + return; + } else { + if (Top.IsExportNode) { + // This node has no children but is itself an export node. + CumulativeString.resize(Top.ParentStringLength); + return; + } + Stack.pop_back(); + } + } + Done = true; +} + +iterator_range<export_iterator> +MachOObjectFile::exports(Error &E, ArrayRef<uint8_t> Trie, + const MachOObjectFile *O) { + ExportEntry Start(&E, O, Trie); + if (Trie.empty()) + Start.moveToEnd(); + else + Start.moveToFirst(); + + ExportEntry Finish(&E, O, Trie); + Finish.moveToEnd(); + + return make_range(export_iterator(Start), export_iterator(Finish)); +} + +iterator_range<export_iterator> MachOObjectFile::exports(Error &Err) const { + return exports(Err, getDyldInfoExportsTrie(), this); +} + +MachORebaseEntry::MachORebaseEntry(Error *E, const MachOObjectFile *O, + ArrayRef<uint8_t> Bytes, bool is64Bit) + : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()), + PointerSize(is64Bit ? 8 : 4) {} + +void MachORebaseEntry::moveToFirst() { + Ptr = Opcodes.begin(); + moveNext(); +} + +void MachORebaseEntry::moveToEnd() { + Ptr = Opcodes.end(); + RemainingLoopCount = 0; + Done = true; +} + +void MachORebaseEntry::moveNext() { + ErrorAsOutParameter ErrAsOutParam(E); + // If in the middle of some loop, move to next rebasing in loop. + SegmentOffset += AdvanceAmount; + if (RemainingLoopCount) { + --RemainingLoopCount; + return; + } + // REBASE_OPCODE_DONE is only used for padding if we are not aligned to + // pointer size. Therefore it is possible to reach the end without ever having + // seen REBASE_OPCODE_DONE. + if (Ptr == Opcodes.end()) { + Done = true; + return; + } + bool More = true; + while (More) { + // Parse next opcode and set up next loop. + const uint8_t *OpcodeStart = Ptr; + uint8_t Byte = *Ptr++; + uint8_t ImmValue = Byte & MachO::REBASE_IMMEDIATE_MASK; + uint8_t Opcode = Byte & MachO::REBASE_OPCODE_MASK; + uint32_t Count, Skip; + const char *error = nullptr; + switch (Opcode) { + case MachO::REBASE_OPCODE_DONE: + More = false; + Done = true; + moveToEnd(); + DEBUG_WITH_TYPE("mach-o-rebase", dbgs() << "REBASE_OPCODE_DONE\n"); + break; + case MachO::REBASE_OPCODE_SET_TYPE_IMM: + RebaseType = ImmValue; + if (RebaseType > MachO::REBASE_TYPE_TEXT_PCREL32) { + *E = malformedError("for REBASE_OPCODE_SET_TYPE_IMM bad bind type: " + + Twine((int)RebaseType) + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE( + "mach-o-rebase", + dbgs() << "REBASE_OPCODE_SET_TYPE_IMM: " + << "RebaseType=" << (int) RebaseType << "\n"); + break; + case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: + SegmentIndex = ImmValue; + SegmentOffset = readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " + + Twine(error) + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset, + PointerSize); + if (error) { + *E = malformedError("for REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " + + Twine(error) + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE( + "mach-o-rebase", + dbgs() << "REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " + << "SegmentIndex=" << SegmentIndex << ", " + << format("SegmentOffset=0x%06X", SegmentOffset) + << "\n"); + break; + case MachO::REBASE_OPCODE_ADD_ADDR_ULEB: + SegmentOffset += readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_ADD_ADDR_ULEB " + Twine(error) + + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset, + PointerSize); + if (error) { + *E = malformedError("for REBASE_OPCODE_ADD_ADDR_ULEB " + Twine(error) + + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE("mach-o-rebase", + dbgs() << "REBASE_OPCODE_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); + break; + case MachO::REBASE_OPCODE_ADD_ADDR_IMM_SCALED: + SegmentOffset += ImmValue * PointerSize; + error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset, + PointerSize); + if (error) { + *E = malformedError("for REBASE_OPCODE_ADD_ADDR_IMM_SCALED " + + Twine(error) + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE("mach-o-rebase", + dbgs() << "REBASE_OPCODE_ADD_ADDR_IMM_SCALED: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); + break; + case MachO::REBASE_OPCODE_DO_REBASE_IMM_TIMES: + AdvanceAmount = PointerSize; + Skip = 0; + Count = ImmValue; + if (ImmValue != 0) + RemainingLoopCount = ImmValue - 1; + else + RemainingLoopCount = 0; + error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset, + PointerSize, Count, Skip); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_IMM_TIMES " + + Twine(error) + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE( + "mach-o-rebase", + dbgs() << "REBASE_OPCODE_DO_REBASE_IMM_TIMES: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); + return; + case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES: + AdvanceAmount = PointerSize; + Skip = 0; + Count = readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES " + + Twine(error) + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (Count != 0) + RemainingLoopCount = Count - 1; + else + RemainingLoopCount = 0; + error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset, + PointerSize, Count, Skip); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES " + + Twine(error) + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE( + "mach-o-rebase", + dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); + return; + case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: + Skip = readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + AdvanceAmount = Skip + PointerSize; + Count = 1; + RemainingLoopCount = 0; + error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset, + PointerSize, Count, Skip); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE( + "mach-o-rebase", + dbgs() << "REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); + return; + case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: + Count = readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_" + "ULEB " + + Twine(error) + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (Count != 0) + RemainingLoopCount = Count - 1; + else + RemainingLoopCount = 0; + Skip = readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_" + "ULEB " + + Twine(error) + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + AdvanceAmount = Skip + PointerSize; + + error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset, + PointerSize, Count, Skip); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_" + "ULEB " + + Twine(error) + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE( + "mach-o-rebase", + dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); + return; + default: + *E = malformedError("bad rebase info (bad opcode value 0x" + + Twine::utohexstr(Opcode) + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + } +} + +uint64_t MachORebaseEntry::readULEB128(const char **error) { + unsigned Count; + uint64_t Result = decodeULEB128(Ptr, &Count, Opcodes.end(), error); + Ptr += Count; + if (Ptr > Opcodes.end()) + Ptr = Opcodes.end(); + return Result; +} + +int32_t MachORebaseEntry::segmentIndex() const { return SegmentIndex; } + +uint64_t MachORebaseEntry::segmentOffset() const { return SegmentOffset; } + +StringRef MachORebaseEntry::typeName() const { + switch (RebaseType) { + case MachO::REBASE_TYPE_POINTER: + return "pointer"; + case MachO::REBASE_TYPE_TEXT_ABSOLUTE32: + return "text abs32"; + case MachO::REBASE_TYPE_TEXT_PCREL32: + return "text rel32"; + } + return "unknown"; +} + +// For use with the SegIndex of a checked Mach-O Rebase entry +// to get the segment name. +StringRef MachORebaseEntry::segmentName() const { + return O->BindRebaseSegmentName(SegmentIndex); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Rebase entry +// to get the section name. +StringRef MachORebaseEntry::sectionName() const { + return O->BindRebaseSectionName(SegmentIndex, SegmentOffset); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Rebase entry +// to get the address. +uint64_t MachORebaseEntry::address() const { + return O->BindRebaseAddress(SegmentIndex, SegmentOffset); +} + +bool MachORebaseEntry::operator==(const MachORebaseEntry &Other) const { +#ifdef EXPENSIVE_CHECKS + assert(Opcodes == Other.Opcodes && "compare iterators of different files"); +#else + assert(Opcodes.data() == Other.Opcodes.data() && "compare iterators of different files"); +#endif + return (Ptr == Other.Ptr) && + (RemainingLoopCount == Other.RemainingLoopCount) && + (Done == Other.Done); +} + +iterator_range<rebase_iterator> +MachOObjectFile::rebaseTable(Error &Err, MachOObjectFile *O, + ArrayRef<uint8_t> Opcodes, bool is64) { + if (O->BindRebaseSectionTable == nullptr) + O->BindRebaseSectionTable = std::make_unique<BindRebaseSegInfo>(O); + MachORebaseEntry Start(&Err, O, Opcodes, is64); + Start.moveToFirst(); + + MachORebaseEntry Finish(&Err, O, Opcodes, is64); + Finish.moveToEnd(); + + return make_range(rebase_iterator(Start), rebase_iterator(Finish)); +} + +iterator_range<rebase_iterator> MachOObjectFile::rebaseTable(Error &Err) { + return rebaseTable(Err, this, getDyldInfoRebaseOpcodes(), is64Bit()); +} + +MachOBindEntry::MachOBindEntry(Error *E, const MachOObjectFile *O, + ArrayRef<uint8_t> Bytes, bool is64Bit, Kind BK) + : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()), + PointerSize(is64Bit ? 8 : 4), TableKind(BK) {} + +void MachOBindEntry::moveToFirst() { + Ptr = Opcodes.begin(); + moveNext(); +} + +void MachOBindEntry::moveToEnd() { + Ptr = Opcodes.end(); + RemainingLoopCount = 0; + Done = true; +} + +void MachOBindEntry::moveNext() { + ErrorAsOutParameter ErrAsOutParam(E); + // If in the middle of some loop, move to next binding in loop. + SegmentOffset += AdvanceAmount; + if (RemainingLoopCount) { + --RemainingLoopCount; + return; + } + // BIND_OPCODE_DONE is only used for padding if we are not aligned to + // pointer size. Therefore it is possible to reach the end without ever having + // seen BIND_OPCODE_DONE. + if (Ptr == Opcodes.end()) { + Done = true; + return; + } + bool More = true; + while (More) { + // Parse next opcode and set up next loop. + const uint8_t *OpcodeStart = Ptr; + uint8_t Byte = *Ptr++; + uint8_t ImmValue = Byte & MachO::BIND_IMMEDIATE_MASK; + uint8_t Opcode = Byte & MachO::BIND_OPCODE_MASK; + int8_t SignExtended; + const uint8_t *SymStart; + uint32_t Count, Skip; + const char *error = nullptr; + switch (Opcode) { + case MachO::BIND_OPCODE_DONE: + if (TableKind == Kind::Lazy) { + // Lazying bindings have a DONE opcode between entries. Need to ignore + // it to advance to next entry. But need not if this is last entry. + bool NotLastEntry = false; + for (const uint8_t *P = Ptr; P < Opcodes.end(); ++P) { + if (*P) { + NotLastEntry = true; + } + } + if (NotLastEntry) + break; + } + More = false; + moveToEnd(); + DEBUG_WITH_TYPE("mach-o-bind", dbgs() << "BIND_OPCODE_DONE\n"); + break; + case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: + if (TableKind == Kind::Weak) { + *E = malformedError("BIND_OPCODE_SET_DYLIB_ORDINAL_IMM not allowed in " + "weak bind table for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + Ordinal = ImmValue; + LibraryOrdinalSet = true; + if (ImmValue > O->getLibraryCount()) { + *E = malformedError("for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB bad " + "library ordinal: " + + Twine((int)ImmValue) + " (max " + + Twine((int)O->getLibraryCount()) + + ") for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE( + "mach-o-bind", + dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: " + << "Ordinal=" << Ordinal << "\n"); + break; + case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: + if (TableKind == Kind::Weak) { + *E = malformedError("BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB not allowed in " + "weak bind table for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + Ordinal = readULEB128(&error); + LibraryOrdinalSet = true; + if (error) { + *E = malformedError("for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB " + + Twine(error) + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (Ordinal > (int)O->getLibraryCount()) { + *E = malformedError("for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB bad " + "library ordinal: " + + Twine((int)Ordinal) + " (max " + + Twine((int)O->getLibraryCount()) + + ") for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE( + "mach-o-bind", + dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: " + << "Ordinal=" << Ordinal << "\n"); + break; + case MachO::BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: + if (TableKind == Kind::Weak) { + *E = malformedError("BIND_OPCODE_SET_DYLIB_SPECIAL_IMM not allowed in " + "weak bind table for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (ImmValue) { + SignExtended = MachO::BIND_OPCODE_MASK | ImmValue; + Ordinal = SignExtended; + if (Ordinal < MachO::BIND_SPECIAL_DYLIB_FLAT_LOOKUP) { + *E = malformedError("for BIND_OPCODE_SET_DYLIB_SPECIAL_IMM unknown " + "special ordinal: " + + Twine((int)Ordinal) + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + } else + Ordinal = 0; + LibraryOrdinalSet = true; + DEBUG_WITH_TYPE( + "mach-o-bind", + dbgs() << "BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: " + << "Ordinal=" << Ordinal << "\n"); + break; + case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: + Flags = ImmValue; + SymStart = Ptr; + while (*Ptr && (Ptr < Opcodes.end())) { + ++Ptr; + } + if (Ptr == Opcodes.end()) { + *E = malformedError( + "for BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM " + "symbol name extends past opcodes for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + SymbolName = StringRef(reinterpret_cast<const char*>(SymStart), + Ptr-SymStart); + ++Ptr; + DEBUG_WITH_TYPE( + "mach-o-bind", + dbgs() << "BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: " + << "SymbolName=" << SymbolName << "\n"); + if (TableKind == Kind::Weak) { + if (ImmValue & MachO::BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION) + return; + } + break; + case MachO::BIND_OPCODE_SET_TYPE_IMM: + BindType = ImmValue; + if (ImmValue > MachO::BIND_TYPE_TEXT_PCREL32) { + *E = malformedError("for BIND_OPCODE_SET_TYPE_IMM bad bind type: " + + Twine((int)ImmValue) + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE( + "mach-o-bind", + dbgs() << "BIND_OPCODE_SET_TYPE_IMM: " + << "BindType=" << (int)BindType << "\n"); + break; + case MachO::BIND_OPCODE_SET_ADDEND_SLEB: + Addend = readSLEB128(&error); + if (error) { + *E = malformedError("for BIND_OPCODE_SET_ADDEND_SLEB " + Twine(error) + + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE( + "mach-o-bind", + dbgs() << "BIND_OPCODE_SET_ADDEND_SLEB: " + << "Addend=" << Addend << "\n"); + break; + case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: + SegmentIndex = ImmValue; + SegmentOffset = readULEB128(&error); + if (error) { + *E = malformedError("for BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " + + Twine(error) + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset, + PointerSize); + if (error) { + *E = malformedError("for BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " + + Twine(error) + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE( + "mach-o-bind", + dbgs() << "BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " + << "SegmentIndex=" << SegmentIndex << ", " + << format("SegmentOffset=0x%06X", SegmentOffset) + << "\n"); + break; + case MachO::BIND_OPCODE_ADD_ADDR_ULEB: + SegmentOffset += readULEB128(&error); + if (error) { + *E = malformedError("for BIND_OPCODE_ADD_ADDR_ULEB " + Twine(error) + + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset, + PointerSize); + if (error) { + *E = malformedError("for BIND_OPCODE_ADD_ADDR_ULEB " + Twine(error) + + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE("mach-o-bind", + dbgs() << "BIND_OPCODE_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); + break; + case MachO::BIND_OPCODE_DO_BIND: + AdvanceAmount = PointerSize; + RemainingLoopCount = 0; + error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset, + PointerSize); + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND " + Twine(error) + + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (SymbolName == StringRef()) { + *E = malformedError( + "for BIND_OPCODE_DO_BIND missing preceding " + "BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (!LibraryOrdinalSet && TableKind != Kind::Weak) { + *E = + malformedError("for BIND_OPCODE_DO_BIND missing preceding " + "BIND_OPCODE_SET_DYLIB_ORDINAL_* for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE("mach-o-bind", + dbgs() << "BIND_OPCODE_DO_BIND: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); + return; + case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: + if (TableKind == Kind::Lazy) { + *E = malformedError("BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB not allowed in " + "lazy bind table for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset, + PointerSize); + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (SymbolName == StringRef()) { + *E = malformedError( + "for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB missing " + "preceding BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM for opcode " + "at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (!LibraryOrdinalSet && TableKind != Kind::Weak) { + *E = malformedError( + "for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB missing " + "preceding BIND_OPCODE_SET_DYLIB_ORDINAL_* for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + AdvanceAmount = readULEB128(&error) + PointerSize; + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + // Note, this is not really an error until the next bind but make no sense + // for a BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB to not be followed by another + // bind operation. + error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset + + AdvanceAmount, PointerSize); + if (error) { + *E = malformedError("for BIND_OPCODE_ADD_ADDR_ULEB (after adding " + "ULEB) " + + Twine(error) + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + RemainingLoopCount = 0; + DEBUG_WITH_TYPE( + "mach-o-bind", + dbgs() << "BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); + return; + case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: + if (TableKind == Kind::Lazy) { + *E = malformedError("BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED not " + "allowed in lazy bind table for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (SymbolName == StringRef()) { + *E = malformedError( + "for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED " + "missing preceding BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM for " + "opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (!LibraryOrdinalSet && TableKind != Kind::Weak) { + *E = malformedError( + "for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED " + "missing preceding BIND_OPCODE_SET_DYLIB_ORDINAL_* for opcode " + "at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + AdvanceAmount = ImmValue * PointerSize + PointerSize; + RemainingLoopCount = 0; + error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset + + AdvanceAmount, PointerSize); + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED " + + Twine(error) + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE("mach-o-bind", + dbgs() + << "BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: " + << format("SegmentOffset=0x%06X", SegmentOffset) << "\n"); + return; + case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: + if (TableKind == Kind::Lazy) { + *E = malformedError("BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB not " + "allowed in lazy bind table for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + Count = readULEB128(&error); + if (Count != 0) + RemainingLoopCount = Count - 1; + else + RemainingLoopCount = 0; + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + " (count value) " + + Twine(error) + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + Skip = readULEB128(&error); + AdvanceAmount = Skip + PointerSize; + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + " (skip value) " + + Twine(error) + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (SymbolName == StringRef()) { + *E = malformedError( + "for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + "missing preceding BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM for " + "opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (!LibraryOrdinalSet && TableKind != Kind::Weak) { + *E = malformedError( + "for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + "missing preceding BIND_OPCODE_SET_DYLIB_ORDINAL_* for opcode " + "at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset, + PointerSize, Count, Skip); + if (error) { + *E = + malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + + Twine(error) + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE( + "mach-o-bind", + dbgs() << "BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); + return; + default: + *E = malformedError("bad bind info (bad opcode value 0x" + + Twine::utohexstr(Opcode) + " for opcode at: 0x" + + Twine::utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + } +} + +uint64_t MachOBindEntry::readULEB128(const char **error) { + unsigned Count; + uint64_t Result = decodeULEB128(Ptr, &Count, Opcodes.end(), error); + Ptr += Count; + if (Ptr > Opcodes.end()) + Ptr = Opcodes.end(); + return Result; +} + +int64_t MachOBindEntry::readSLEB128(const char **error) { + unsigned Count; + int64_t Result = decodeSLEB128(Ptr, &Count, Opcodes.end(), error); + Ptr += Count; + if (Ptr > Opcodes.end()) + Ptr = Opcodes.end(); + return Result; +} + +int32_t MachOBindEntry::segmentIndex() const { return SegmentIndex; } + +uint64_t MachOBindEntry::segmentOffset() const { return SegmentOffset; } + +StringRef MachOBindEntry::typeName() const { + switch (BindType) { + case MachO::BIND_TYPE_POINTER: + return "pointer"; + case MachO::BIND_TYPE_TEXT_ABSOLUTE32: + return "text abs32"; + case MachO::BIND_TYPE_TEXT_PCREL32: + return "text rel32"; + } + return "unknown"; +} + +StringRef MachOBindEntry::symbolName() const { return SymbolName; } + +int64_t MachOBindEntry::addend() const { return Addend; } + +uint32_t MachOBindEntry::flags() const { return Flags; } + +int MachOBindEntry::ordinal() const { return Ordinal; } + +// For use with the SegIndex of a checked Mach-O Bind entry +// to get the segment name. +StringRef MachOBindEntry::segmentName() const { + return O->BindRebaseSegmentName(SegmentIndex); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind entry +// to get the section name. +StringRef MachOBindEntry::sectionName() const { + return O->BindRebaseSectionName(SegmentIndex, SegmentOffset); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind entry +// to get the address. +uint64_t MachOBindEntry::address() const { + return O->BindRebaseAddress(SegmentIndex, SegmentOffset); +} + +bool MachOBindEntry::operator==(const MachOBindEntry &Other) const { +#ifdef EXPENSIVE_CHECKS + assert(Opcodes == Other.Opcodes && "compare iterators of different files"); +#else + assert(Opcodes.data() == Other.Opcodes.data() && "compare iterators of different files"); +#endif + return (Ptr == Other.Ptr) && + (RemainingLoopCount == Other.RemainingLoopCount) && + (Done == Other.Done); +} + +// Build table of sections so SegIndex/SegOffset pairs can be translated. +BindRebaseSegInfo::BindRebaseSegInfo(const object::MachOObjectFile *Obj) { + uint32_t CurSegIndex = Obj->hasPageZeroSegment() ? 1 : 0; + StringRef CurSegName; + uint64_t CurSegAddress; + for (const SectionRef &Section : Obj->sections()) { + SectionInfo Info; + Expected<StringRef> NameOrErr = Section.getName(); + if (!NameOrErr) + consumeError(NameOrErr.takeError()); + else + Info.SectionName = *NameOrErr; + Info.Address = Section.getAddress(); + Info.Size = Section.getSize(); + Info.SegmentName = + Obj->getSectionFinalSegmentName(Section.getRawDataRefImpl()); + if (!Info.SegmentName.equals(CurSegName)) { + ++CurSegIndex; + CurSegName = Info.SegmentName; + CurSegAddress = Info.Address; + } + Info.SegmentIndex = CurSegIndex - 1; + Info.OffsetInSegment = Info.Address - CurSegAddress; + Info.SegmentStartAddress = CurSegAddress; + Sections.push_back(Info); + } + MaxSegIndex = CurSegIndex; +} + +// For use with a SegIndex, SegOffset, and PointerSize triple in +// MachOBindEntry::moveNext() to validate a MachOBindEntry or MachORebaseEntry. +// +// Given a SegIndex, SegOffset, and PointerSize, verify a valid section exists +// that fully contains a pointer at that location. Multiple fixups in a bind +// (such as with the BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB opcode) can +// be tested via the Count and Skip parameters. +const char * BindRebaseSegInfo::checkSegAndOffsets(int32_t SegIndex, + uint64_t SegOffset, + uint8_t PointerSize, + uint32_t Count, + uint32_t Skip) { + if (SegIndex == -1) + return "missing preceding *_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB"; + if (SegIndex >= MaxSegIndex) + return "bad segIndex (too large)"; + for (uint32_t i = 0; i < Count; ++i) { + uint32_t Start = SegOffset + i * (PointerSize + Skip); + uint32_t End = Start + PointerSize; + bool Found = false; + for (const SectionInfo &SI : Sections) { + if (SI.SegmentIndex != SegIndex) + continue; + if ((SI.OffsetInSegment<=Start) && (Start<(SI.OffsetInSegment+SI.Size))) { + if (End <= SI.OffsetInSegment + SI.Size) { + Found = true; + break; + } + else + return "bad offset, extends beyond section boundary"; + } + } + if (!Found) + return "bad offset, not in section"; + } + return nullptr; +} + +// For use with the SegIndex of a checked Mach-O Bind or Rebase entry +// to get the segment name. +StringRef BindRebaseSegInfo::segmentName(int32_t SegIndex) { + for (const SectionInfo &SI : Sections) { + if (SI.SegmentIndex == SegIndex) + return SI.SegmentName; + } + llvm_unreachable("invalid SegIndex"); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or Rebase +// to get the SectionInfo. +const BindRebaseSegInfo::SectionInfo &BindRebaseSegInfo::findSection( + int32_t SegIndex, uint64_t SegOffset) { + for (const SectionInfo &SI : Sections) { + if (SI.SegmentIndex != SegIndex) + continue; + if (SI.OffsetInSegment > SegOffset) + continue; + if (SegOffset >= (SI.OffsetInSegment + SI.Size)) + continue; + return SI; + } + llvm_unreachable("SegIndex and SegOffset not in any section"); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or Rebase +// entry to get the section name. +StringRef BindRebaseSegInfo::sectionName(int32_t SegIndex, + uint64_t SegOffset) { + return findSection(SegIndex, SegOffset).SectionName; +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or Rebase +// entry to get the address. +uint64_t BindRebaseSegInfo::address(uint32_t SegIndex, uint64_t OffsetInSeg) { + const SectionInfo &SI = findSection(SegIndex, OffsetInSeg); + return SI.SegmentStartAddress + OffsetInSeg; +} + +iterator_range<bind_iterator> +MachOObjectFile::bindTable(Error &Err, MachOObjectFile *O, + ArrayRef<uint8_t> Opcodes, bool is64, + MachOBindEntry::Kind BKind) { + if (O->BindRebaseSectionTable == nullptr) + O->BindRebaseSectionTable = std::make_unique<BindRebaseSegInfo>(O); + MachOBindEntry Start(&Err, O, Opcodes, is64, BKind); + Start.moveToFirst(); + + MachOBindEntry Finish(&Err, O, Opcodes, is64, BKind); + Finish.moveToEnd(); + + return make_range(bind_iterator(Start), bind_iterator(Finish)); +} + +iterator_range<bind_iterator> MachOObjectFile::bindTable(Error &Err) { + return bindTable(Err, this, getDyldInfoBindOpcodes(), is64Bit(), + MachOBindEntry::Kind::Regular); +} + +iterator_range<bind_iterator> MachOObjectFile::lazyBindTable(Error &Err) { + return bindTable(Err, this, getDyldInfoLazyBindOpcodes(), is64Bit(), + MachOBindEntry::Kind::Lazy); +} + +iterator_range<bind_iterator> MachOObjectFile::weakBindTable(Error &Err) { + return bindTable(Err, this, getDyldInfoWeakBindOpcodes(), is64Bit(), + MachOBindEntry::Kind::Weak); +} + +MachOObjectFile::load_command_iterator +MachOObjectFile::begin_load_commands() const { + return LoadCommands.begin(); +} + +MachOObjectFile::load_command_iterator +MachOObjectFile::end_load_commands() const { + return LoadCommands.end(); +} + +iterator_range<MachOObjectFile::load_command_iterator> +MachOObjectFile::load_commands() const { + return make_range(begin_load_commands(), end_load_commands()); +} + +StringRef +MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec) const { + ArrayRef<char> Raw = getSectionRawFinalSegmentName(Sec); + return parseSegmentOrSectionName(Raw.data()); +} + +ArrayRef<char> +MachOObjectFile::getSectionRawName(DataRefImpl Sec) const { + assert(Sec.d.a < Sections.size() && "Should have detected this earlier"); + const section_base *Base = + reinterpret_cast<const section_base *>(Sections[Sec.d.a]); + return makeArrayRef(Base->sectname); +} + +ArrayRef<char> +MachOObjectFile::getSectionRawFinalSegmentName(DataRefImpl Sec) const { + assert(Sec.d.a < Sections.size() && "Should have detected this earlier"); + const section_base *Base = + reinterpret_cast<const section_base *>(Sections[Sec.d.a]); + return makeArrayRef(Base->segname); +} + +bool +MachOObjectFile::isRelocationScattered(const MachO::any_relocation_info &RE) + const { + if (getCPUType(*this) == MachO::CPU_TYPE_X86_64) + return false; + return getPlainRelocationAddress(RE) & MachO::R_SCATTERED; +} + +unsigned MachOObjectFile::getPlainRelocationSymbolNum( + const MachO::any_relocation_info &RE) const { + if (isLittleEndian()) + return RE.r_word1 & 0xffffff; + return RE.r_word1 >> 8; +} + +bool MachOObjectFile::getPlainRelocationExternal( + const MachO::any_relocation_info &RE) const { + if (isLittleEndian()) + return (RE.r_word1 >> 27) & 1; + return (RE.r_word1 >> 4) & 1; +} + +bool MachOObjectFile::getScatteredRelocationScattered( + const MachO::any_relocation_info &RE) const { + return RE.r_word0 >> 31; +} + +uint32_t MachOObjectFile::getScatteredRelocationValue( + const MachO::any_relocation_info &RE) const { + return RE.r_word1; +} + +uint32_t MachOObjectFile::getScatteredRelocationType( + const MachO::any_relocation_info &RE) const { + return (RE.r_word0 >> 24) & 0xf; +} + +unsigned MachOObjectFile::getAnyRelocationAddress( + const MachO::any_relocation_info &RE) const { + if (isRelocationScattered(RE)) + return getScatteredRelocationAddress(RE); + return getPlainRelocationAddress(RE); +} + +unsigned MachOObjectFile::getAnyRelocationPCRel( + const MachO::any_relocation_info &RE) const { + if (isRelocationScattered(RE)) + return getScatteredRelocationPCRel(RE); + return getPlainRelocationPCRel(*this, RE); +} + +unsigned MachOObjectFile::getAnyRelocationLength( + const MachO::any_relocation_info &RE) const { + if (isRelocationScattered(RE)) + return getScatteredRelocationLength(RE); + return getPlainRelocationLength(*this, RE); +} + +unsigned +MachOObjectFile::getAnyRelocationType( + const MachO::any_relocation_info &RE) const { + if (isRelocationScattered(RE)) + return getScatteredRelocationType(RE); + return getPlainRelocationType(*this, RE); +} + +SectionRef +MachOObjectFile::getAnyRelocationSection( + const MachO::any_relocation_info &RE) const { + if (isRelocationScattered(RE) || getPlainRelocationExternal(RE)) + return *section_end(); + unsigned SecNum = getPlainRelocationSymbolNum(RE); + if (SecNum == MachO::R_ABS || SecNum > Sections.size()) + return *section_end(); + DataRefImpl DRI; + DRI.d.a = SecNum - 1; + return SectionRef(DRI, this); +} + +MachO::section MachOObjectFile::getSection(DataRefImpl DRI) const { + assert(DRI.d.a < Sections.size() && "Should have detected this earlier"); + return getStruct<MachO::section>(*this, Sections[DRI.d.a]); +} + +MachO::section_64 MachOObjectFile::getSection64(DataRefImpl DRI) const { + assert(DRI.d.a < Sections.size() && "Should have detected this earlier"); + return getStruct<MachO::section_64>(*this, Sections[DRI.d.a]); +} + +MachO::section MachOObjectFile::getSection(const LoadCommandInfo &L, + unsigned Index) const { + const char *Sec = getSectionPtr(*this, L, Index); + return getStruct<MachO::section>(*this, Sec); +} + +MachO::section_64 MachOObjectFile::getSection64(const LoadCommandInfo &L, + unsigned Index) const { + const char *Sec = getSectionPtr(*this, L, Index); + return getStruct<MachO::section_64>(*this, Sec); +} + +MachO::nlist +MachOObjectFile::getSymbolTableEntry(DataRefImpl DRI) const { + const char *P = reinterpret_cast<const char *>(DRI.p); + return getStruct<MachO::nlist>(*this, P); +} + +MachO::nlist_64 +MachOObjectFile::getSymbol64TableEntry(DataRefImpl DRI) const { + const char *P = reinterpret_cast<const char *>(DRI.p); + return getStruct<MachO::nlist_64>(*this, P); +} + +MachO::linkedit_data_command +MachOObjectFile::getLinkeditDataLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::linkedit_data_command>(*this, L.Ptr); +} + +MachO::segment_command +MachOObjectFile::getSegmentLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::segment_command>(*this, L.Ptr); +} + +MachO::segment_command_64 +MachOObjectFile::getSegment64LoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::segment_command_64>(*this, L.Ptr); +} + +MachO::linker_option_command +MachOObjectFile::getLinkerOptionLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::linker_option_command>(*this, L.Ptr); +} + +MachO::version_min_command +MachOObjectFile::getVersionMinLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::version_min_command>(*this, L.Ptr); +} + +MachO::note_command +MachOObjectFile::getNoteLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::note_command>(*this, L.Ptr); +} + +MachO::build_version_command +MachOObjectFile::getBuildVersionLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::build_version_command>(*this, L.Ptr); +} + +MachO::build_tool_version +MachOObjectFile::getBuildToolVersion(unsigned index) const { + return getStruct<MachO::build_tool_version>(*this, BuildTools[index]); +} + +MachO::dylib_command +MachOObjectFile::getDylibIDLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::dylib_command>(*this, L.Ptr); +} + +MachO::dyld_info_command +MachOObjectFile::getDyldInfoLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::dyld_info_command>(*this, L.Ptr); +} + +MachO::dylinker_command +MachOObjectFile::getDylinkerCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::dylinker_command>(*this, L.Ptr); +} + +MachO::uuid_command +MachOObjectFile::getUuidCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::uuid_command>(*this, L.Ptr); +} + +MachO::rpath_command +MachOObjectFile::getRpathCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::rpath_command>(*this, L.Ptr); +} + +MachO::source_version_command +MachOObjectFile::getSourceVersionCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::source_version_command>(*this, L.Ptr); +} + +MachO::entry_point_command +MachOObjectFile::getEntryPointCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::entry_point_command>(*this, L.Ptr); +} + +MachO::encryption_info_command +MachOObjectFile::getEncryptionInfoCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::encryption_info_command>(*this, L.Ptr); +} + +MachO::encryption_info_command_64 +MachOObjectFile::getEncryptionInfoCommand64(const LoadCommandInfo &L) const { + return getStruct<MachO::encryption_info_command_64>(*this, L.Ptr); +} + +MachO::sub_framework_command +MachOObjectFile::getSubFrameworkCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::sub_framework_command>(*this, L.Ptr); +} + +MachO::sub_umbrella_command +MachOObjectFile::getSubUmbrellaCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::sub_umbrella_command>(*this, L.Ptr); +} + +MachO::sub_library_command +MachOObjectFile::getSubLibraryCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::sub_library_command>(*this, L.Ptr); +} + +MachO::sub_client_command +MachOObjectFile::getSubClientCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::sub_client_command>(*this, L.Ptr); +} + +MachO::routines_command +MachOObjectFile::getRoutinesCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::routines_command>(*this, L.Ptr); +} + +MachO::routines_command_64 +MachOObjectFile::getRoutinesCommand64(const LoadCommandInfo &L) const { + return getStruct<MachO::routines_command_64>(*this, L.Ptr); +} + +MachO::thread_command +MachOObjectFile::getThreadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::thread_command>(*this, L.Ptr); +} + +MachO::any_relocation_info +MachOObjectFile::getRelocation(DataRefImpl Rel) const { + uint32_t Offset; + if (getHeader().filetype == MachO::MH_OBJECT) { + DataRefImpl Sec; + Sec.d.a = Rel.d.a; + if (is64Bit()) { + MachO::section_64 Sect = getSection64(Sec); + Offset = Sect.reloff; + } else { + MachO::section Sect = getSection(Sec); + Offset = Sect.reloff; + } + } else { + MachO::dysymtab_command DysymtabLoadCmd = getDysymtabLoadCommand(); + if (Rel.d.a == 0) + Offset = DysymtabLoadCmd.extreloff; // Offset to the external relocations + else + Offset = DysymtabLoadCmd.locreloff; // Offset to the local relocations + } + + auto P = reinterpret_cast<const MachO::any_relocation_info *>( + getPtr(*this, Offset)) + Rel.d.b; + return getStruct<MachO::any_relocation_info>( + *this, reinterpret_cast<const char *>(P)); +} + +MachO::data_in_code_entry +MachOObjectFile::getDice(DataRefImpl Rel) const { + const char *P = reinterpret_cast<const char *>(Rel.p); + return getStruct<MachO::data_in_code_entry>(*this, P); +} + +const MachO::mach_header &MachOObjectFile::getHeader() const { + return Header; +} + +const MachO::mach_header_64 &MachOObjectFile::getHeader64() const { + assert(is64Bit()); + return Header64; +} + +uint32_t MachOObjectFile::getIndirectSymbolTableEntry( + const MachO::dysymtab_command &DLC, + unsigned Index) const { + uint64_t Offset = DLC.indirectsymoff + Index * sizeof(uint32_t); + return getStruct<uint32_t>(*this, getPtr(*this, Offset)); +} + +MachO::data_in_code_entry +MachOObjectFile::getDataInCodeTableEntry(uint32_t DataOffset, + unsigned Index) const { + uint64_t Offset = DataOffset + Index * sizeof(MachO::data_in_code_entry); + return getStruct<MachO::data_in_code_entry>(*this, getPtr(*this, Offset)); +} + +MachO::symtab_command MachOObjectFile::getSymtabLoadCommand() const { + if (SymtabLoadCmd) + return getStruct<MachO::symtab_command>(*this, SymtabLoadCmd); + + // If there is no SymtabLoadCmd return a load command with zero'ed fields. + MachO::symtab_command Cmd; + Cmd.cmd = MachO::LC_SYMTAB; + Cmd.cmdsize = sizeof(MachO::symtab_command); + Cmd.symoff = 0; + Cmd.nsyms = 0; + Cmd.stroff = 0; + Cmd.strsize = 0; + return Cmd; +} + +MachO::dysymtab_command MachOObjectFile::getDysymtabLoadCommand() const { + if (DysymtabLoadCmd) + return getStruct<MachO::dysymtab_command>(*this, DysymtabLoadCmd); + + // If there is no DysymtabLoadCmd return a load command with zero'ed fields. + MachO::dysymtab_command Cmd; + Cmd.cmd = MachO::LC_DYSYMTAB; + Cmd.cmdsize = sizeof(MachO::dysymtab_command); + Cmd.ilocalsym = 0; + Cmd.nlocalsym = 0; + Cmd.iextdefsym = 0; + Cmd.nextdefsym = 0; + Cmd.iundefsym = 0; + Cmd.nundefsym = 0; + Cmd.tocoff = 0; + Cmd.ntoc = 0; + Cmd.modtaboff = 0; + Cmd.nmodtab = 0; + Cmd.extrefsymoff = 0; + Cmd.nextrefsyms = 0; + Cmd.indirectsymoff = 0; + Cmd.nindirectsyms = 0; + Cmd.extreloff = 0; + Cmd.nextrel = 0; + Cmd.locreloff = 0; + Cmd.nlocrel = 0; + return Cmd; +} + +MachO::linkedit_data_command +MachOObjectFile::getDataInCodeLoadCommand() const { + if (DataInCodeLoadCmd) + return getStruct<MachO::linkedit_data_command>(*this, DataInCodeLoadCmd); + + // If there is no DataInCodeLoadCmd return a load command with zero'ed fields. + MachO::linkedit_data_command Cmd; + Cmd.cmd = MachO::LC_DATA_IN_CODE; + Cmd.cmdsize = sizeof(MachO::linkedit_data_command); + Cmd.dataoff = 0; + Cmd.datasize = 0; + return Cmd; +} + +MachO::linkedit_data_command +MachOObjectFile::getLinkOptHintsLoadCommand() const { + if (LinkOptHintsLoadCmd) + return getStruct<MachO::linkedit_data_command>(*this, LinkOptHintsLoadCmd); + + // If there is no LinkOptHintsLoadCmd return a load command with zero'ed + // fields. + MachO::linkedit_data_command Cmd; + Cmd.cmd = MachO::LC_LINKER_OPTIMIZATION_HINT; + Cmd.cmdsize = sizeof(MachO::linkedit_data_command); + Cmd.dataoff = 0; + Cmd.datasize = 0; + return Cmd; +} + +ArrayRef<uint8_t> MachOObjectFile::getDyldInfoRebaseOpcodes() const { + if (!DyldInfoLoadCmd) + return None; + + auto DyldInfoOrErr = + getStructOrErr<MachO::dyld_info_command>(*this, DyldInfoLoadCmd); + if (!DyldInfoOrErr) + return None; + MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get(); + const uint8_t *Ptr = + reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.rebase_off)); + return makeArrayRef(Ptr, DyldInfo.rebase_size); +} + +ArrayRef<uint8_t> MachOObjectFile::getDyldInfoBindOpcodes() const { + if (!DyldInfoLoadCmd) + return None; + + auto DyldInfoOrErr = + getStructOrErr<MachO::dyld_info_command>(*this, DyldInfoLoadCmd); + if (!DyldInfoOrErr) + return None; + MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get(); + const uint8_t *Ptr = + reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.bind_off)); + return makeArrayRef(Ptr, DyldInfo.bind_size); +} + +ArrayRef<uint8_t> MachOObjectFile::getDyldInfoWeakBindOpcodes() const { + if (!DyldInfoLoadCmd) + return None; + + auto DyldInfoOrErr = + getStructOrErr<MachO::dyld_info_command>(*this, DyldInfoLoadCmd); + if (!DyldInfoOrErr) + return None; + MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get(); + const uint8_t *Ptr = + reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.weak_bind_off)); + return makeArrayRef(Ptr, DyldInfo.weak_bind_size); +} + +ArrayRef<uint8_t> MachOObjectFile::getDyldInfoLazyBindOpcodes() const { + if (!DyldInfoLoadCmd) + return None; + + auto DyldInfoOrErr = + getStructOrErr<MachO::dyld_info_command>(*this, DyldInfoLoadCmd); + if (!DyldInfoOrErr) + return None; + MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get(); + const uint8_t *Ptr = + reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.lazy_bind_off)); + return makeArrayRef(Ptr, DyldInfo.lazy_bind_size); +} + +ArrayRef<uint8_t> MachOObjectFile::getDyldInfoExportsTrie() const { + if (!DyldInfoLoadCmd) + return None; + + auto DyldInfoOrErr = + getStructOrErr<MachO::dyld_info_command>(*this, DyldInfoLoadCmd); + if (!DyldInfoOrErr) + return None; + MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get(); + const uint8_t *Ptr = + reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.export_off)); + return makeArrayRef(Ptr, DyldInfo.export_size); +} + +ArrayRef<uint8_t> MachOObjectFile::getUuid() const { + if (!UuidLoadCmd) + return None; + // Returning a pointer is fine as uuid doesn't need endian swapping. + const char *Ptr = UuidLoadCmd + offsetof(MachO::uuid_command, uuid); + return makeArrayRef(reinterpret_cast<const uint8_t *>(Ptr), 16); +} + +StringRef MachOObjectFile::getStringTableData() const { + MachO::symtab_command S = getSymtabLoadCommand(); + return getData().substr(S.stroff, S.strsize); +} + +bool MachOObjectFile::is64Bit() const { + return getType() == getMachOType(false, true) || + getType() == getMachOType(true, true); +} + +void MachOObjectFile::ReadULEB128s(uint64_t Index, + SmallVectorImpl<uint64_t> &Out) const { + DataExtractor extractor(ObjectFile::getData(), true, 0); + + uint64_t offset = Index; + uint64_t data = 0; + while (uint64_t delta = extractor.getULEB128(&offset)) { + data += delta; + Out.push_back(data); + } +} + +bool MachOObjectFile::isRelocatableObject() const { + return getHeader().filetype == MachO::MH_OBJECT; +} + +Expected<std::unique_ptr<MachOObjectFile>> +ObjectFile::createMachOObjectFile(MemoryBufferRef Buffer, + uint32_t UniversalCputype, + uint32_t UniversalIndex) { + StringRef Magic = Buffer.getBuffer().slice(0, 4); + if (Magic == "\xFE\xED\xFA\xCE") + return MachOObjectFile::create(Buffer, false, false, + UniversalCputype, UniversalIndex); + if (Magic == "\xCE\xFA\xED\xFE") + return MachOObjectFile::create(Buffer, true, false, + UniversalCputype, UniversalIndex); + if (Magic == "\xFE\xED\xFA\xCF") + return MachOObjectFile::create(Buffer, false, true, + UniversalCputype, UniversalIndex); + if (Magic == "\xCF\xFA\xED\xFE") + return MachOObjectFile::create(Buffer, true, true, + UniversalCputype, UniversalIndex); + return make_error<GenericBinaryError>("Unrecognized MachO magic number", + object_error::invalid_file_type); +} + +StringRef MachOObjectFile::mapDebugSectionName(StringRef Name) const { + return StringSwitch<StringRef>(Name) + .Case("debug_str_offs", "debug_str_offsets") + .Default(Name); +} + +Expected<std::vector<std::string>> +MachOObjectFile::findDsymObjectMembers(StringRef Path) { + SmallString<256> BundlePath(Path); + // Normalize input path. This is necessary to accept `bundle.dSYM/`. + sys::path::remove_dots(BundlePath); + if (!sys::fs::is_directory(BundlePath) || + sys::path::extension(BundlePath) != ".dSYM") + return std::vector<std::string>(); + sys::path::append(BundlePath, "Contents", "Resources", "DWARF"); + bool IsDir; + auto EC = sys::fs::is_directory(BundlePath, IsDir); + if (EC == errc::no_such_file_or_directory || (!EC && !IsDir)) + return createStringError( + EC, "%s: expected directory 'Contents/Resources/DWARF' in dSYM bundle", + Path.str().c_str()); + if (EC) + return createFileError(BundlePath, errorCodeToError(EC)); + + std::vector<std::string> ObjectPaths; + for (sys::fs::directory_iterator Dir(BundlePath, EC), DirEnd; + Dir != DirEnd && !EC; Dir.increment(EC)) { + StringRef ObjectPath = Dir->path(); + sys::fs::file_status Status; + if (auto EC = sys::fs::status(ObjectPath, Status)) + return createFileError(ObjectPath, errorCodeToError(EC)); + switch (Status.type()) { + case sys::fs::file_type::regular_file: + case sys::fs::file_type::symlink_file: + case sys::fs::file_type::type_unknown: + ObjectPaths.push_back(ObjectPath.str()); + break; + default: /*ignore*/; + } + } + if (EC) + return createFileError(BundlePath, errorCodeToError(EC)); + if (ObjectPaths.empty()) + return createStringError(std::error_code(), + "%s: no objects found in dSYM bundle", + Path.str().c_str()); + return ObjectPaths; +} + +llvm::binaryformat::Swift5ReflectionSectionKind +MachOObjectFile::mapReflectionSectionNameToEnumValue( + StringRef SectionName) const { +#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF) \ + .Case(MACHO, llvm::binaryformat::Swift5ReflectionSectionKind::KIND) + return StringSwitch<llvm::binaryformat::Swift5ReflectionSectionKind>( + SectionName) +#include "llvm/BinaryFormat/Swift.def" + .Default(llvm::binaryformat::Swift5ReflectionSectionKind::unknown); +#undef HANDLE_SWIFT_SECTION +} diff --git a/contrib/libs/llvm14/lib/Object/MachOUniversal.cpp b/contrib/libs/llvm14/lib/Object/MachOUniversal.cpp new file mode 100644 index 0000000000..f3ce005e6e --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/MachOUniversal.cpp @@ -0,0 +1,272 @@ +//===- MachOUniversal.cpp - Mach-O universal binary -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the MachOUniversalBinary class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/MachOUniversal.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/IRObjectFile.h" +#include "llvm/Object/MachO.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace object; + +static Error +malformedError(Twine Msg) { + std::string StringMsg = "truncated or malformed fat file (" + Msg.str() + ")"; + return make_error<GenericBinaryError>(std::move(StringMsg), + object_error::parse_failed); +} + +template<typename T> +static T getUniversalBinaryStruct(const char *Ptr) { + T Res; + memcpy(&Res, Ptr, sizeof(T)); + // Universal binary headers have big-endian byte order. + if (sys::IsLittleEndianHost) + swapStruct(Res); + return Res; +} + +MachOUniversalBinary::ObjectForArch::ObjectForArch( + const MachOUniversalBinary *Parent, uint32_t Index) + : Parent(Parent), Index(Index) { + // The iterators use Parent as a nullptr and an Index+1 == NumberOfObjects. + if (!Parent || Index >= Parent->getNumberOfObjects()) { + clear(); + } else { + // Parse object header. + StringRef ParentData = Parent->getData(); + if (Parent->getMagic() == MachO::FAT_MAGIC) { + const char *HeaderPos = ParentData.begin() + sizeof(MachO::fat_header) + + Index * sizeof(MachO::fat_arch); + Header = getUniversalBinaryStruct<MachO::fat_arch>(HeaderPos); + } else { // Parent->getMagic() == MachO::FAT_MAGIC_64 + const char *HeaderPos = ParentData.begin() + sizeof(MachO::fat_header) + + Index * sizeof(MachO::fat_arch_64); + Header64 = getUniversalBinaryStruct<MachO::fat_arch_64>(HeaderPos); + } + } +} + +Expected<std::unique_ptr<MachOObjectFile>> +MachOUniversalBinary::ObjectForArch::getAsObjectFile() const { + if (!Parent) + report_fatal_error("MachOUniversalBinary::ObjectForArch::getAsObjectFile() " + "called when Parent is a nullptr"); + + StringRef ParentData = Parent->getData(); + StringRef ObjectData; + uint32_t cputype; + if (Parent->getMagic() == MachO::FAT_MAGIC) { + ObjectData = ParentData.substr(Header.offset, Header.size); + cputype = Header.cputype; + } else { // Parent->getMagic() == MachO::FAT_MAGIC_64 + ObjectData = ParentData.substr(Header64.offset, Header64.size); + cputype = Header64.cputype; + } + StringRef ObjectName = Parent->getFileName(); + MemoryBufferRef ObjBuffer(ObjectData, ObjectName); + return ObjectFile::createMachOObjectFile(ObjBuffer, cputype, Index); +} + +Expected<std::unique_ptr<IRObjectFile>> +MachOUniversalBinary::ObjectForArch::getAsIRObject(LLVMContext &Ctx) const { + if (!Parent) + report_fatal_error("MachOUniversalBinary::ObjectForArch::getAsIRObject() " + "called when Parent is a nullptr"); + + StringRef ParentData = Parent->getData(); + StringRef ObjectData; + if (Parent->getMagic() == MachO::FAT_MAGIC) { + ObjectData = ParentData.substr(Header.offset, Header.size); + } else { // Parent->getMagic() == MachO::FAT_MAGIC_64 + ObjectData = ParentData.substr(Header64.offset, Header64.size); + } + StringRef ObjectName = Parent->getFileName(); + MemoryBufferRef ObjBuffer(ObjectData, ObjectName); + + return IRObjectFile::create(ObjBuffer, Ctx); +} + +Expected<std::unique_ptr<Archive>> +MachOUniversalBinary::ObjectForArch::getAsArchive() const { + if (!Parent) + report_fatal_error("MachOUniversalBinary::ObjectForArch::getAsArchive() " + "called when Parent is a nullptr"); + + StringRef ParentData = Parent->getData(); + StringRef ObjectData; + if (Parent->getMagic() == MachO::FAT_MAGIC) + ObjectData = ParentData.substr(Header.offset, Header.size); + else // Parent->getMagic() == MachO::FAT_MAGIC_64 + ObjectData = ParentData.substr(Header64.offset, Header64.size); + StringRef ObjectName = Parent->getFileName(); + MemoryBufferRef ObjBuffer(ObjectData, ObjectName); + return Archive::create(ObjBuffer); +} + +void MachOUniversalBinary::anchor() { } + +Expected<std::unique_ptr<MachOUniversalBinary>> +MachOUniversalBinary::create(MemoryBufferRef Source) { + Error Err = Error::success(); + std::unique_ptr<MachOUniversalBinary> Ret( + new MachOUniversalBinary(Source, Err)); + if (Err) + return std::move(Err); + return std::move(Ret); +} + +MachOUniversalBinary::MachOUniversalBinary(MemoryBufferRef Source, Error &Err) + : Binary(Binary::ID_MachOUniversalBinary, Source), Magic(0), + NumberOfObjects(0) { + ErrorAsOutParameter ErrAsOutParam(&Err); + if (Data.getBufferSize() < sizeof(MachO::fat_header)) { + Err = make_error<GenericBinaryError>("File too small to be a Mach-O " + "universal file", + object_error::invalid_file_type); + return; + } + // Check for magic value and sufficient header size. + StringRef Buf = getData(); + MachO::fat_header H = + getUniversalBinaryStruct<MachO::fat_header>(Buf.begin()); + Magic = H.magic; + NumberOfObjects = H.nfat_arch; + if (NumberOfObjects == 0) { + Err = malformedError("contains zero architecture types"); + return; + } + uint32_t MinSize = sizeof(MachO::fat_header); + if (Magic == MachO::FAT_MAGIC) + MinSize += sizeof(MachO::fat_arch) * NumberOfObjects; + else if (Magic == MachO::FAT_MAGIC_64) + MinSize += sizeof(MachO::fat_arch_64) * NumberOfObjects; + else { + Err = malformedError("bad magic number"); + return; + } + if (Buf.size() < MinSize) { + Err = malformedError("fat_arch" + + Twine(Magic == MachO::FAT_MAGIC ? "" : "_64") + + " structs would extend past the end of the file"); + return; + } + for (uint32_t i = 0; i < NumberOfObjects; i++) { + ObjectForArch A(this, i); + uint64_t bigSize = A.getOffset(); + bigSize += A.getSize(); + if (bigSize > Buf.size()) { + Err = malformedError("offset plus size of cputype (" + + Twine(A.getCPUType()) + ") cpusubtype (" + + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + + ") extends past the end of the file"); + return; + } + + if (A.getAlign() > MaxSectionAlignment) { + Err = malformedError("align (2^" + Twine(A.getAlign()) + + ") too large for cputype (" + Twine(A.getCPUType()) + + ") cpusubtype (" + + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + + ") (maximum 2^" + Twine(MaxSectionAlignment) + ")"); + return; + } + if(A.getOffset() % (1ull << A.getAlign()) != 0){ + Err = malformedError("offset: " + Twine(A.getOffset()) + + " for cputype (" + Twine(A.getCPUType()) + ") cpusubtype (" + + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + + ") not aligned on it's alignment (2^" + Twine(A.getAlign()) + ")"); + return; + } + if (A.getOffset() < MinSize) { + Err = malformedError("cputype (" + Twine(A.getCPUType()) + ") " + "cpusubtype (" + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + + ") offset " + Twine(A.getOffset()) + " overlaps universal headers"); + return; + } + } + for (uint32_t i = 0; i < NumberOfObjects; i++) { + ObjectForArch A(this, i); + for (uint32_t j = i + 1; j < NumberOfObjects; j++) { + ObjectForArch B(this, j); + if (A.getCPUType() == B.getCPUType() && + (A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) == + (B.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK)) { + Err = malformedError("contains two of the same architecture (cputype " + "(" + Twine(A.getCPUType()) + ") cpusubtype (" + + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + "))"); + return; + } + if ((A.getOffset() >= B.getOffset() && + A.getOffset() < B.getOffset() + B.getSize()) || + (A.getOffset() + A.getSize() > B.getOffset() && + A.getOffset() + A.getSize() < B.getOffset() + B.getSize()) || + (A.getOffset() <= B.getOffset() && + A.getOffset() + A.getSize() >= B.getOffset() + B.getSize())) { + Err = malformedError("cputype (" + Twine(A.getCPUType()) + ") " + "cpusubtype (" + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + + ") at offset " + Twine(A.getOffset()) + " with a size of " + + Twine(A.getSize()) + ", overlaps cputype (" + Twine(B.getCPUType()) + + ") cpusubtype (" + Twine(B.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + + ") at offset " + Twine(B.getOffset()) + " with a size of " + + Twine(B.getSize())); + return; + } + } + } + Err = Error::success(); +} + +Expected<MachOUniversalBinary::ObjectForArch> +MachOUniversalBinary::getObjectForArch(StringRef ArchName) const { + if (Triple(ArchName).getArch() == Triple::ArchType::UnknownArch) + return make_error<GenericBinaryError>("Unknown architecture " + "named: " + + ArchName, + object_error::arch_not_found); + for (const auto &Obj : objects()) + if (Obj.getArchFlagName() == ArchName) + return Obj; + return make_error<GenericBinaryError>("fat file does not " + "contain " + + ArchName, + object_error::arch_not_found); +} + +Expected<std::unique_ptr<MachOObjectFile>> +MachOUniversalBinary::getMachOObjectForArch(StringRef ArchName) const { + Expected<ObjectForArch> O = getObjectForArch(ArchName); + if (!O) + return O.takeError(); + return O->getAsObjectFile(); +} + +Expected<std::unique_ptr<IRObjectFile>> +MachOUniversalBinary::getIRObjectForArch(StringRef ArchName, + LLVMContext &Ctx) const { + Expected<ObjectForArch> O = getObjectForArch(ArchName); + if (!O) + return O.takeError(); + return O->getAsIRObject(Ctx); +} + +Expected<std::unique_ptr<Archive>> +MachOUniversalBinary::getArchiveForArch(StringRef ArchName) const { + Expected<ObjectForArch> O = getObjectForArch(ArchName); + if (!O) + return O.takeError(); + return O->getAsArchive(); +} diff --git a/contrib/libs/llvm14/lib/Object/MachOUniversalWriter.cpp b/contrib/libs/llvm14/lib/Object/MachOUniversalWriter.cpp new file mode 100644 index 0000000000..ae1ff09a4f --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/MachOUniversalWriter.cpp @@ -0,0 +1,325 @@ +//===- MachOUniversalWriter.cpp - MachO universal binary writer---*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines the Slice class and writeUniversalBinary function for writing a MachO +// universal binary file. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/MachOUniversalWriter.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/IRObjectFile.h" +#include "llvm/Object/MachO.h" +#include "llvm/Object/MachOUniversal.h" + +using namespace llvm; +using namespace object; + +// For compatibility with cctools lipo, a file's alignment is calculated as the +// minimum aligment of all segments. For object files, the file's alignment is +// the maximum alignment of its sections. +static uint32_t calculateFileAlignment(const MachOObjectFile &O) { + uint32_t P2CurrentAlignment; + uint32_t P2MinAlignment = MachOUniversalBinary::MaxSectionAlignment; + const bool Is64Bit = O.is64Bit(); + + for (const auto &LC : O.load_commands()) { + if (LC.C.cmd != (Is64Bit ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT)) + continue; + if (O.getHeader().filetype == MachO::MH_OBJECT) { + unsigned NumberOfSections = + (Is64Bit ? O.getSegment64LoadCommand(LC).nsects + : O.getSegmentLoadCommand(LC).nsects); + P2CurrentAlignment = NumberOfSections ? 2 : P2MinAlignment; + for (unsigned SI = 0; SI < NumberOfSections; ++SI) { + P2CurrentAlignment = std::max(P2CurrentAlignment, + (Is64Bit ? O.getSection64(LC, SI).align + : O.getSection(LC, SI).align)); + } + } else { + P2CurrentAlignment = + countTrailingZeros(Is64Bit ? O.getSegment64LoadCommand(LC).vmaddr + : O.getSegmentLoadCommand(LC).vmaddr); + } + P2MinAlignment = std::min(P2MinAlignment, P2CurrentAlignment); + } + // return a value >= 4 byte aligned, and less than MachO MaxSectionAlignment + return std::max( + static_cast<uint32_t>(2), + std::min(P2MinAlignment, static_cast<uint32_t>( + MachOUniversalBinary::MaxSectionAlignment))); +} + +static uint32_t calculateAlignment(const MachOObjectFile &ObjectFile) { + switch (ObjectFile.getHeader().cputype) { + case MachO::CPU_TYPE_I386: + case MachO::CPU_TYPE_X86_64: + case MachO::CPU_TYPE_POWERPC: + case MachO::CPU_TYPE_POWERPC64: + return 12; // log2 value of page size(4k) for x86 and PPC + case MachO::CPU_TYPE_ARM: + case MachO::CPU_TYPE_ARM64: + case MachO::CPU_TYPE_ARM64_32: + return 14; // log2 value of page size(16k) for Darwin ARM + default: + return calculateFileAlignment(ObjectFile); + } +} + +Slice::Slice(const Archive &A, uint32_t CPUType, uint32_t CPUSubType, + std::string ArchName, uint32_t Align) + : B(&A), CPUType(CPUType), CPUSubType(CPUSubType), + ArchName(std::move(ArchName)), P2Alignment(Align) {} + +Slice::Slice(const MachOObjectFile &O, uint32_t Align) + : B(&O), CPUType(O.getHeader().cputype), + CPUSubType(O.getHeader().cpusubtype), + ArchName(std::string(O.getArchTriple().getArchName())), + P2Alignment(Align) {} + +Slice::Slice(const IRObjectFile &IRO, uint32_t CPUType, uint32_t CPUSubType, + std::string ArchName, uint32_t Align) + : B(&IRO), CPUType(CPUType), CPUSubType(CPUSubType), + ArchName(std::move(ArchName)), P2Alignment(Align) {} + +Slice::Slice(const MachOObjectFile &O) : Slice(O, calculateAlignment(O)) {} + +using MachoCPUTy = std::pair<unsigned, unsigned>; + +static Expected<MachoCPUTy> getMachoCPUFromTriple(Triple TT) { + auto CPU = std::make_pair(MachO::getCPUType(TT), MachO::getCPUSubType(TT)); + if (!CPU.first) { + return CPU.first.takeError(); + } + if (!CPU.second) { + return CPU.second.takeError(); + } + return std::make_pair(*CPU.first, *CPU.second); +} + +static Expected<MachoCPUTy> getMachoCPUFromTriple(StringRef TT) { + return getMachoCPUFromTriple(Triple{TT}); +} + +Expected<Slice> Slice::create(const Archive &A, LLVMContext *LLVMCtx) { + Error Err = Error::success(); + std::unique_ptr<MachOObjectFile> MFO = nullptr; + std::unique_ptr<IRObjectFile> IRFO = nullptr; + for (const Archive::Child &Child : A.children(Err)) { + Expected<std::unique_ptr<Binary>> ChildOrErr = Child.getAsBinary(LLVMCtx); + if (!ChildOrErr) + return createFileError(A.getFileName(), ChildOrErr.takeError()); + Binary *Bin = ChildOrErr.get().get(); + if (Bin->isMachOUniversalBinary()) + return createStringError(std::errc::invalid_argument, + ("archive member " + Bin->getFileName() + + " is a fat file (not allowed in an archive)") + .str() + .c_str()); + if (Bin->isMachO()) { + MachOObjectFile *O = cast<MachOObjectFile>(Bin); + if (IRFO) { + return createStringError( + std::errc::invalid_argument, + "archive member %s is a MachO, while previous archive member " + "%s was an IR LLVM object", + O->getFileName().str().c_str(), IRFO->getFileName().str().c_str()); + } + if (MFO && + std::tie(MFO->getHeader().cputype, MFO->getHeader().cpusubtype) != + std::tie(O->getHeader().cputype, O->getHeader().cpusubtype)) { + return createStringError( + std::errc::invalid_argument, + ("archive member " + O->getFileName() + " cputype (" + + Twine(O->getHeader().cputype) + ") and cpusubtype(" + + Twine(O->getHeader().cpusubtype) + + ") does not match previous archive members cputype (" + + Twine(MFO->getHeader().cputype) + ") and cpusubtype(" + + Twine(MFO->getHeader().cpusubtype) + + ") (all members must match) " + MFO->getFileName()) + .str() + .c_str()); + } + if (!MFO) { + ChildOrErr.get().release(); + MFO.reset(O); + } + } else if (Bin->isIR()) { + IRObjectFile *O = cast<IRObjectFile>(Bin); + if (MFO) { + return createStringError(std::errc::invalid_argument, + "archive member '%s' is an LLVM IR object, " + "while previous archive member " + "'%s' was a MachO", + O->getFileName().str().c_str(), + MFO->getFileName().str().c_str()); + } + if (IRFO) { + Expected<MachoCPUTy> CPUO = getMachoCPUFromTriple(O->getTargetTriple()); + Expected<MachoCPUTy> CPUFO = + getMachoCPUFromTriple(IRFO->getTargetTriple()); + if (!CPUO) + return CPUO.takeError(); + if (!CPUFO) + return CPUFO.takeError(); + if (*CPUO != *CPUFO) { + return createStringError( + std::errc::invalid_argument, + ("archive member " + O->getFileName() + " cputype (" + + Twine(CPUO->first) + ") and cpusubtype(" + Twine(CPUO->second) + + ") does not match previous archive members cputype (" + + Twine(CPUFO->first) + ") and cpusubtype(" + + Twine(CPUFO->second) + ") (all members must match) " + + IRFO->getFileName()) + .str() + .c_str()); + } + } else { + ChildOrErr.get().release(); + IRFO.reset(O); + } + } else + return createStringError(std::errc::invalid_argument, + ("archive member " + Bin->getFileName() + + " is neither a MachO file or an LLVM IR file " + "(not allowed in an archive)") + .str() + .c_str()); + } + if (Err) + return createFileError(A.getFileName(), std::move(Err)); + if (!MFO && !IRFO) + return createStringError( + std::errc::invalid_argument, + ("empty archive with no architecture specification: " + + A.getFileName() + " (can't determine architecture for it)") + .str() + .c_str()); + + if (MFO) { + Slice ArchiveSlice(*(MFO.get()), MFO->is64Bit() ? 3 : 2); + ArchiveSlice.B = &A; + return ArchiveSlice; + } + + // For IR objects + Expected<Slice> ArchiveSliceOrErr = Slice::create(*IRFO, 0); + if (!ArchiveSliceOrErr) + return createFileError(A.getFileName(), ArchiveSliceOrErr.takeError()); + auto &ArchiveSlice = ArchiveSliceOrErr.get(); + ArchiveSlice.B = &A; + return std::move(ArchiveSlice); +} + +Expected<Slice> Slice::create(const IRObjectFile &IRO, uint32_t Align) { + Expected<MachoCPUTy> CPUOrErr = getMachoCPUFromTriple(IRO.getTargetTriple()); + if (!CPUOrErr) + return CPUOrErr.takeError(); + unsigned CPUType, CPUSubType; + std::tie(CPUType, CPUSubType) = CPUOrErr.get(); + // We don't directly use the architecture name of the target triple T, as, + // for instance, thumb is treated as ARM by the MachOUniversal object. + std::string ArchName( + MachOObjectFile::getArchTriple(CPUType, CPUSubType).getArchName()); + return Slice{IRO, CPUType, CPUSubType, std::move(ArchName), Align}; +} + +static Expected<SmallVector<MachO::fat_arch, 2>> +buildFatArchList(ArrayRef<Slice> Slices) { + SmallVector<MachO::fat_arch, 2> FatArchList; + uint64_t Offset = + sizeof(MachO::fat_header) + Slices.size() * sizeof(MachO::fat_arch); + + for (const auto &S : Slices) { + Offset = alignTo(Offset, 1ull << S.getP2Alignment()); + if (Offset > UINT32_MAX) + return createStringError( + std::errc::invalid_argument, + ("fat file too large to be created because the offset " + "field in struct fat_arch is only 32-bits and the offset " + + Twine(Offset) + " for " + S.getBinary()->getFileName() + + " for architecture " + S.getArchString() + "exceeds that.") + .str() + .c_str()); + + MachO::fat_arch FatArch; + FatArch.cputype = S.getCPUType(); + FatArch.cpusubtype = S.getCPUSubType(); + FatArch.offset = Offset; + FatArch.size = S.getBinary()->getMemoryBufferRef().getBufferSize(); + FatArch.align = S.getP2Alignment(); + Offset += FatArch.size; + FatArchList.push_back(FatArch); + } + return FatArchList; +} + +Error object::writeUniversalBinaryToStream(ArrayRef<Slice> Slices, + raw_ostream &Out) { + MachO::fat_header FatHeader; + FatHeader.magic = MachO::FAT_MAGIC; + FatHeader.nfat_arch = Slices.size(); + + Expected<SmallVector<MachO::fat_arch, 2>> FatArchListOrErr = + buildFatArchList(Slices); + if (!FatArchListOrErr) + return FatArchListOrErr.takeError(); + SmallVector<MachO::fat_arch, 2> FatArchList = *FatArchListOrErr; + + if (sys::IsLittleEndianHost) + MachO::swapStruct(FatHeader); + Out.write(reinterpret_cast<const char *>(&FatHeader), + sizeof(MachO::fat_header)); + + if (sys::IsLittleEndianHost) + for (MachO::fat_arch &FA : FatArchList) + MachO::swapStruct(FA); + Out.write(reinterpret_cast<const char *>(FatArchList.data()), + sizeof(MachO::fat_arch) * FatArchList.size()); + + if (sys::IsLittleEndianHost) + for (MachO::fat_arch &FA : FatArchList) + MachO::swapStruct(FA); + + size_t Offset = + sizeof(MachO::fat_header) + sizeof(MachO::fat_arch) * FatArchList.size(); + for (size_t Index = 0, Size = Slices.size(); Index < Size; ++Index) { + MemoryBufferRef BufferRef = Slices[Index].getBinary()->getMemoryBufferRef(); + assert((Offset <= FatArchList[Index].offset) && "Incorrect slice offset"); + Out.write_zeros(FatArchList[Index].offset - Offset); + Out.write(BufferRef.getBufferStart(), BufferRef.getBufferSize()); + Offset = FatArchList[Index].offset + BufferRef.getBufferSize(); + } + + Out.flush(); + return Error::success(); +} + +Error object::writeUniversalBinary(ArrayRef<Slice> Slices, + StringRef OutputFileName) { + const bool IsExecutable = any_of(Slices, [](Slice S) { + return sys::fs::can_execute(S.getBinary()->getFileName()); + }); + unsigned Mode = sys::fs::all_read | sys::fs::all_write; + if (IsExecutable) + Mode |= sys::fs::all_exe; + Expected<sys::fs::TempFile> Temp = sys::fs::TempFile::create( + OutputFileName + ".temp-universal-%%%%%%", Mode); + if (!Temp) + return Temp.takeError(); + raw_fd_ostream Out(Temp->FD, false); + if (Error E = writeUniversalBinaryToStream(Slices, Out)) { + if (Error DiscardError = Temp->discard()) + return joinErrors(std::move(E), std::move(DiscardError)); + return E; + } + return Temp->keep(OutputFileName); +} diff --git a/contrib/libs/llvm14/lib/Object/Minidump.cpp b/contrib/libs/llvm14/lib/Object/Minidump.cpp new file mode 100644 index 0000000000..3e932fe7be --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/Minidump.cpp @@ -0,0 +1,155 @@ +//===- Minidump.cpp - Minidump object file implementation -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/Minidump.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/ConvertUTF.h" + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::minidump; + +Optional<ArrayRef<uint8_t>> +MinidumpFile::getRawStream(minidump::StreamType Type) const { + auto It = StreamMap.find(Type); + if (It != StreamMap.end()) + return getRawStream(Streams[It->second]); + return None; +} + +Expected<std::string> MinidumpFile::getString(size_t Offset) const { + // Minidump strings consist of a 32-bit length field, which gives the size of + // the string in *bytes*. This is followed by the actual string encoded in + // UTF16. + auto ExpectedSize = + getDataSliceAs<support::ulittle32_t>(getData(), Offset, 1); + if (!ExpectedSize) + return ExpectedSize.takeError(); + size_t Size = (*ExpectedSize)[0]; + if (Size % 2 != 0) + return createError("String size not even"); + Size /= 2; + if (Size == 0) + return ""; + + Offset += sizeof(support::ulittle32_t); + auto ExpectedData = + getDataSliceAs<support::ulittle16_t>(getData(), Offset, Size); + if (!ExpectedData) + return ExpectedData.takeError(); + + SmallVector<UTF16, 32> WStr(Size); + copy(*ExpectedData, WStr.begin()); + + std::string Result; + if (!convertUTF16ToUTF8String(WStr, Result)) + return createError("String decoding failed"); + + return Result; +} + +Expected<iterator_range<MinidumpFile::MemoryInfoIterator>> +MinidumpFile::getMemoryInfoList() const { + Optional<ArrayRef<uint8_t>> Stream = getRawStream(StreamType::MemoryInfoList); + if (!Stream) + return createError("No such stream"); + auto ExpectedHeader = + getDataSliceAs<minidump::MemoryInfoListHeader>(*Stream, 0, 1); + if (!ExpectedHeader) + return ExpectedHeader.takeError(); + const minidump::MemoryInfoListHeader &H = ExpectedHeader.get()[0]; + Expected<ArrayRef<uint8_t>> Data = + getDataSlice(*Stream, H.SizeOfHeader, H.SizeOfEntry * H.NumberOfEntries); + if (!Data) + return Data.takeError(); + return make_range(MemoryInfoIterator(*Data, H.SizeOfEntry), + MemoryInfoIterator({}, H.SizeOfEntry)); +} + +template <typename T> +Expected<ArrayRef<T>> MinidumpFile::getListStream(StreamType Type) const { + Optional<ArrayRef<uint8_t>> Stream = getRawStream(Type); + if (!Stream) + return createError("No such stream"); + auto ExpectedSize = getDataSliceAs<support::ulittle32_t>(*Stream, 0, 1); + if (!ExpectedSize) + return ExpectedSize.takeError(); + + size_t ListSize = ExpectedSize.get()[0]; + + size_t ListOffset = 4; + // Some producers insert additional padding bytes to align the list to an + // 8-byte boundary. Check for that by comparing the list size with the overall + // stream size. + if (ListOffset + sizeof(T) * ListSize < Stream->size()) + ListOffset = 8; + + return getDataSliceAs<T>(*Stream, ListOffset, ListSize); +} +template Expected<ArrayRef<Module>> + MinidumpFile::getListStream(StreamType) const; +template Expected<ArrayRef<Thread>> + MinidumpFile::getListStream(StreamType) const; +template Expected<ArrayRef<MemoryDescriptor>> + MinidumpFile::getListStream(StreamType) const; + +Expected<ArrayRef<uint8_t>> +MinidumpFile::getDataSlice(ArrayRef<uint8_t> Data, size_t Offset, size_t Size) { + // Check for overflow. + if (Offset + Size < Offset || Offset + Size < Size || + Offset + Size > Data.size()) + return createEOFError(); + return Data.slice(Offset, Size); +} + +Expected<std::unique_ptr<MinidumpFile>> +MinidumpFile::create(MemoryBufferRef Source) { + ArrayRef<uint8_t> Data = arrayRefFromStringRef(Source.getBuffer()); + auto ExpectedHeader = getDataSliceAs<minidump::Header>(Data, 0, 1); + if (!ExpectedHeader) + return ExpectedHeader.takeError(); + + const minidump::Header &Hdr = (*ExpectedHeader)[0]; + if (Hdr.Signature != Header::MagicSignature) + return createError("Invalid signature"); + if ((Hdr.Version & 0xffff) != Header::MagicVersion) + return createError("Invalid version"); + + auto ExpectedStreams = getDataSliceAs<Directory>(Data, Hdr.StreamDirectoryRVA, + Hdr.NumberOfStreams); + if (!ExpectedStreams) + return ExpectedStreams.takeError(); + + DenseMap<StreamType, std::size_t> StreamMap; + for (const auto &StreamDescriptor : llvm::enumerate(*ExpectedStreams)) { + StreamType Type = StreamDescriptor.value().Type; + const LocationDescriptor &Loc = StreamDescriptor.value().Location; + + Expected<ArrayRef<uint8_t>> Stream = + getDataSlice(Data, Loc.RVA, Loc.DataSize); + if (!Stream) + return Stream.takeError(); + + if (Type == StreamType::Unused && Loc.DataSize == 0) { + // Ignore dummy streams. This is technically ill-formed, but a number of + // existing minidumps seem to contain such streams. + continue; + } + + if (Type == DenseMapInfo<StreamType>::getEmptyKey() || + Type == DenseMapInfo<StreamType>::getTombstoneKey()) + return createError("Cannot handle one of the minidump streams"); + + // Update the directory map, checking for duplicate stream types. + if (!StreamMap.try_emplace(Type, StreamDescriptor.index()).second) + return createError("Duplicate stream type"); + } + + return std::unique_ptr<MinidumpFile>( + new MinidumpFile(Source, Hdr, *ExpectedStreams, std::move(StreamMap))); +} diff --git a/contrib/libs/llvm14/lib/Object/ModuleSymbolTable.cpp b/contrib/libs/llvm14/lib/Object/ModuleSymbolTable.cpp new file mode 100644 index 0000000000..954d1f09f4 --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/ModuleSymbolTable.cpp @@ -0,0 +1,230 @@ +//===- ModuleSymbolTable.cpp - symbol table for in-memory IR --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class represents a symbol table built from in-memory IR. It provides +// access to GlobalValues and should only be used if such access is required +// (e.g. in the LTO implementation). +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/ModuleSymbolTable.h" +#include "RecordStreamer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDirectives.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <memory> +#include <string> + +using namespace llvm; +using namespace object; + +void ModuleSymbolTable::addModule(Module *M) { + if (FirstMod) + assert(FirstMod->getTargetTriple() == M->getTargetTriple()); + else + FirstMod = M; + + for (GlobalValue &GV : M->global_values()) + SymTab.push_back(&GV); + + CollectAsmSymbols(*M, [this](StringRef Name, BasicSymbolRef::Flags Flags) { + SymTab.push_back(new (AsmSymbols.Allocate()) + AsmSymbol(std::string(Name), Flags)); + }); +} + +static void +initializeRecordStreamer(const Module &M, + function_ref<void(RecordStreamer &)> Init) { + StringRef InlineAsm = M.getModuleInlineAsm(); + if (InlineAsm.empty()) + return; + + std::string Err; + const Triple TT(M.getTargetTriple()); + const Target *T = TargetRegistry::lookupTarget(TT.str(), Err); + assert(T && T->hasMCAsmParser()); + + std::unique_ptr<MCRegisterInfo> MRI(T->createMCRegInfo(TT.str())); + if (!MRI) + return; + + MCTargetOptions MCOptions; + std::unique_ptr<MCAsmInfo> MAI(T->createMCAsmInfo(*MRI, TT.str(), MCOptions)); + if (!MAI) + return; + + std::unique_ptr<MCSubtargetInfo> STI( + T->createMCSubtargetInfo(TT.str(), "", "")); + if (!STI) + return; + + std::unique_ptr<MCInstrInfo> MCII(T->createMCInstrInfo()); + if (!MCII) + return; + + std::unique_ptr<MemoryBuffer> Buffer(MemoryBuffer::getMemBuffer(InlineAsm)); + SourceMgr SrcMgr; + SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc()); + + MCContext MCCtx(TT, MAI.get(), MRI.get(), STI.get(), &SrcMgr); + std::unique_ptr<MCObjectFileInfo> MOFI( + T->createMCObjectFileInfo(MCCtx, /*PIC=*/false)); + MOFI->setSDKVersion(M.getSDKVersion()); + MCCtx.setObjectFileInfo(MOFI.get()); + RecordStreamer Streamer(MCCtx, M); + T->createNullTargetStreamer(Streamer); + + std::unique_ptr<MCAsmParser> Parser( + createMCAsmParser(SrcMgr, MCCtx, Streamer, *MAI)); + + std::unique_ptr<MCTargetAsmParser> TAP( + T->createMCAsmParser(*STI, *Parser, *MCII, MCOptions)); + if (!TAP) + return; + + // Module-level inline asm is assumed to use At&t syntax (see + // AsmPrinter::doInitialization()). + Parser->setAssemblerDialect(InlineAsm::AD_ATT); + + Parser->setTargetParser(*TAP); + if (Parser->Run(false)) + return; + + Init(Streamer); +} + +void ModuleSymbolTable::CollectAsmSymbols( + const Module &M, + function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmSymbol) { + initializeRecordStreamer(M, [&](RecordStreamer &Streamer) { + Streamer.flushSymverDirectives(); + + for (auto &KV : Streamer) { + StringRef Key = KV.first(); + RecordStreamer::State Value = KV.second; + // FIXME: For now we just assume that all asm symbols are executable. + uint32_t Res = BasicSymbolRef::SF_Executable; + switch (Value) { + case RecordStreamer::NeverSeen: + llvm_unreachable("NeverSeen should have been replaced earlier"); + case RecordStreamer::DefinedGlobal: + Res |= BasicSymbolRef::SF_Global; + break; + case RecordStreamer::Defined: + break; + case RecordStreamer::Global: + case RecordStreamer::Used: + Res |= BasicSymbolRef::SF_Undefined; + Res |= BasicSymbolRef::SF_Global; + break; + case RecordStreamer::DefinedWeak: + Res |= BasicSymbolRef::SF_Weak; + Res |= BasicSymbolRef::SF_Global; + break; + case RecordStreamer::UndefinedWeak: + Res |= BasicSymbolRef::SF_Weak; + Res |= BasicSymbolRef::SF_Undefined; + } + AsmSymbol(Key, BasicSymbolRef::Flags(Res)); + } + }); +} + +void ModuleSymbolTable::CollectAsmSymvers( + const Module &M, function_ref<void(StringRef, StringRef)> AsmSymver) { + initializeRecordStreamer(M, [&](RecordStreamer &Streamer) { + for (auto &KV : Streamer.symverAliases()) + for (auto &Alias : KV.second) + AsmSymver(KV.first->getName(), Alias); + }); +} + +void ModuleSymbolTable::printSymbolName(raw_ostream &OS, Symbol S) const { + if (S.is<AsmSymbol *>()) { + OS << S.get<AsmSymbol *>()->first; + return; + } + + auto *GV = S.get<GlobalValue *>(); + if (GV->hasDLLImportStorageClass()) + OS << "__imp_"; + + Mang.getNameWithPrefix(OS, GV, false); +} + +uint32_t ModuleSymbolTable::getSymbolFlags(Symbol S) const { + if (S.is<AsmSymbol *>()) + return S.get<AsmSymbol *>()->second; + + auto *GV = S.get<GlobalValue *>(); + + uint32_t Res = BasicSymbolRef::SF_None; + if (GV->isDeclarationForLinker()) + Res |= BasicSymbolRef::SF_Undefined; + else if (GV->hasHiddenVisibility() && !GV->hasLocalLinkage()) + Res |= BasicSymbolRef::SF_Hidden; + if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) { + if (GVar->isConstant()) + Res |= BasicSymbolRef::SF_Const; + } + if (const GlobalObject *GO = GV->getAliaseeObject()) + if (isa<Function>(GO) || isa<GlobalIFunc>(GO)) + Res |= BasicSymbolRef::SF_Executable; + if (isa<GlobalAlias>(GV)) + Res |= BasicSymbolRef::SF_Indirect; + if (GV->hasPrivateLinkage()) + Res |= BasicSymbolRef::SF_FormatSpecific; + if (!GV->hasLocalLinkage()) + Res |= BasicSymbolRef::SF_Global; + if (GV->hasCommonLinkage()) + Res |= BasicSymbolRef::SF_Common; + if (GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() || + GV->hasExternalWeakLinkage()) + Res |= BasicSymbolRef::SF_Weak; + + if (GV->getName().startswith("llvm.")) + Res |= BasicSymbolRef::SF_FormatSpecific; + else if (auto *Var = dyn_cast<GlobalVariable>(GV)) { + if (Var->getSection() == "llvm.metadata") + Res |= BasicSymbolRef::SF_FormatSpecific; + } + + return Res; +} diff --git a/contrib/libs/llvm14/lib/Object/Object.cpp b/contrib/libs/llvm14/lib/Object/Object.cpp new file mode 100644 index 0000000000..576eb8d069 --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/Object.cpp @@ -0,0 +1,354 @@ +//===- Object.cpp - C bindings to the object file library--------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the C bindings to the file-format-independent object +// library. +// +//===----------------------------------------------------------------------===// + +#include "llvm-c/Object.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/MachOUniversal.h" +#include "llvm/Support/MemAlloc.h" + +using namespace llvm; +using namespace object; + +inline OwningBinary<ObjectFile> *unwrap(LLVMObjectFileRef OF) { + return reinterpret_cast<OwningBinary<ObjectFile> *>(OF); +} + +inline LLVMObjectFileRef wrap(const OwningBinary<ObjectFile> *OF) { + return reinterpret_cast<LLVMObjectFileRef>( + const_cast<OwningBinary<ObjectFile> *>(OF)); +} + +inline section_iterator *unwrap(LLVMSectionIteratorRef SI) { + return reinterpret_cast<section_iterator*>(SI); +} + +inline LLVMSectionIteratorRef +wrap(const section_iterator *SI) { + return reinterpret_cast<LLVMSectionIteratorRef> + (const_cast<section_iterator*>(SI)); +} + +inline symbol_iterator *unwrap(LLVMSymbolIteratorRef SI) { + return reinterpret_cast<symbol_iterator*>(SI); +} + +inline LLVMSymbolIteratorRef +wrap(const symbol_iterator *SI) { + return reinterpret_cast<LLVMSymbolIteratorRef> + (const_cast<symbol_iterator*>(SI)); +} + +inline relocation_iterator *unwrap(LLVMRelocationIteratorRef SI) { + return reinterpret_cast<relocation_iterator*>(SI); +} + +inline LLVMRelocationIteratorRef +wrap(const relocation_iterator *SI) { + return reinterpret_cast<LLVMRelocationIteratorRef> + (const_cast<relocation_iterator*>(SI)); +} + +/*--.. Operations on binary files ..........................................--*/ + +LLVMBinaryRef LLVMCreateBinary(LLVMMemoryBufferRef MemBuf, + LLVMContextRef Context, + char **ErrorMessage) { + auto maybeContext = Context ? unwrap(Context) : nullptr; + Expected<std::unique_ptr<Binary>> ObjOrErr( + createBinary(unwrap(MemBuf)->getMemBufferRef(), maybeContext)); + if (!ObjOrErr) { + *ErrorMessage = strdup(toString(ObjOrErr.takeError()).c_str()); + return nullptr; + } + + return wrap(ObjOrErr.get().release()); +} + +LLVMMemoryBufferRef LLVMBinaryCopyMemoryBuffer(LLVMBinaryRef BR) { + auto Buf = unwrap(BR)->getMemoryBufferRef(); + return wrap(llvm::MemoryBuffer::getMemBuffer( + Buf.getBuffer(), Buf.getBufferIdentifier(), + /*RequiresNullTerminator*/false).release()); +} + +void LLVMDisposeBinary(LLVMBinaryRef BR) { + delete unwrap(BR); +} + +LLVMBinaryType LLVMBinaryGetType(LLVMBinaryRef BR) { + class BinaryTypeMapper final : public Binary { + public: + static LLVMBinaryType mapBinaryTypeToLLVMBinaryType(unsigned Kind) { + switch (Kind) { + case ID_Archive: + return LLVMBinaryTypeArchive; + case ID_MachOUniversalBinary: + return LLVMBinaryTypeMachOUniversalBinary; + case ID_COFFImportFile: + return LLVMBinaryTypeCOFFImportFile; + case ID_IR: + return LLVMBinaryTypeIR; + case ID_WinRes: + return LLVMBinaryTypeWinRes; + case ID_COFF: + return LLVMBinaryTypeCOFF; + case ID_ELF32L: + return LLVMBinaryTypeELF32L; + case ID_ELF32B: + return LLVMBinaryTypeELF32B; + case ID_ELF64L: + return LLVMBinaryTypeELF64L; + case ID_ELF64B: + return LLVMBinaryTypeELF64B; + case ID_MachO32L: + return LLVMBinaryTypeMachO32L; + case ID_MachO32B: + return LLVMBinaryTypeMachO32B; + case ID_MachO64L: + return LLVMBinaryTypeMachO64L; + case ID_MachO64B: + return LLVMBinaryTypeMachO64B; + case ID_Wasm: + return LLVMBinaryTypeWasm; + case ID_StartObjects: + case ID_EndObjects: + llvm_unreachable("Marker types are not valid binary kinds!"); + default: + llvm_unreachable("Unknown binary kind!"); + } + } + }; + return BinaryTypeMapper::mapBinaryTypeToLLVMBinaryType(unwrap(BR)->getType()); +} + +LLVMBinaryRef LLVMMachOUniversalBinaryCopyObjectForArch(LLVMBinaryRef BR, + const char *Arch, + size_t ArchLen, + char **ErrorMessage) { + auto universal = cast<MachOUniversalBinary>(unwrap(BR)); + Expected<std::unique_ptr<ObjectFile>> ObjOrErr( + universal->getMachOObjectForArch({Arch, ArchLen})); + if (!ObjOrErr) { + *ErrorMessage = strdup(toString(ObjOrErr.takeError()).c_str()); + return nullptr; + } + return wrap(ObjOrErr.get().release()); +} + +LLVMSectionIteratorRef LLVMObjectFileCopySectionIterator(LLVMBinaryRef BR) { + auto OF = cast<ObjectFile>(unwrap(BR)); + auto sections = OF->sections(); + if (sections.begin() == sections.end()) + return nullptr; + return wrap(new section_iterator(sections.begin())); +} + +LLVMBool LLVMObjectFileIsSectionIteratorAtEnd(LLVMBinaryRef BR, + LLVMSectionIteratorRef SI) { + auto OF = cast<ObjectFile>(unwrap(BR)); + return (*unwrap(SI) == OF->section_end()) ? 1 : 0; +} + +LLVMSymbolIteratorRef LLVMObjectFileCopySymbolIterator(LLVMBinaryRef BR) { + auto OF = cast<ObjectFile>(unwrap(BR)); + auto symbols = OF->symbols(); + if (symbols.begin() == symbols.end()) + return nullptr; + return wrap(new symbol_iterator(symbols.begin())); +} + +LLVMBool LLVMObjectFileIsSymbolIteratorAtEnd(LLVMBinaryRef BR, + LLVMSymbolIteratorRef SI) { + auto OF = cast<ObjectFile>(unwrap(BR)); + return (*unwrap(SI) == OF->symbol_end()) ? 1 : 0; +} + +// ObjectFile creation +LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf) { + std::unique_ptr<MemoryBuffer> Buf(unwrap(MemBuf)); + Expected<std::unique_ptr<ObjectFile>> ObjOrErr( + ObjectFile::createObjectFile(Buf->getMemBufferRef())); + std::unique_ptr<ObjectFile> Obj; + if (!ObjOrErr) { + // TODO: Actually report errors helpfully. + consumeError(ObjOrErr.takeError()); + return nullptr; + } + + auto *Ret = new OwningBinary<ObjectFile>(std::move(ObjOrErr.get()), std::move(Buf)); + return wrap(Ret); +} + +void LLVMDisposeObjectFile(LLVMObjectFileRef ObjectFile) { + delete unwrap(ObjectFile); +} + +// ObjectFile Section iterators +LLVMSectionIteratorRef LLVMGetSections(LLVMObjectFileRef OF) { + OwningBinary<ObjectFile> *OB = unwrap(OF); + section_iterator SI = OB->getBinary()->section_begin(); + return wrap(new section_iterator(SI)); +} + +void LLVMDisposeSectionIterator(LLVMSectionIteratorRef SI) { + delete unwrap(SI); +} + +LLVMBool LLVMIsSectionIteratorAtEnd(LLVMObjectFileRef OF, + LLVMSectionIteratorRef SI) { + OwningBinary<ObjectFile> *OB = unwrap(OF); + return (*unwrap(SI) == OB->getBinary()->section_end()) ? 1 : 0; +} + +void LLVMMoveToNextSection(LLVMSectionIteratorRef SI) { + ++(*unwrap(SI)); +} + +void LLVMMoveToContainingSection(LLVMSectionIteratorRef Sect, + LLVMSymbolIteratorRef Sym) { + Expected<section_iterator> SecOrErr = (*unwrap(Sym))->getSection(); + if (!SecOrErr) { + std::string Buf; + raw_string_ostream OS(Buf); + logAllUnhandledErrors(SecOrErr.takeError(), OS); + report_fatal_error(Twine(OS.str())); + } + *unwrap(Sect) = *SecOrErr; +} + +// ObjectFile Symbol iterators +LLVMSymbolIteratorRef LLVMGetSymbols(LLVMObjectFileRef OF) { + OwningBinary<ObjectFile> *OB = unwrap(OF); + symbol_iterator SI = OB->getBinary()->symbol_begin(); + return wrap(new symbol_iterator(SI)); +} + +void LLVMDisposeSymbolIterator(LLVMSymbolIteratorRef SI) { + delete unwrap(SI); +} + +LLVMBool LLVMIsSymbolIteratorAtEnd(LLVMObjectFileRef OF, + LLVMSymbolIteratorRef SI) { + OwningBinary<ObjectFile> *OB = unwrap(OF); + return (*unwrap(SI) == OB->getBinary()->symbol_end()) ? 1 : 0; +} + +void LLVMMoveToNextSymbol(LLVMSymbolIteratorRef SI) { + ++(*unwrap(SI)); +} + +// SectionRef accessors +const char *LLVMGetSectionName(LLVMSectionIteratorRef SI) { + auto NameOrErr = (*unwrap(SI))->getName(); + if (!NameOrErr) + report_fatal_error(NameOrErr.takeError()); + return NameOrErr->data(); +} + +uint64_t LLVMGetSectionSize(LLVMSectionIteratorRef SI) { + return (*unwrap(SI))->getSize(); +} + +const char *LLVMGetSectionContents(LLVMSectionIteratorRef SI) { + if (Expected<StringRef> E = (*unwrap(SI))->getContents()) + return E->data(); + else + report_fatal_error(E.takeError()); +} + +uint64_t LLVMGetSectionAddress(LLVMSectionIteratorRef SI) { + return (*unwrap(SI))->getAddress(); +} + +LLVMBool LLVMGetSectionContainsSymbol(LLVMSectionIteratorRef SI, + LLVMSymbolIteratorRef Sym) { + return (*unwrap(SI))->containsSymbol(**unwrap(Sym)); +} + +// Section Relocation iterators +LLVMRelocationIteratorRef LLVMGetRelocations(LLVMSectionIteratorRef Section) { + relocation_iterator SI = (*unwrap(Section))->relocation_begin(); + return wrap(new relocation_iterator(SI)); +} + +void LLVMDisposeRelocationIterator(LLVMRelocationIteratorRef SI) { + delete unwrap(SI); +} + +LLVMBool LLVMIsRelocationIteratorAtEnd(LLVMSectionIteratorRef Section, + LLVMRelocationIteratorRef SI) { + return (*unwrap(SI) == (*unwrap(Section))->relocation_end()) ? 1 : 0; +} + +void LLVMMoveToNextRelocation(LLVMRelocationIteratorRef SI) { + ++(*unwrap(SI)); +} + + +// SymbolRef accessors +const char *LLVMGetSymbolName(LLVMSymbolIteratorRef SI) { + Expected<StringRef> Ret = (*unwrap(SI))->getName(); + if (!Ret) { + std::string Buf; + raw_string_ostream OS(Buf); + logAllUnhandledErrors(Ret.takeError(), OS); + report_fatal_error(Twine(OS.str())); + } + return Ret->data(); +} + +uint64_t LLVMGetSymbolAddress(LLVMSymbolIteratorRef SI) { + Expected<uint64_t> Ret = (*unwrap(SI))->getAddress(); + if (!Ret) { + std::string Buf; + raw_string_ostream OS(Buf); + logAllUnhandledErrors(Ret.takeError(), OS); + report_fatal_error(Twine(OS.str())); + } + return *Ret; +} + +uint64_t LLVMGetSymbolSize(LLVMSymbolIteratorRef SI) { + return (*unwrap(SI))->getCommonSize(); +} + +// RelocationRef accessors +uint64_t LLVMGetRelocationOffset(LLVMRelocationIteratorRef RI) { + return (*unwrap(RI))->getOffset(); +} + +LLVMSymbolIteratorRef LLVMGetRelocationSymbol(LLVMRelocationIteratorRef RI) { + symbol_iterator ret = (*unwrap(RI))->getSymbol(); + return wrap(new symbol_iterator(ret)); +} + +uint64_t LLVMGetRelocationType(LLVMRelocationIteratorRef RI) { + return (*unwrap(RI))->getType(); +} + +// NOTE: Caller takes ownership of returned string. +const char *LLVMGetRelocationTypeName(LLVMRelocationIteratorRef RI) { + SmallVector<char, 0> ret; + (*unwrap(RI))->getTypeName(ret); + char *str = static_cast<char*>(safe_malloc(ret.size())); + llvm::copy(ret, str); + return str; +} + +// NOTE: Caller takes ownership of returned string. +const char *LLVMGetRelocationValueString(LLVMRelocationIteratorRef RI) { + return strdup(""); +} + diff --git a/contrib/libs/llvm14/lib/Object/ObjectFile.cpp b/contrib/libs/llvm14/lib/Object/ObjectFile.cpp new file mode 100644 index 0000000000..6fd02f3b95 --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/ObjectFile.cpp @@ -0,0 +1,200 @@ +//===- ObjectFile.cpp - File format independent object file ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a file format independent ObjectFile class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/ObjectFile.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/COFF.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/MachO.h" +#include "llvm/Object/Wasm.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cstdint> +#include <memory> +#include <system_error> + +using namespace llvm; +using namespace object; + +raw_ostream &object::operator<<(raw_ostream &OS, const SectionedAddress &Addr) { + OS << "SectionedAddress{" << format_hex(Addr.Address, 10); + if (Addr.SectionIndex != SectionedAddress::UndefSection) + OS << ", " << Addr.SectionIndex; + return OS << "}"; +} + +void ObjectFile::anchor() {} + +ObjectFile::ObjectFile(unsigned int Type, MemoryBufferRef Source) + : SymbolicFile(Type, Source) {} + +bool SectionRef::containsSymbol(SymbolRef S) const { + Expected<section_iterator> SymSec = S.getSection(); + if (!SymSec) { + // TODO: Actually report errors helpfully. + consumeError(SymSec.takeError()); + return false; + } + return *this == **SymSec; +} + +Expected<uint64_t> ObjectFile::getSymbolValue(DataRefImpl Ref) const { + uint32_t Flags; + if (Error E = getSymbolFlags(Ref).moveInto(Flags)) + // TODO: Test this error. + return std::move(E); + + if (Flags & SymbolRef::SF_Undefined) + return 0; + if (Flags & SymbolRef::SF_Common) + return getCommonSymbolSize(Ref); + return getSymbolValueImpl(Ref); +} + +Error ObjectFile::printSymbolName(raw_ostream &OS, DataRefImpl Symb) const { + Expected<StringRef> Name = getSymbolName(Symb); + if (!Name) + return Name.takeError(); + OS << *Name; + return Error::success(); +} + +uint32_t ObjectFile::getSymbolAlignment(DataRefImpl DRI) const { return 0; } + +bool ObjectFile::isSectionBitcode(DataRefImpl Sec) const { + Expected<StringRef> NameOrErr = getSectionName(Sec); + if (NameOrErr) + return *NameOrErr == ".llvmbc"; + consumeError(NameOrErr.takeError()); + return false; +} + +bool ObjectFile::isSectionStripped(DataRefImpl Sec) const { return false; } + +bool ObjectFile::isBerkeleyText(DataRefImpl Sec) const { + return isSectionText(Sec); +} + +bool ObjectFile::isBerkeleyData(DataRefImpl Sec) const { + return isSectionData(Sec); +} + +bool ObjectFile::isDebugSection(DataRefImpl Sec) const { return false; } + +Expected<section_iterator> +ObjectFile::getRelocatedSection(DataRefImpl Sec) const { + return section_iterator(SectionRef(Sec, this)); +} + +Triple ObjectFile::makeTriple() const { + Triple TheTriple; + auto Arch = getArch(); + TheTriple.setArch(Triple::ArchType(Arch)); + + // For ARM targets, try to use the build attributes to build determine + // the build target. Target features are also added, but later during + // disassembly. + if (Arch == Triple::arm || Arch == Triple::armeb) + setARMSubArch(TheTriple); + + // TheTriple defaults to ELF, and COFF doesn't have an environment: + // something we can do here is indicate that it is mach-o. + if (isMachO()) { + TheTriple.setObjectFormat(Triple::MachO); + } else if (isCOFF()) { + const auto COFFObj = cast<COFFObjectFile>(this); + if (COFFObj->getArch() == Triple::thumb) + TheTriple.setTriple("thumbv7-windows"); + } else if (isXCOFF()) { + // XCOFF implies AIX. + TheTriple.setOS(Triple::AIX); + TheTriple.setObjectFormat(Triple::XCOFF); + } + + return TheTriple; +} + +Expected<std::unique_ptr<ObjectFile>> +ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type, + bool InitContent) { + StringRef Data = Object.getBuffer(); + if (Type == file_magic::unknown) + Type = identify_magic(Data); + + switch (Type) { + case file_magic::unknown: + case file_magic::bitcode: + case file_magic::coff_cl_gl_object: + case file_magic::archive: + case file_magic::macho_universal_binary: + case file_magic::windows_resource: + case file_magic::pdb: + case file_magic::minidump: + case file_magic::goff_object: + return errorCodeToError(object_error::invalid_file_type); + case file_magic::tapi_file: + return errorCodeToError(object_error::invalid_file_type); + case file_magic::elf: + case file_magic::elf_relocatable: + case file_magic::elf_executable: + case file_magic::elf_shared_object: + case file_magic::elf_core: + return createELFObjectFile(Object, InitContent); + case file_magic::macho_object: + case file_magic::macho_executable: + case file_magic::macho_fixed_virtual_memory_shared_lib: + case file_magic::macho_core: + case file_magic::macho_preload_executable: + case file_magic::macho_dynamically_linked_shared_lib: + case file_magic::macho_dynamic_linker: + case file_magic::macho_bundle: + case file_magic::macho_dynamically_linked_shared_lib_stub: + case file_magic::macho_dsym_companion: + case file_magic::macho_kext_bundle: + return createMachOObjectFile(Object); + case file_magic::coff_object: + case file_magic::coff_import_library: + case file_magic::pecoff_executable: + return createCOFFObjectFile(Object); + case file_magic::xcoff_object_32: + return createXCOFFObjectFile(Object, Binary::ID_XCOFF32); + case file_magic::xcoff_object_64: + return createXCOFFObjectFile(Object, Binary::ID_XCOFF64); + case file_magic::wasm_object: + return createWasmObjectFile(Object); + } + llvm_unreachable("Unexpected Object File Type"); +} + +Expected<OwningBinary<ObjectFile>> +ObjectFile::createObjectFile(StringRef ObjectPath) { + ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = + MemoryBuffer::getFile(ObjectPath); + if (std::error_code EC = FileOrErr.getError()) + return errorCodeToError(EC); + std::unique_ptr<MemoryBuffer> Buffer = std::move(FileOrErr.get()); + + Expected<std::unique_ptr<ObjectFile>> ObjOrErr = + createObjectFile(Buffer->getMemBufferRef()); + if (Error Err = ObjOrErr.takeError()) + return std::move(Err); + std::unique_ptr<ObjectFile> Obj = std::move(ObjOrErr.get()); + + return OwningBinary<ObjectFile>(std::move(Obj), std::move(Buffer)); +} diff --git a/contrib/libs/llvm14/lib/Object/RecordStreamer.cpp b/contrib/libs/llvm14/lib/Object/RecordStreamer.cpp new file mode 100644 index 0000000000..2d07d34bbf --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/RecordStreamer.cpp @@ -0,0 +1,233 @@ +//===-- RecordStreamer.cpp - Record asm defined and used symbols ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "RecordStreamer.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" + +using namespace llvm; + +void RecordStreamer::markDefined(const MCSymbol &Symbol) { + State &S = Symbols[Symbol.getName()]; + switch (S) { + case DefinedGlobal: + case Global: + S = DefinedGlobal; + break; + case NeverSeen: + case Defined: + case Used: + S = Defined; + break; + case DefinedWeak: + break; + case UndefinedWeak: + S = DefinedWeak; + } +} + +void RecordStreamer::markGlobal(const MCSymbol &Symbol, + MCSymbolAttr Attribute) { + State &S = Symbols[Symbol.getName()]; + switch (S) { + case DefinedGlobal: + case Defined: + S = (Attribute == MCSA_Weak) ? DefinedWeak : DefinedGlobal; + break; + + case NeverSeen: + case Global: + case Used: + S = (Attribute == MCSA_Weak) ? UndefinedWeak : Global; + break; + case UndefinedWeak: + case DefinedWeak: + break; + } +} + +void RecordStreamer::markUsed(const MCSymbol &Symbol) { + State &S = Symbols[Symbol.getName()]; + switch (S) { + case DefinedGlobal: + case Defined: + case Global: + case DefinedWeak: + case UndefinedWeak: + break; + + case NeverSeen: + case Used: + S = Used; + break; + } +} + +void RecordStreamer::visitUsedSymbol(const MCSymbol &Sym) { markUsed(Sym); } + +RecordStreamer::RecordStreamer(MCContext &Context, const Module &M) + : MCStreamer(Context), M(M) {} + +RecordStreamer::const_iterator RecordStreamer::begin() { + return Symbols.begin(); +} + +RecordStreamer::const_iterator RecordStreamer::end() { return Symbols.end(); } + +void RecordStreamer::emitInstruction(const MCInst &Inst, + const MCSubtargetInfo &STI) { + MCStreamer::emitInstruction(Inst, STI); +} + +void RecordStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) { + MCStreamer::emitLabel(Symbol); + markDefined(*Symbol); +} + +void RecordStreamer::emitAssignment(MCSymbol *Symbol, const MCExpr *Value) { + markDefined(*Symbol); + MCStreamer::emitAssignment(Symbol, Value); +} + +bool RecordStreamer::emitSymbolAttribute(MCSymbol *Symbol, + MCSymbolAttr Attribute) { + if (Attribute == MCSA_Global || Attribute == MCSA_Weak) + markGlobal(*Symbol, Attribute); + if (Attribute == MCSA_LazyReference) + markUsed(*Symbol); + return true; +} + +void RecordStreamer::emitZerofill(MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment, + SMLoc Loc) { + markDefined(*Symbol); +} + +void RecordStreamer::emitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { + markDefined(*Symbol); +} + +RecordStreamer::State RecordStreamer::getSymbolState(const MCSymbol *Sym) { + auto SI = Symbols.find(Sym->getName()); + if (SI == Symbols.end()) + return NeverSeen; + return SI->second; +} + +void RecordStreamer::emitELFSymverDirective(const MCSymbol *OriginalSym, + StringRef Name, + bool KeepOriginalSym) { + SymverAliasMap[OriginalSym].push_back(Name); +} + +iterator_range<RecordStreamer::const_symver_iterator> +RecordStreamer::symverAliases() { + return {SymverAliasMap.begin(), SymverAliasMap.end()}; +} + +void RecordStreamer::flushSymverDirectives() { + // Mapping from mangled name to GV. + StringMap<const GlobalValue *> MangledNameMap; + // The name in the assembler will be mangled, but the name in the IR + // might not, so we first compute a mapping from mangled name to GV. + Mangler Mang; + SmallString<64> MangledName; + for (const GlobalValue &GV : M.global_values()) { + if (!GV.hasName()) + continue; + MangledName.clear(); + MangledName.reserve(GV.getName().size() + 1); + Mang.getNameWithPrefix(MangledName, &GV, /*CannotUsePrivateLabel=*/false); + MangledNameMap[MangledName] = &GV; + } + + // Walk all the recorded .symver aliases, and set up the binding + // for each alias. + for (auto &Symver : SymverAliasMap) { + const MCSymbol *Aliasee = Symver.first; + MCSymbolAttr Attr = MCSA_Invalid; + bool IsDefined = false; + + // First check if the aliasee binding was recorded in the asm. + RecordStreamer::State state = getSymbolState(Aliasee); + switch (state) { + case RecordStreamer::Global: + case RecordStreamer::DefinedGlobal: + Attr = MCSA_Global; + break; + case RecordStreamer::UndefinedWeak: + case RecordStreamer::DefinedWeak: + Attr = MCSA_Weak; + break; + default: + break; + } + + switch (state) { + case RecordStreamer::Defined: + case RecordStreamer::DefinedGlobal: + case RecordStreamer::DefinedWeak: + IsDefined = true; + break; + case RecordStreamer::NeverSeen: + case RecordStreamer::Global: + case RecordStreamer::Used: + case RecordStreamer::UndefinedWeak: + break; + } + + if (Attr == MCSA_Invalid || !IsDefined) { + const GlobalValue *GV = M.getNamedValue(Aliasee->getName()); + if (!GV) { + auto MI = MangledNameMap.find(Aliasee->getName()); + if (MI != MangledNameMap.end()) + GV = MI->second; + } + if (GV) { + // If we don't have a symbol attribute from assembly, then check if + // the aliasee was defined in the IR. + if (Attr == MCSA_Invalid) { + if (GV->hasExternalLinkage()) + Attr = MCSA_Global; + else if (GV->hasLocalLinkage()) + Attr = MCSA_Local; + else if (GV->isWeakForLinker()) + Attr = MCSA_Weak; + } + IsDefined = IsDefined || !GV->isDeclarationForLinker(); + } + } + + // Set the detected binding on each alias with this aliasee. + for (auto AliasName : Symver.second) { + std::pair<StringRef, StringRef> Split = AliasName.split("@@@"); + SmallString<128> NewName; + if (!Split.second.empty() && !Split.second.startswith("@")) { + // Special processing for "@@@" according + // https://sourceware.org/binutils/docs/as/Symver.html + const char *Separator = IsDefined ? "@@" : "@"; + AliasName = + (Split.first + Separator + Split.second).toStringRef(NewName); + } + MCSymbol *Alias = getContext().getOrCreateSymbol(AliasName); + // TODO: Handle "@@@". Depending on SymbolAttribute value it needs to be + // converted into @ or @@. + const MCExpr *Value = MCSymbolRefExpr::create(Aliasee, getContext()); + if (IsDefined) + markDefined(*Alias); + // Don't use EmitAssignment override as it always marks alias as defined. + MCStreamer::emitAssignment(Alias, Value); + if (Attr != MCSA_Invalid) + emitSymbolAttribute(Alias, Attr); + } + } +} diff --git a/contrib/libs/llvm14/lib/Object/RecordStreamer.h b/contrib/libs/llvm14/lib/Object/RecordStreamer.h new file mode 100644 index 0000000000..957d80f33b --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/RecordStreamer.h @@ -0,0 +1,85 @@ +//===- RecordStreamer.h - Record asm defined and used symbols ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJECT_RECORDSTREAMER_H +#define LLVM_LIB_OBJECT_RECORDSTREAMER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/MC/MCDirectives.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/SMLoc.h" +#include <vector> + +namespace llvm { + +class MCSymbol; +class Module; + +class RecordStreamer : public MCStreamer { +public: + enum State { NeverSeen, Global, Defined, DefinedGlobal, DefinedWeak, Used, + UndefinedWeak}; + +private: + const Module &M; + StringMap<State> Symbols; + // Map of aliases created by .symver directives, saved so we can update + // their symbol binding after parsing complete. This maps from each + // aliasee to its list of aliases. + DenseMap<const MCSymbol *, std::vector<StringRef>> SymverAliasMap; + + /// Get the state recorded for the given symbol. + State getSymbolState(const MCSymbol *Sym); + + void markDefined(const MCSymbol &Symbol); + void markGlobal(const MCSymbol &Symbol, MCSymbolAttr Attribute); + void markUsed(const MCSymbol &Symbol); + void visitUsedSymbol(const MCSymbol &Sym) override; + +public: + RecordStreamer(MCContext &Context, const Module &M); + + void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override; + void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override; + void emitAssignment(MCSymbol *Symbol, const MCExpr *Value) override; + bool emitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override; + void emitZerofill(MCSection *Section, MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment, SMLoc Loc = SMLoc()) override; + void emitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) override; + + // Ignore COFF-specific directives; we do not need any information from them, + // but the default implementation of these methods crashes, so we override + // them with versions that do nothing. + void BeginCOFFSymbolDef(const MCSymbol *Symbol) override {} + void EmitCOFFSymbolStorageClass(int StorageClass) override {} + void EmitCOFFSymbolType(int Type) override {} + void EndCOFFSymbolDef() override {} + + /// Record .symver aliases for later processing. + void emitELFSymverDirective(const MCSymbol *OriginalSym, StringRef Name, + bool KeepOriginalSym) override; + + // Emit ELF .symver aliases and ensure they have the same binding as the + // defined symbol they alias with. + void flushSymverDirectives(); + + // Symbols iterators + using const_iterator = StringMap<State>::const_iterator; + const_iterator begin(); + const_iterator end(); + + // SymverAliasMap iterators + using const_symver_iterator = decltype(SymverAliasMap)::const_iterator; + iterator_range<const_symver_iterator> symverAliases(); +}; + +} // end namespace llvm + +#endif // LLVM_LIB_OBJECT_RECORDSTREAMER_H diff --git a/contrib/libs/llvm14/lib/Object/RelocationResolver.cpp b/contrib/libs/llvm14/lib/Object/RelocationResolver.cpp new file mode 100644 index 0000000000..00a45e2c5d --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/RelocationResolver.cpp @@ -0,0 +1,775 @@ +//===- RelocationResolver.cpp ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines utilities to resolve relocations in object files. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/RelocationResolver.h" + +namespace llvm { +namespace object { + +static int64_t getELFAddend(RelocationRef R) { + Expected<int64_t> AddendOrErr = ELFRelocationRef(R).getAddend(); + handleAllErrors(AddendOrErr.takeError(), [](const ErrorInfoBase &EI) { + report_fatal_error(Twine(EI.message())); + }); + return *AddendOrErr; +} + +static bool supportsX86_64(uint64_t Type) { + switch (Type) { + case ELF::R_X86_64_NONE: + case ELF::R_X86_64_64: + case ELF::R_X86_64_DTPOFF32: + case ELF::R_X86_64_DTPOFF64: + case ELF::R_X86_64_PC32: + case ELF::R_X86_64_PC64: + case ELF::R_X86_64_32: + case ELF::R_X86_64_32S: + return true; + default: + return false; + } +} + +static uint64_t resolveX86_64(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t LocData, int64_t Addend) { + switch (Type) { + case ELF::R_X86_64_NONE: + return LocData; + case ELF::R_X86_64_64: + case ELF::R_X86_64_DTPOFF32: + case ELF::R_X86_64_DTPOFF64: + return S + Addend; + case ELF::R_X86_64_PC32: + case ELF::R_X86_64_PC64: + return S + Addend - Offset; + case ELF::R_X86_64_32: + case ELF::R_X86_64_32S: + return (S + Addend) & 0xFFFFFFFF; + default: + llvm_unreachable("Invalid relocation type"); + } +} + +static bool supportsAArch64(uint64_t Type) { + switch (Type) { + case ELF::R_AARCH64_ABS32: + case ELF::R_AARCH64_ABS64: + case ELF::R_AARCH64_PREL32: + case ELF::R_AARCH64_PREL64: + return true; + default: + return false; + } +} + +static uint64_t resolveAArch64(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t /*LocData*/, int64_t Addend) { + switch (Type) { + case ELF::R_AARCH64_ABS32: + return (S + Addend) & 0xFFFFFFFF; + case ELF::R_AARCH64_ABS64: + return S + Addend; + case ELF::R_AARCH64_PREL32: + return (S + Addend - Offset) & 0xFFFFFFFF; + case ELF::R_AARCH64_PREL64: + return S + Addend - Offset; + default: + llvm_unreachable("Invalid relocation type"); + } +} + +static bool supportsBPF(uint64_t Type) { + switch (Type) { + case ELF::R_BPF_64_ABS32: + case ELF::R_BPF_64_ABS64: + return true; + default: + return false; + } +} + +static uint64_t resolveBPF(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t LocData, int64_t /*Addend*/) { + switch (Type) { + case ELF::R_BPF_64_ABS32: + return (S + LocData) & 0xFFFFFFFF; + case ELF::R_BPF_64_ABS64: + return S + LocData; + default: + llvm_unreachable("Invalid relocation type"); + } +} + +static bool supportsMips64(uint64_t Type) { + switch (Type) { + case ELF::R_MIPS_32: + case ELF::R_MIPS_64: + case ELF::R_MIPS_TLS_DTPREL64: + case ELF::R_MIPS_PC32: + return true; + default: + return false; + } +} + +static uint64_t resolveMips64(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t /*LocData*/, int64_t Addend) { + switch (Type) { + case ELF::R_MIPS_32: + return (S + Addend) & 0xFFFFFFFF; + case ELF::R_MIPS_64: + return S + Addend; + case ELF::R_MIPS_TLS_DTPREL64: + return S + Addend - 0x8000; + case ELF::R_MIPS_PC32: + return S + Addend - Offset; + default: + llvm_unreachable("Invalid relocation type"); + } +} + +static bool supportsMSP430(uint64_t Type) { + switch (Type) { + case ELF::R_MSP430_32: + case ELF::R_MSP430_16_BYTE: + return true; + default: + return false; + } +} + +static uint64_t resolveMSP430(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t /*LocData*/, int64_t Addend) { + switch (Type) { + case ELF::R_MSP430_32: + return (S + Addend) & 0xFFFFFFFF; + case ELF::R_MSP430_16_BYTE: + return (S + Addend) & 0xFFFF; + default: + llvm_unreachable("Invalid relocation type"); + } +} + +static bool supportsPPC64(uint64_t Type) { + switch (Type) { + case ELF::R_PPC64_ADDR32: + case ELF::R_PPC64_ADDR64: + case ELF::R_PPC64_REL32: + case ELF::R_PPC64_REL64: + return true; + default: + return false; + } +} + +static uint64_t resolvePPC64(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t /*LocData*/, int64_t Addend) { + switch (Type) { + case ELF::R_PPC64_ADDR32: + return (S + Addend) & 0xFFFFFFFF; + case ELF::R_PPC64_ADDR64: + return S + Addend; + case ELF::R_PPC64_REL32: + return (S + Addend - Offset) & 0xFFFFFFFF; + case ELF::R_PPC64_REL64: + return S + Addend - Offset; + default: + llvm_unreachable("Invalid relocation type"); + } +} + +static bool supportsSystemZ(uint64_t Type) { + switch (Type) { + case ELF::R_390_32: + case ELF::R_390_64: + return true; + default: + return false; + } +} + +static uint64_t resolveSystemZ(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t /*LocData*/, int64_t Addend) { + switch (Type) { + case ELF::R_390_32: + return (S + Addend) & 0xFFFFFFFF; + case ELF::R_390_64: + return S + Addend; + default: + llvm_unreachable("Invalid relocation type"); + } +} + +static bool supportsSparc64(uint64_t Type) { + switch (Type) { + case ELF::R_SPARC_32: + case ELF::R_SPARC_64: + case ELF::R_SPARC_UA32: + case ELF::R_SPARC_UA64: + return true; + default: + return false; + } +} + +static uint64_t resolveSparc64(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t /*LocData*/, int64_t Addend) { + switch (Type) { + case ELF::R_SPARC_32: + case ELF::R_SPARC_64: + case ELF::R_SPARC_UA32: + case ELF::R_SPARC_UA64: + return S + Addend; + default: + llvm_unreachable("Invalid relocation type"); + } +} + +static bool supportsAmdgpu(uint64_t Type) { + switch (Type) { + case ELF::R_AMDGPU_ABS32: + case ELF::R_AMDGPU_ABS64: + return true; + default: + return false; + } +} + +static uint64_t resolveAmdgpu(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t /*LocData*/, int64_t Addend) { + switch (Type) { + case ELF::R_AMDGPU_ABS32: + case ELF::R_AMDGPU_ABS64: + return S + Addend; + default: + llvm_unreachable("Invalid relocation type"); + } +} + +static bool supportsX86(uint64_t Type) { + switch (Type) { + case ELF::R_386_NONE: + case ELF::R_386_32: + case ELF::R_386_PC32: + return true; + default: + return false; + } +} + +static uint64_t resolveX86(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t LocData, int64_t /*Addend*/) { + switch (Type) { + case ELF::R_386_NONE: + return LocData; + case ELF::R_386_32: + return S + LocData; + case ELF::R_386_PC32: + return S - Offset + LocData; + default: + llvm_unreachable("Invalid relocation type"); + } +} + +static bool supportsPPC32(uint64_t Type) { + switch (Type) { + case ELF::R_PPC_ADDR32: + case ELF::R_PPC_REL32: + return true; + default: + return false; + } +} + +static uint64_t resolvePPC32(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t /*LocData*/, int64_t Addend) { + switch (Type) { + case ELF::R_PPC_ADDR32: + return (S + Addend) & 0xFFFFFFFF; + case ELF::R_PPC_REL32: + return (S + Addend - Offset) & 0xFFFFFFFF; + } + llvm_unreachable("Invalid relocation type"); +} + +static bool supportsARM(uint64_t Type) { + switch (Type) { + case ELF::R_ARM_ABS32: + case ELF::R_ARM_REL32: + return true; + default: + return false; + } +} + +static uint64_t resolveARM(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t LocData, int64_t Addend) { + // Support both RELA and REL relocations. The caller is responsible + // for supplying the correct values for LocData and Addend, i.e. + // Addend == 0 for REL and LocData == 0 for RELA. + assert((LocData == 0 || Addend == 0) && + "one of LocData and Addend must be 0"); + switch (Type) { + case ELF::R_ARM_ABS32: + return (S + LocData + Addend) & 0xFFFFFFFF; + case ELF::R_ARM_REL32: + return (S + LocData + Addend - Offset) & 0xFFFFFFFF; + } + llvm_unreachable("Invalid relocation type"); +} + +static bool supportsAVR(uint64_t Type) { + switch (Type) { + case ELF::R_AVR_16: + case ELF::R_AVR_32: + return true; + default: + return false; + } +} + +static uint64_t resolveAVR(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t /*LocData*/, int64_t Addend) { + switch (Type) { + case ELF::R_AVR_16: + return (S + Addend) & 0xFFFF; + case ELF::R_AVR_32: + return (S + Addend) & 0xFFFFFFFF; + default: + llvm_unreachable("Invalid relocation type"); + } +} + +static bool supportsLanai(uint64_t Type) { + return Type == ELF::R_LANAI_32; +} + +static uint64_t resolveLanai(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t /*LocData*/, int64_t Addend) { + if (Type == ELF::R_LANAI_32) + return (S + Addend) & 0xFFFFFFFF; + llvm_unreachable("Invalid relocation type"); +} + +static bool supportsMips32(uint64_t Type) { + switch (Type) { + case ELF::R_MIPS_32: + case ELF::R_MIPS_TLS_DTPREL32: + return true; + default: + return false; + } +} + +static uint64_t resolveMips32(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t LocData, int64_t /*Addend*/) { + // FIXME: Take in account implicit addends to get correct results. + if (Type == ELF::R_MIPS_32) + return (S + LocData) & 0xFFFFFFFF; + if (Type == ELF::R_MIPS_TLS_DTPREL32) + return (S + LocData) & 0xFFFFFFFF; + llvm_unreachable("Invalid relocation type"); +} + +static bool supportsSparc32(uint64_t Type) { + switch (Type) { + case ELF::R_SPARC_32: + case ELF::R_SPARC_UA32: + return true; + default: + return false; + } +} + +static uint64_t resolveSparc32(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t LocData, int64_t Addend) { + if (Type == ELF::R_SPARC_32 || Type == ELF::R_SPARC_UA32) + return S + Addend; + return LocData; +} + +static bool supportsHexagon(uint64_t Type) { + return Type == ELF::R_HEX_32; +} + +static uint64_t resolveHexagon(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t /*LocData*/, int64_t Addend) { + if (Type == ELF::R_HEX_32) + return S + Addend; + llvm_unreachable("Invalid relocation type"); +} + +static bool supportsRISCV(uint64_t Type) { + switch (Type) { + case ELF::R_RISCV_NONE: + case ELF::R_RISCV_32: + case ELF::R_RISCV_32_PCREL: + case ELF::R_RISCV_64: + case ELF::R_RISCV_SET6: + case ELF::R_RISCV_SUB6: + case ELF::R_RISCV_ADD8: + case ELF::R_RISCV_SUB8: + case ELF::R_RISCV_ADD16: + case ELF::R_RISCV_SUB16: + case ELF::R_RISCV_ADD32: + case ELF::R_RISCV_SUB32: + case ELF::R_RISCV_ADD64: + case ELF::R_RISCV_SUB64: + return true; + default: + return false; + } +} + +static uint64_t resolveRISCV(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t LocData, int64_t Addend) { + int64_t RA = Addend; + uint64_t A = LocData; + switch (Type) { + case ELF::R_RISCV_NONE: + return LocData; + case ELF::R_RISCV_32: + return (S + RA) & 0xFFFFFFFF; + case ELF::R_RISCV_32_PCREL: + return (S + RA - Offset) & 0xFFFFFFFF; + case ELF::R_RISCV_64: + return S + RA; + case ELF::R_RISCV_SET6: + return (A & 0xC0) | ((S + RA) & 0x3F); + case ELF::R_RISCV_SUB6: + return (A & 0xC0) | (((A & 0x3F) - (S + RA)) & 0x3F); + case ELF::R_RISCV_ADD8: + return (A + (S + RA)) & 0xFF; + case ELF::R_RISCV_SUB8: + return (A - (S + RA)) & 0xFF; + case ELF::R_RISCV_ADD16: + return (A + (S + RA)) & 0xFFFF; + case ELF::R_RISCV_SUB16: + return (A - (S + RA)) & 0xFFFF; + case ELF::R_RISCV_ADD32: + return (A + (S + RA)) & 0xFFFFFFFF; + case ELF::R_RISCV_SUB32: + return (A - (S + RA)) & 0xFFFFFFFF; + case ELF::R_RISCV_ADD64: + return (A + (S + RA)); + case ELF::R_RISCV_SUB64: + return (A - (S + RA)); + default: + llvm_unreachable("Invalid relocation type"); + } +} + +static bool supportsCOFFX86(uint64_t Type) { + switch (Type) { + case COFF::IMAGE_REL_I386_SECREL: + case COFF::IMAGE_REL_I386_DIR32: + return true; + default: + return false; + } +} + +static uint64_t resolveCOFFX86(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t LocData, int64_t /*Addend*/) { + switch (Type) { + case COFF::IMAGE_REL_I386_SECREL: + case COFF::IMAGE_REL_I386_DIR32: + return (S + LocData) & 0xFFFFFFFF; + default: + llvm_unreachable("Invalid relocation type"); + } +} + +static bool supportsCOFFX86_64(uint64_t Type) { + switch (Type) { + case COFF::IMAGE_REL_AMD64_SECREL: + case COFF::IMAGE_REL_AMD64_ADDR64: + return true; + default: + return false; + } +} + +static uint64_t resolveCOFFX86_64(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t LocData, int64_t /*Addend*/) { + switch (Type) { + case COFF::IMAGE_REL_AMD64_SECREL: + return (S + LocData) & 0xFFFFFFFF; + case COFF::IMAGE_REL_AMD64_ADDR64: + return S + LocData; + default: + llvm_unreachable("Invalid relocation type"); + } +} + +static bool supportsCOFFARM(uint64_t Type) { + switch (Type) { + case COFF::IMAGE_REL_ARM_SECREL: + case COFF::IMAGE_REL_ARM_ADDR32: + return true; + default: + return false; + } +} + +static uint64_t resolveCOFFARM(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t LocData, int64_t /*Addend*/) { + switch (Type) { + case COFF::IMAGE_REL_ARM_SECREL: + case COFF::IMAGE_REL_ARM_ADDR32: + return (S + LocData) & 0xFFFFFFFF; + default: + llvm_unreachable("Invalid relocation type"); + } +} + +static bool supportsCOFFARM64(uint64_t Type) { + switch (Type) { + case COFF::IMAGE_REL_ARM64_SECREL: + case COFF::IMAGE_REL_ARM64_ADDR64: + return true; + default: + return false; + } +} + +static uint64_t resolveCOFFARM64(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t LocData, int64_t /*Addend*/) { + switch (Type) { + case COFF::IMAGE_REL_ARM64_SECREL: + return (S + LocData) & 0xFFFFFFFF; + case COFF::IMAGE_REL_ARM64_ADDR64: + return S + LocData; + default: + llvm_unreachable("Invalid relocation type"); + } +} + +static bool supportsMachOX86_64(uint64_t Type) { + return Type == MachO::X86_64_RELOC_UNSIGNED; +} + +static uint64_t resolveMachOX86_64(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t LocData, int64_t /*Addend*/) { + if (Type == MachO::X86_64_RELOC_UNSIGNED) + return S; + llvm_unreachable("Invalid relocation type"); +} + +static bool supportsWasm32(uint64_t Type) { + switch (Type) { + case wasm::R_WASM_FUNCTION_INDEX_LEB: + case wasm::R_WASM_TABLE_INDEX_SLEB: + case wasm::R_WASM_TABLE_INDEX_I32: + case wasm::R_WASM_MEMORY_ADDR_LEB: + case wasm::R_WASM_MEMORY_ADDR_SLEB: + case wasm::R_WASM_MEMORY_ADDR_I32: + case wasm::R_WASM_TYPE_INDEX_LEB: + case wasm::R_WASM_GLOBAL_INDEX_LEB: + case wasm::R_WASM_FUNCTION_OFFSET_I32: + case wasm::R_WASM_SECTION_OFFSET_I32: + case wasm::R_WASM_TAG_INDEX_LEB: + case wasm::R_WASM_GLOBAL_INDEX_I32: + case wasm::R_WASM_TABLE_NUMBER_LEB: + case wasm::R_WASM_MEMORY_ADDR_LOCREL_I32: + return true; + default: + return false; + } +} + +static bool supportsWasm64(uint64_t Type) { + switch (Type) { + case wasm::R_WASM_MEMORY_ADDR_LEB64: + case wasm::R_WASM_MEMORY_ADDR_SLEB64: + case wasm::R_WASM_MEMORY_ADDR_I64: + case wasm::R_WASM_TABLE_INDEX_SLEB64: + case wasm::R_WASM_TABLE_INDEX_I64: + case wasm::R_WASM_FUNCTION_OFFSET_I64: + return true; + default: + return supportsWasm32(Type); + } +} + +static uint64_t resolveWasm32(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t LocData, int64_t /*Addend*/) { + switch (Type) { + case wasm::R_WASM_FUNCTION_INDEX_LEB: + case wasm::R_WASM_TABLE_INDEX_SLEB: + case wasm::R_WASM_TABLE_INDEX_I32: + case wasm::R_WASM_MEMORY_ADDR_LEB: + case wasm::R_WASM_MEMORY_ADDR_SLEB: + case wasm::R_WASM_MEMORY_ADDR_I32: + case wasm::R_WASM_TYPE_INDEX_LEB: + case wasm::R_WASM_GLOBAL_INDEX_LEB: + case wasm::R_WASM_FUNCTION_OFFSET_I32: + case wasm::R_WASM_SECTION_OFFSET_I32: + case wasm::R_WASM_TAG_INDEX_LEB: + case wasm::R_WASM_GLOBAL_INDEX_I32: + case wasm::R_WASM_TABLE_NUMBER_LEB: + case wasm::R_WASM_MEMORY_ADDR_LOCREL_I32: + // For wasm section, its offset at 0 -- ignoring Value + return LocData; + default: + llvm_unreachable("Invalid relocation type"); + } +} + +static uint64_t resolveWasm64(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t LocData, int64_t Addend) { + switch (Type) { + case wasm::R_WASM_MEMORY_ADDR_LEB64: + case wasm::R_WASM_MEMORY_ADDR_SLEB64: + case wasm::R_WASM_MEMORY_ADDR_I64: + case wasm::R_WASM_TABLE_INDEX_SLEB64: + case wasm::R_WASM_TABLE_INDEX_I64: + case wasm::R_WASM_FUNCTION_OFFSET_I64: + // For wasm section, its offset at 0 -- ignoring Value + return LocData; + default: + return resolveWasm32(Type, Offset, S, LocData, Addend); + } +} + +std::pair<SupportsRelocation, RelocationResolver> +getRelocationResolver(const ObjectFile &Obj) { + if (Obj.isCOFF()) { + switch (Obj.getArch()) { + case Triple::x86_64: + return {supportsCOFFX86_64, resolveCOFFX86_64}; + case Triple::x86: + return {supportsCOFFX86, resolveCOFFX86}; + case Triple::arm: + case Triple::thumb: + return {supportsCOFFARM, resolveCOFFARM}; + case Triple::aarch64: + return {supportsCOFFARM64, resolveCOFFARM64}; + default: + return {nullptr, nullptr}; + } + } else if (Obj.isELF()) { + if (Obj.getBytesInAddress() == 8) { + switch (Obj.getArch()) { + case Triple::x86_64: + return {supportsX86_64, resolveX86_64}; + case Triple::aarch64: + case Triple::aarch64_be: + return {supportsAArch64, resolveAArch64}; + case Triple::bpfel: + case Triple::bpfeb: + return {supportsBPF, resolveBPF}; + case Triple::mips64el: + case Triple::mips64: + return {supportsMips64, resolveMips64}; + case Triple::ppc64le: + case Triple::ppc64: + return {supportsPPC64, resolvePPC64}; + case Triple::systemz: + return {supportsSystemZ, resolveSystemZ}; + case Triple::sparcv9: + return {supportsSparc64, resolveSparc64}; + case Triple::amdgcn: + return {supportsAmdgpu, resolveAmdgpu}; + case Triple::riscv64: + return {supportsRISCV, resolveRISCV}; + default: + return {nullptr, nullptr}; + } + } + + // 32-bit object file + assert(Obj.getBytesInAddress() == 4 && + "Invalid word size in object file"); + + switch (Obj.getArch()) { + case Triple::x86: + return {supportsX86, resolveX86}; + case Triple::ppcle: + case Triple::ppc: + return {supportsPPC32, resolvePPC32}; + case Triple::arm: + case Triple::armeb: + return {supportsARM, resolveARM}; + case Triple::avr: + return {supportsAVR, resolveAVR}; + case Triple::lanai: + return {supportsLanai, resolveLanai}; + case Triple::mipsel: + case Triple::mips: + return {supportsMips32, resolveMips32}; + case Triple::msp430: + return {supportsMSP430, resolveMSP430}; + case Triple::sparc: + return {supportsSparc32, resolveSparc32}; + case Triple::hexagon: + return {supportsHexagon, resolveHexagon}; + case Triple::riscv32: + return {supportsRISCV, resolveRISCV}; + default: + return {nullptr, nullptr}; + } + } else if (Obj.isMachO()) { + if (Obj.getArch() == Triple::x86_64) + return {supportsMachOX86_64, resolveMachOX86_64}; + return {nullptr, nullptr}; + } else if (Obj.isWasm()) { + if (Obj.getArch() == Triple::wasm32) + return {supportsWasm32, resolveWasm32}; + if (Obj.getArch() == Triple::wasm64) + return {supportsWasm64, resolveWasm64}; + return {nullptr, nullptr}; + } + + llvm_unreachable("Invalid object file"); +} + +uint64_t resolveRelocation(RelocationResolver Resolver, const RelocationRef &R, + uint64_t S, uint64_t LocData) { + if (const ObjectFile *Obj = R.getObject()) { + int64_t Addend = 0; + if (Obj->isELF()) { + auto GetRelSectionType = [&]() -> unsigned { + if (auto *Elf32LEObj = dyn_cast<ELF32LEObjectFile>(Obj)) + return Elf32LEObj->getRelSection(R.getRawDataRefImpl())->sh_type; + if (auto *Elf64LEObj = dyn_cast<ELF64LEObjectFile>(Obj)) + return Elf64LEObj->getRelSection(R.getRawDataRefImpl())->sh_type; + if (auto *Elf32BEObj = dyn_cast<ELF32BEObjectFile>(Obj)) + return Elf32BEObj->getRelSection(R.getRawDataRefImpl())->sh_type; + auto *Elf64BEObj = cast<ELF64BEObjectFile>(Obj); + return Elf64BEObj->getRelSection(R.getRawDataRefImpl())->sh_type; + }; + + if (GetRelSectionType() == ELF::SHT_RELA) { + Addend = getELFAddend(R); + // RISCV relocations use both LocData and Addend. + if (Obj->getArch() != Triple::riscv32 && + Obj->getArch() != Triple::riscv64) + LocData = 0; + } + } + + return Resolver(R.getType(), R.getOffset(), S, LocData, Addend); + } + + // Sometimes the caller might want to use its own specific implementation of + // the resolver function. E.g. this is used by LLD when it resolves debug + // relocations and assumes that all of them have the same computation (S + A). + // The relocation R has no owner object in this case and we don't need to + // provide Type and Offset fields. It is also assumed the DataRefImpl.p + // contains the addend, provided by the caller. + return Resolver(/*Type=*/0, /*Offset=*/0, S, LocData, + R.getRawDataRefImpl().p); +} + +} // namespace object +} // namespace llvm diff --git a/contrib/libs/llvm14/lib/Object/SymbolSize.cpp b/contrib/libs/llvm14/lib/Object/SymbolSize.cpp new file mode 100644 index 0000000000..e42dbe6f47 --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/SymbolSize.cpp @@ -0,0 +1,110 @@ +//===- SymbolSize.cpp -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/SymbolSize.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Object/COFF.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/MachO.h" +#include "llvm/Object/Wasm.h" +#include "llvm/Object/XCOFFObjectFile.h" + +using namespace llvm; +using namespace object; + +// Orders increasingly by (SectionID, Address). +int llvm::object::compareAddress(const SymEntry *A, const SymEntry *B) { + if (A->SectionID != B->SectionID) + return A->SectionID < B->SectionID ? -1 : 1; + if (A->Address != B->Address) + return A->Address < B->Address ? -1 : 1; + return 0; +} + +static unsigned getSectionID(const ObjectFile &O, SectionRef Sec) { + if (auto *M = dyn_cast<MachOObjectFile>(&O)) + return M->getSectionID(Sec); + if (isa<WasmObjectFile>(&O)) + return Sec.getIndex(); + if (isa<XCOFFObjectFile>(&O)) + return Sec.getIndex(); + return cast<COFFObjectFile>(O).getSectionID(Sec); +} + +static unsigned getSymbolSectionID(const ObjectFile &O, SymbolRef Sym) { + if (auto *M = dyn_cast<MachOObjectFile>(&O)) + return M->getSymbolSectionID(Sym); + if (const auto *M = dyn_cast<WasmObjectFile>(&O)) + return M->getSymbolSectionId(Sym); + if (const auto *M = dyn_cast<XCOFFObjectFile>(&O)) + return M->getSymbolSectionID(Sym); + return cast<COFFObjectFile>(O).getSymbolSectionID(Sym); +} + +std::vector<std::pair<SymbolRef, uint64_t>> +llvm::object::computeSymbolSizes(const ObjectFile &O) { + std::vector<std::pair<SymbolRef, uint64_t>> Ret; + + if (const auto *E = dyn_cast<ELFObjectFileBase>(&O)) { + auto Syms = E->symbols(); + if (Syms.empty()) + Syms = E->getDynamicSymbolIterators(); + for (ELFSymbolRef Sym : Syms) + Ret.push_back({Sym, Sym.getSize()}); + return Ret; + } + + // Collect sorted symbol addresses. Include dummy addresses for the end + // of each section. + std::vector<SymEntry> Addresses; + unsigned SymNum = 0; + for (symbol_iterator I = O.symbol_begin(), E = O.symbol_end(); I != E; ++I) { + SymbolRef Sym = *I; + Expected<uint64_t> ValueOrErr = Sym.getValue(); + if (!ValueOrErr) + // TODO: Actually report errors helpfully. + report_fatal_error(ValueOrErr.takeError()); + Addresses.push_back({I, *ValueOrErr, SymNum, getSymbolSectionID(O, Sym)}); + ++SymNum; + } + for (SectionRef Sec : O.sections()) { + uint64_t Address = Sec.getAddress(); + uint64_t Size = Sec.getSize(); + Addresses.push_back( + {O.symbol_end(), Address + Size, 0, getSectionID(O, Sec)}); + } + + if (Addresses.empty()) + return Ret; + + array_pod_sort(Addresses.begin(), Addresses.end(), compareAddress); + + // Compute the size as the gap to the next symbol + for (unsigned I = 0, N = Addresses.size() - 1; I < N; ++I) { + auto &P = Addresses[I]; + if (P.I == O.symbol_end()) + continue; + + // If multiple symbol have the same address, give both the same size. + unsigned NextI = I + 1; + while (NextI < N && Addresses[NextI].Address == P.Address) + ++NextI; + + uint64_t Size = Addresses[NextI].Address - P.Address; + P.Address = Size; + } + + // Assign the sorted symbols in the original order. + Ret.resize(SymNum); + for (SymEntry &P : Addresses) { + if (P.I == O.symbol_end()) + continue; + Ret[P.Number] = {*P.I, P.Address}; + } + return Ret; +} diff --git a/contrib/libs/llvm14/lib/Object/SymbolicFile.cpp b/contrib/libs/llvm14/lib/Object/SymbolicFile.cpp new file mode 100644 index 0000000000..58db5b6729 --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/SymbolicFile.cpp @@ -0,0 +1,129 @@ +//===- SymbolicFile.cpp - Interface that only provides symbols ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a file format independent SymbolicFile class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/SymbolicFile.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/Object/COFFImportFile.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/IRObjectFile.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include <algorithm> +#include <memory> + +using namespace llvm; +using namespace object; + +SymbolicFile::SymbolicFile(unsigned int Type, MemoryBufferRef Source) + : Binary(Type, Source) {} + +SymbolicFile::~SymbolicFile() = default; + +Expected<std::unique_ptr<SymbolicFile>> +SymbolicFile::createSymbolicFile(MemoryBufferRef Object, file_magic Type, + LLVMContext *Context, bool InitContent) { + StringRef Data = Object.getBuffer(); + if (Type == file_magic::unknown) + Type = identify_magic(Data); + + if (!isSymbolicFile(Type, Context)) + return errorCodeToError(object_error::invalid_file_type); + + switch (Type) { + case file_magic::bitcode: + // Context is guaranteed to be non-null here, because bitcode magic only + // indicates a symbolic file when Context is non-null. + return IRObjectFile::create(Object, *Context); + case file_magic::elf: + case file_magic::elf_executable: + case file_magic::elf_shared_object: + case file_magic::elf_core: + case file_magic::goff_object: + case file_magic::macho_executable: + case file_magic::macho_fixed_virtual_memory_shared_lib: + case file_magic::macho_core: + case file_magic::macho_preload_executable: + case file_magic::macho_dynamically_linked_shared_lib: + case file_magic::macho_dynamic_linker: + case file_magic::macho_bundle: + case file_magic::macho_dynamically_linked_shared_lib_stub: + case file_magic::macho_dsym_companion: + case file_magic::macho_kext_bundle: + case file_magic::pecoff_executable: + case file_magic::xcoff_object_32: + case file_magic::xcoff_object_64: + case file_magic::wasm_object: + return ObjectFile::createObjectFile(Object, Type, InitContent); + case file_magic::coff_import_library: + return std::unique_ptr<SymbolicFile>(new COFFImportFile(Object)); + case file_magic::elf_relocatable: + case file_magic::macho_object: + case file_magic::coff_object: { + Expected<std::unique_ptr<ObjectFile>> Obj = + ObjectFile::createObjectFile(Object, Type, InitContent); + if (!Obj || !Context) + return std::move(Obj); + + Expected<MemoryBufferRef> BCData = + IRObjectFile::findBitcodeInObject(*Obj->get()); + if (!BCData) { + consumeError(BCData.takeError()); + return std::move(Obj); + } + + return IRObjectFile::create( + MemoryBufferRef(BCData->getBuffer(), Object.getBufferIdentifier()), + *Context); + } + default: + llvm_unreachable("Unexpected Binary File Type"); + } +} + +bool SymbolicFile::isSymbolicFile(file_magic Type, const LLVMContext *Context) { + switch (Type) { + case file_magic::bitcode: + return Context != nullptr; + case file_magic::elf: + case file_magic::elf_executable: + case file_magic::elf_shared_object: + case file_magic::elf_core: + case file_magic::goff_object: + case file_magic::macho_executable: + case file_magic::macho_fixed_virtual_memory_shared_lib: + case file_magic::macho_core: + case file_magic::macho_preload_executable: + case file_magic::macho_dynamically_linked_shared_lib: + case file_magic::macho_dynamic_linker: + case file_magic::macho_bundle: + case file_magic::macho_dynamically_linked_shared_lib_stub: + case file_magic::macho_dsym_companion: + case file_magic::macho_kext_bundle: + case file_magic::pecoff_executable: + case file_magic::xcoff_object_32: + case file_magic::xcoff_object_64: + case file_magic::wasm_object: + case file_magic::coff_import_library: + case file_magic::elf_relocatable: + case file_magic::macho_object: + case file_magic::coff_object: + return true; + default: + return false; + } +} diff --git a/contrib/libs/llvm14/lib/Object/TapiFile.cpp b/contrib/libs/llvm14/lib/Object/TapiFile.cpp new file mode 100644 index 0000000000..83568e8d82 --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/TapiFile.cpp @@ -0,0 +1,96 @@ +//===- TapiFile.cpp -------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the Text-based Dynamcic Library Stub format. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/TapiFile.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/TextAPI/Symbol.h" + +using namespace llvm; +using namespace MachO; +using namespace object; + +static uint32_t getFlags(const Symbol *Sym) { + uint32_t Flags = BasicSymbolRef::SF_Global; + if (Sym->isUndefined()) + Flags |= BasicSymbolRef::SF_Undefined; + else + Flags |= BasicSymbolRef::SF_Exported; + + if (Sym->isWeakDefined() || Sym->isWeakReferenced()) + Flags |= BasicSymbolRef::SF_Weak; + + return Flags; +} + +TapiFile::TapiFile(MemoryBufferRef Source, const InterfaceFile &interface, + Architecture Arch) + : SymbolicFile(ID_TapiFile, Source), Arch(Arch) { + for (const auto *Symbol : interface.symbols()) { + if (!Symbol->getArchitectures().has(Arch)) + continue; + + switch (Symbol->getKind()) { + case SymbolKind::GlobalSymbol: + Symbols.emplace_back(StringRef(), Symbol->getName(), getFlags(Symbol)); + break; + case SymbolKind::ObjectiveCClass: + if (interface.getPlatforms().count(PLATFORM_MACOS) && Arch == AK_i386) { + Symbols.emplace_back(ObjC1ClassNamePrefix, Symbol->getName(), + getFlags(Symbol)); + } else { + Symbols.emplace_back(ObjC2ClassNamePrefix, Symbol->getName(), + getFlags(Symbol)); + Symbols.emplace_back(ObjC2MetaClassNamePrefix, Symbol->getName(), + getFlags(Symbol)); + } + break; + case SymbolKind::ObjectiveCClassEHType: + Symbols.emplace_back(ObjC2EHTypePrefix, Symbol->getName(), + getFlags(Symbol)); + break; + case SymbolKind::ObjectiveCInstanceVariable: + Symbols.emplace_back(ObjC2IVarPrefix, Symbol->getName(), + getFlags(Symbol)); + break; + } + } +} + +TapiFile::~TapiFile() = default; + +void TapiFile::moveSymbolNext(DataRefImpl &DRI) const { DRI.d.a++; } + +Error TapiFile::printSymbolName(raw_ostream &OS, DataRefImpl DRI) const { + assert(DRI.d.a < Symbols.size() && "Attempt to access symbol out of bounds"); + const Symbol &Sym = Symbols[DRI.d.a]; + OS << Sym.Prefix << Sym.Name; + return Error::success(); +} + +Expected<uint32_t> TapiFile::getSymbolFlags(DataRefImpl DRI) const { + assert(DRI.d.a < Symbols.size() && "Attempt to access symbol out of bounds"); + return Symbols[DRI.d.a].Flags; +} + +basic_symbol_iterator TapiFile::symbol_begin() const { + DataRefImpl DRI; + DRI.d.a = 0; + return BasicSymbolRef{DRI, this}; +} + +basic_symbol_iterator TapiFile::symbol_end() const { + DataRefImpl DRI; + DRI.d.a = Symbols.size(); + return BasicSymbolRef{DRI, this}; +} diff --git a/contrib/libs/llvm14/lib/Object/TapiUniversal.cpp b/contrib/libs/llvm14/lib/Object/TapiUniversal.cpp new file mode 100644 index 0000000000..d73d93f6bd --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/TapiUniversal.cpp @@ -0,0 +1,61 @@ +//===- TapiUniversal.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the Text-based Dynamic Library Stub format. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/TapiUniversal.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/TextAPI/TextAPIReader.h" + +using namespace llvm; +using namespace MachO; +using namespace object; + +TapiUniversal::TapiUniversal(MemoryBufferRef Source, Error &Err) + : Binary(ID_TapiUniversal, Source) { + Expected<std::unique_ptr<InterfaceFile>> Result = TextAPIReader::get(Source); + ErrorAsOutParameter ErrAsOuParam(&Err); + if (!Result) { + Err = Result.takeError(); + return; + } + ParsedFile = std::move(Result.get()); + + auto FlattenObjectInfo = [this](const auto &File) { + StringRef Name = File->getInstallName(); + for (const Architecture Arch : File->getArchitectures()) + Libraries.emplace_back(Library({Name, Arch})); + }; + + FlattenObjectInfo(ParsedFile); + // Get inlined documents from tapi file. + for (const std::shared_ptr<InterfaceFile> &File : ParsedFile->documents()) + FlattenObjectInfo(File); +} + +TapiUniversal::~TapiUniversal() = default; + +Expected<std::unique_ptr<TapiFile>> +TapiUniversal::ObjectForArch::getAsObjectFile() const { + return std::unique_ptr<TapiFile>(new TapiFile(Parent->getMemoryBufferRef(), + *Parent->ParsedFile.get(), + Parent->Libraries[Index].Arch)); +} + +Expected<std::unique_ptr<TapiUniversal>> +TapiUniversal::create(MemoryBufferRef Source) { + Error Err = Error::success(); + std::unique_ptr<TapiUniversal> Ret(new TapiUniversal(Source, Err)); + if (Err) + return std::move(Err); + return std::move(Ret); +} diff --git a/contrib/libs/llvm14/lib/Object/WasmObjectFile.cpp b/contrib/libs/llvm14/lib/Object/WasmObjectFile.cpp new file mode 100644 index 0000000000..6a19b159f3 --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/WasmObjectFile.cpp @@ -0,0 +1,1997 @@ +//===- WasmObjectFile.cpp - Wasm object file implementation ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/Wasm.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Object/Wasm.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/ScopedPrinter.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstring> +#include <system_error> + +#define DEBUG_TYPE "wasm-object" + +using namespace llvm; +using namespace object; + +void WasmSymbol::print(raw_ostream &Out) const { + Out << "Name=" << Info.Name + << ", Kind=" << toString(wasm::WasmSymbolType(Info.Kind)) << ", Flags=0x" + << Twine::utohexstr(Info.Flags); + if (!isTypeData()) { + Out << ", ElemIndex=" << Info.ElementIndex; + } else if (isDefined()) { + Out << ", Segment=" << Info.DataRef.Segment; + Out << ", Offset=" << Info.DataRef.Offset; + Out << ", Size=" << Info.DataRef.Size; + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void WasmSymbol::dump() const { print(dbgs()); } +#endif + +Expected<std::unique_ptr<WasmObjectFile>> +ObjectFile::createWasmObjectFile(MemoryBufferRef Buffer) { + Error Err = Error::success(); + auto ObjectFile = std::make_unique<WasmObjectFile>(Buffer, Err); + if (Err) + return std::move(Err); + + return std::move(ObjectFile); +} + +#define VARINT7_MAX ((1 << 7) - 1) +#define VARINT7_MIN (-(1 << 7)) +#define VARUINT7_MAX (1 << 7) +#define VARUINT1_MAX (1) + +static uint8_t readUint8(WasmObjectFile::ReadContext &Ctx) { + if (Ctx.Ptr == Ctx.End) + report_fatal_error("EOF while reading uint8"); + return *Ctx.Ptr++; +} + +static uint32_t readUint32(WasmObjectFile::ReadContext &Ctx) { + if (Ctx.Ptr + 4 > Ctx.End) + report_fatal_error("EOF while reading uint32"); + uint32_t Result = support::endian::read32le(Ctx.Ptr); + Ctx.Ptr += 4; + return Result; +} + +static int32_t readFloat32(WasmObjectFile::ReadContext &Ctx) { + if (Ctx.Ptr + 4 > Ctx.End) + report_fatal_error("EOF while reading float64"); + int32_t Result = 0; + memcpy(&Result, Ctx.Ptr, sizeof(Result)); + Ctx.Ptr += sizeof(Result); + return Result; +} + +static int64_t readFloat64(WasmObjectFile::ReadContext &Ctx) { + if (Ctx.Ptr + 8 > Ctx.End) + report_fatal_error("EOF while reading float64"); + int64_t Result = 0; + memcpy(&Result, Ctx.Ptr, sizeof(Result)); + Ctx.Ptr += sizeof(Result); + return Result; +} + +static uint64_t readULEB128(WasmObjectFile::ReadContext &Ctx) { + unsigned Count; + const char *Error = nullptr; + uint64_t Result = decodeULEB128(Ctx.Ptr, &Count, Ctx.End, &Error); + if (Error) + report_fatal_error(Error); + Ctx.Ptr += Count; + return Result; +} + +static StringRef readString(WasmObjectFile::ReadContext &Ctx) { + uint32_t StringLen = readULEB128(Ctx); + if (Ctx.Ptr + StringLen > Ctx.End) + report_fatal_error("EOF while reading string"); + StringRef Return = + StringRef(reinterpret_cast<const char *>(Ctx.Ptr), StringLen); + Ctx.Ptr += StringLen; + return Return; +} + +static int64_t readLEB128(WasmObjectFile::ReadContext &Ctx) { + unsigned Count; + const char *Error = nullptr; + uint64_t Result = decodeSLEB128(Ctx.Ptr, &Count, Ctx.End, &Error); + if (Error) + report_fatal_error(Error); + Ctx.Ptr += Count; + return Result; +} + +static uint8_t readVaruint1(WasmObjectFile::ReadContext &Ctx) { + int64_t Result = readLEB128(Ctx); + if (Result > VARUINT1_MAX || Result < 0) + report_fatal_error("LEB is outside Varuint1 range"); + return Result; +} + +static int32_t readVarint32(WasmObjectFile::ReadContext &Ctx) { + int64_t Result = readLEB128(Ctx); + if (Result > INT32_MAX || Result < INT32_MIN) + report_fatal_error("LEB is outside Varint32 range"); + return Result; +} + +static uint32_t readVaruint32(WasmObjectFile::ReadContext &Ctx) { + uint64_t Result = readULEB128(Ctx); + if (Result > UINT32_MAX) + report_fatal_error("LEB is outside Varuint32 range"); + return Result; +} + +static int64_t readVarint64(WasmObjectFile::ReadContext &Ctx) { + return readLEB128(Ctx); +} + +static uint64_t readVaruint64(WasmObjectFile::ReadContext &Ctx) { + return readULEB128(Ctx); +} + +static uint8_t readOpcode(WasmObjectFile::ReadContext &Ctx) { + return readUint8(Ctx); +} + +static Error readInitExpr(wasm::WasmInitExpr &Expr, + WasmObjectFile::ReadContext &Ctx) { + Expr.Opcode = readOpcode(Ctx); + + switch (Expr.Opcode) { + case wasm::WASM_OPCODE_I32_CONST: + Expr.Value.Int32 = readVarint32(Ctx); + break; + case wasm::WASM_OPCODE_I64_CONST: + Expr.Value.Int64 = readVarint64(Ctx); + break; + case wasm::WASM_OPCODE_F32_CONST: + Expr.Value.Float32 = readFloat32(Ctx); + break; + case wasm::WASM_OPCODE_F64_CONST: + Expr.Value.Float64 = readFloat64(Ctx); + break; + case wasm::WASM_OPCODE_GLOBAL_GET: + Expr.Value.Global = readULEB128(Ctx); + break; + case wasm::WASM_OPCODE_REF_NULL: { + wasm::ValType Ty = static_cast<wasm::ValType>(readULEB128(Ctx)); + if (Ty != wasm::ValType::EXTERNREF) { + return make_error<GenericBinaryError>("invalid type for ref.null", + object_error::parse_failed); + } + break; + } + default: + return make_error<GenericBinaryError>("invalid opcode in init_expr", + object_error::parse_failed); + } + + uint8_t EndOpcode = readOpcode(Ctx); + if (EndOpcode != wasm::WASM_OPCODE_END) { + return make_error<GenericBinaryError>("invalid init_expr", + object_error::parse_failed); + } + return Error::success(); +} + +static wasm::WasmLimits readLimits(WasmObjectFile::ReadContext &Ctx) { + wasm::WasmLimits Result; + Result.Flags = readVaruint32(Ctx); + Result.Minimum = readVaruint64(Ctx); + if (Result.Flags & wasm::WASM_LIMITS_FLAG_HAS_MAX) + Result.Maximum = readVaruint64(Ctx); + return Result; +} + +static wasm::WasmTableType readTableType(WasmObjectFile::ReadContext &Ctx) { + wasm::WasmTableType TableType; + TableType.ElemType = readUint8(Ctx); + TableType.Limits = readLimits(Ctx); + return TableType; +} + +static Error readSection(WasmSection &Section, WasmObjectFile::ReadContext &Ctx, + WasmSectionOrderChecker &Checker) { + Section.Offset = Ctx.Ptr - Ctx.Start; + Section.Type = readUint8(Ctx); + LLVM_DEBUG(dbgs() << "readSection type=" << Section.Type << "\n"); + uint32_t Size = readVaruint32(Ctx); + if (Size == 0) + return make_error<StringError>("zero length section", + object_error::parse_failed); + if (Ctx.Ptr + Size > Ctx.End) + return make_error<StringError>("section too large", + object_error::parse_failed); + if (Section.Type == wasm::WASM_SEC_CUSTOM) { + WasmObjectFile::ReadContext SectionCtx; + SectionCtx.Start = Ctx.Ptr; + SectionCtx.Ptr = Ctx.Ptr; + SectionCtx.End = Ctx.Ptr + Size; + + Section.Name = readString(SectionCtx); + + uint32_t SectionNameSize = SectionCtx.Ptr - SectionCtx.Start; + Ctx.Ptr += SectionNameSize; + Size -= SectionNameSize; + } + + if (!Checker.isValidSectionOrder(Section.Type, Section.Name)) { + return make_error<StringError>("out of order section type: " + + llvm::to_string(Section.Type), + object_error::parse_failed); + } + + Section.Content = ArrayRef<uint8_t>(Ctx.Ptr, Size); + Ctx.Ptr += Size; + return Error::success(); +} + +WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err) + : ObjectFile(Binary::ID_Wasm, Buffer) { + ErrorAsOutParameter ErrAsOutParam(&Err); + Header.Magic = getData().substr(0, 4); + if (Header.Magic != StringRef("\0asm", 4)) { + Err = make_error<StringError>("invalid magic number", + object_error::parse_failed); + return; + } + + ReadContext Ctx; + Ctx.Start = getData().bytes_begin(); + Ctx.Ptr = Ctx.Start + 4; + Ctx.End = Ctx.Start + getData().size(); + + if (Ctx.Ptr + 4 > Ctx.End) { + Err = make_error<StringError>("missing version number", + object_error::parse_failed); + return; + } + + Header.Version = readUint32(Ctx); + if (Header.Version != wasm::WasmVersion) { + Err = make_error<StringError>("invalid version number: " + + Twine(Header.Version), + object_error::parse_failed); + return; + } + + WasmSectionOrderChecker Checker; + while (Ctx.Ptr < Ctx.End) { + WasmSection Sec; + if ((Err = readSection(Sec, Ctx, Checker))) + return; + if ((Err = parseSection(Sec))) + return; + + Sections.push_back(Sec); + } +} + +Error WasmObjectFile::parseSection(WasmSection &Sec) { + ReadContext Ctx; + Ctx.Start = Sec.Content.data(); + Ctx.End = Ctx.Start + Sec.Content.size(); + Ctx.Ptr = Ctx.Start; + switch (Sec.Type) { + case wasm::WASM_SEC_CUSTOM: + return parseCustomSection(Sec, Ctx); + case wasm::WASM_SEC_TYPE: + return parseTypeSection(Ctx); + case wasm::WASM_SEC_IMPORT: + return parseImportSection(Ctx); + case wasm::WASM_SEC_FUNCTION: + return parseFunctionSection(Ctx); + case wasm::WASM_SEC_TABLE: + return parseTableSection(Ctx); + case wasm::WASM_SEC_MEMORY: + return parseMemorySection(Ctx); + case wasm::WASM_SEC_TAG: + return parseTagSection(Ctx); + case wasm::WASM_SEC_GLOBAL: + return parseGlobalSection(Ctx); + case wasm::WASM_SEC_EXPORT: + return parseExportSection(Ctx); + case wasm::WASM_SEC_START: + return parseStartSection(Ctx); + case wasm::WASM_SEC_ELEM: + return parseElemSection(Ctx); + case wasm::WASM_SEC_CODE: + return parseCodeSection(Ctx); + case wasm::WASM_SEC_DATA: + return parseDataSection(Ctx); + case wasm::WASM_SEC_DATACOUNT: + return parseDataCountSection(Ctx); + default: + return make_error<GenericBinaryError>( + "invalid section type: " + Twine(Sec.Type), object_error::parse_failed); + } +} + +Error WasmObjectFile::parseDylinkSection(ReadContext &Ctx) { + // Legacy "dylink" section support. + // See parseDylink0Section for the current "dylink.0" section parsing. + HasDylinkSection = true; + DylinkInfo.MemorySize = readVaruint32(Ctx); + DylinkInfo.MemoryAlignment = readVaruint32(Ctx); + DylinkInfo.TableSize = readVaruint32(Ctx); + DylinkInfo.TableAlignment = readVaruint32(Ctx); + uint32_t Count = readVaruint32(Ctx); + while (Count--) { + DylinkInfo.Needed.push_back(readString(Ctx)); + } + + if (Ctx.Ptr != Ctx.End) + return make_error<GenericBinaryError>("dylink section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseDylink0Section(ReadContext &Ctx) { + // See + // https://github.com/WebAssembly/tool-conventions/blob/main/DynamicLinking.md + HasDylinkSection = true; + + const uint8_t *OrigEnd = Ctx.End; + while (Ctx.Ptr < OrigEnd) { + Ctx.End = OrigEnd; + uint8_t Type = readUint8(Ctx); + uint32_t Size = readVaruint32(Ctx); + LLVM_DEBUG(dbgs() << "readSubsection type=" << int(Type) << " size=" << Size + << "\n"); + Ctx.End = Ctx.Ptr + Size; + uint32_t Count; + switch (Type) { + case wasm::WASM_DYLINK_MEM_INFO: + DylinkInfo.MemorySize = readVaruint32(Ctx); + DylinkInfo.MemoryAlignment = readVaruint32(Ctx); + DylinkInfo.TableSize = readVaruint32(Ctx); + DylinkInfo.TableAlignment = readVaruint32(Ctx); + break; + case wasm::WASM_DYLINK_NEEDED: + Count = readVaruint32(Ctx); + while (Count--) { + DylinkInfo.Needed.push_back(readString(Ctx)); + } + break; + case wasm::WASM_DYLINK_EXPORT_INFO: { + uint32_t Count = readVaruint32(Ctx); + while (Count--) { + DylinkInfo.ExportInfo.push_back({readString(Ctx), readVaruint32(Ctx)}); + } + break; + } + case wasm::WASM_DYLINK_IMPORT_INFO: { + uint32_t Count = readVaruint32(Ctx); + while (Count--) { + DylinkInfo.ImportInfo.push_back( + {readString(Ctx), readString(Ctx), readVaruint32(Ctx)}); + } + break; + } + default: + LLVM_DEBUG(dbgs() << "unknown dylink.0 sub-section: " << Type << "\n"); + Ctx.Ptr += Size; + break; + } + if (Ctx.Ptr != Ctx.End) { + return make_error<GenericBinaryError>( + "dylink.0 sub-section ended prematurely", object_error::parse_failed); + } + } + + if (Ctx.Ptr != Ctx.End) + return make_error<GenericBinaryError>("dylink.0 section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseNameSection(ReadContext &Ctx) { + llvm::DenseSet<uint64_t> SeenFunctions; + llvm::DenseSet<uint64_t> SeenGlobals; + llvm::DenseSet<uint64_t> SeenSegments; + if (Functions.size() && !SeenCodeSection) { + return make_error<GenericBinaryError>("names must come after code section", + object_error::parse_failed); + } + + while (Ctx.Ptr < Ctx.End) { + uint8_t Type = readUint8(Ctx); + uint32_t Size = readVaruint32(Ctx); + const uint8_t *SubSectionEnd = Ctx.Ptr + Size; + switch (Type) { + case wasm::WASM_NAMES_FUNCTION: + case wasm::WASM_NAMES_GLOBAL: + case wasm::WASM_NAMES_DATA_SEGMENT: { + uint32_t Count = readVaruint32(Ctx); + while (Count--) { + uint32_t Index = readVaruint32(Ctx); + StringRef Name = readString(Ctx); + wasm::NameType nameType = wasm::NameType::FUNCTION; + if (Type == wasm::WASM_NAMES_FUNCTION) { + if (!SeenFunctions.insert(Index).second) + return make_error<GenericBinaryError>( + "function named more than once", object_error::parse_failed); + if (!isValidFunctionIndex(Index) || Name.empty()) + return make_error<GenericBinaryError>("invalid name entry", + object_error::parse_failed); + + if (isDefinedFunctionIndex(Index)) + getDefinedFunction(Index).DebugName = Name; + } else if (Type == wasm::WASM_NAMES_GLOBAL) { + nameType = wasm::NameType::GLOBAL; + if (!SeenGlobals.insert(Index).second) + return make_error<GenericBinaryError>("global named more than once", + object_error::parse_failed); + if (!isValidGlobalIndex(Index) || Name.empty()) + return make_error<GenericBinaryError>("invalid name entry", + object_error::parse_failed); + } else { + nameType = wasm::NameType::DATA_SEGMENT; + if (!SeenSegments.insert(Index).second) + return make_error<GenericBinaryError>( + "segment named more than once", object_error::parse_failed); + if (Index > DataSegments.size()) + return make_error<GenericBinaryError>("invalid named data segment", + object_error::parse_failed); + } + DebugNames.push_back(wasm::WasmDebugName{nameType, Index, Name}); + } + break; + } + // Ignore local names for now + case wasm::WASM_NAMES_LOCAL: + default: + Ctx.Ptr += Size; + break; + } + if (Ctx.Ptr != SubSectionEnd) + return make_error<GenericBinaryError>( + "name sub-section ended prematurely", object_error::parse_failed); + } + + if (Ctx.Ptr != Ctx.End) + return make_error<GenericBinaryError>("name section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseLinkingSection(ReadContext &Ctx) { + HasLinkingSection = true; + if (Functions.size() && !SeenCodeSection) { + return make_error<GenericBinaryError>( + "linking data must come after code section", + object_error::parse_failed); + } + + LinkingData.Version = readVaruint32(Ctx); + if (LinkingData.Version != wasm::WasmMetadataVersion) { + return make_error<GenericBinaryError>( + "unexpected metadata version: " + Twine(LinkingData.Version) + + " (Expected: " + Twine(wasm::WasmMetadataVersion) + ")", + object_error::parse_failed); + } + + const uint8_t *OrigEnd = Ctx.End; + while (Ctx.Ptr < OrigEnd) { + Ctx.End = OrigEnd; + uint8_t Type = readUint8(Ctx); + uint32_t Size = readVaruint32(Ctx); + LLVM_DEBUG(dbgs() << "readSubsection type=" << int(Type) << " size=" << Size + << "\n"); + Ctx.End = Ctx.Ptr + Size; + switch (Type) { + case wasm::WASM_SYMBOL_TABLE: + if (Error Err = parseLinkingSectionSymtab(Ctx)) + return Err; + break; + case wasm::WASM_SEGMENT_INFO: { + uint32_t Count = readVaruint32(Ctx); + if (Count > DataSegments.size()) + return make_error<GenericBinaryError>("too many segment names", + object_error::parse_failed); + for (uint32_t I = 0; I < Count; I++) { + DataSegments[I].Data.Name = readString(Ctx); + DataSegments[I].Data.Alignment = readVaruint32(Ctx); + DataSegments[I].Data.LinkingFlags = readVaruint32(Ctx); + } + break; + } + case wasm::WASM_INIT_FUNCS: { + uint32_t Count = readVaruint32(Ctx); + LinkingData.InitFunctions.reserve(Count); + for (uint32_t I = 0; I < Count; I++) { + wasm::WasmInitFunc Init; + Init.Priority = readVaruint32(Ctx); + Init.Symbol = readVaruint32(Ctx); + if (!isValidFunctionSymbol(Init.Symbol)) + return make_error<GenericBinaryError>("invalid function symbol: " + + Twine(Init.Symbol), + object_error::parse_failed); + LinkingData.InitFunctions.emplace_back(Init); + } + break; + } + case wasm::WASM_COMDAT_INFO: + if (Error Err = parseLinkingSectionComdat(Ctx)) + return Err; + break; + default: + Ctx.Ptr += Size; + break; + } + if (Ctx.Ptr != Ctx.End) + return make_error<GenericBinaryError>( + "linking sub-section ended prematurely", object_error::parse_failed); + } + if (Ctx.Ptr != OrigEnd) + return make_error<GenericBinaryError>("linking section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) { + uint32_t Count = readVaruint32(Ctx); + LinkingData.SymbolTable.reserve(Count); + Symbols.reserve(Count); + StringSet<> SymbolNames; + + std::vector<wasm::WasmImport *> ImportedGlobals; + std::vector<wasm::WasmImport *> ImportedFunctions; + std::vector<wasm::WasmImport *> ImportedTags; + std::vector<wasm::WasmImport *> ImportedTables; + ImportedGlobals.reserve(Imports.size()); + ImportedFunctions.reserve(Imports.size()); + ImportedTags.reserve(Imports.size()); + ImportedTables.reserve(Imports.size()); + for (auto &I : Imports) { + if (I.Kind == wasm::WASM_EXTERNAL_FUNCTION) + ImportedFunctions.emplace_back(&I); + else if (I.Kind == wasm::WASM_EXTERNAL_GLOBAL) + ImportedGlobals.emplace_back(&I); + else if (I.Kind == wasm::WASM_EXTERNAL_TAG) + ImportedTags.emplace_back(&I); + else if (I.Kind == wasm::WASM_EXTERNAL_TABLE) + ImportedTables.emplace_back(&I); + } + + while (Count--) { + wasm::WasmSymbolInfo Info; + const wasm::WasmSignature *Signature = nullptr; + const wasm::WasmGlobalType *GlobalType = nullptr; + const wasm::WasmTableType *TableType = nullptr; + + Info.Kind = readUint8(Ctx); + Info.Flags = readVaruint32(Ctx); + bool IsDefined = (Info.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0; + + switch (Info.Kind) { + case wasm::WASM_SYMBOL_TYPE_FUNCTION: + Info.ElementIndex = readVaruint32(Ctx); + if (!isValidFunctionIndex(Info.ElementIndex) || + IsDefined != isDefinedFunctionIndex(Info.ElementIndex)) + return make_error<GenericBinaryError>("invalid function symbol index", + object_error::parse_failed); + if (IsDefined) { + Info.Name = readString(Ctx); + unsigned FuncIndex = Info.ElementIndex - NumImportedFunctions; + wasm::WasmFunction &Function = Functions[FuncIndex]; + Signature = &Signatures[Function.SigIndex]; + if (Function.SymbolName.empty()) + Function.SymbolName = Info.Name; + } else { + wasm::WasmImport &Import = *ImportedFunctions[Info.ElementIndex]; + if ((Info.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0) { + Info.Name = readString(Ctx); + Info.ImportName = Import.Field; + } else { + Info.Name = Import.Field; + } + Signature = &Signatures[Import.SigIndex]; + if (!Import.Module.empty()) { + Info.ImportModule = Import.Module; + } + } + break; + + case wasm::WASM_SYMBOL_TYPE_GLOBAL: + Info.ElementIndex = readVaruint32(Ctx); + if (!isValidGlobalIndex(Info.ElementIndex) || + IsDefined != isDefinedGlobalIndex(Info.ElementIndex)) + return make_error<GenericBinaryError>("invalid global symbol index", + object_error::parse_failed); + if (!IsDefined && (Info.Flags & wasm::WASM_SYMBOL_BINDING_MASK) == + wasm::WASM_SYMBOL_BINDING_WEAK) + return make_error<GenericBinaryError>("undefined weak global symbol", + object_error::parse_failed); + if (IsDefined) { + Info.Name = readString(Ctx); + unsigned GlobalIndex = Info.ElementIndex - NumImportedGlobals; + wasm::WasmGlobal &Global = Globals[GlobalIndex]; + GlobalType = &Global.Type; + if (Global.SymbolName.empty()) + Global.SymbolName = Info.Name; + } else { + wasm::WasmImport &Import = *ImportedGlobals[Info.ElementIndex]; + if ((Info.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0) { + Info.Name = readString(Ctx); + Info.ImportName = Import.Field; + } else { + Info.Name = Import.Field; + } + GlobalType = &Import.Global; + if (!Import.Module.empty()) { + Info.ImportModule = Import.Module; + } + } + break; + + case wasm::WASM_SYMBOL_TYPE_TABLE: + Info.ElementIndex = readVaruint32(Ctx); + if (!isValidTableNumber(Info.ElementIndex) || + IsDefined != isDefinedTableNumber(Info.ElementIndex)) + return make_error<GenericBinaryError>("invalid table symbol index", + object_error::parse_failed); + if (!IsDefined && (Info.Flags & wasm::WASM_SYMBOL_BINDING_MASK) == + wasm::WASM_SYMBOL_BINDING_WEAK) + return make_error<GenericBinaryError>("undefined weak table symbol", + object_error::parse_failed); + if (IsDefined) { + Info.Name = readString(Ctx); + unsigned TableNumber = Info.ElementIndex - NumImportedTables; + wasm::WasmTable &Table = Tables[TableNumber]; + TableType = &Table.Type; + if (Table.SymbolName.empty()) + Table.SymbolName = Info.Name; + } else { + wasm::WasmImport &Import = *ImportedTables[Info.ElementIndex]; + if ((Info.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0) { + Info.Name = readString(Ctx); + Info.ImportName = Import.Field; + } else { + Info.Name = Import.Field; + } + TableType = &Import.Table; + if (!Import.Module.empty()) { + Info.ImportModule = Import.Module; + } + } + break; + + case wasm::WASM_SYMBOL_TYPE_DATA: + Info.Name = readString(Ctx); + if (IsDefined) { + auto Index = readVaruint32(Ctx); + if (Index >= DataSegments.size()) + return make_error<GenericBinaryError>("invalid data symbol index", + object_error::parse_failed); + auto Offset = readVaruint64(Ctx); + auto Size = readVaruint64(Ctx); + size_t SegmentSize = DataSegments[Index].Data.Content.size(); + if (Offset > SegmentSize) + return make_error<GenericBinaryError>( + "invalid data symbol offset: `" + Info.Name + "` (offset: " + + Twine(Offset) + " segment size: " + Twine(SegmentSize) + ")", + object_error::parse_failed); + Info.DataRef = wasm::WasmDataReference{Index, Offset, Size}; + } + break; + + case wasm::WASM_SYMBOL_TYPE_SECTION: { + if ((Info.Flags & wasm::WASM_SYMBOL_BINDING_MASK) != + wasm::WASM_SYMBOL_BINDING_LOCAL) + return make_error<GenericBinaryError>( + "section symbols must have local binding", + object_error::parse_failed); + Info.ElementIndex = readVaruint32(Ctx); + // Use somewhat unique section name as symbol name. + StringRef SectionName = Sections[Info.ElementIndex].Name; + Info.Name = SectionName; + break; + } + + case wasm::WASM_SYMBOL_TYPE_TAG: { + Info.ElementIndex = readVaruint32(Ctx); + if (!isValidTagIndex(Info.ElementIndex) || + IsDefined != isDefinedTagIndex(Info.ElementIndex)) + return make_error<GenericBinaryError>("invalid tag symbol index", + object_error::parse_failed); + if (!IsDefined && (Info.Flags & wasm::WASM_SYMBOL_BINDING_MASK) == + wasm::WASM_SYMBOL_BINDING_WEAK) + return make_error<GenericBinaryError>("undefined weak global symbol", + object_error::parse_failed); + if (IsDefined) { + Info.Name = readString(Ctx); + unsigned TagIndex = Info.ElementIndex - NumImportedTags; + wasm::WasmTag &Tag = Tags[TagIndex]; + Signature = &Signatures[Tag.SigIndex]; + if (Tag.SymbolName.empty()) + Tag.SymbolName = Info.Name; + + } else { + wasm::WasmImport &Import = *ImportedTags[Info.ElementIndex]; + if ((Info.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0) { + Info.Name = readString(Ctx); + Info.ImportName = Import.Field; + } else { + Info.Name = Import.Field; + } + Signature = &Signatures[Import.SigIndex]; + if (!Import.Module.empty()) { + Info.ImportModule = Import.Module; + } + } + break; + } + + default: + return make_error<GenericBinaryError>("invalid symbol type: " + + Twine(unsigned(Info.Kind)), + object_error::parse_failed); + } + + if ((Info.Flags & wasm::WASM_SYMBOL_BINDING_MASK) != + wasm::WASM_SYMBOL_BINDING_LOCAL && + !SymbolNames.insert(Info.Name).second) + return make_error<GenericBinaryError>("duplicate symbol name " + + Twine(Info.Name), + object_error::parse_failed); + LinkingData.SymbolTable.emplace_back(Info); + Symbols.emplace_back(LinkingData.SymbolTable.back(), GlobalType, TableType, + Signature); + LLVM_DEBUG(dbgs() << "Adding symbol: " << Symbols.back() << "\n"); + } + + return Error::success(); +} + +Error WasmObjectFile::parseLinkingSectionComdat(ReadContext &Ctx) { + uint32_t ComdatCount = readVaruint32(Ctx); + StringSet<> ComdatSet; + for (unsigned ComdatIndex = 0; ComdatIndex < ComdatCount; ++ComdatIndex) { + StringRef Name = readString(Ctx); + if (Name.empty() || !ComdatSet.insert(Name).second) + return make_error<GenericBinaryError>("bad/duplicate COMDAT name " + + Twine(Name), + object_error::parse_failed); + LinkingData.Comdats.emplace_back(Name); + uint32_t Flags = readVaruint32(Ctx); + if (Flags != 0) + return make_error<GenericBinaryError>("unsupported COMDAT flags", + object_error::parse_failed); + + uint32_t EntryCount = readVaruint32(Ctx); + while (EntryCount--) { + unsigned Kind = readVaruint32(Ctx); + unsigned Index = readVaruint32(Ctx); + switch (Kind) { + default: + return make_error<GenericBinaryError>("invalid COMDAT entry type", + object_error::parse_failed); + case wasm::WASM_COMDAT_DATA: + if (Index >= DataSegments.size()) + return make_error<GenericBinaryError>( + "COMDAT data index out of range", object_error::parse_failed); + if (DataSegments[Index].Data.Comdat != UINT32_MAX) + return make_error<GenericBinaryError>("data segment in two COMDATs", + object_error::parse_failed); + DataSegments[Index].Data.Comdat = ComdatIndex; + break; + case wasm::WASM_COMDAT_FUNCTION: + if (!isDefinedFunctionIndex(Index)) + return make_error<GenericBinaryError>( + "COMDAT function index out of range", object_error::parse_failed); + if (getDefinedFunction(Index).Comdat != UINT32_MAX) + return make_error<GenericBinaryError>("function in two COMDATs", + object_error::parse_failed); + getDefinedFunction(Index).Comdat = ComdatIndex; + break; + case wasm::WASM_COMDAT_SECTION: + if (Index >= Sections.size()) + return make_error<GenericBinaryError>( + "COMDAT section index out of range", object_error::parse_failed); + if (Sections[Index].Type != wasm::WASM_SEC_CUSTOM) + return make_error<GenericBinaryError>( + "non-custom section in a COMDAT", object_error::parse_failed); + Sections[Index].Comdat = ComdatIndex; + break; + } + } + } + return Error::success(); +} + +Error WasmObjectFile::parseProducersSection(ReadContext &Ctx) { + llvm::SmallSet<StringRef, 3> FieldsSeen; + uint32_t Fields = readVaruint32(Ctx); + for (size_t I = 0; I < Fields; ++I) { + StringRef FieldName = readString(Ctx); + if (!FieldsSeen.insert(FieldName).second) + return make_error<GenericBinaryError>( + "producers section does not have unique fields", + object_error::parse_failed); + std::vector<std::pair<std::string, std::string>> *ProducerVec = nullptr; + if (FieldName == "language") { + ProducerVec = &ProducerInfo.Languages; + } else if (FieldName == "processed-by") { + ProducerVec = &ProducerInfo.Tools; + } else if (FieldName == "sdk") { + ProducerVec = &ProducerInfo.SDKs; + } else { + return make_error<GenericBinaryError>( + "producers section field is not named one of language, processed-by, " + "or sdk", + object_error::parse_failed); + } + uint32_t ValueCount = readVaruint32(Ctx); + llvm::SmallSet<StringRef, 8> ProducersSeen; + for (size_t J = 0; J < ValueCount; ++J) { + StringRef Name = readString(Ctx); + StringRef Version = readString(Ctx); + if (!ProducersSeen.insert(Name).second) { + return make_error<GenericBinaryError>( + "producers section contains repeated producer", + object_error::parse_failed); + } + ProducerVec->emplace_back(std::string(Name), std::string(Version)); + } + } + if (Ctx.Ptr != Ctx.End) + return make_error<GenericBinaryError>("producers section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseTargetFeaturesSection(ReadContext &Ctx) { + llvm::SmallSet<std::string, 8> FeaturesSeen; + uint32_t FeatureCount = readVaruint32(Ctx); + for (size_t I = 0; I < FeatureCount; ++I) { + wasm::WasmFeatureEntry Feature; + Feature.Prefix = readUint8(Ctx); + switch (Feature.Prefix) { + case wasm::WASM_FEATURE_PREFIX_USED: + case wasm::WASM_FEATURE_PREFIX_REQUIRED: + case wasm::WASM_FEATURE_PREFIX_DISALLOWED: + break; + default: + return make_error<GenericBinaryError>("unknown feature policy prefix", + object_error::parse_failed); + } + Feature.Name = std::string(readString(Ctx)); + if (!FeaturesSeen.insert(Feature.Name).second) + return make_error<GenericBinaryError>( + "target features section contains repeated feature \"" + + Feature.Name + "\"", + object_error::parse_failed); + TargetFeatures.push_back(Feature); + } + if (Ctx.Ptr != Ctx.End) + return make_error<GenericBinaryError>( + "target features section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) { + uint32_t SectionIndex = readVaruint32(Ctx); + if (SectionIndex >= Sections.size()) + return make_error<GenericBinaryError>("invalid section index", + object_error::parse_failed); + WasmSection &Section = Sections[SectionIndex]; + uint32_t RelocCount = readVaruint32(Ctx); + uint32_t EndOffset = Section.Content.size(); + uint32_t PreviousOffset = 0; + while (RelocCount--) { + wasm::WasmRelocation Reloc = {}; + uint32_t type = readVaruint32(Ctx); + Reloc.Type = type; + Reloc.Offset = readVaruint32(Ctx); + if (Reloc.Offset < PreviousOffset) + return make_error<GenericBinaryError>("relocations not in offset order", + object_error::parse_failed); + PreviousOffset = Reloc.Offset; + Reloc.Index = readVaruint32(Ctx); + switch (type) { + case wasm::R_WASM_FUNCTION_INDEX_LEB: + case wasm::R_WASM_TABLE_INDEX_SLEB: + case wasm::R_WASM_TABLE_INDEX_SLEB64: + case wasm::R_WASM_TABLE_INDEX_I32: + case wasm::R_WASM_TABLE_INDEX_I64: + case wasm::R_WASM_TABLE_INDEX_REL_SLEB: + case wasm::R_WASM_TABLE_INDEX_REL_SLEB64: + if (!isValidFunctionSymbol(Reloc.Index)) + return make_error<GenericBinaryError>( + "invalid relocation function index", object_error::parse_failed); + break; + case wasm::R_WASM_TABLE_NUMBER_LEB: + if (!isValidTableSymbol(Reloc.Index)) + return make_error<GenericBinaryError>("invalid relocation table index", + object_error::parse_failed); + break; + case wasm::R_WASM_TYPE_INDEX_LEB: + if (Reloc.Index >= Signatures.size()) + return make_error<GenericBinaryError>("invalid relocation type index", + object_error::parse_failed); + break; + case wasm::R_WASM_GLOBAL_INDEX_LEB: + // R_WASM_GLOBAL_INDEX_LEB are can be used against function and data + // symbols to refer to their GOT entries. + if (!isValidGlobalSymbol(Reloc.Index) && + !isValidDataSymbol(Reloc.Index) && + !isValidFunctionSymbol(Reloc.Index)) + return make_error<GenericBinaryError>("invalid relocation global index", + object_error::parse_failed); + break; + case wasm::R_WASM_GLOBAL_INDEX_I32: + if (!isValidGlobalSymbol(Reloc.Index)) + return make_error<GenericBinaryError>("invalid relocation global index", + object_error::parse_failed); + break; + case wasm::R_WASM_TAG_INDEX_LEB: + if (!isValidTagSymbol(Reloc.Index)) + return make_error<GenericBinaryError>("invalid relocation tag index", + object_error::parse_failed); + break; + case wasm::R_WASM_MEMORY_ADDR_LEB: + case wasm::R_WASM_MEMORY_ADDR_SLEB: + case wasm::R_WASM_MEMORY_ADDR_I32: + case wasm::R_WASM_MEMORY_ADDR_REL_SLEB: + case wasm::R_WASM_MEMORY_ADDR_TLS_SLEB: + case wasm::R_WASM_MEMORY_ADDR_LOCREL_I32: + if (!isValidDataSymbol(Reloc.Index)) + return make_error<GenericBinaryError>("invalid relocation data index", + object_error::parse_failed); + Reloc.Addend = readVarint32(Ctx); + break; + case wasm::R_WASM_MEMORY_ADDR_LEB64: + case wasm::R_WASM_MEMORY_ADDR_SLEB64: + case wasm::R_WASM_MEMORY_ADDR_I64: + case wasm::R_WASM_MEMORY_ADDR_REL_SLEB64: + case wasm::R_WASM_MEMORY_ADDR_TLS_SLEB64: + if (!isValidDataSymbol(Reloc.Index)) + return make_error<GenericBinaryError>("invalid relocation data index", + object_error::parse_failed); + Reloc.Addend = readVarint64(Ctx); + break; + case wasm::R_WASM_FUNCTION_OFFSET_I32: + if (!isValidFunctionSymbol(Reloc.Index)) + return make_error<GenericBinaryError>( + "invalid relocation function index", object_error::parse_failed); + Reloc.Addend = readVarint32(Ctx); + break; + case wasm::R_WASM_FUNCTION_OFFSET_I64: + if (!isValidFunctionSymbol(Reloc.Index)) + return make_error<GenericBinaryError>( + "invalid relocation function index", object_error::parse_failed); + Reloc.Addend = readVarint64(Ctx); + break; + case wasm::R_WASM_SECTION_OFFSET_I32: + if (!isValidSectionSymbol(Reloc.Index)) + return make_error<GenericBinaryError>( + "invalid relocation section index", object_error::parse_failed); + Reloc.Addend = readVarint32(Ctx); + break; + default: + return make_error<GenericBinaryError>("invalid relocation type: " + + Twine(type), + object_error::parse_failed); + } + + // Relocations must fit inside the section, and must appear in order. They + // also shouldn't overlap a function/element boundary, but we don't bother + // to check that. + uint64_t Size = 5; + if (Reloc.Type == wasm::R_WASM_MEMORY_ADDR_LEB64 || + Reloc.Type == wasm::R_WASM_MEMORY_ADDR_SLEB64 || + Reloc.Type == wasm::R_WASM_MEMORY_ADDR_REL_SLEB64) + Size = 10; + if (Reloc.Type == wasm::R_WASM_TABLE_INDEX_I32 || + Reloc.Type == wasm::R_WASM_MEMORY_ADDR_I32 || + Reloc.Type == wasm::R_WASM_MEMORY_ADDR_LOCREL_I32 || + Reloc.Type == wasm::R_WASM_SECTION_OFFSET_I32 || + Reloc.Type == wasm::R_WASM_FUNCTION_OFFSET_I32 || + Reloc.Type == wasm::R_WASM_GLOBAL_INDEX_I32) + Size = 4; + if (Reloc.Type == wasm::R_WASM_TABLE_INDEX_I64 || + Reloc.Type == wasm::R_WASM_MEMORY_ADDR_I64 || + Reloc.Type == wasm::R_WASM_FUNCTION_OFFSET_I64) + Size = 8; + if (Reloc.Offset + Size > EndOffset) + return make_error<GenericBinaryError>("invalid relocation offset", + object_error::parse_failed); + + Section.Relocations.push_back(Reloc); + } + if (Ctx.Ptr != Ctx.End) + return make_error<GenericBinaryError>("reloc section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseCustomSection(WasmSection &Sec, ReadContext &Ctx) { + if (Sec.Name == "dylink") { + if (Error Err = parseDylinkSection(Ctx)) + return Err; + } else if (Sec.Name == "dylink.0") { + if (Error Err = parseDylink0Section(Ctx)) + return Err; + } else if (Sec.Name == "name") { + if (Error Err = parseNameSection(Ctx)) + return Err; + } else if (Sec.Name == "linking") { + if (Error Err = parseLinkingSection(Ctx)) + return Err; + } else if (Sec.Name == "producers") { + if (Error Err = parseProducersSection(Ctx)) + return Err; + } else if (Sec.Name == "target_features") { + if (Error Err = parseTargetFeaturesSection(Ctx)) + return Err; + } else if (Sec.Name.startswith("reloc.")) { + if (Error Err = parseRelocSection(Sec.Name, Ctx)) + return Err; + } + return Error::success(); +} + +Error WasmObjectFile::parseTypeSection(ReadContext &Ctx) { + uint32_t Count = readVaruint32(Ctx); + Signatures.reserve(Count); + while (Count--) { + wasm::WasmSignature Sig; + uint8_t Form = readUint8(Ctx); + if (Form != wasm::WASM_TYPE_FUNC) { + return make_error<GenericBinaryError>("invalid signature type", + object_error::parse_failed); + } + uint32_t ParamCount = readVaruint32(Ctx); + Sig.Params.reserve(ParamCount); + while (ParamCount--) { + uint32_t ParamType = readUint8(Ctx); + Sig.Params.push_back(wasm::ValType(ParamType)); + } + uint32_t ReturnCount = readVaruint32(Ctx); + while (ReturnCount--) { + uint32_t ReturnType = readUint8(Ctx); + Sig.Returns.push_back(wasm::ValType(ReturnType)); + } + Signatures.push_back(std::move(Sig)); + } + if (Ctx.Ptr != Ctx.End) + return make_error<GenericBinaryError>("type section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseImportSection(ReadContext &Ctx) { + uint32_t Count = readVaruint32(Ctx); + uint32_t NumTypes = Signatures.size(); + Imports.reserve(Count); + for (uint32_t I = 0; I < Count; I++) { + wasm::WasmImport Im; + Im.Module = readString(Ctx); + Im.Field = readString(Ctx); + Im.Kind = readUint8(Ctx); + switch (Im.Kind) { + case wasm::WASM_EXTERNAL_FUNCTION: + NumImportedFunctions++; + Im.SigIndex = readVaruint32(Ctx); + if (Im.SigIndex >= NumTypes) + return make_error<GenericBinaryError>("invalid function type", + object_error::parse_failed); + break; + case wasm::WASM_EXTERNAL_GLOBAL: + NumImportedGlobals++; + Im.Global.Type = readUint8(Ctx); + Im.Global.Mutable = readVaruint1(Ctx); + break; + case wasm::WASM_EXTERNAL_MEMORY: + Im.Memory = readLimits(Ctx); + if (Im.Memory.Flags & wasm::WASM_LIMITS_FLAG_IS_64) + HasMemory64 = true; + break; + case wasm::WASM_EXTERNAL_TABLE: { + Im.Table = readTableType(Ctx); + NumImportedTables++; + auto ElemType = Im.Table.ElemType; + if (ElemType != wasm::WASM_TYPE_FUNCREF && + ElemType != wasm::WASM_TYPE_EXTERNREF) + return make_error<GenericBinaryError>("invalid table element type", + object_error::parse_failed); + break; + } + case wasm::WASM_EXTERNAL_TAG: + NumImportedTags++; + if (readUint8(Ctx) != 0) // Reserved 'attribute' field + return make_error<GenericBinaryError>("invalid attribute", + object_error::parse_failed); + Im.SigIndex = readVaruint32(Ctx); + if (Im.SigIndex >= NumTypes) + return make_error<GenericBinaryError>("invalid tag type", + object_error::parse_failed); + break; + default: + return make_error<GenericBinaryError>("unexpected import kind", + object_error::parse_failed); + } + Imports.push_back(Im); + } + if (Ctx.Ptr != Ctx.End) + return make_error<GenericBinaryError>("import section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseFunctionSection(ReadContext &Ctx) { + uint32_t Count = readVaruint32(Ctx); + Functions.reserve(Count); + uint32_t NumTypes = Signatures.size(); + while (Count--) { + uint32_t Type = readVaruint32(Ctx); + if (Type >= NumTypes) + return make_error<GenericBinaryError>("invalid function type", + object_error::parse_failed); + wasm::WasmFunction F; + F.SigIndex = Type; + Functions.push_back(F); + } + if (Ctx.Ptr != Ctx.End) + return make_error<GenericBinaryError>("function section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseTableSection(ReadContext &Ctx) { + TableSection = Sections.size(); + uint32_t Count = readVaruint32(Ctx); + Tables.reserve(Count); + while (Count--) { + wasm::WasmTable T; + T.Type = readTableType(Ctx); + T.Index = NumImportedTables + Tables.size(); + Tables.push_back(T); + auto ElemType = Tables.back().Type.ElemType; + if (ElemType != wasm::WASM_TYPE_FUNCREF && + ElemType != wasm::WASM_TYPE_EXTERNREF) { + return make_error<GenericBinaryError>("invalid table element type", + object_error::parse_failed); + } + } + if (Ctx.Ptr != Ctx.End) + return make_error<GenericBinaryError>("table section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseMemorySection(ReadContext &Ctx) { + uint32_t Count = readVaruint32(Ctx); + Memories.reserve(Count); + while (Count--) { + auto Limits = readLimits(Ctx); + if (Limits.Flags & wasm::WASM_LIMITS_FLAG_IS_64) + HasMemory64 = true; + Memories.push_back(Limits); + } + if (Ctx.Ptr != Ctx.End) + return make_error<GenericBinaryError>("memory section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseTagSection(ReadContext &Ctx) { + TagSection = Sections.size(); + uint32_t Count = readVaruint32(Ctx); + Tags.reserve(Count); + uint32_t NumTypes = Signatures.size(); + while (Count--) { + if (readUint8(Ctx) != 0) // Reserved 'attribute' field + return make_error<GenericBinaryError>("invalid attribute", + object_error::parse_failed); + uint32_t Type = readVaruint32(Ctx); + if (Type >= NumTypes) + return make_error<GenericBinaryError>("invalid tag type", + object_error::parse_failed); + wasm::WasmTag Tag; + Tag.Index = NumImportedTags + Tags.size(); + Tag.SigIndex = Type; + Tags.push_back(Tag); + } + + if (Ctx.Ptr != Ctx.End) + return make_error<GenericBinaryError>("tag section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseGlobalSection(ReadContext &Ctx) { + GlobalSection = Sections.size(); + uint32_t Count = readVaruint32(Ctx); + Globals.reserve(Count); + while (Count--) { + wasm::WasmGlobal Global; + Global.Index = NumImportedGlobals + Globals.size(); + Global.Type.Type = readUint8(Ctx); + Global.Type.Mutable = readVaruint1(Ctx); + if (Error Err = readInitExpr(Global.InitExpr, Ctx)) + return Err; + Globals.push_back(Global); + } + if (Ctx.Ptr != Ctx.End) + return make_error<GenericBinaryError>("global section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseExportSection(ReadContext &Ctx) { + uint32_t Count = readVaruint32(Ctx); + Exports.reserve(Count); + for (uint32_t I = 0; I < Count; I++) { + wasm::WasmExport Ex; + Ex.Name = readString(Ctx); + Ex.Kind = readUint8(Ctx); + Ex.Index = readVaruint32(Ctx); + switch (Ex.Kind) { + case wasm::WASM_EXTERNAL_FUNCTION: + + if (!isDefinedFunctionIndex(Ex.Index)) + return make_error<GenericBinaryError>("invalid function export", + object_error::parse_failed); + getDefinedFunction(Ex.Index).ExportName = Ex.Name; + break; + case wasm::WASM_EXTERNAL_GLOBAL: + if (!isValidGlobalIndex(Ex.Index)) + return make_error<GenericBinaryError>("invalid global export", + object_error::parse_failed); + break; + case wasm::WASM_EXTERNAL_TAG: + if (!isValidTagIndex(Ex.Index)) + return make_error<GenericBinaryError>("invalid tag export", + object_error::parse_failed); + break; + case wasm::WASM_EXTERNAL_MEMORY: + case wasm::WASM_EXTERNAL_TABLE: + break; + default: + return make_error<GenericBinaryError>("unexpected export kind", + object_error::parse_failed); + } + Exports.push_back(Ex); + } + if (Ctx.Ptr != Ctx.End) + return make_error<GenericBinaryError>("export section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +bool WasmObjectFile::isValidFunctionIndex(uint32_t Index) const { + return Index < NumImportedFunctions + Functions.size(); +} + +bool WasmObjectFile::isDefinedFunctionIndex(uint32_t Index) const { + return Index >= NumImportedFunctions && isValidFunctionIndex(Index); +} + +bool WasmObjectFile::isValidGlobalIndex(uint32_t Index) const { + return Index < NumImportedGlobals + Globals.size(); +} + +bool WasmObjectFile::isValidTableNumber(uint32_t Index) const { + return Index < NumImportedTables + Tables.size(); +} + +bool WasmObjectFile::isDefinedGlobalIndex(uint32_t Index) const { + return Index >= NumImportedGlobals && isValidGlobalIndex(Index); +} + +bool WasmObjectFile::isDefinedTableNumber(uint32_t Index) const { + return Index >= NumImportedTables && isValidTableNumber(Index); +} + +bool WasmObjectFile::isValidTagIndex(uint32_t Index) const { + return Index < NumImportedTags + Tags.size(); +} + +bool WasmObjectFile::isDefinedTagIndex(uint32_t Index) const { + return Index >= NumImportedTags && isValidTagIndex(Index); +} + +bool WasmObjectFile::isValidFunctionSymbol(uint32_t Index) const { + return Index < Symbols.size() && Symbols[Index].isTypeFunction(); +} + +bool WasmObjectFile::isValidTableSymbol(uint32_t Index) const { + return Index < Symbols.size() && Symbols[Index].isTypeTable(); +} + +bool WasmObjectFile::isValidGlobalSymbol(uint32_t Index) const { + return Index < Symbols.size() && Symbols[Index].isTypeGlobal(); +} + +bool WasmObjectFile::isValidTagSymbol(uint32_t Index) const { + return Index < Symbols.size() && Symbols[Index].isTypeTag(); +} + +bool WasmObjectFile::isValidDataSymbol(uint32_t Index) const { + return Index < Symbols.size() && Symbols[Index].isTypeData(); +} + +bool WasmObjectFile::isValidSectionSymbol(uint32_t Index) const { + return Index < Symbols.size() && Symbols[Index].isTypeSection(); +} + +wasm::WasmFunction &WasmObjectFile::getDefinedFunction(uint32_t Index) { + assert(isDefinedFunctionIndex(Index)); + return Functions[Index - NumImportedFunctions]; +} + +const wasm::WasmFunction & +WasmObjectFile::getDefinedFunction(uint32_t Index) const { + assert(isDefinedFunctionIndex(Index)); + return Functions[Index - NumImportedFunctions]; +} + +wasm::WasmGlobal &WasmObjectFile::getDefinedGlobal(uint32_t Index) { + assert(isDefinedGlobalIndex(Index)); + return Globals[Index - NumImportedGlobals]; +} + +wasm::WasmTag &WasmObjectFile::getDefinedTag(uint32_t Index) { + assert(isDefinedTagIndex(Index)); + return Tags[Index - NumImportedTags]; +} + +Error WasmObjectFile::parseStartSection(ReadContext &Ctx) { + StartFunction = readVaruint32(Ctx); + if (!isValidFunctionIndex(StartFunction)) + return make_error<GenericBinaryError>("invalid start function", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseCodeSection(ReadContext &Ctx) { + SeenCodeSection = true; + CodeSection = Sections.size(); + uint32_t FunctionCount = readVaruint32(Ctx); + if (FunctionCount != Functions.size()) { + return make_error<GenericBinaryError>("invalid function count", + object_error::parse_failed); + } + + for (uint32_t i = 0; i < FunctionCount; i++) { + wasm::WasmFunction& Function = Functions[i]; + const uint8_t *FunctionStart = Ctx.Ptr; + uint32_t Size = readVaruint32(Ctx); + const uint8_t *FunctionEnd = Ctx.Ptr + Size; + + Function.CodeOffset = Ctx.Ptr - FunctionStart; + Function.Index = NumImportedFunctions + i; + Function.CodeSectionOffset = FunctionStart - Ctx.Start; + Function.Size = FunctionEnd - FunctionStart; + + uint32_t NumLocalDecls = readVaruint32(Ctx); + Function.Locals.reserve(NumLocalDecls); + while (NumLocalDecls--) { + wasm::WasmLocalDecl Decl; + Decl.Count = readVaruint32(Ctx); + Decl.Type = readUint8(Ctx); + Function.Locals.push_back(Decl); + } + + uint32_t BodySize = FunctionEnd - Ctx.Ptr; + Function.Body = ArrayRef<uint8_t>(Ctx.Ptr, BodySize); + // This will be set later when reading in the linking metadata section. + Function.Comdat = UINT32_MAX; + Ctx.Ptr += BodySize; + assert(Ctx.Ptr == FunctionEnd); + } + if (Ctx.Ptr != Ctx.End) + return make_error<GenericBinaryError>("code section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseElemSection(ReadContext &Ctx) { + uint32_t Count = readVaruint32(Ctx); + ElemSegments.reserve(Count); + while (Count--) { + wasm::WasmElemSegment Segment; + Segment.Flags = readVaruint32(Ctx); + + uint32_t SupportedFlags = wasm::WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER | + wasm::WASM_ELEM_SEGMENT_IS_PASSIVE | + wasm::WASM_ELEM_SEGMENT_HAS_INIT_EXPRS; + if (Segment.Flags & ~SupportedFlags) + return make_error<GenericBinaryError>( + "Unsupported flags for element segment", object_error::parse_failed); + + if (Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER) + Segment.TableNumber = readVaruint32(Ctx); + else + Segment.TableNumber = 0; + if (!isValidTableNumber(Segment.TableNumber)) + return make_error<GenericBinaryError>("invalid TableNumber", + object_error::parse_failed); + + if (Segment.Flags & wasm::WASM_ELEM_SEGMENT_IS_PASSIVE) { + Segment.Offset.Opcode = wasm::WASM_OPCODE_I32_CONST; + Segment.Offset.Value.Int32 = 0; + } else { + if (Error Err = readInitExpr(Segment.Offset, Ctx)) + return Err; + } + + if (Segment.Flags & wasm::WASM_ELEM_SEGMENT_MASK_HAS_ELEM_KIND) { + Segment.ElemKind = readUint8(Ctx); + if (Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_INIT_EXPRS) { + if (Segment.ElemKind != uint8_t(wasm::ValType::FUNCREF) && + Segment.ElemKind != uint8_t(wasm::ValType::EXTERNREF)) { + return make_error<GenericBinaryError>("invalid reference type", + object_error::parse_failed); + } + } else { + if (Segment.ElemKind != 0) + return make_error<GenericBinaryError>("invalid elemtype", + object_error::parse_failed); + Segment.ElemKind = uint8_t(wasm::ValType::FUNCREF); + } + } else { + Segment.ElemKind = uint8_t(wasm::ValType::FUNCREF); + } + + if (Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_INIT_EXPRS) + return make_error<GenericBinaryError>( + "elem segment init expressions not yet implemented", + object_error::parse_failed); + + uint32_t NumElems = readVaruint32(Ctx); + while (NumElems--) { + Segment.Functions.push_back(readVaruint32(Ctx)); + } + ElemSegments.push_back(Segment); + } + if (Ctx.Ptr != Ctx.End) + return make_error<GenericBinaryError>("elem section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseDataSection(ReadContext &Ctx) { + DataSection = Sections.size(); + uint32_t Count = readVaruint32(Ctx); + if (DataCount && Count != DataCount.getValue()) + return make_error<GenericBinaryError>( + "number of data segments does not match DataCount section"); + DataSegments.reserve(Count); + while (Count--) { + WasmSegment Segment; + Segment.Data.InitFlags = readVaruint32(Ctx); + Segment.Data.MemoryIndex = + (Segment.Data.InitFlags & wasm::WASM_DATA_SEGMENT_HAS_MEMINDEX) + ? readVaruint32(Ctx) + : 0; + if ((Segment.Data.InitFlags & wasm::WASM_DATA_SEGMENT_IS_PASSIVE) == 0) { + if (Error Err = readInitExpr(Segment.Data.Offset, Ctx)) + return Err; + } else { + Segment.Data.Offset.Opcode = wasm::WASM_OPCODE_I32_CONST; + Segment.Data.Offset.Value.Int32 = 0; + } + uint32_t Size = readVaruint32(Ctx); + if (Size > (size_t)(Ctx.End - Ctx.Ptr)) + return make_error<GenericBinaryError>("invalid segment size", + object_error::parse_failed); + Segment.Data.Content = ArrayRef<uint8_t>(Ctx.Ptr, Size); + // The rest of these Data fields are set later, when reading in the linking + // metadata section. + Segment.Data.Alignment = 0; + Segment.Data.LinkingFlags = 0; + Segment.Data.Comdat = UINT32_MAX; + Segment.SectionOffset = Ctx.Ptr - Ctx.Start; + Ctx.Ptr += Size; + DataSegments.push_back(Segment); + } + if (Ctx.Ptr != Ctx.End) + return make_error<GenericBinaryError>("data section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseDataCountSection(ReadContext &Ctx) { + DataCount = readVaruint32(Ctx); + return Error::success(); +} + +const wasm::WasmObjectHeader &WasmObjectFile::getHeader() const { + return Header; +} + +void WasmObjectFile::moveSymbolNext(DataRefImpl &Symb) const { Symb.d.b++; } + +Expected<uint32_t> WasmObjectFile::getSymbolFlags(DataRefImpl Symb) const { + uint32_t Result = SymbolRef::SF_None; + const WasmSymbol &Sym = getWasmSymbol(Symb); + + LLVM_DEBUG(dbgs() << "getSymbolFlags: ptr=" << &Sym << " " << Sym << "\n"); + if (Sym.isBindingWeak()) + Result |= SymbolRef::SF_Weak; + if (!Sym.isBindingLocal()) + Result |= SymbolRef::SF_Global; + if (Sym.isHidden()) + Result |= SymbolRef::SF_Hidden; + if (!Sym.isDefined()) + Result |= SymbolRef::SF_Undefined; + if (Sym.isTypeFunction()) + Result |= SymbolRef::SF_Executable; + return Result; +} + +basic_symbol_iterator WasmObjectFile::symbol_begin() const { + DataRefImpl Ref; + Ref.d.a = 1; // Arbitrary non-zero value so that Ref.p is non-null + Ref.d.b = 0; // Symbol index + return BasicSymbolRef(Ref, this); +} + +basic_symbol_iterator WasmObjectFile::symbol_end() const { + DataRefImpl Ref; + Ref.d.a = 1; // Arbitrary non-zero value so that Ref.p is non-null + Ref.d.b = Symbols.size(); // Symbol index + return BasicSymbolRef(Ref, this); +} + +const WasmSymbol &WasmObjectFile::getWasmSymbol(const DataRefImpl &Symb) const { + return Symbols[Symb.d.b]; +} + +const WasmSymbol &WasmObjectFile::getWasmSymbol(const SymbolRef &Symb) const { + return getWasmSymbol(Symb.getRawDataRefImpl()); +} + +Expected<StringRef> WasmObjectFile::getSymbolName(DataRefImpl Symb) const { + return getWasmSymbol(Symb).Info.Name; +} + +Expected<uint64_t> WasmObjectFile::getSymbolAddress(DataRefImpl Symb) const { + auto &Sym = getWasmSymbol(Symb); + if (Sym.Info.Kind == wasm::WASM_SYMBOL_TYPE_FUNCTION && + isDefinedFunctionIndex(Sym.Info.ElementIndex)) + return getDefinedFunction(Sym.Info.ElementIndex).CodeSectionOffset; + else + return getSymbolValue(Symb); +} + +uint64_t WasmObjectFile::getWasmSymbolValue(const WasmSymbol &Sym) const { + switch (Sym.Info.Kind) { + case wasm::WASM_SYMBOL_TYPE_FUNCTION: + case wasm::WASM_SYMBOL_TYPE_GLOBAL: + case wasm::WASM_SYMBOL_TYPE_TAG: + case wasm::WASM_SYMBOL_TYPE_TABLE: + return Sym.Info.ElementIndex; + case wasm::WASM_SYMBOL_TYPE_DATA: { + // The value of a data symbol is the segment offset, plus the symbol + // offset within the segment. + uint32_t SegmentIndex = Sym.Info.DataRef.Segment; + const wasm::WasmDataSegment &Segment = DataSegments[SegmentIndex].Data; + if (Segment.Offset.Opcode == wasm::WASM_OPCODE_I32_CONST) { + return Segment.Offset.Value.Int32 + Sym.Info.DataRef.Offset; + } else if (Segment.Offset.Opcode == wasm::WASM_OPCODE_I64_CONST) { + return Segment.Offset.Value.Int64 + Sym.Info.DataRef.Offset; + } else { + llvm_unreachable("unknown init expr opcode"); + } + } + case wasm::WASM_SYMBOL_TYPE_SECTION: + return 0; + } + llvm_unreachable("invalid symbol type"); +} + +uint64_t WasmObjectFile::getSymbolValueImpl(DataRefImpl Symb) const { + return getWasmSymbolValue(getWasmSymbol(Symb)); +} + +uint32_t WasmObjectFile::getSymbolAlignment(DataRefImpl Symb) const { + llvm_unreachable("not yet implemented"); + return 0; +} + +uint64_t WasmObjectFile::getCommonSymbolSizeImpl(DataRefImpl Symb) const { + llvm_unreachable("not yet implemented"); + return 0; +} + +Expected<SymbolRef::Type> +WasmObjectFile::getSymbolType(DataRefImpl Symb) const { + const WasmSymbol &Sym = getWasmSymbol(Symb); + + switch (Sym.Info.Kind) { + case wasm::WASM_SYMBOL_TYPE_FUNCTION: + return SymbolRef::ST_Function; + case wasm::WASM_SYMBOL_TYPE_GLOBAL: + return SymbolRef::ST_Other; + case wasm::WASM_SYMBOL_TYPE_DATA: + return SymbolRef::ST_Data; + case wasm::WASM_SYMBOL_TYPE_SECTION: + return SymbolRef::ST_Debug; + case wasm::WASM_SYMBOL_TYPE_TAG: + return SymbolRef::ST_Other; + case wasm::WASM_SYMBOL_TYPE_TABLE: + return SymbolRef::ST_Other; + } + + llvm_unreachable("unknown WasmSymbol::SymbolType"); + return SymbolRef::ST_Other; +} + +Expected<section_iterator> +WasmObjectFile::getSymbolSection(DataRefImpl Symb) const { + const WasmSymbol &Sym = getWasmSymbol(Symb); + if (Sym.isUndefined()) + return section_end(); + + DataRefImpl Ref; + Ref.d.a = getSymbolSectionIdImpl(Sym); + return section_iterator(SectionRef(Ref, this)); +} + +uint32_t WasmObjectFile::getSymbolSectionId(SymbolRef Symb) const { + const WasmSymbol &Sym = getWasmSymbol(Symb); + return getSymbolSectionIdImpl(Sym); +} + +uint32_t WasmObjectFile::getSymbolSectionIdImpl(const WasmSymbol &Sym) const { + switch (Sym.Info.Kind) { + case wasm::WASM_SYMBOL_TYPE_FUNCTION: + return CodeSection; + case wasm::WASM_SYMBOL_TYPE_GLOBAL: + return GlobalSection; + case wasm::WASM_SYMBOL_TYPE_DATA: + return DataSection; + case wasm::WASM_SYMBOL_TYPE_SECTION: + return Sym.Info.ElementIndex; + case wasm::WASM_SYMBOL_TYPE_TAG: + return TagSection; + case wasm::WASM_SYMBOL_TYPE_TABLE: + return TableSection; + default: + llvm_unreachable("unknown WasmSymbol::SymbolType"); + } +} + +void WasmObjectFile::moveSectionNext(DataRefImpl &Sec) const { Sec.d.a++; } + +Expected<StringRef> WasmObjectFile::getSectionName(DataRefImpl Sec) const { + const WasmSection &S = Sections[Sec.d.a]; +#define ECase(X) \ + case wasm::WASM_SEC_##X: \ + return #X; + switch (S.Type) { + ECase(TYPE); + ECase(IMPORT); + ECase(FUNCTION); + ECase(TABLE); + ECase(MEMORY); + ECase(GLOBAL); + ECase(TAG); + ECase(EXPORT); + ECase(START); + ECase(ELEM); + ECase(CODE); + ECase(DATA); + ECase(DATACOUNT); + case wasm::WASM_SEC_CUSTOM: + return S.Name; + default: + return createStringError(object_error::invalid_section_index, ""); + } +#undef ECase +} + +uint64_t WasmObjectFile::getSectionAddress(DataRefImpl Sec) const { return 0; } + +uint64_t WasmObjectFile::getSectionIndex(DataRefImpl Sec) const { + return Sec.d.a; +} + +uint64_t WasmObjectFile::getSectionSize(DataRefImpl Sec) const { + const WasmSection &S = Sections[Sec.d.a]; + return S.Content.size(); +} + +Expected<ArrayRef<uint8_t>> +WasmObjectFile::getSectionContents(DataRefImpl Sec) const { + const WasmSection &S = Sections[Sec.d.a]; + // This will never fail since wasm sections can never be empty (user-sections + // must have a name and non-user sections each have a defined structure). + return S.Content; +} + +uint64_t WasmObjectFile::getSectionAlignment(DataRefImpl Sec) const { + return 1; +} + +bool WasmObjectFile::isSectionCompressed(DataRefImpl Sec) const { + return false; +} + +bool WasmObjectFile::isSectionText(DataRefImpl Sec) const { + return getWasmSection(Sec).Type == wasm::WASM_SEC_CODE; +} + +bool WasmObjectFile::isSectionData(DataRefImpl Sec) const { + return getWasmSection(Sec).Type == wasm::WASM_SEC_DATA; +} + +bool WasmObjectFile::isSectionBSS(DataRefImpl Sec) const { return false; } + +bool WasmObjectFile::isSectionVirtual(DataRefImpl Sec) const { return false; } + +relocation_iterator WasmObjectFile::section_rel_begin(DataRefImpl Ref) const { + DataRefImpl RelocRef; + RelocRef.d.a = Ref.d.a; + RelocRef.d.b = 0; + return relocation_iterator(RelocationRef(RelocRef, this)); +} + +relocation_iterator WasmObjectFile::section_rel_end(DataRefImpl Ref) const { + const WasmSection &Sec = getWasmSection(Ref); + DataRefImpl RelocRef; + RelocRef.d.a = Ref.d.a; + RelocRef.d.b = Sec.Relocations.size(); + return relocation_iterator(RelocationRef(RelocRef, this)); +} + +void WasmObjectFile::moveRelocationNext(DataRefImpl &Rel) const { Rel.d.b++; } + +uint64_t WasmObjectFile::getRelocationOffset(DataRefImpl Ref) const { + const wasm::WasmRelocation &Rel = getWasmRelocation(Ref); + return Rel.Offset; +} + +symbol_iterator WasmObjectFile::getRelocationSymbol(DataRefImpl Ref) const { + const wasm::WasmRelocation &Rel = getWasmRelocation(Ref); + if (Rel.Type == wasm::R_WASM_TYPE_INDEX_LEB) + return symbol_end(); + DataRefImpl Sym; + Sym.d.a = 1; + Sym.d.b = Rel.Index; + return symbol_iterator(SymbolRef(Sym, this)); +} + +uint64_t WasmObjectFile::getRelocationType(DataRefImpl Ref) const { + const wasm::WasmRelocation &Rel = getWasmRelocation(Ref); + return Rel.Type; +} + +void WasmObjectFile::getRelocationTypeName( + DataRefImpl Ref, SmallVectorImpl<char> &Result) const { + const wasm::WasmRelocation &Rel = getWasmRelocation(Ref); + StringRef Res = "Unknown"; + +#define WASM_RELOC(name, value) \ + case wasm::name: \ + Res = #name; \ + break; + + switch (Rel.Type) { +#include "llvm/BinaryFormat/WasmRelocs.def" + } + +#undef WASM_RELOC + + Result.append(Res.begin(), Res.end()); +} + +section_iterator WasmObjectFile::section_begin() const { + DataRefImpl Ref; + Ref.d.a = 0; + return section_iterator(SectionRef(Ref, this)); +} + +section_iterator WasmObjectFile::section_end() const { + DataRefImpl Ref; + Ref.d.a = Sections.size(); + return section_iterator(SectionRef(Ref, this)); +} + +uint8_t WasmObjectFile::getBytesInAddress() const { + return HasMemory64 ? 8 : 4; +} + +StringRef WasmObjectFile::getFileFormatName() const { return "WASM"; } + +Triple::ArchType WasmObjectFile::getArch() const { + return HasMemory64 ? Triple::wasm64 : Triple::wasm32; +} + +SubtargetFeatures WasmObjectFile::getFeatures() const { + return SubtargetFeatures(); +} + +bool WasmObjectFile::isRelocatableObject() const { return HasLinkingSection; } + +bool WasmObjectFile::isSharedObject() const { return HasDylinkSection; } + +const WasmSection &WasmObjectFile::getWasmSection(DataRefImpl Ref) const { + assert(Ref.d.a < Sections.size()); + return Sections[Ref.d.a]; +} + +const WasmSection & +WasmObjectFile::getWasmSection(const SectionRef &Section) const { + return getWasmSection(Section.getRawDataRefImpl()); +} + +const wasm::WasmRelocation & +WasmObjectFile::getWasmRelocation(const RelocationRef &Ref) const { + return getWasmRelocation(Ref.getRawDataRefImpl()); +} + +const wasm::WasmRelocation & +WasmObjectFile::getWasmRelocation(DataRefImpl Ref) const { + assert(Ref.d.a < Sections.size()); + const WasmSection &Sec = Sections[Ref.d.a]; + assert(Ref.d.b < Sec.Relocations.size()); + return Sec.Relocations[Ref.d.b]; +} + +int WasmSectionOrderChecker::getSectionOrder(unsigned ID, + StringRef CustomSectionName) { + switch (ID) { + case wasm::WASM_SEC_CUSTOM: + return StringSwitch<unsigned>(CustomSectionName) + .Case("dylink", WASM_SEC_ORDER_DYLINK) + .Case("dylink.0", WASM_SEC_ORDER_DYLINK) + .Case("linking", WASM_SEC_ORDER_LINKING) + .StartsWith("reloc.", WASM_SEC_ORDER_RELOC) + .Case("name", WASM_SEC_ORDER_NAME) + .Case("producers", WASM_SEC_ORDER_PRODUCERS) + .Case("target_features", WASM_SEC_ORDER_TARGET_FEATURES) + .Default(WASM_SEC_ORDER_NONE); + case wasm::WASM_SEC_TYPE: + return WASM_SEC_ORDER_TYPE; + case wasm::WASM_SEC_IMPORT: + return WASM_SEC_ORDER_IMPORT; + case wasm::WASM_SEC_FUNCTION: + return WASM_SEC_ORDER_FUNCTION; + case wasm::WASM_SEC_TABLE: + return WASM_SEC_ORDER_TABLE; + case wasm::WASM_SEC_MEMORY: + return WASM_SEC_ORDER_MEMORY; + case wasm::WASM_SEC_GLOBAL: + return WASM_SEC_ORDER_GLOBAL; + case wasm::WASM_SEC_EXPORT: + return WASM_SEC_ORDER_EXPORT; + case wasm::WASM_SEC_START: + return WASM_SEC_ORDER_START; + case wasm::WASM_SEC_ELEM: + return WASM_SEC_ORDER_ELEM; + case wasm::WASM_SEC_CODE: + return WASM_SEC_ORDER_CODE; + case wasm::WASM_SEC_DATA: + return WASM_SEC_ORDER_DATA; + case wasm::WASM_SEC_DATACOUNT: + return WASM_SEC_ORDER_DATACOUNT; + case wasm::WASM_SEC_TAG: + return WASM_SEC_ORDER_TAG; + default: + return WASM_SEC_ORDER_NONE; + } +} + +// Represents the edges in a directed graph where any node B reachable from node +// A is not allowed to appear before A in the section ordering, but may appear +// afterward. +int WasmSectionOrderChecker::DisallowedPredecessors + [WASM_NUM_SEC_ORDERS][WASM_NUM_SEC_ORDERS] = { + // WASM_SEC_ORDER_NONE + {}, + // WASM_SEC_ORDER_TYPE + {WASM_SEC_ORDER_TYPE, WASM_SEC_ORDER_IMPORT}, + // WASM_SEC_ORDER_IMPORT + {WASM_SEC_ORDER_IMPORT, WASM_SEC_ORDER_FUNCTION}, + // WASM_SEC_ORDER_FUNCTION + {WASM_SEC_ORDER_FUNCTION, WASM_SEC_ORDER_TABLE}, + // WASM_SEC_ORDER_TABLE + {WASM_SEC_ORDER_TABLE, WASM_SEC_ORDER_MEMORY}, + // WASM_SEC_ORDER_MEMORY + {WASM_SEC_ORDER_MEMORY, WASM_SEC_ORDER_TAG}, + // WASM_SEC_ORDER_TAG + {WASM_SEC_ORDER_TAG, WASM_SEC_ORDER_GLOBAL}, + // WASM_SEC_ORDER_GLOBAL + {WASM_SEC_ORDER_GLOBAL, WASM_SEC_ORDER_EXPORT}, + // WASM_SEC_ORDER_EXPORT + {WASM_SEC_ORDER_EXPORT, WASM_SEC_ORDER_START}, + // WASM_SEC_ORDER_START + {WASM_SEC_ORDER_START, WASM_SEC_ORDER_ELEM}, + // WASM_SEC_ORDER_ELEM + {WASM_SEC_ORDER_ELEM, WASM_SEC_ORDER_DATACOUNT}, + // WASM_SEC_ORDER_DATACOUNT + {WASM_SEC_ORDER_DATACOUNT, WASM_SEC_ORDER_CODE}, + // WASM_SEC_ORDER_CODE + {WASM_SEC_ORDER_CODE, WASM_SEC_ORDER_DATA}, + // WASM_SEC_ORDER_DATA + {WASM_SEC_ORDER_DATA, WASM_SEC_ORDER_LINKING}, + + // Custom Sections + // WASM_SEC_ORDER_DYLINK + {WASM_SEC_ORDER_DYLINK, WASM_SEC_ORDER_TYPE}, + // WASM_SEC_ORDER_LINKING + {WASM_SEC_ORDER_LINKING, WASM_SEC_ORDER_RELOC, WASM_SEC_ORDER_NAME}, + // WASM_SEC_ORDER_RELOC (can be repeated) + {}, + // WASM_SEC_ORDER_NAME + {WASM_SEC_ORDER_NAME, WASM_SEC_ORDER_PRODUCERS}, + // WASM_SEC_ORDER_PRODUCERS + {WASM_SEC_ORDER_PRODUCERS, WASM_SEC_ORDER_TARGET_FEATURES}, + // WASM_SEC_ORDER_TARGET_FEATURES + {WASM_SEC_ORDER_TARGET_FEATURES}}; + +bool WasmSectionOrderChecker::isValidSectionOrder(unsigned ID, + StringRef CustomSectionName) { + int Order = getSectionOrder(ID, CustomSectionName); + if (Order == WASM_SEC_ORDER_NONE) + return true; + + // Disallowed predecessors we need to check for + SmallVector<int, WASM_NUM_SEC_ORDERS> WorkList; + + // Keep track of completed checks to avoid repeating work + bool Checked[WASM_NUM_SEC_ORDERS] = {}; + + int Curr = Order; + while (true) { + // Add new disallowed predecessors to work list + for (size_t I = 0;; ++I) { + int Next = DisallowedPredecessors[Curr][I]; + if (Next == WASM_SEC_ORDER_NONE) + break; + if (Checked[Next]) + continue; + WorkList.push_back(Next); + Checked[Next] = true; + } + + if (WorkList.empty()) + break; + + // Consider next disallowed predecessor + Curr = WorkList.pop_back_val(); + if (Seen[Curr]) + return false; + } + + // Have not seen any disallowed predecessors + Seen[Order] = true; + return true; +} diff --git a/contrib/libs/llvm14/lib/Object/WindowsMachineFlag.cpp b/contrib/libs/llvm14/lib/Object/WindowsMachineFlag.cpp new file mode 100644 index 0000000000..f7f2b20ae1 --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/WindowsMachineFlag.cpp @@ -0,0 +1,44 @@ +//===- WindowsMachineFlag.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Functions for implementing the /machine: flag. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/WindowsMachineFlag.h" + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/BinaryFormat/COFF.h" + +using namespace llvm; + +// Returns /machine's value. +COFF::MachineTypes llvm::getMachineType(StringRef S) { + return StringSwitch<COFF::MachineTypes>(S.lower()) + .Cases("x64", "amd64", COFF::IMAGE_FILE_MACHINE_AMD64) + .Cases("x86", "i386", COFF::IMAGE_FILE_MACHINE_I386) + .Case("arm", COFF::IMAGE_FILE_MACHINE_ARMNT) + .Case("arm64", COFF::IMAGE_FILE_MACHINE_ARM64) + .Default(COFF::IMAGE_FILE_MACHINE_UNKNOWN); +} + +StringRef llvm::machineToStr(COFF::MachineTypes MT) { + switch (MT) { + case COFF::IMAGE_FILE_MACHINE_ARMNT: + return "arm"; + case COFF::IMAGE_FILE_MACHINE_ARM64: + return "arm64"; + case COFF::IMAGE_FILE_MACHINE_AMD64: + return "x64"; + case COFF::IMAGE_FILE_MACHINE_I386: + return "x86"; + default: + llvm_unreachable("unknown machine type"); + } +} diff --git a/contrib/libs/llvm14/lib/Object/WindowsResource.cpp b/contrib/libs/llvm14/lib/Object/WindowsResource.cpp new file mode 100644 index 0000000000..2a69c6c46b --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/WindowsResource.cpp @@ -0,0 +1,1015 @@ +//===-- WindowsResource.cpp -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the .res file class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/WindowsResource.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/ScopedPrinter.h" +#include <ctime> +#include <queue> +#include <system_error> + +using namespace llvm; +using namespace object; + +namespace llvm { +namespace object { + +#define RETURN_IF_ERROR(X) \ + if (auto EC = X) \ + return EC; + +#define UNWRAP_REF_OR_RETURN(Name, Expr) \ + auto Name##OrErr = Expr; \ + if (!Name##OrErr) \ + return Name##OrErr.takeError(); \ + const auto &Name = *Name##OrErr; + +#define UNWRAP_OR_RETURN(Name, Expr) \ + auto Name##OrErr = Expr; \ + if (!Name##OrErr) \ + return Name##OrErr.takeError(); \ + auto Name = *Name##OrErr; + +const uint32_t MIN_HEADER_SIZE = 7 * sizeof(uint32_t) + 2 * sizeof(uint16_t); + +// COFF files seem to be inconsistent with alignment between sections, just use +// 8-byte because it makes everyone happy. +const uint32_t SECTION_ALIGNMENT = sizeof(uint64_t); + +WindowsResource::WindowsResource(MemoryBufferRef Source) + : Binary(Binary::ID_WinRes, Source) { + size_t LeadingSize = WIN_RES_MAGIC_SIZE + WIN_RES_NULL_ENTRY_SIZE; + BBS = BinaryByteStream(Data.getBuffer().drop_front(LeadingSize), + support::little); +} + +// static +Expected<std::unique_ptr<WindowsResource>> +WindowsResource::createWindowsResource(MemoryBufferRef Source) { + if (Source.getBufferSize() < WIN_RES_MAGIC_SIZE + WIN_RES_NULL_ENTRY_SIZE) + return make_error<GenericBinaryError>( + Source.getBufferIdentifier() + ": too small to be a resource file", + object_error::invalid_file_type); + std::unique_ptr<WindowsResource> Ret(new WindowsResource(Source)); + return std::move(Ret); +} + +Expected<ResourceEntryRef> WindowsResource::getHeadEntry() { + if (BBS.getLength() < sizeof(WinResHeaderPrefix) + sizeof(WinResHeaderSuffix)) + return make_error<EmptyResError>(getFileName() + " contains no entries", + object_error::unexpected_eof); + return ResourceEntryRef::create(BinaryStreamRef(BBS), this); +} + +ResourceEntryRef::ResourceEntryRef(BinaryStreamRef Ref, + const WindowsResource *Owner) + : Reader(Ref), Owner(Owner) {} + +Expected<ResourceEntryRef> +ResourceEntryRef::create(BinaryStreamRef BSR, const WindowsResource *Owner) { + auto Ref = ResourceEntryRef(BSR, Owner); + if (auto E = Ref.loadNext()) + return std::move(E); + return Ref; +} + +Error ResourceEntryRef::moveNext(bool &End) { + // Reached end of all the entries. + if (Reader.bytesRemaining() == 0) { + End = true; + return Error::success(); + } + RETURN_IF_ERROR(loadNext()); + + return Error::success(); +} + +static Error readStringOrId(BinaryStreamReader &Reader, uint16_t &ID, + ArrayRef<UTF16> &Str, bool &IsString) { + uint16_t IDFlag; + RETURN_IF_ERROR(Reader.readInteger(IDFlag)); + IsString = IDFlag != 0xffff; + + if (IsString) { + Reader.setOffset( + Reader.getOffset() - + sizeof(uint16_t)); // Re-read the bytes which we used to check the flag. + RETURN_IF_ERROR(Reader.readWideString(Str)); + } else + RETURN_IF_ERROR(Reader.readInteger(ID)); + + return Error::success(); +} + +Error ResourceEntryRef::loadNext() { + const WinResHeaderPrefix *Prefix; + RETURN_IF_ERROR(Reader.readObject(Prefix)); + + if (Prefix->HeaderSize < MIN_HEADER_SIZE) + return make_error<GenericBinaryError>(Owner->getFileName() + + ": header size too small", + object_error::parse_failed); + + RETURN_IF_ERROR(readStringOrId(Reader, TypeID, Type, IsStringType)); + + RETURN_IF_ERROR(readStringOrId(Reader, NameID, Name, IsStringName)); + + RETURN_IF_ERROR(Reader.padToAlignment(WIN_RES_HEADER_ALIGNMENT)); + + RETURN_IF_ERROR(Reader.readObject(Suffix)); + + RETURN_IF_ERROR(Reader.readArray(Data, Prefix->DataSize)); + + RETURN_IF_ERROR(Reader.padToAlignment(WIN_RES_DATA_ALIGNMENT)); + + return Error::success(); +} + +WindowsResourceParser::WindowsResourceParser(bool MinGW) + : Root(false), MinGW(MinGW) {} + +void printResourceTypeName(uint16_t TypeID, raw_ostream &OS) { + switch (TypeID) { + case 1: OS << "CURSOR (ID 1)"; break; + case 2: OS << "BITMAP (ID 2)"; break; + case 3: OS << "ICON (ID 3)"; break; + case 4: OS << "MENU (ID 4)"; break; + case 5: OS << "DIALOG (ID 5)"; break; + case 6: OS << "STRINGTABLE (ID 6)"; break; + case 7: OS << "FONTDIR (ID 7)"; break; + case 8: OS << "FONT (ID 8)"; break; + case 9: OS << "ACCELERATOR (ID 9)"; break; + case 10: OS << "RCDATA (ID 10)"; break; + case 11: OS << "MESSAGETABLE (ID 11)"; break; + case 12: OS << "GROUP_CURSOR (ID 12)"; break; + case 14: OS << "GROUP_ICON (ID 14)"; break; + case 16: OS << "VERSIONINFO (ID 16)"; break; + case 17: OS << "DLGINCLUDE (ID 17)"; break; + case 19: OS << "PLUGPLAY (ID 19)"; break; + case 20: OS << "VXD (ID 20)"; break; + case 21: OS << "ANICURSOR (ID 21)"; break; + case 22: OS << "ANIICON (ID 22)"; break; + case 23: OS << "HTML (ID 23)"; break; + case 24: OS << "MANIFEST (ID 24)"; break; + default: OS << "ID " << TypeID; break; + } +} + +static bool convertUTF16LEToUTF8String(ArrayRef<UTF16> Src, std::string &Out) { + if (!sys::IsBigEndianHost) + return convertUTF16ToUTF8String(Src, Out); + + std::vector<UTF16> EndianCorrectedSrc; + EndianCorrectedSrc.resize(Src.size() + 1); + llvm::copy(Src, EndianCorrectedSrc.begin() + 1); + EndianCorrectedSrc[0] = UNI_UTF16_BYTE_ORDER_MARK_SWAPPED; + return convertUTF16ToUTF8String(makeArrayRef(EndianCorrectedSrc), Out); +} + +static std::string makeDuplicateResourceError( + const ResourceEntryRef &Entry, StringRef File1, StringRef File2) { + std::string Ret; + raw_string_ostream OS(Ret); + + OS << "duplicate resource:"; + + OS << " type "; + if (Entry.checkTypeString()) { + std::string UTF8; + if (!convertUTF16LEToUTF8String(Entry.getTypeString(), UTF8)) + UTF8 = "(failed conversion from UTF16)"; + OS << '\"' << UTF8 << '\"'; + } else + printResourceTypeName(Entry.getTypeID(), OS); + + OS << "/name "; + if (Entry.checkNameString()) { + std::string UTF8; + if (!convertUTF16LEToUTF8String(Entry.getNameString(), UTF8)) + UTF8 = "(failed conversion from UTF16)"; + OS << '\"' << UTF8 << '\"'; + } else { + OS << "ID " << Entry.getNameID(); + } + + OS << "/language " << Entry.getLanguage() << ", in " << File1 << " and in " + << File2; + + return OS.str(); +} + +static void printStringOrID(const WindowsResourceParser::StringOrID &S, + raw_string_ostream &OS, bool IsType, bool IsID) { + if (S.IsString) { + std::string UTF8; + if (!convertUTF16LEToUTF8String(S.String, UTF8)) + UTF8 = "(failed conversion from UTF16)"; + OS << '\"' << UTF8 << '\"'; + } else if (IsType) + printResourceTypeName(S.ID, OS); + else if (IsID) + OS << "ID " << S.ID; + else + OS << S.ID; +} + +static std::string makeDuplicateResourceError( + const std::vector<WindowsResourceParser::StringOrID> &Context, + StringRef File1, StringRef File2) { + std::string Ret; + raw_string_ostream OS(Ret); + + OS << "duplicate resource:"; + + if (Context.size() >= 1) { + OS << " type "; + printStringOrID(Context[0], OS, /* IsType */ true, /* IsID */ true); + } + + if (Context.size() >= 2) { + OS << "/name "; + printStringOrID(Context[1], OS, /* IsType */ false, /* IsID */ true); + } + + if (Context.size() >= 3) { + OS << "/language "; + printStringOrID(Context[2], OS, /* IsType */ false, /* IsID */ false); + } + OS << ", in " << File1 << " and in " << File2; + + return OS.str(); +} + +// MinGW specific. Remove default manifests (with language zero) if there are +// other manifests present, and report an error if there are more than one +// manifest with a non-zero language code. +// GCC has the concept of a default manifest resource object, which gets +// linked in implicitly if present. This default manifest has got language +// id zero, and should be dropped silently if there's another manifest present. +// If the user resources surprisignly had a manifest with language id zero, +// we should also ignore the duplicate default manifest. +void WindowsResourceParser::cleanUpManifests( + std::vector<std::string> &Duplicates) { + auto TypeIt = Root.IDChildren.find(/* RT_MANIFEST */ 24); + if (TypeIt == Root.IDChildren.end()) + return; + + TreeNode *TypeNode = TypeIt->second.get(); + auto NameIt = + TypeNode->IDChildren.find(/* CREATEPROCESS_MANIFEST_RESOURCE_ID */ 1); + if (NameIt == TypeNode->IDChildren.end()) + return; + + TreeNode *NameNode = NameIt->second.get(); + if (NameNode->IDChildren.size() <= 1) + return; // None or one manifest present, all good. + + // If we have more than one manifest, drop the language zero one if present, + // and check again. + auto LangZeroIt = NameNode->IDChildren.find(0); + if (LangZeroIt != NameNode->IDChildren.end() && + LangZeroIt->second->IsDataNode) { + uint32_t RemovedIndex = LangZeroIt->second->DataIndex; + NameNode->IDChildren.erase(LangZeroIt); + Data.erase(Data.begin() + RemovedIndex); + Root.shiftDataIndexDown(RemovedIndex); + + // If we're now down to one manifest, all is good. + if (NameNode->IDChildren.size() <= 1) + return; + } + + // More than one non-language-zero manifest + auto FirstIt = NameNode->IDChildren.begin(); + uint32_t FirstLang = FirstIt->first; + TreeNode *FirstNode = FirstIt->second.get(); + auto LastIt = NameNode->IDChildren.rbegin(); + uint32_t LastLang = LastIt->first; + TreeNode *LastNode = LastIt->second.get(); + Duplicates.push_back( + ("duplicate non-default manifests with languages " + Twine(FirstLang) + + " in " + InputFilenames[FirstNode->Origin] + " and " + Twine(LastLang) + + " in " + InputFilenames[LastNode->Origin]) + .str()); +} + +// Ignore duplicates of manifests with language zero (the default manifest), +// in case the user has provided a manifest with that language id. See +// the function comment above for context. Only returns true if MinGW is set +// to true. +bool WindowsResourceParser::shouldIgnoreDuplicate( + const ResourceEntryRef &Entry) const { + return MinGW && !Entry.checkTypeString() && + Entry.getTypeID() == /* RT_MANIFEST */ 24 && + !Entry.checkNameString() && + Entry.getNameID() == /* CREATEPROCESS_MANIFEST_RESOURCE_ID */ 1 && + Entry.getLanguage() == 0; +} + +bool WindowsResourceParser::shouldIgnoreDuplicate( + const std::vector<StringOrID> &Context) const { + return MinGW && Context.size() == 3 && !Context[0].IsString && + Context[0].ID == /* RT_MANIFEST */ 24 && !Context[1].IsString && + Context[1].ID == /* CREATEPROCESS_MANIFEST_RESOURCE_ID */ 1 && + !Context[2].IsString && Context[2].ID == 0; +} + +Error WindowsResourceParser::parse(WindowsResource *WR, + std::vector<std::string> &Duplicates) { + auto EntryOrErr = WR->getHeadEntry(); + if (!EntryOrErr) { + auto E = EntryOrErr.takeError(); + if (E.isA<EmptyResError>()) { + // Check if the .res file contains no entries. In this case we don't have + // to throw an error but can rather just return without parsing anything. + // This applies for files which have a valid PE header magic and the + // mandatory empty null resource entry. Files which do not fit this + // criteria would have already been filtered out by + // WindowsResource::createWindowsResource(). + consumeError(std::move(E)); + return Error::success(); + } + return E; + } + + ResourceEntryRef Entry = EntryOrErr.get(); + uint32_t Origin = InputFilenames.size(); + InputFilenames.push_back(std::string(WR->getFileName())); + bool End = false; + while (!End) { + + TreeNode *Node; + bool IsNewNode = Root.addEntry(Entry, Origin, Data, StringTable, Node); + if (!IsNewNode) { + if (!shouldIgnoreDuplicate(Entry)) + Duplicates.push_back(makeDuplicateResourceError( + Entry, InputFilenames[Node->Origin], WR->getFileName())); + } + + RETURN_IF_ERROR(Entry.moveNext(End)); + } + + return Error::success(); +} + +Error WindowsResourceParser::parse(ResourceSectionRef &RSR, StringRef Filename, + std::vector<std::string> &Duplicates) { + UNWRAP_REF_OR_RETURN(BaseTable, RSR.getBaseTable()); + uint32_t Origin = InputFilenames.size(); + InputFilenames.push_back(std::string(Filename)); + std::vector<StringOrID> Context; + return addChildren(Root, RSR, BaseTable, Origin, Context, Duplicates); +} + +void WindowsResourceParser::printTree(raw_ostream &OS) const { + ScopedPrinter Writer(OS); + Root.print(Writer, "Resource Tree"); +} + +bool WindowsResourceParser::TreeNode::addEntry( + const ResourceEntryRef &Entry, uint32_t Origin, + std::vector<std::vector<uint8_t>> &Data, + std::vector<std::vector<UTF16>> &StringTable, TreeNode *&Result) { + TreeNode &TypeNode = addTypeNode(Entry, StringTable); + TreeNode &NameNode = TypeNode.addNameNode(Entry, StringTable); + return NameNode.addLanguageNode(Entry, Origin, Data, Result); +} + +Error WindowsResourceParser::addChildren(TreeNode &Node, + ResourceSectionRef &RSR, + const coff_resource_dir_table &Table, + uint32_t Origin, + std::vector<StringOrID> &Context, + std::vector<std::string> &Duplicates) { + + for (int i = 0; i < Table.NumberOfNameEntries + Table.NumberOfIDEntries; + i++) { + UNWRAP_REF_OR_RETURN(Entry, RSR.getTableEntry(Table, i)); + TreeNode *Child; + + if (Entry.Offset.isSubDir()) { + + // Create a new subdirectory and recurse + if (i < Table.NumberOfNameEntries) { + UNWRAP_OR_RETURN(NameString, RSR.getEntryNameString(Entry)); + Child = &Node.addNameChild(NameString, StringTable); + Context.push_back(StringOrID(NameString)); + } else { + Child = &Node.addIDChild(Entry.Identifier.ID); + Context.push_back(StringOrID(Entry.Identifier.ID)); + } + + UNWRAP_REF_OR_RETURN(NextTable, RSR.getEntrySubDir(Entry)); + Error E = + addChildren(*Child, RSR, NextTable, Origin, Context, Duplicates); + if (E) + return E; + Context.pop_back(); + + } else { + + // Data leaves are supposed to have a numeric ID as identifier (language). + if (Table.NumberOfNameEntries > 0) + return createStringError(object_error::parse_failed, + "unexpected string key for data object"); + + // Try adding a data leaf + UNWRAP_REF_OR_RETURN(DataEntry, RSR.getEntryData(Entry)); + TreeNode *Child; + Context.push_back(StringOrID(Entry.Identifier.ID)); + bool Added = Node.addDataChild(Entry.Identifier.ID, Table.MajorVersion, + Table.MinorVersion, Table.Characteristics, + Origin, Data.size(), Child); + if (Added) { + UNWRAP_OR_RETURN(Contents, RSR.getContents(DataEntry)); + Data.push_back(ArrayRef<uint8_t>( + reinterpret_cast<const uint8_t *>(Contents.data()), + Contents.size())); + } else { + if (!shouldIgnoreDuplicate(Context)) + Duplicates.push_back(makeDuplicateResourceError( + Context, InputFilenames[Child->Origin], InputFilenames.back())); + } + Context.pop_back(); + + } + } + return Error::success(); +} + +WindowsResourceParser::TreeNode::TreeNode(uint32_t StringIndex) + : StringIndex(StringIndex) {} + +WindowsResourceParser::TreeNode::TreeNode(uint16_t MajorVersion, + uint16_t MinorVersion, + uint32_t Characteristics, + uint32_t Origin, uint32_t DataIndex) + : IsDataNode(true), DataIndex(DataIndex), MajorVersion(MajorVersion), + MinorVersion(MinorVersion), Characteristics(Characteristics), + Origin(Origin) {} + +std::unique_ptr<WindowsResourceParser::TreeNode> +WindowsResourceParser::TreeNode::createStringNode(uint32_t Index) { + return std::unique_ptr<TreeNode>(new TreeNode(Index)); +} + +std::unique_ptr<WindowsResourceParser::TreeNode> +WindowsResourceParser::TreeNode::createIDNode() { + return std::unique_ptr<TreeNode>(new TreeNode(0)); +} + +std::unique_ptr<WindowsResourceParser::TreeNode> +WindowsResourceParser::TreeNode::createDataNode(uint16_t MajorVersion, + uint16_t MinorVersion, + uint32_t Characteristics, + uint32_t Origin, + uint32_t DataIndex) { + return std::unique_ptr<TreeNode>(new TreeNode( + MajorVersion, MinorVersion, Characteristics, Origin, DataIndex)); +} + +WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addTypeNode( + const ResourceEntryRef &Entry, + std::vector<std::vector<UTF16>> &StringTable) { + if (Entry.checkTypeString()) + return addNameChild(Entry.getTypeString(), StringTable); + else + return addIDChild(Entry.getTypeID()); +} + +WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addNameNode( + const ResourceEntryRef &Entry, + std::vector<std::vector<UTF16>> &StringTable) { + if (Entry.checkNameString()) + return addNameChild(Entry.getNameString(), StringTable); + else + return addIDChild(Entry.getNameID()); +} + +bool WindowsResourceParser::TreeNode::addLanguageNode( + const ResourceEntryRef &Entry, uint32_t Origin, + std::vector<std::vector<uint8_t>> &Data, TreeNode *&Result) { + bool Added = addDataChild(Entry.getLanguage(), Entry.getMajorVersion(), + Entry.getMinorVersion(), Entry.getCharacteristics(), + Origin, Data.size(), Result); + if (Added) + Data.push_back(Entry.getData()); + return Added; +} + +bool WindowsResourceParser::TreeNode::addDataChild( + uint32_t ID, uint16_t MajorVersion, uint16_t MinorVersion, + uint32_t Characteristics, uint32_t Origin, uint32_t DataIndex, + TreeNode *&Result) { + auto NewChild = createDataNode(MajorVersion, MinorVersion, Characteristics, + Origin, DataIndex); + auto ElementInserted = IDChildren.emplace(ID, std::move(NewChild)); + Result = ElementInserted.first->second.get(); + return ElementInserted.second; +} + +WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addIDChild( + uint32_t ID) { + auto Child = IDChildren.find(ID); + if (Child == IDChildren.end()) { + auto NewChild = createIDNode(); + WindowsResourceParser::TreeNode &Node = *NewChild; + IDChildren.emplace(ID, std::move(NewChild)); + return Node; + } else + return *(Child->second); +} + +WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addNameChild( + ArrayRef<UTF16> NameRef, std::vector<std::vector<UTF16>> &StringTable) { + std::string NameString; + convertUTF16LEToUTF8String(NameRef, NameString); + + auto Child = StringChildren.find(NameString); + if (Child == StringChildren.end()) { + auto NewChild = createStringNode(StringTable.size()); + StringTable.push_back(NameRef); + WindowsResourceParser::TreeNode &Node = *NewChild; + StringChildren.emplace(NameString, std::move(NewChild)); + return Node; + } else + return *(Child->second); +} + +void WindowsResourceParser::TreeNode::print(ScopedPrinter &Writer, + StringRef Name) const { + ListScope NodeScope(Writer, Name); + for (auto const &Child : StringChildren) { + Child.second->print(Writer, Child.first); + } + for (auto const &Child : IDChildren) { + Child.second->print(Writer, to_string(Child.first)); + } +} + +// This function returns the size of the entire resource tree, including +// directory tables, directory entries, and data entries. It does not include +// the directory strings or the relocations of the .rsrc section. +uint32_t WindowsResourceParser::TreeNode::getTreeSize() const { + uint32_t Size = (IDChildren.size() + StringChildren.size()) * + sizeof(coff_resource_dir_entry); + + // Reached a node pointing to a data entry. + if (IsDataNode) { + Size += sizeof(coff_resource_data_entry); + return Size; + } + + // If the node does not point to data, it must have a directory table pointing + // to other nodes. + Size += sizeof(coff_resource_dir_table); + + for (auto const &Child : StringChildren) { + Size += Child.second->getTreeSize(); + } + for (auto const &Child : IDChildren) { + Size += Child.second->getTreeSize(); + } + return Size; +} + +// Shift DataIndex of all data children with an Index greater or equal to the +// given one, to fill a gap from removing an entry from the Data vector. +void WindowsResourceParser::TreeNode::shiftDataIndexDown(uint32_t Index) { + if (IsDataNode && DataIndex >= Index) { + DataIndex--; + } else { + for (auto &Child : IDChildren) + Child.second->shiftDataIndexDown(Index); + for (auto &Child : StringChildren) + Child.second->shiftDataIndexDown(Index); + } +} + +class WindowsResourceCOFFWriter { +public: + WindowsResourceCOFFWriter(COFF::MachineTypes MachineType, + const WindowsResourceParser &Parser, Error &E); + std::unique_ptr<MemoryBuffer> write(uint32_t TimeDateStamp); + +private: + void performFileLayout(); + void performSectionOneLayout(); + void performSectionTwoLayout(); + void writeCOFFHeader(uint32_t TimeDateStamp); + void writeFirstSectionHeader(); + void writeSecondSectionHeader(); + void writeFirstSection(); + void writeSecondSection(); + void writeSymbolTable(); + void writeStringTable(); + void writeDirectoryTree(); + void writeDirectoryStringTable(); + void writeFirstSectionRelocations(); + std::unique_ptr<WritableMemoryBuffer> OutputBuffer; + char *BufferStart; + uint64_t CurrentOffset = 0; + COFF::MachineTypes MachineType; + const WindowsResourceParser::TreeNode &Resources; + const ArrayRef<std::vector<uint8_t>> Data; + uint64_t FileSize; + uint32_t SymbolTableOffset; + uint32_t SectionOneSize; + uint32_t SectionOneOffset; + uint32_t SectionOneRelocations; + uint32_t SectionTwoSize; + uint32_t SectionTwoOffset; + const ArrayRef<std::vector<UTF16>> StringTable; + std::vector<uint32_t> StringTableOffsets; + std::vector<uint32_t> DataOffsets; + std::vector<uint32_t> RelocationAddresses; +}; + +WindowsResourceCOFFWriter::WindowsResourceCOFFWriter( + COFF::MachineTypes MachineType, const WindowsResourceParser &Parser, + Error &E) + : MachineType(MachineType), Resources(Parser.getTree()), + Data(Parser.getData()), StringTable(Parser.getStringTable()) { + performFileLayout(); + + OutputBuffer = WritableMemoryBuffer::getNewMemBuffer( + FileSize, "internal .obj file created from .res files"); +} + +void WindowsResourceCOFFWriter::performFileLayout() { + // Add size of COFF header. + FileSize = COFF::Header16Size; + + // one .rsrc section header for directory tree, another for resource data. + FileSize += 2 * COFF::SectionSize; + + performSectionOneLayout(); + performSectionTwoLayout(); + + // We have reached the address of the symbol table. + SymbolTableOffset = FileSize; + + FileSize += COFF::Symbol16Size; // size of the @feat.00 symbol. + FileSize += 4 * COFF::Symbol16Size; // symbol + aux for each section. + FileSize += Data.size() * COFF::Symbol16Size; // 1 symbol per resource. + FileSize += 4; // four null bytes for the string table. +} + +void WindowsResourceCOFFWriter::performSectionOneLayout() { + SectionOneOffset = FileSize; + + SectionOneSize = Resources.getTreeSize(); + uint32_t CurrentStringOffset = SectionOneSize; + uint32_t TotalStringTableSize = 0; + for (auto const &String : StringTable) { + StringTableOffsets.push_back(CurrentStringOffset); + uint32_t StringSize = String.size() * sizeof(UTF16) + sizeof(uint16_t); + CurrentStringOffset += StringSize; + TotalStringTableSize += StringSize; + } + SectionOneSize += alignTo(TotalStringTableSize, sizeof(uint32_t)); + + // account for the relocations of section one. + SectionOneRelocations = FileSize + SectionOneSize; + FileSize += SectionOneSize; + FileSize += + Data.size() * COFF::RelocationSize; // one relocation for each resource. + FileSize = alignTo(FileSize, SECTION_ALIGNMENT); +} + +void WindowsResourceCOFFWriter::performSectionTwoLayout() { + // add size of .rsrc$2 section, which contains all resource data on 8-byte + // alignment. + SectionTwoOffset = FileSize; + SectionTwoSize = 0; + for (auto const &Entry : Data) { + DataOffsets.push_back(SectionTwoSize); + SectionTwoSize += alignTo(Entry.size(), sizeof(uint64_t)); + } + FileSize += SectionTwoSize; + FileSize = alignTo(FileSize, SECTION_ALIGNMENT); +} + +std::unique_ptr<MemoryBuffer> +WindowsResourceCOFFWriter::write(uint32_t TimeDateStamp) { + BufferStart = OutputBuffer->getBufferStart(); + + writeCOFFHeader(TimeDateStamp); + writeFirstSectionHeader(); + writeSecondSectionHeader(); + writeFirstSection(); + writeSecondSection(); + writeSymbolTable(); + writeStringTable(); + + return std::move(OutputBuffer); +} + +// According to COFF specification, if the Src has a size equal to Dest, +// it's okay to *not* copy the trailing zero. +static void coffnamecpy(char (&Dest)[COFF::NameSize], StringRef Src) { + assert(Src.size() <= COFF::NameSize && + "Src is larger than COFF::NameSize"); + assert((Src.size() == COFF::NameSize || Dest[Src.size()] == '\0') && + "Dest not zeroed upon initialization"); + memcpy(Dest, Src.data(), Src.size()); +} + +void WindowsResourceCOFFWriter::writeCOFFHeader(uint32_t TimeDateStamp) { + // Write the COFF header. + auto *Header = reinterpret_cast<coff_file_header *>(BufferStart); + Header->Machine = MachineType; + Header->NumberOfSections = 2; + Header->TimeDateStamp = TimeDateStamp; + Header->PointerToSymbolTable = SymbolTableOffset; + // One symbol for every resource plus 2 for each section and 1 for @feat.00 + Header->NumberOfSymbols = Data.size() + 5; + Header->SizeOfOptionalHeader = 0; + // cvtres.exe sets 32BIT_MACHINE even for 64-bit machine types. Match it. + Header->Characteristics = COFF::IMAGE_FILE_32BIT_MACHINE; +} + +void WindowsResourceCOFFWriter::writeFirstSectionHeader() { + // Write the first section header. + CurrentOffset += sizeof(coff_file_header); + auto *SectionOneHeader = + reinterpret_cast<coff_section *>(BufferStart + CurrentOffset); + coffnamecpy(SectionOneHeader->Name, ".rsrc$01"); + SectionOneHeader->VirtualSize = 0; + SectionOneHeader->VirtualAddress = 0; + SectionOneHeader->SizeOfRawData = SectionOneSize; + SectionOneHeader->PointerToRawData = SectionOneOffset; + SectionOneHeader->PointerToRelocations = SectionOneRelocations; + SectionOneHeader->PointerToLinenumbers = 0; + SectionOneHeader->NumberOfRelocations = Data.size(); + SectionOneHeader->NumberOfLinenumbers = 0; + SectionOneHeader->Characteristics += COFF::IMAGE_SCN_CNT_INITIALIZED_DATA; + SectionOneHeader->Characteristics += COFF::IMAGE_SCN_MEM_READ; +} + +void WindowsResourceCOFFWriter::writeSecondSectionHeader() { + // Write the second section header. + CurrentOffset += sizeof(coff_section); + auto *SectionTwoHeader = + reinterpret_cast<coff_section *>(BufferStart + CurrentOffset); + coffnamecpy(SectionTwoHeader->Name, ".rsrc$02"); + SectionTwoHeader->VirtualSize = 0; + SectionTwoHeader->VirtualAddress = 0; + SectionTwoHeader->SizeOfRawData = SectionTwoSize; + SectionTwoHeader->PointerToRawData = SectionTwoOffset; + SectionTwoHeader->PointerToRelocations = 0; + SectionTwoHeader->PointerToLinenumbers = 0; + SectionTwoHeader->NumberOfRelocations = 0; + SectionTwoHeader->NumberOfLinenumbers = 0; + SectionTwoHeader->Characteristics = COFF::IMAGE_SCN_CNT_INITIALIZED_DATA; + SectionTwoHeader->Characteristics += COFF::IMAGE_SCN_MEM_READ; +} + +void WindowsResourceCOFFWriter::writeFirstSection() { + // Write section one. + CurrentOffset += sizeof(coff_section); + + writeDirectoryTree(); + writeDirectoryStringTable(); + writeFirstSectionRelocations(); + + CurrentOffset = alignTo(CurrentOffset, SECTION_ALIGNMENT); +} + +void WindowsResourceCOFFWriter::writeSecondSection() { + // Now write the .rsrc$02 section. + for (auto const &RawDataEntry : Data) { + llvm::copy(RawDataEntry, BufferStart + CurrentOffset); + CurrentOffset += alignTo(RawDataEntry.size(), sizeof(uint64_t)); + } + + CurrentOffset = alignTo(CurrentOffset, SECTION_ALIGNMENT); +} + +void WindowsResourceCOFFWriter::writeSymbolTable() { + // Now write the symbol table. + // First, the feat symbol. + auto *Symbol = reinterpret_cast<coff_symbol16 *>(BufferStart + CurrentOffset); + coffnamecpy(Symbol->Name.ShortName, "@feat.00"); + Symbol->Value = 0x11; + Symbol->SectionNumber = 0xffff; + Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; + Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC; + Symbol->NumberOfAuxSymbols = 0; + CurrentOffset += sizeof(coff_symbol16); + + // Now write the .rsrc1 symbol + aux. + Symbol = reinterpret_cast<coff_symbol16 *>(BufferStart + CurrentOffset); + coffnamecpy(Symbol->Name.ShortName, ".rsrc$01"); + Symbol->Value = 0; + Symbol->SectionNumber = 1; + Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; + Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC; + Symbol->NumberOfAuxSymbols = 1; + CurrentOffset += sizeof(coff_symbol16); + auto *Aux = reinterpret_cast<coff_aux_section_definition *>(BufferStart + + CurrentOffset); + Aux->Length = SectionOneSize; + Aux->NumberOfRelocations = Data.size(); + Aux->NumberOfLinenumbers = 0; + Aux->CheckSum = 0; + Aux->NumberLowPart = 0; + Aux->Selection = 0; + CurrentOffset += sizeof(coff_aux_section_definition); + + // Now write the .rsrc2 symbol + aux. + Symbol = reinterpret_cast<coff_symbol16 *>(BufferStart + CurrentOffset); + coffnamecpy(Symbol->Name.ShortName, ".rsrc$02"); + Symbol->Value = 0; + Symbol->SectionNumber = 2; + Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; + Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC; + Symbol->NumberOfAuxSymbols = 1; + CurrentOffset += sizeof(coff_symbol16); + Aux = reinterpret_cast<coff_aux_section_definition *>(BufferStart + + CurrentOffset); + Aux->Length = SectionTwoSize; + Aux->NumberOfRelocations = 0; + Aux->NumberOfLinenumbers = 0; + Aux->CheckSum = 0; + Aux->NumberLowPart = 0; + Aux->Selection = 0; + CurrentOffset += sizeof(coff_aux_section_definition); + + // Now write a symbol for each relocation. + for (unsigned i = 0; i < Data.size(); i++) { + auto RelocationName = formatv("$R{0:X-6}", i & 0xffffff).sstr<COFF::NameSize>(); + Symbol = reinterpret_cast<coff_symbol16 *>(BufferStart + CurrentOffset); + coffnamecpy(Symbol->Name.ShortName, RelocationName); + Symbol->Value = DataOffsets[i]; + Symbol->SectionNumber = 2; + Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; + Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC; + Symbol->NumberOfAuxSymbols = 0; + CurrentOffset += sizeof(coff_symbol16); + } +} + +void WindowsResourceCOFFWriter::writeStringTable() { + // Just 4 null bytes for the string table. + auto COFFStringTable = reinterpret_cast<void *>(BufferStart + CurrentOffset); + memset(COFFStringTable, 0, 4); +} + +void WindowsResourceCOFFWriter::writeDirectoryTree() { + // Traverse parsed resource tree breadth-first and write the corresponding + // COFF objects. + std::queue<const WindowsResourceParser::TreeNode *> Queue; + Queue.push(&Resources); + uint32_t NextLevelOffset = + sizeof(coff_resource_dir_table) + (Resources.getStringChildren().size() + + Resources.getIDChildren().size()) * + sizeof(coff_resource_dir_entry); + std::vector<const WindowsResourceParser::TreeNode *> DataEntriesTreeOrder; + uint32_t CurrentRelativeOffset = 0; + + while (!Queue.empty()) { + auto CurrentNode = Queue.front(); + Queue.pop(); + auto *Table = reinterpret_cast<coff_resource_dir_table *>(BufferStart + + CurrentOffset); + Table->Characteristics = CurrentNode->getCharacteristics(); + Table->TimeDateStamp = 0; + Table->MajorVersion = CurrentNode->getMajorVersion(); + Table->MinorVersion = CurrentNode->getMinorVersion(); + auto &IDChildren = CurrentNode->getIDChildren(); + auto &StringChildren = CurrentNode->getStringChildren(); + Table->NumberOfNameEntries = StringChildren.size(); + Table->NumberOfIDEntries = IDChildren.size(); + CurrentOffset += sizeof(coff_resource_dir_table); + CurrentRelativeOffset += sizeof(coff_resource_dir_table); + + // Write the directory entries immediately following each directory table. + for (auto const &Child : StringChildren) { + auto *Entry = reinterpret_cast<coff_resource_dir_entry *>(BufferStart + + CurrentOffset); + Entry->Identifier.setNameOffset( + StringTableOffsets[Child.second->getStringIndex()]); + if (Child.second->checkIsDataNode()) { + Entry->Offset.DataEntryOffset = NextLevelOffset; + NextLevelOffset += sizeof(coff_resource_data_entry); + DataEntriesTreeOrder.push_back(Child.second.get()); + } else { + Entry->Offset.SubdirOffset = NextLevelOffset + (1 << 31); + NextLevelOffset += sizeof(coff_resource_dir_table) + + (Child.second->getStringChildren().size() + + Child.second->getIDChildren().size()) * + sizeof(coff_resource_dir_entry); + Queue.push(Child.second.get()); + } + CurrentOffset += sizeof(coff_resource_dir_entry); + CurrentRelativeOffset += sizeof(coff_resource_dir_entry); + } + for (auto const &Child : IDChildren) { + auto *Entry = reinterpret_cast<coff_resource_dir_entry *>(BufferStart + + CurrentOffset); + Entry->Identifier.ID = Child.first; + if (Child.second->checkIsDataNode()) { + Entry->Offset.DataEntryOffset = NextLevelOffset; + NextLevelOffset += sizeof(coff_resource_data_entry); + DataEntriesTreeOrder.push_back(Child.second.get()); + } else { + Entry->Offset.SubdirOffset = NextLevelOffset + (1 << 31); + NextLevelOffset += sizeof(coff_resource_dir_table) + + (Child.second->getStringChildren().size() + + Child.second->getIDChildren().size()) * + sizeof(coff_resource_dir_entry); + Queue.push(Child.second.get()); + } + CurrentOffset += sizeof(coff_resource_dir_entry); + CurrentRelativeOffset += sizeof(coff_resource_dir_entry); + } + } + + RelocationAddresses.resize(Data.size()); + // Now write all the resource data entries. + for (auto DataNodes : DataEntriesTreeOrder) { + auto *Entry = reinterpret_cast<coff_resource_data_entry *>(BufferStart + + CurrentOffset); + RelocationAddresses[DataNodes->getDataIndex()] = CurrentRelativeOffset; + Entry->DataRVA = 0; // Set to zero because it is a relocation. + Entry->DataSize = Data[DataNodes->getDataIndex()].size(); + Entry->Codepage = 0; + Entry->Reserved = 0; + CurrentOffset += sizeof(coff_resource_data_entry); + CurrentRelativeOffset += sizeof(coff_resource_data_entry); + } +} + +void WindowsResourceCOFFWriter::writeDirectoryStringTable() { + // Now write the directory string table for .rsrc$01 + uint32_t TotalStringTableSize = 0; + for (auto &String : StringTable) { + uint16_t Length = String.size(); + support::endian::write16le(BufferStart + CurrentOffset, Length); + CurrentOffset += sizeof(uint16_t); + auto *Start = reinterpret_cast<UTF16 *>(BufferStart + CurrentOffset); + llvm::copy(String, Start); + CurrentOffset += Length * sizeof(UTF16); + TotalStringTableSize += Length * sizeof(UTF16) + sizeof(uint16_t); + } + CurrentOffset += + alignTo(TotalStringTableSize, sizeof(uint32_t)) - TotalStringTableSize; +} + +void WindowsResourceCOFFWriter::writeFirstSectionRelocations() { + + // Now write the relocations for .rsrc$01 + // Five symbols already in table before we start, @feat.00 and 2 for each + // .rsrc section. + uint32_t NextSymbolIndex = 5; + for (unsigned i = 0; i < Data.size(); i++) { + auto *Reloc = + reinterpret_cast<coff_relocation *>(BufferStart + CurrentOffset); + Reloc->VirtualAddress = RelocationAddresses[i]; + Reloc->SymbolTableIndex = NextSymbolIndex++; + switch (MachineType) { + case COFF::IMAGE_FILE_MACHINE_ARMNT: + Reloc->Type = COFF::IMAGE_REL_ARM_ADDR32NB; + break; + case COFF::IMAGE_FILE_MACHINE_AMD64: + Reloc->Type = COFF::IMAGE_REL_AMD64_ADDR32NB; + break; + case COFF::IMAGE_FILE_MACHINE_I386: + Reloc->Type = COFF::IMAGE_REL_I386_DIR32NB; + break; + case COFF::IMAGE_FILE_MACHINE_ARM64: + Reloc->Type = COFF::IMAGE_REL_ARM64_ADDR32NB; + break; + default: + llvm_unreachable("unknown machine type"); + } + CurrentOffset += sizeof(coff_relocation); + } +} + +Expected<std::unique_ptr<MemoryBuffer>> +writeWindowsResourceCOFF(COFF::MachineTypes MachineType, + const WindowsResourceParser &Parser, + uint32_t TimeDateStamp) { + Error E = Error::success(); + WindowsResourceCOFFWriter Writer(MachineType, Parser, E); + if (E) + return std::move(E); + return Writer.write(TimeDateStamp); +} + +} // namespace object +} // namespace llvm diff --git a/contrib/libs/llvm14/lib/Object/XCOFFObjectFile.cpp b/contrib/libs/llvm14/lib/Object/XCOFFObjectFile.cpp new file mode 100644 index 0000000000..f2f6d700dd --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/XCOFFObjectFile.cpp @@ -0,0 +1,1491 @@ +//===--- XCOFFObjectFile.cpp - XCOFF object file implementation -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the XCOFFObjectFile class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/XCOFFObjectFile.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Support/DataExtractor.h" +#include <cstddef> +#include <cstring> + +namespace llvm { + +using namespace XCOFF; + +namespace object { + +static const uint8_t FunctionSym = 0x20; +static const uint16_t NoRelMask = 0x0001; +static const size_t SymbolAuxTypeOffset = 17; + +// Checks that [Ptr, Ptr + Size) bytes fall inside the memory buffer +// 'M'. Returns a pointer to the underlying object on success. +template <typename T> +static Expected<const T *> getObject(MemoryBufferRef M, const void *Ptr, + const uint64_t Size = sizeof(T)) { + uintptr_t Addr = reinterpret_cast<uintptr_t>(Ptr); + if (Error E = Binary::checkOffset(M, Addr, Size)) + return std::move(E); + return reinterpret_cast<const T *>(Addr); +} + +static uintptr_t getWithOffset(uintptr_t Base, ptrdiff_t Offset) { + return reinterpret_cast<uintptr_t>(reinterpret_cast<const char *>(Base) + + Offset); +} + +template <typename T> static const T *viewAs(uintptr_t in) { + return reinterpret_cast<const T *>(in); +} + +static StringRef generateXCOFFFixedNameStringRef(const char *Name) { + auto NulCharPtr = + static_cast<const char *>(memchr(Name, '\0', XCOFF::NameSize)); + return NulCharPtr ? StringRef(Name, NulCharPtr - Name) + : StringRef(Name, XCOFF::NameSize); +} + +template <typename T> StringRef XCOFFSectionHeader<T>::getName() const { + const T &DerivedXCOFFSectionHeader = static_cast<const T &>(*this); + return generateXCOFFFixedNameStringRef(DerivedXCOFFSectionHeader.Name); +} + +template <typename T> uint16_t XCOFFSectionHeader<T>::getSectionType() const { + const T &DerivedXCOFFSectionHeader = static_cast<const T &>(*this); + return DerivedXCOFFSectionHeader.Flags & SectionFlagsTypeMask; +} + +template <typename T> +bool XCOFFSectionHeader<T>::isReservedSectionType() const { + return getSectionType() & SectionFlagsReservedMask; +} + +template <typename AddressType> +bool XCOFFRelocation<AddressType>::isRelocationSigned() const { + return Info & XR_SIGN_INDICATOR_MASK; +} + +template <typename AddressType> +bool XCOFFRelocation<AddressType>::isFixupIndicated() const { + return Info & XR_FIXUP_INDICATOR_MASK; +} + +template <typename AddressType> +uint8_t XCOFFRelocation<AddressType>::getRelocatedLength() const { + // The relocation encodes the bit length being relocated minus 1. Add back + // the 1 to get the actual length being relocated. + return (Info & XR_BIASED_LENGTH_MASK) + 1; +} + +uintptr_t +XCOFFObjectFile::getAdvancedSymbolEntryAddress(uintptr_t CurrentAddress, + uint32_t Distance) { + return getWithOffset(CurrentAddress, Distance * XCOFF::SymbolTableEntrySize); +} + +const XCOFF::SymbolAuxType * +XCOFFObjectFile::getSymbolAuxType(uintptr_t AuxEntryAddress) const { + assert(is64Bit() && "64-bit interface called on a 32-bit object file."); + return viewAs<XCOFF::SymbolAuxType>( + getWithOffset(AuxEntryAddress, SymbolAuxTypeOffset)); +} + +void XCOFFObjectFile::checkSectionAddress(uintptr_t Addr, + uintptr_t TableAddress) const { + if (Addr < TableAddress) + report_fatal_error("Section header outside of section header table."); + + uintptr_t Offset = Addr - TableAddress; + if (Offset >= getSectionHeaderSize() * getNumberOfSections()) + report_fatal_error("Section header outside of section header table."); + + if (Offset % getSectionHeaderSize() != 0) + report_fatal_error( + "Section header pointer does not point to a valid section header."); +} + +const XCOFFSectionHeader32 * +XCOFFObjectFile::toSection32(DataRefImpl Ref) const { + assert(!is64Bit() && "32-bit interface called on 64-bit object file."); +#ifndef NDEBUG + checkSectionAddress(Ref.p, getSectionHeaderTableAddress()); +#endif + return viewAs<XCOFFSectionHeader32>(Ref.p); +} + +const XCOFFSectionHeader64 * +XCOFFObjectFile::toSection64(DataRefImpl Ref) const { + assert(is64Bit() && "64-bit interface called on a 32-bit object file."); +#ifndef NDEBUG + checkSectionAddress(Ref.p, getSectionHeaderTableAddress()); +#endif + return viewAs<XCOFFSectionHeader64>(Ref.p); +} + +XCOFFSymbolRef XCOFFObjectFile::toSymbolRef(DataRefImpl Ref) const { + assert(Ref.p != 0 && "Symbol table pointer can not be nullptr!"); +#ifndef NDEBUG + checkSymbolEntryPointer(Ref.p); +#endif + return XCOFFSymbolRef(Ref, this); +} + +const XCOFFFileHeader32 *XCOFFObjectFile::fileHeader32() const { + assert(!is64Bit() && "32-bit interface called on 64-bit object file."); + return static_cast<const XCOFFFileHeader32 *>(FileHeader); +} + +const XCOFFFileHeader64 *XCOFFObjectFile::fileHeader64() const { + assert(is64Bit() && "64-bit interface called on a 32-bit object file."); + return static_cast<const XCOFFFileHeader64 *>(FileHeader); +} + +const XCOFFAuxiliaryHeader32 *XCOFFObjectFile::auxiliaryHeader32() const { + assert(!is64Bit() && "32-bit interface called on 64-bit object file."); + return static_cast<const XCOFFAuxiliaryHeader32 *>(AuxiliaryHeader); +} + +const XCOFFAuxiliaryHeader64 *XCOFFObjectFile::auxiliaryHeader64() const { + assert(is64Bit() && "64-bit interface called on a 32-bit object file."); + return static_cast<const XCOFFAuxiliaryHeader64 *>(AuxiliaryHeader); +} + +template <typename T> const T *XCOFFObjectFile::sectionHeaderTable() const { + return static_cast<const T *>(SectionHeaderTable); +} + +const XCOFFSectionHeader32 * +XCOFFObjectFile::sectionHeaderTable32() const { + assert(!is64Bit() && "32-bit interface called on 64-bit object file."); + return static_cast<const XCOFFSectionHeader32 *>(SectionHeaderTable); +} + +const XCOFFSectionHeader64 * +XCOFFObjectFile::sectionHeaderTable64() const { + assert(is64Bit() && "64-bit interface called on a 32-bit object file."); + return static_cast<const XCOFFSectionHeader64 *>(SectionHeaderTable); +} + +void XCOFFObjectFile::moveSymbolNext(DataRefImpl &Symb) const { + uintptr_t NextSymbolAddr = getAdvancedSymbolEntryAddress( + Symb.p, toSymbolRef(Symb).getNumberOfAuxEntries() + 1); +#ifndef NDEBUG + // This function is used by basic_symbol_iterator, which allows to + // point to the end-of-symbol-table address. + if (NextSymbolAddr != getEndOfSymbolTableAddress()) + checkSymbolEntryPointer(NextSymbolAddr); +#endif + Symb.p = NextSymbolAddr; +} + +Expected<StringRef> +XCOFFObjectFile::getStringTableEntry(uint32_t Offset) const { + // The byte offset is relative to the start of the string table. + // A byte offset value of 0 is a null or zero-length symbol + // name. A byte offset in the range 1 to 3 (inclusive) points into the length + // field; as a soft-error recovery mechanism, we treat such cases as having an + // offset of 0. + if (Offset < 4) + return StringRef(nullptr, 0); + + if (StringTable.Data != nullptr && StringTable.Size > Offset) + return (StringTable.Data + Offset); + + return createError("entry with offset 0x" + Twine::utohexstr(Offset) + + " in a string table with size 0x" + + Twine::utohexstr(StringTable.Size) + " is invalid"); +} + +StringRef XCOFFObjectFile::getStringTable() const { + // If the size is less than or equal to 4, then the string table contains no + // string data. + return StringRef(StringTable.Data, + StringTable.Size <= 4 ? 0 : StringTable.Size); +} + +Expected<StringRef> +XCOFFObjectFile::getCFileName(const XCOFFFileAuxEnt *CFileEntPtr) const { + if (CFileEntPtr->NameInStrTbl.Magic != XCOFFSymbolRef::NAME_IN_STR_TBL_MAGIC) + return generateXCOFFFixedNameStringRef(CFileEntPtr->Name); + return getStringTableEntry(CFileEntPtr->NameInStrTbl.Offset); +} + +Expected<StringRef> XCOFFObjectFile::getSymbolName(DataRefImpl Symb) const { + return toSymbolRef(Symb).getName(); +} + +Expected<uint64_t> XCOFFObjectFile::getSymbolAddress(DataRefImpl Symb) const { + return toSymbolRef(Symb).getValue(); +} + +uint64_t XCOFFObjectFile::getSymbolValueImpl(DataRefImpl Symb) const { + return toSymbolRef(Symb).getValue(); +} + +uint32_t XCOFFObjectFile::getSymbolAlignment(DataRefImpl Symb) const { + uint64_t Result = 0; + XCOFFSymbolRef XCOFFSym = toSymbolRef(Symb); + if (XCOFFSym.isCsectSymbol()) { + Expected<XCOFFCsectAuxRef> CsectAuxRefOrError = + XCOFFSym.getXCOFFCsectAuxRef(); + if (!CsectAuxRefOrError) + // TODO: report the error up the stack. + consumeError(CsectAuxRefOrError.takeError()); + else + Result = 1ULL << CsectAuxRefOrError.get().getAlignmentLog2(); + } + return Result; +} + +uint64_t XCOFFObjectFile::getCommonSymbolSizeImpl(DataRefImpl Symb) const { + uint64_t Result = 0; + XCOFFSymbolRef XCOFFSym = toSymbolRef(Symb); + if (XCOFFSym.isCsectSymbol()) { + Expected<XCOFFCsectAuxRef> CsectAuxRefOrError = + XCOFFSym.getXCOFFCsectAuxRef(); + if (!CsectAuxRefOrError) + // TODO: report the error up the stack. + consumeError(CsectAuxRefOrError.takeError()); + else { + XCOFFCsectAuxRef CsectAuxRef = CsectAuxRefOrError.get(); + assert(CsectAuxRef.getSymbolType() == XCOFF::XTY_CM); + Result = CsectAuxRef.getSectionOrLength(); + } + } + return Result; +} + +Expected<SymbolRef::Type> +XCOFFObjectFile::getSymbolType(DataRefImpl Symb) const { + XCOFFSymbolRef XCOFFSym = toSymbolRef(Symb); + + if (XCOFFSym.isFunction()) + return SymbolRef::ST_Function; + + if (XCOFF::C_FILE == XCOFFSym.getStorageClass()) + return SymbolRef::ST_File; + + int16_t SecNum = XCOFFSym.getSectionNumber(); + if (SecNum <= 0) + return SymbolRef::ST_Other; + + Expected<DataRefImpl> SecDRIOrErr = + getSectionByNum(XCOFFSym.getSectionNumber()); + + if (!SecDRIOrErr) + return SecDRIOrErr.takeError(); + + DataRefImpl SecDRI = SecDRIOrErr.get(); + + Expected<StringRef> SymNameOrError = XCOFFSym.getName(); + if (SymNameOrError) { + // The "TOC" symbol is treated as SymbolRef::ST_Other. + if (SymNameOrError.get() == "TOC") + return SymbolRef::ST_Other; + + // The symbol for a section name is treated as SymbolRef::ST_Other. + StringRef SecName; + if (is64Bit()) + SecName = XCOFFObjectFile::toSection64(SecDRIOrErr.get())->getName(); + else + SecName = XCOFFObjectFile::toSection32(SecDRIOrErr.get())->getName(); + + if (SecName == SymNameOrError.get()) + return SymbolRef::ST_Other; + } else + return SymNameOrError.takeError(); + + if (isSectionData(SecDRI) || isSectionBSS(SecDRI)) + return SymbolRef::ST_Data; + + if (isDebugSection(SecDRI)) + return SymbolRef::ST_Debug; + + return SymbolRef::ST_Other; +} + +Expected<section_iterator> +XCOFFObjectFile::getSymbolSection(DataRefImpl Symb) const { + const int16_t SectNum = toSymbolRef(Symb).getSectionNumber(); + + if (isReservedSectionNumber(SectNum)) + return section_end(); + + Expected<DataRefImpl> ExpSec = getSectionByNum(SectNum); + if (!ExpSec) + return ExpSec.takeError(); + + return section_iterator(SectionRef(ExpSec.get(), this)); +} + +void XCOFFObjectFile::moveSectionNext(DataRefImpl &Sec) const { + const char *Ptr = reinterpret_cast<const char *>(Sec.p); + Sec.p = reinterpret_cast<uintptr_t>(Ptr + getSectionHeaderSize()); +} + +Expected<StringRef> XCOFFObjectFile::getSectionName(DataRefImpl Sec) const { + return generateXCOFFFixedNameStringRef(getSectionNameInternal(Sec)); +} + +uint64_t XCOFFObjectFile::getSectionAddress(DataRefImpl Sec) const { + // Avoid ternary due to failure to convert the ubig32_t value to a unit64_t + // with MSVC. + if (is64Bit()) + return toSection64(Sec)->VirtualAddress; + + return toSection32(Sec)->VirtualAddress; +} + +uint64_t XCOFFObjectFile::getSectionIndex(DataRefImpl Sec) const { + // Section numbers in XCOFF are numbered beginning at 1. A section number of + // zero is used to indicate that a symbol is being imported or is undefined. + if (is64Bit()) + return toSection64(Sec) - sectionHeaderTable64() + 1; + else + return toSection32(Sec) - sectionHeaderTable32() + 1; +} + +uint64_t XCOFFObjectFile::getSectionSize(DataRefImpl Sec) const { + // Avoid ternary due to failure to convert the ubig32_t value to a unit64_t + // with MSVC. + if (is64Bit()) + return toSection64(Sec)->SectionSize; + + return toSection32(Sec)->SectionSize; +} + +Expected<ArrayRef<uint8_t>> +XCOFFObjectFile::getSectionContents(DataRefImpl Sec) const { + if (isSectionVirtual(Sec)) + return ArrayRef<uint8_t>(); + + uint64_t OffsetToRaw; + if (is64Bit()) + OffsetToRaw = toSection64(Sec)->FileOffsetToRawData; + else + OffsetToRaw = toSection32(Sec)->FileOffsetToRawData; + + const uint8_t * ContentStart = base() + OffsetToRaw; + uint64_t SectionSize = getSectionSize(Sec); + if (Error E = Binary::checkOffset( + Data, reinterpret_cast<uintptr_t>(ContentStart), SectionSize)) + return createError( + toString(std::move(E)) + ": section data with offset 0x" + + Twine::utohexstr(OffsetToRaw) + " and size 0x" + + Twine::utohexstr(SectionSize) + " goes past the end of the file"); + + return makeArrayRef(ContentStart,SectionSize); +} + +uint64_t XCOFFObjectFile::getSectionAlignment(DataRefImpl Sec) const { + uint64_t Result = 0; + llvm_unreachable("Not yet implemented!"); + return Result; +} + +Expected<uintptr_t> XCOFFObjectFile::getLoaderSectionAddress() const { + uint64_t OffsetToLoaderSection = 0; + uint64_t SizeOfLoaderSection = 0; + + if (is64Bit()) { + for (const auto &Sec64 : sections64()) + if (Sec64.getSectionType() == XCOFF::STYP_LOADER) { + OffsetToLoaderSection = Sec64.FileOffsetToRawData; + SizeOfLoaderSection = Sec64.SectionSize; + break; + } + } else { + for (const auto &Sec32 : sections32()) + if (Sec32.getSectionType() == XCOFF::STYP_LOADER) { + OffsetToLoaderSection = Sec32.FileOffsetToRawData; + SizeOfLoaderSection = Sec32.SectionSize; + break; + } + } + + // No loader section is not an error. + if (!SizeOfLoaderSection) + return 0; + + uintptr_t LoderSectionStart = + reinterpret_cast<uintptr_t>(base() + OffsetToLoaderSection); + if (Error E = + Binary::checkOffset(Data, LoderSectionStart, SizeOfLoaderSection)) + return createError(toString(std::move(E)) + + ": loader section with offset 0x" + + Twine::utohexstr(OffsetToLoaderSection) + + " and size 0x" + Twine::utohexstr(SizeOfLoaderSection) + + " goes past the end of the file"); + + return LoderSectionStart; +} + +bool XCOFFObjectFile::isSectionCompressed(DataRefImpl Sec) const { + return false; +} + +bool XCOFFObjectFile::isSectionText(DataRefImpl Sec) const { + return getSectionFlags(Sec) & XCOFF::STYP_TEXT; +} + +bool XCOFFObjectFile::isSectionData(DataRefImpl Sec) const { + uint32_t Flags = getSectionFlags(Sec); + return Flags & (XCOFF::STYP_DATA | XCOFF::STYP_TDATA); +} + +bool XCOFFObjectFile::isSectionBSS(DataRefImpl Sec) const { + uint32_t Flags = getSectionFlags(Sec); + return Flags & (XCOFF::STYP_BSS | XCOFF::STYP_TBSS); +} + +bool XCOFFObjectFile::isDebugSection(DataRefImpl Sec) const { + uint32_t Flags = getSectionFlags(Sec); + return Flags & (XCOFF::STYP_DEBUG | XCOFF::STYP_DWARF); +} + +bool XCOFFObjectFile::isSectionVirtual(DataRefImpl Sec) const { + return is64Bit() ? toSection64(Sec)->FileOffsetToRawData == 0 + : toSection32(Sec)->FileOffsetToRawData == 0; +} + +relocation_iterator XCOFFObjectFile::section_rel_begin(DataRefImpl Sec) const { + DataRefImpl Ret; + if (is64Bit()) { + const XCOFFSectionHeader64 *SectionEntPtr = toSection64(Sec); + auto RelocationsOrErr = + relocations<XCOFFSectionHeader64, XCOFFRelocation64>(*SectionEntPtr); + if (Error E = RelocationsOrErr.takeError()) { + // TODO: report the error up the stack. + consumeError(std::move(E)); + return relocation_iterator(RelocationRef()); + } + Ret.p = reinterpret_cast<uintptr_t>(&*RelocationsOrErr.get().begin()); + } else { + const XCOFFSectionHeader32 *SectionEntPtr = toSection32(Sec); + auto RelocationsOrErr = + relocations<XCOFFSectionHeader32, XCOFFRelocation32>(*SectionEntPtr); + if (Error E = RelocationsOrErr.takeError()) { + // TODO: report the error up the stack. + consumeError(std::move(E)); + return relocation_iterator(RelocationRef()); + } + Ret.p = reinterpret_cast<uintptr_t>(&*RelocationsOrErr.get().begin()); + } + return relocation_iterator(RelocationRef(Ret, this)); +} + +relocation_iterator XCOFFObjectFile::section_rel_end(DataRefImpl Sec) const { + DataRefImpl Ret; + if (is64Bit()) { + const XCOFFSectionHeader64 *SectionEntPtr = toSection64(Sec); + auto RelocationsOrErr = + relocations<XCOFFSectionHeader64, XCOFFRelocation64>(*SectionEntPtr); + if (Error E = RelocationsOrErr.takeError()) { + // TODO: report the error up the stack. + consumeError(std::move(E)); + return relocation_iterator(RelocationRef()); + } + Ret.p = reinterpret_cast<uintptr_t>(&*RelocationsOrErr.get().end()); + } else { + const XCOFFSectionHeader32 *SectionEntPtr = toSection32(Sec); + auto RelocationsOrErr = + relocations<XCOFFSectionHeader32, XCOFFRelocation32>(*SectionEntPtr); + if (Error E = RelocationsOrErr.takeError()) { + // TODO: report the error up the stack. + consumeError(std::move(E)); + return relocation_iterator(RelocationRef()); + } + Ret.p = reinterpret_cast<uintptr_t>(&*RelocationsOrErr.get().end()); + } + return relocation_iterator(RelocationRef(Ret, this)); +} + +void XCOFFObjectFile::moveRelocationNext(DataRefImpl &Rel) const { + if (is64Bit()) + Rel.p = reinterpret_cast<uintptr_t>(viewAs<XCOFFRelocation64>(Rel.p) + 1); + else + Rel.p = reinterpret_cast<uintptr_t>(viewAs<XCOFFRelocation32>(Rel.p) + 1); +} + +uint64_t XCOFFObjectFile::getRelocationOffset(DataRefImpl Rel) const { + if (is64Bit()) { + const XCOFFRelocation64 *Reloc = viewAs<XCOFFRelocation64>(Rel.p); + const XCOFFSectionHeader64 *Sec64 = sectionHeaderTable64(); + const uint64_t RelocAddress = Reloc->VirtualAddress; + const uint16_t NumberOfSections = getNumberOfSections(); + for (uint16_t I = 0; I < NumberOfSections; ++I) { + // Find which section this relocation belongs to, and get the + // relocation offset relative to the start of the section. + if (Sec64->VirtualAddress <= RelocAddress && + RelocAddress < Sec64->VirtualAddress + Sec64->SectionSize) { + return RelocAddress - Sec64->VirtualAddress; + } + ++Sec64; + } + } else { + const XCOFFRelocation32 *Reloc = viewAs<XCOFFRelocation32>(Rel.p); + const XCOFFSectionHeader32 *Sec32 = sectionHeaderTable32(); + const uint32_t RelocAddress = Reloc->VirtualAddress; + const uint16_t NumberOfSections = getNumberOfSections(); + for (uint16_t I = 0; I < NumberOfSections; ++I) { + // Find which section this relocation belongs to, and get the + // relocation offset relative to the start of the section. + if (Sec32->VirtualAddress <= RelocAddress && + RelocAddress < Sec32->VirtualAddress + Sec32->SectionSize) { + return RelocAddress - Sec32->VirtualAddress; + } + ++Sec32; + } + } + return InvalidRelocOffset; +} + +symbol_iterator XCOFFObjectFile::getRelocationSymbol(DataRefImpl Rel) const { + uint32_t Index; + if (is64Bit()) { + const XCOFFRelocation64 *Reloc = viewAs<XCOFFRelocation64>(Rel.p); + Index = Reloc->SymbolIndex; + + if (Index >= getNumberOfSymbolTableEntries64()) + return symbol_end(); + } else { + const XCOFFRelocation32 *Reloc = viewAs<XCOFFRelocation32>(Rel.p); + Index = Reloc->SymbolIndex; + + if (Index >= getLogicalNumberOfSymbolTableEntries32()) + return symbol_end(); + } + DataRefImpl SymDRI; + SymDRI.p = getSymbolEntryAddressByIndex(Index); + return symbol_iterator(SymbolRef(SymDRI, this)); +} + +uint64_t XCOFFObjectFile::getRelocationType(DataRefImpl Rel) const { + if (is64Bit()) + return viewAs<XCOFFRelocation64>(Rel.p)->Type; + return viewAs<XCOFFRelocation32>(Rel.p)->Type; +} + +void XCOFFObjectFile::getRelocationTypeName( + DataRefImpl Rel, SmallVectorImpl<char> &Result) const { + StringRef Res; + if (is64Bit()) { + const XCOFFRelocation64 *Reloc = viewAs<XCOFFRelocation64>(Rel.p); + Res = XCOFF::getRelocationTypeString(Reloc->Type); + } else { + const XCOFFRelocation32 *Reloc = viewAs<XCOFFRelocation32>(Rel.p); + Res = XCOFF::getRelocationTypeString(Reloc->Type); + } + Result.append(Res.begin(), Res.end()); +} + +Expected<uint32_t> XCOFFObjectFile::getSymbolFlags(DataRefImpl Symb) const { + XCOFFSymbolRef XCOFFSym = toSymbolRef(Symb); + uint32_t Result = SymbolRef::SF_None; + + if (XCOFFSym.getSectionNumber() == XCOFF::N_ABS) + Result |= SymbolRef::SF_Absolute; + + XCOFF::StorageClass SC = XCOFFSym.getStorageClass(); + if (XCOFF::C_EXT == SC || XCOFF::C_WEAKEXT == SC) + Result |= SymbolRef::SF_Global; + + if (XCOFF::C_WEAKEXT == SC) + Result |= SymbolRef::SF_Weak; + + if (XCOFFSym.isCsectSymbol()) { + Expected<XCOFFCsectAuxRef> CsectAuxEntOrErr = + XCOFFSym.getXCOFFCsectAuxRef(); + if (CsectAuxEntOrErr) { + if (CsectAuxEntOrErr.get().getSymbolType() == XCOFF::XTY_CM) + Result |= SymbolRef::SF_Common; + } else + return CsectAuxEntOrErr.takeError(); + } + + if (XCOFFSym.getSectionNumber() == XCOFF::N_UNDEF) + Result |= SymbolRef::SF_Undefined; + + return Result; +} + +basic_symbol_iterator XCOFFObjectFile::symbol_begin() const { + DataRefImpl SymDRI; + SymDRI.p = reinterpret_cast<uintptr_t>(SymbolTblPtr); + return basic_symbol_iterator(SymbolRef(SymDRI, this)); +} + +basic_symbol_iterator XCOFFObjectFile::symbol_end() const { + DataRefImpl SymDRI; + const uint32_t NumberOfSymbolTableEntries = getNumberOfSymbolTableEntries(); + SymDRI.p = getSymbolEntryAddressByIndex(NumberOfSymbolTableEntries); + return basic_symbol_iterator(SymbolRef(SymDRI, this)); +} + +section_iterator XCOFFObjectFile::section_begin() const { + DataRefImpl DRI; + DRI.p = getSectionHeaderTableAddress(); + return section_iterator(SectionRef(DRI, this)); +} + +section_iterator XCOFFObjectFile::section_end() const { + DataRefImpl DRI; + DRI.p = getWithOffset(getSectionHeaderTableAddress(), + getNumberOfSections() * getSectionHeaderSize()); + return section_iterator(SectionRef(DRI, this)); +} + +uint8_t XCOFFObjectFile::getBytesInAddress() const { return is64Bit() ? 8 : 4; } + +StringRef XCOFFObjectFile::getFileFormatName() const { + return is64Bit() ? "aix5coff64-rs6000" : "aixcoff-rs6000"; +} + +Triple::ArchType XCOFFObjectFile::getArch() const { + return is64Bit() ? Triple::ppc64 : Triple::ppc; +} + +SubtargetFeatures XCOFFObjectFile::getFeatures() const { + return SubtargetFeatures(); +} + +bool XCOFFObjectFile::isRelocatableObject() const { + if (is64Bit()) + return !(fileHeader64()->Flags & NoRelMask); + return !(fileHeader32()->Flags & NoRelMask); +} + +Expected<uint64_t> XCOFFObjectFile::getStartAddress() const { + // TODO FIXME Should get from auxiliary_header->o_entry when support for the + // auxiliary_header is added. + return 0; +} + +StringRef XCOFFObjectFile::mapDebugSectionName(StringRef Name) const { + return StringSwitch<StringRef>(Name) + .Case("dwinfo", "debug_info") + .Case("dwline", "debug_line") + .Case("dwpbnms", "debug_pubnames") + .Case("dwpbtyp", "debug_pubtypes") + .Case("dwarnge", "debug_aranges") + .Case("dwabrev", "debug_abbrev") + .Case("dwstr", "debug_str") + .Case("dwrnges", "debug_ranges") + .Case("dwloc", "debug_loc") + .Case("dwframe", "debug_frame") + .Case("dwmac", "debug_macinfo") + .Default(Name); +} + +size_t XCOFFObjectFile::getFileHeaderSize() const { + return is64Bit() ? sizeof(XCOFFFileHeader64) : sizeof(XCOFFFileHeader32); +} + +size_t XCOFFObjectFile::getSectionHeaderSize() const { + return is64Bit() ? sizeof(XCOFFSectionHeader64) : + sizeof(XCOFFSectionHeader32); +} + +bool XCOFFObjectFile::is64Bit() const { + return Binary::ID_XCOFF64 == getType(); +} + +uint16_t XCOFFObjectFile::getMagic() const { + return is64Bit() ? fileHeader64()->Magic : fileHeader32()->Magic; +} + +Expected<DataRefImpl> XCOFFObjectFile::getSectionByNum(int16_t Num) const { + if (Num <= 0 || Num > getNumberOfSections()) + return createStringError(object_error::invalid_section_index, + "the section index (" + Twine(Num) + + ") is invalid"); + + DataRefImpl DRI; + DRI.p = getWithOffset(getSectionHeaderTableAddress(), + getSectionHeaderSize() * (Num - 1)); + return DRI; +} + +Expected<StringRef> +XCOFFObjectFile::getSymbolSectionName(XCOFFSymbolRef SymEntPtr) const { + const int16_t SectionNum = SymEntPtr.getSectionNumber(); + + switch (SectionNum) { + case XCOFF::N_DEBUG: + return "N_DEBUG"; + case XCOFF::N_ABS: + return "N_ABS"; + case XCOFF::N_UNDEF: + return "N_UNDEF"; + default: + Expected<DataRefImpl> SecRef = getSectionByNum(SectionNum); + if (SecRef) + return generateXCOFFFixedNameStringRef( + getSectionNameInternal(SecRef.get())); + return SecRef.takeError(); + } +} + +unsigned XCOFFObjectFile::getSymbolSectionID(SymbolRef Sym) const { + XCOFFSymbolRef XCOFFSymRef(Sym.getRawDataRefImpl(), this); + return XCOFFSymRef.getSectionNumber(); +} + +bool XCOFFObjectFile::isReservedSectionNumber(int16_t SectionNumber) { + return (SectionNumber <= 0 && SectionNumber >= -2); +} + +uint16_t XCOFFObjectFile::getNumberOfSections() const { + return is64Bit() ? fileHeader64()->NumberOfSections + : fileHeader32()->NumberOfSections; +} + +int32_t XCOFFObjectFile::getTimeStamp() const { + return is64Bit() ? fileHeader64()->TimeStamp : fileHeader32()->TimeStamp; +} + +uint16_t XCOFFObjectFile::getOptionalHeaderSize() const { + return is64Bit() ? fileHeader64()->AuxHeaderSize + : fileHeader32()->AuxHeaderSize; +} + +uint32_t XCOFFObjectFile::getSymbolTableOffset32() const { + return fileHeader32()->SymbolTableOffset; +} + +int32_t XCOFFObjectFile::getRawNumberOfSymbolTableEntries32() const { + // As far as symbol table size is concerned, if this field is negative it is + // to be treated as a 0. However since this field is also used for printing we + // don't want to truncate any negative values. + return fileHeader32()->NumberOfSymTableEntries; +} + +uint32_t XCOFFObjectFile::getLogicalNumberOfSymbolTableEntries32() const { + return (fileHeader32()->NumberOfSymTableEntries >= 0 + ? fileHeader32()->NumberOfSymTableEntries + : 0); +} + +uint64_t XCOFFObjectFile::getSymbolTableOffset64() const { + return fileHeader64()->SymbolTableOffset; +} + +uint32_t XCOFFObjectFile::getNumberOfSymbolTableEntries64() const { + return fileHeader64()->NumberOfSymTableEntries; +} + +uint32_t XCOFFObjectFile::getNumberOfSymbolTableEntries() const { + return is64Bit() ? getNumberOfSymbolTableEntries64() + : getLogicalNumberOfSymbolTableEntries32(); +} + +uintptr_t XCOFFObjectFile::getEndOfSymbolTableAddress() const { + const uint32_t NumberOfSymTableEntries = getNumberOfSymbolTableEntries(); + return getWithOffset(reinterpret_cast<uintptr_t>(SymbolTblPtr), + XCOFF::SymbolTableEntrySize * NumberOfSymTableEntries); +} + +void XCOFFObjectFile::checkSymbolEntryPointer(uintptr_t SymbolEntPtr) const { + if (SymbolEntPtr < reinterpret_cast<uintptr_t>(SymbolTblPtr)) + report_fatal_error("Symbol table entry is outside of symbol table."); + + if (SymbolEntPtr >= getEndOfSymbolTableAddress()) + report_fatal_error("Symbol table entry is outside of symbol table."); + + ptrdiff_t Offset = reinterpret_cast<const char *>(SymbolEntPtr) - + reinterpret_cast<const char *>(SymbolTblPtr); + + if (Offset % XCOFF::SymbolTableEntrySize != 0) + report_fatal_error( + "Symbol table entry position is not valid inside of symbol table."); +} + +uint32_t XCOFFObjectFile::getSymbolIndex(uintptr_t SymbolEntPtr) const { + return (reinterpret_cast<const char *>(SymbolEntPtr) - + reinterpret_cast<const char *>(SymbolTblPtr)) / + XCOFF::SymbolTableEntrySize; +} + +uint64_t XCOFFObjectFile::getSymbolSize(DataRefImpl Symb) const { + uint64_t Result = 0; + XCOFFSymbolRef XCOFFSym = toSymbolRef(Symb); + if (XCOFFSym.isCsectSymbol()) { + Expected<XCOFFCsectAuxRef> CsectAuxRefOrError = + XCOFFSym.getXCOFFCsectAuxRef(); + if (!CsectAuxRefOrError) + // TODO: report the error up the stack. + consumeError(CsectAuxRefOrError.takeError()); + else { + XCOFFCsectAuxRef CsectAuxRef = CsectAuxRefOrError.get(); + uint8_t SymType = CsectAuxRef.getSymbolType(); + if (SymType == XCOFF::XTY_SD || SymType == XCOFF::XTY_CM) + Result = CsectAuxRef.getSectionOrLength(); + } + } + return Result; +} + +uintptr_t XCOFFObjectFile::getSymbolEntryAddressByIndex(uint32_t Index) const { + return getAdvancedSymbolEntryAddress( + reinterpret_cast<uintptr_t>(getPointerToSymbolTable()), Index); +} + +Expected<StringRef> +XCOFFObjectFile::getSymbolNameByIndex(uint32_t Index) const { + const uint32_t NumberOfSymTableEntries = getNumberOfSymbolTableEntries(); + + if (Index >= NumberOfSymTableEntries) + return createError("symbol index " + Twine(Index) + + " exceeds symbol count " + + Twine(NumberOfSymTableEntries)); + + DataRefImpl SymDRI; + SymDRI.p = getSymbolEntryAddressByIndex(Index); + return getSymbolName(SymDRI); +} + +uint16_t XCOFFObjectFile::getFlags() const { + return is64Bit() ? fileHeader64()->Flags : fileHeader32()->Flags; +} + +const char *XCOFFObjectFile::getSectionNameInternal(DataRefImpl Sec) const { + return is64Bit() ? toSection64(Sec)->Name : toSection32(Sec)->Name; +} + +uintptr_t XCOFFObjectFile::getSectionHeaderTableAddress() const { + return reinterpret_cast<uintptr_t>(SectionHeaderTable); +} + +int32_t XCOFFObjectFile::getSectionFlags(DataRefImpl Sec) const { + return is64Bit() ? toSection64(Sec)->Flags : toSection32(Sec)->Flags; +} + +XCOFFObjectFile::XCOFFObjectFile(unsigned int Type, MemoryBufferRef Object) + : ObjectFile(Type, Object) { + assert(Type == Binary::ID_XCOFF32 || Type == Binary::ID_XCOFF64); +} + +ArrayRef<XCOFFSectionHeader64> XCOFFObjectFile::sections64() const { + assert(is64Bit() && "64-bit interface called for non 64-bit file."); + const XCOFFSectionHeader64 *TablePtr = sectionHeaderTable64(); + return ArrayRef<XCOFFSectionHeader64>(TablePtr, + TablePtr + getNumberOfSections()); +} + +ArrayRef<XCOFFSectionHeader32> XCOFFObjectFile::sections32() const { + assert(!is64Bit() && "32-bit interface called for non 32-bit file."); + const XCOFFSectionHeader32 *TablePtr = sectionHeaderTable32(); + return ArrayRef<XCOFFSectionHeader32>(TablePtr, + TablePtr + getNumberOfSections()); +} + +// In an XCOFF32 file, when the field value is 65535, then an STYP_OVRFLO +// section header contains the actual count of relocation entries in the s_paddr +// field. STYP_OVRFLO headers contain the section index of their corresponding +// sections as their raw "NumberOfRelocations" field value. +template <typename T> +Expected<uint32_t> XCOFFObjectFile::getNumberOfRelocationEntries( + const XCOFFSectionHeader<T> &Sec) const { + const T &Section = static_cast<const T &>(Sec); + if (is64Bit()) + return Section.NumberOfRelocations; + + uint16_t SectionIndex = &Section - sectionHeaderTable<T>() + 1; + if (Section.NumberOfRelocations < XCOFF::RelocOverflow) + return Section.NumberOfRelocations; + for (const auto &Sec : sections32()) { + if (Sec.Flags == XCOFF::STYP_OVRFLO && + Sec.NumberOfRelocations == SectionIndex) + return Sec.PhysicalAddress; + } + return errorCodeToError(object_error::parse_failed); +} + +template <typename Shdr, typename Reloc> +Expected<ArrayRef<Reloc>> XCOFFObjectFile::relocations(const Shdr &Sec) const { + uintptr_t RelocAddr = getWithOffset(reinterpret_cast<uintptr_t>(FileHeader), + Sec.FileOffsetToRelocationInfo); + auto NumRelocEntriesOrErr = getNumberOfRelocationEntries(Sec); + if (Error E = NumRelocEntriesOrErr.takeError()) + return std::move(E); + + uint32_t NumRelocEntries = NumRelocEntriesOrErr.get(); + static_assert((sizeof(Reloc) == XCOFF::RelocationSerializationSize64 || + sizeof(Reloc) == XCOFF::RelocationSerializationSize32), + "Relocation structure is incorrect"); + auto RelocationOrErr = + getObject<Reloc>(Data, reinterpret_cast<void *>(RelocAddr), + NumRelocEntries * sizeof(Reloc)); + if (!RelocationOrErr) + return createError( + toString(RelocationOrErr.takeError()) + ": relocations with offset 0x" + + Twine::utohexstr(Sec.FileOffsetToRelocationInfo) + " and size 0x" + + Twine::utohexstr(NumRelocEntries * sizeof(Reloc)) + + " go past the end of the file"); + + const Reloc *StartReloc = RelocationOrErr.get(); + + return ArrayRef<Reloc>(StartReloc, StartReloc + NumRelocEntries); +} + +Expected<XCOFFStringTable> +XCOFFObjectFile::parseStringTable(const XCOFFObjectFile *Obj, uint64_t Offset) { + // If there is a string table, then the buffer must contain at least 4 bytes + // for the string table's size. Not having a string table is not an error. + if (Error E = Binary::checkOffset( + Obj->Data, reinterpret_cast<uintptr_t>(Obj->base() + Offset), 4)) { + consumeError(std::move(E)); + return XCOFFStringTable{0, nullptr}; + } + + // Read the size out of the buffer. + uint32_t Size = support::endian::read32be(Obj->base() + Offset); + + // If the size is less then 4, then the string table is just a size and no + // string data. + if (Size <= 4) + return XCOFFStringTable{4, nullptr}; + + auto StringTableOrErr = + getObject<char>(Obj->Data, Obj->base() + Offset, Size); + if (!StringTableOrErr) + return createError(toString(StringTableOrErr.takeError()) + + ": string table with offset 0x" + + Twine::utohexstr(Offset) + " and size 0x" + + Twine::utohexstr(Size) + + " goes past the end of the file"); + + const char *StringTablePtr = StringTableOrErr.get(); + if (StringTablePtr[Size - 1] != '\0') + return errorCodeToError(object_error::string_table_non_null_end); + + return XCOFFStringTable{Size, StringTablePtr}; +} + +// This function returns the import file table. Each entry in the import file +// table consists of: "path_name\0base_name\0archive_member_name\0". +Expected<StringRef> XCOFFObjectFile::getImportFileTable() const { + Expected<uintptr_t> LoaderSectionAddrOrError = getLoaderSectionAddress(); + if (!LoaderSectionAddrOrError) + return LoaderSectionAddrOrError.takeError(); + + uintptr_t LoaderSectionAddr = LoaderSectionAddrOrError.get(); + if (!LoaderSectionAddr) + return StringRef(); + + uint64_t OffsetToImportFileTable = 0; + uint64_t LengthOfImportFileTable = 0; + if (is64Bit()) { + const LoaderSectionHeader64 *LoaderSec64 = + viewAs<LoaderSectionHeader64>(LoaderSectionAddr); + OffsetToImportFileTable = LoaderSec64->OffsetToImpid; + LengthOfImportFileTable = LoaderSec64->LengthOfImpidStrTbl; + } else { + const LoaderSectionHeader32 *LoaderSec32 = + viewAs<LoaderSectionHeader32>(LoaderSectionAddr); + OffsetToImportFileTable = LoaderSec32->OffsetToImpid; + LengthOfImportFileTable = LoaderSec32->LengthOfImpidStrTbl; + } + + auto ImportTableOrErr = getObject<char>( + Data, + reinterpret_cast<void *>(LoaderSectionAddr + OffsetToImportFileTable), + LengthOfImportFileTable); + if (!ImportTableOrErr) + return createError( + toString(ImportTableOrErr.takeError()) + + ": import file table with offset 0x" + + Twine::utohexstr(LoaderSectionAddr + OffsetToImportFileTable) + + " and size 0x" + Twine::utohexstr(LengthOfImportFileTable) + + " goes past the end of the file"); + + const char *ImportTablePtr = ImportTableOrErr.get(); + if (ImportTablePtr[LengthOfImportFileTable - 1] != '\0') + return createError( + ": import file name table with offset 0x" + + Twine::utohexstr(LoaderSectionAddr + OffsetToImportFileTable) + + " and size 0x" + Twine::utohexstr(LengthOfImportFileTable) + + " must end with a null terminator"); + + return StringRef(ImportTablePtr, LengthOfImportFileTable); +} + +Expected<std::unique_ptr<XCOFFObjectFile>> +XCOFFObjectFile::create(unsigned Type, MemoryBufferRef MBR) { + // Can't use std::make_unique because of the private constructor. + std::unique_ptr<XCOFFObjectFile> Obj; + Obj.reset(new XCOFFObjectFile(Type, MBR)); + + uint64_t CurOffset = 0; + const auto *Base = Obj->base(); + MemoryBufferRef Data = Obj->Data; + + // Parse file header. + auto FileHeaderOrErr = + getObject<void>(Data, Base + CurOffset, Obj->getFileHeaderSize()); + if (Error E = FileHeaderOrErr.takeError()) + return std::move(E); + Obj->FileHeader = FileHeaderOrErr.get(); + + CurOffset += Obj->getFileHeaderSize(); + + if (Obj->getOptionalHeaderSize()) { + auto AuxiliaryHeaderOrErr = + getObject<void>(Data, Base + CurOffset, Obj->getOptionalHeaderSize()); + if (Error E = AuxiliaryHeaderOrErr.takeError()) + return std::move(E); + Obj->AuxiliaryHeader = AuxiliaryHeaderOrErr.get(); + } + + CurOffset += Obj->getOptionalHeaderSize(); + + // Parse the section header table if it is present. + if (Obj->getNumberOfSections()) { + uint64_t SectionHeadersSize = + Obj->getNumberOfSections() * Obj->getSectionHeaderSize(); + auto SecHeadersOrErr = + getObject<void>(Data, Base + CurOffset, SectionHeadersSize); + if (!SecHeadersOrErr) + return createError(toString(SecHeadersOrErr.takeError()) + + ": section headers with offset 0x" + + Twine::utohexstr(CurOffset) + " and size 0x" + + Twine::utohexstr(SectionHeadersSize) + + " go past the end of the file"); + + Obj->SectionHeaderTable = SecHeadersOrErr.get(); + } + + const uint32_t NumberOfSymbolTableEntries = + Obj->getNumberOfSymbolTableEntries(); + + // If there is no symbol table we are done parsing the memory buffer. + if (NumberOfSymbolTableEntries == 0) + return std::move(Obj); + + // Parse symbol table. + CurOffset = Obj->is64Bit() ? Obj->getSymbolTableOffset64() + : Obj->getSymbolTableOffset32(); + const uint64_t SymbolTableSize = + static_cast<uint64_t>(XCOFF::SymbolTableEntrySize) * + NumberOfSymbolTableEntries; + auto SymTableOrErr = + getObject<void *>(Data, Base + CurOffset, SymbolTableSize); + if (!SymTableOrErr) + return createError( + toString(SymTableOrErr.takeError()) + ": symbol table with offset 0x" + + Twine::utohexstr(CurOffset) + " and size 0x" + + Twine::utohexstr(SymbolTableSize) + " goes past the end of the file"); + + Obj->SymbolTblPtr = SymTableOrErr.get(); + CurOffset += SymbolTableSize; + + // Parse String table. + Expected<XCOFFStringTable> StringTableOrErr = + parseStringTable(Obj.get(), CurOffset); + if (Error E = StringTableOrErr.takeError()) + return std::move(E); + Obj->StringTable = StringTableOrErr.get(); + + return std::move(Obj); +} + +Expected<std::unique_ptr<ObjectFile>> +ObjectFile::createXCOFFObjectFile(MemoryBufferRef MemBufRef, + unsigned FileType) { + return XCOFFObjectFile::create(FileType, MemBufRef); +} + +bool XCOFFSymbolRef::isFunction() const { + if (!isCsectSymbol()) + return false; + + if (getSymbolType() & FunctionSym) + return true; + + Expected<XCOFFCsectAuxRef> ExpCsectAuxEnt = getXCOFFCsectAuxRef(); + if (!ExpCsectAuxEnt) { + // If we could not get the CSECT auxiliary entry, then treat this symbol as + // if it isn't a function. Consume the error and return `false` to move on. + consumeError(ExpCsectAuxEnt.takeError()); + return false; + } + + const XCOFFCsectAuxRef CsectAuxRef = ExpCsectAuxEnt.get(); + + // A function definition should be a label definition. + // FIXME: This is not necessarily the case when -ffunction-sections is + // enabled. + if (!CsectAuxRef.isLabel()) + return false; + + if (CsectAuxRef.getStorageMappingClass() != XCOFF::XMC_PR) + return false; + + const int16_t SectNum = getSectionNumber(); + Expected<DataRefImpl> SI = OwningObjectPtr->getSectionByNum(SectNum); + if (!SI) { + // If we could not get the section, then this symbol should not be + // a function. So consume the error and return `false` to move on. + consumeError(SI.takeError()); + return false; + } + + return (OwningObjectPtr->getSectionFlags(SI.get()) & XCOFF::STYP_TEXT); +} + +bool XCOFFSymbolRef::isCsectSymbol() const { + XCOFF::StorageClass SC = getStorageClass(); + return (SC == XCOFF::C_EXT || SC == XCOFF::C_WEAKEXT || + SC == XCOFF::C_HIDEXT); +} + +Expected<XCOFFCsectAuxRef> XCOFFSymbolRef::getXCOFFCsectAuxRef() const { + assert(isCsectSymbol() && + "Calling csect symbol interface with a non-csect symbol."); + + uint8_t NumberOfAuxEntries = getNumberOfAuxEntries(); + + Expected<StringRef> NameOrErr = getName(); + if (auto Err = NameOrErr.takeError()) + return std::move(Err); + + uint32_t SymbolIdx = OwningObjectPtr->getSymbolIndex(getEntryAddress()); + if (!NumberOfAuxEntries) { + return createError("csect symbol \"" + *NameOrErr + "\" with index " + + Twine(SymbolIdx) + " contains no auxiliary entry"); + } + + if (!OwningObjectPtr->is64Bit()) { + // In XCOFF32, the csect auxilliary entry is always the last auxiliary + // entry for the symbol. + uintptr_t AuxAddr = XCOFFObjectFile::getAdvancedSymbolEntryAddress( + getEntryAddress(), NumberOfAuxEntries); + return XCOFFCsectAuxRef(viewAs<XCOFFCsectAuxEnt32>(AuxAddr)); + } + + // XCOFF64 uses SymbolAuxType to identify the auxiliary entry type. + // We need to iterate through all the auxiliary entries to find it. + for (uint8_t Index = NumberOfAuxEntries; Index > 0; --Index) { + uintptr_t AuxAddr = XCOFFObjectFile::getAdvancedSymbolEntryAddress( + getEntryAddress(), Index); + if (*OwningObjectPtr->getSymbolAuxType(AuxAddr) == + XCOFF::SymbolAuxType::AUX_CSECT) { +#ifndef NDEBUG + OwningObjectPtr->checkSymbolEntryPointer(AuxAddr); +#endif + return XCOFFCsectAuxRef(viewAs<XCOFFCsectAuxEnt64>(AuxAddr)); + } + } + + return createError( + "a csect auxiliary entry has not been found for symbol \"" + *NameOrErr + + "\" with index " + Twine(SymbolIdx)); +} + +Expected<StringRef> XCOFFSymbolRef::getName() const { + // A storage class value with the high-order bit on indicates that the name is + // a symbolic debugger stabstring. + if (getStorageClass() & 0x80) + return StringRef("Unimplemented Debug Name"); + + if (Entry32) { + if (Entry32->NameInStrTbl.Magic != XCOFFSymbolRef::NAME_IN_STR_TBL_MAGIC) + return generateXCOFFFixedNameStringRef(Entry32->SymbolName); + + return OwningObjectPtr->getStringTableEntry(Entry32->NameInStrTbl.Offset); + } + + return OwningObjectPtr->getStringTableEntry(Entry64->Offset); +} + +// Explictly instantiate template classes. +template struct XCOFFSectionHeader<XCOFFSectionHeader32>; +template struct XCOFFSectionHeader<XCOFFSectionHeader64>; + +template struct XCOFFRelocation<llvm::support::ubig32_t>; +template struct XCOFFRelocation<llvm::support::ubig64_t>; + +template llvm::Expected<llvm::ArrayRef<llvm::object::XCOFFRelocation64>> +llvm::object::XCOFFObjectFile::relocations<llvm::object::XCOFFSectionHeader64, + llvm::object::XCOFFRelocation64>( + llvm::object::XCOFFSectionHeader64 const &) const; +template llvm::Expected<llvm::ArrayRef<llvm::object::XCOFFRelocation32>> +llvm::object::XCOFFObjectFile::relocations<llvm::object::XCOFFSectionHeader32, + llvm::object::XCOFFRelocation32>( + llvm::object::XCOFFSectionHeader32 const &) const; + +bool doesXCOFFTracebackTableBegin(ArrayRef<uint8_t> Bytes) { + if (Bytes.size() < 4) + return false; + + return support::endian::read32be(Bytes.data()) == 0; +} + +#define GETVALUEWITHMASK(X) (Data & (TracebackTable::X)) +#define GETVALUEWITHMASKSHIFT(X, S) \ + ((Data & (TracebackTable::X)) >> (TracebackTable::S)) + +Expected<TBVectorExt> TBVectorExt::create(StringRef TBvectorStrRef) { + Error Err = Error::success(); + TBVectorExt TBTVecExt(TBvectorStrRef, Err); + if (Err) + return std::move(Err); + return TBTVecExt; +} + +TBVectorExt::TBVectorExt(StringRef TBvectorStrRef, Error &Err) { + const uint8_t *Ptr = reinterpret_cast<const uint8_t *>(TBvectorStrRef.data()); + Data = support::endian::read16be(Ptr); + uint32_t VecParmsTypeValue = support::endian::read32be(Ptr + 2); + unsigned ParmsNum = + GETVALUEWITHMASKSHIFT(NumberOfVectorParmsMask, NumberOfVectorParmsShift); + + ErrorAsOutParameter EAO(&Err); + Expected<SmallString<32>> VecParmsTypeOrError = + parseVectorParmsType(VecParmsTypeValue, ParmsNum); + if (!VecParmsTypeOrError) + Err = VecParmsTypeOrError.takeError(); + else + VecParmsInfo = VecParmsTypeOrError.get(); +} + +uint8_t TBVectorExt::getNumberOfVRSaved() const { + return GETVALUEWITHMASKSHIFT(NumberOfVRSavedMask, NumberOfVRSavedShift); +} + +bool TBVectorExt::isVRSavedOnStack() const { + return GETVALUEWITHMASK(IsVRSavedOnStackMask); +} + +bool TBVectorExt::hasVarArgs() const { + return GETVALUEWITHMASK(HasVarArgsMask); +} + +uint8_t TBVectorExt::getNumberOfVectorParms() const { + return GETVALUEWITHMASKSHIFT(NumberOfVectorParmsMask, + NumberOfVectorParmsShift); +} + +bool TBVectorExt::hasVMXInstruction() const { + return GETVALUEWITHMASK(HasVMXInstructionMask); +} +#undef GETVALUEWITHMASK +#undef GETVALUEWITHMASKSHIFT + +Expected<XCOFFTracebackTable> XCOFFTracebackTable::create(const uint8_t *Ptr, + uint64_t &Size) { + Error Err = Error::success(); + XCOFFTracebackTable TBT(Ptr, Size, Err); + if (Err) + return std::move(Err); + return TBT; +} + +XCOFFTracebackTable::XCOFFTracebackTable(const uint8_t *Ptr, uint64_t &Size, + Error &Err) + : TBPtr(Ptr) { + ErrorAsOutParameter EAO(&Err); + DataExtractor DE(ArrayRef<uint8_t>(Ptr, Size), /*IsLittleEndian=*/false, + /*AddressSize=*/0); + DataExtractor::Cursor Cur(/*Offset=*/0); + + // Skip 8 bytes of mandatory fields. + DE.getU64(Cur); + + unsigned FixedParmsNum = getNumberOfFixedParms(); + unsigned FloatingParmsNum = getNumberOfFPParms(); + uint32_t ParamsTypeValue = 0; + + // Begin to parse optional fields. + if (Cur && (FixedParmsNum + FloatingParmsNum) > 0) + ParamsTypeValue = DE.getU32(Cur); + + if (Cur && hasTraceBackTableOffset()) + TraceBackTableOffset = DE.getU32(Cur); + + if (Cur && isInterruptHandler()) + HandlerMask = DE.getU32(Cur); + + if (Cur && hasControlledStorage()) { + NumOfCtlAnchors = DE.getU32(Cur); + if (Cur && NumOfCtlAnchors) { + SmallVector<uint32_t, 8> Disp; + Disp.reserve(NumOfCtlAnchors.getValue()); + for (uint32_t I = 0; I < NumOfCtlAnchors && Cur; ++I) + Disp.push_back(DE.getU32(Cur)); + if (Cur) + ControlledStorageInfoDisp = std::move(Disp); + } + } + + if (Cur && isFuncNamePresent()) { + uint16_t FunctionNameLen = DE.getU16(Cur); + if (Cur) + FunctionName = DE.getBytes(Cur, FunctionNameLen); + } + + if (Cur && isAllocaUsed()) + AllocaRegister = DE.getU8(Cur); + + unsigned VectorParmsNum = 0; + if (Cur && hasVectorInfo()) { + StringRef VectorExtRef = DE.getBytes(Cur, 6); + if (Cur) { + Expected<TBVectorExt> TBVecExtOrErr = TBVectorExt::create(VectorExtRef); + if (!TBVecExtOrErr) { + Err = TBVecExtOrErr.takeError(); + return; + } + VecExt = TBVecExtOrErr.get(); + VectorParmsNum = VecExt.getValue().getNumberOfVectorParms(); + } + } + + // As long as there is no fixed-point or floating-point parameter, this + // field remains not present even when hasVectorInfo gives true and + // indicates the presence of vector parameters. + if (Cur && (FixedParmsNum + FloatingParmsNum) > 0) { + Expected<SmallString<32>> ParmsTypeOrError = + hasVectorInfo() + ? parseParmsTypeWithVecInfo(ParamsTypeValue, FixedParmsNum, + FloatingParmsNum, VectorParmsNum) + : parseParmsType(ParamsTypeValue, FixedParmsNum, FloatingParmsNum); + + if (!ParmsTypeOrError) { + Err = ParmsTypeOrError.takeError(); + return; + } + ParmsType = ParmsTypeOrError.get(); + } + + if (Cur && hasExtensionTable()) + ExtensionTable = DE.getU8(Cur); + + if (!Cur) + Err = Cur.takeError(); + + Size = Cur.tell(); +} + +#define GETBITWITHMASK(P, X) \ + (support::endian::read32be(TBPtr + (P)) & (TracebackTable::X)) +#define GETBITWITHMASKSHIFT(P, X, S) \ + ((support::endian::read32be(TBPtr + (P)) & (TracebackTable::X)) >> \ + (TracebackTable::S)) + +uint8_t XCOFFTracebackTable::getVersion() const { + return GETBITWITHMASKSHIFT(0, VersionMask, VersionShift); +} + +uint8_t XCOFFTracebackTable::getLanguageID() const { + return GETBITWITHMASKSHIFT(0, LanguageIdMask, LanguageIdShift); +} + +bool XCOFFTracebackTable::isGlobalLinkage() const { + return GETBITWITHMASK(0, IsGlobaLinkageMask); +} + +bool XCOFFTracebackTable::isOutOfLineEpilogOrPrologue() const { + return GETBITWITHMASK(0, IsOutOfLineEpilogOrPrologueMask); +} + +bool XCOFFTracebackTable::hasTraceBackTableOffset() const { + return GETBITWITHMASK(0, HasTraceBackTableOffsetMask); +} + +bool XCOFFTracebackTable::isInternalProcedure() const { + return GETBITWITHMASK(0, IsInternalProcedureMask); +} + +bool XCOFFTracebackTable::hasControlledStorage() const { + return GETBITWITHMASK(0, HasControlledStorageMask); +} + +bool XCOFFTracebackTable::isTOCless() const { + return GETBITWITHMASK(0, IsTOClessMask); +} + +bool XCOFFTracebackTable::isFloatingPointPresent() const { + return GETBITWITHMASK(0, IsFloatingPointPresentMask); +} + +bool XCOFFTracebackTable::isFloatingPointOperationLogOrAbortEnabled() const { + return GETBITWITHMASK(0, IsFloatingPointOperationLogOrAbortEnabledMask); +} + +bool XCOFFTracebackTable::isInterruptHandler() const { + return GETBITWITHMASK(0, IsInterruptHandlerMask); +} + +bool XCOFFTracebackTable::isFuncNamePresent() const { + return GETBITWITHMASK(0, IsFunctionNamePresentMask); +} + +bool XCOFFTracebackTable::isAllocaUsed() const { + return GETBITWITHMASK(0, IsAllocaUsedMask); +} + +uint8_t XCOFFTracebackTable::getOnConditionDirective() const { + return GETBITWITHMASKSHIFT(0, OnConditionDirectiveMask, + OnConditionDirectiveShift); +} + +bool XCOFFTracebackTable::isCRSaved() const { + return GETBITWITHMASK(0, IsCRSavedMask); +} + +bool XCOFFTracebackTable::isLRSaved() const { + return GETBITWITHMASK(0, IsLRSavedMask); +} + +bool XCOFFTracebackTable::isBackChainStored() const { + return GETBITWITHMASK(4, IsBackChainStoredMask); +} + +bool XCOFFTracebackTable::isFixup() const { + return GETBITWITHMASK(4, IsFixupMask); +} + +uint8_t XCOFFTracebackTable::getNumOfFPRsSaved() const { + return GETBITWITHMASKSHIFT(4, FPRSavedMask, FPRSavedShift); +} + +bool XCOFFTracebackTable::hasExtensionTable() const { + return GETBITWITHMASK(4, HasExtensionTableMask); +} + +bool XCOFFTracebackTable::hasVectorInfo() const { + return GETBITWITHMASK(4, HasVectorInfoMask); +} + +uint8_t XCOFFTracebackTable::getNumOfGPRsSaved() const { + return GETBITWITHMASKSHIFT(4, GPRSavedMask, GPRSavedShift); +} + +uint8_t XCOFFTracebackTable::getNumberOfFixedParms() const { + return GETBITWITHMASKSHIFT(4, NumberOfFixedParmsMask, + NumberOfFixedParmsShift); +} + +uint8_t XCOFFTracebackTable::getNumberOfFPParms() const { + return GETBITWITHMASKSHIFT(4, NumberOfFloatingPointParmsMask, + NumberOfFloatingPointParmsShift); +} + +bool XCOFFTracebackTable::hasParmsOnStack() const { + return GETBITWITHMASK(4, HasParmsOnStackMask); +} + +#undef GETBITWITHMASK +#undef GETBITWITHMASKSHIFT +} // namespace object +} // namespace llvm diff --git a/contrib/libs/llvm14/lib/Object/ya.make b/contrib/libs/llvm14/lib/Object/ya.make new file mode 100644 index 0000000000..175bf9b41b --- /dev/null +++ b/contrib/libs/llvm14/lib/Object/ya.make @@ -0,0 +1,62 @@ +# Generated by devtools/yamaker. + +LIBRARY() + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + +PEERDIR( + contrib/libs/llvm14 + contrib/libs/llvm14/include + contrib/libs/llvm14/lib/BinaryFormat + contrib/libs/llvm14/lib/Bitcode/Reader + contrib/libs/llvm14/lib/IR + contrib/libs/llvm14/lib/MC + contrib/libs/llvm14/lib/MC/MCParser + contrib/libs/llvm14/lib/Support + contrib/libs/llvm14/lib/TextAPI +) + +ADDINCL( + contrib/libs/llvm14/lib/Object +) + +NO_COMPILER_WARNINGS() + +NO_UTIL() + +SRCS( + Archive.cpp + ArchiveWriter.cpp + Binary.cpp + COFFImportFile.cpp + COFFModuleDefinition.cpp + COFFObjectFile.cpp + Decompressor.cpp + ELF.cpp + ELFObjectFile.cpp + Error.cpp + FaultMapParser.cpp + IRObjectFile.cpp + IRSymtab.cpp + MachOObjectFile.cpp + MachOUniversal.cpp + MachOUniversalWriter.cpp + Minidump.cpp + ModuleSymbolTable.cpp + Object.cpp + ObjectFile.cpp + RecordStreamer.cpp + RelocationResolver.cpp + SymbolSize.cpp + SymbolicFile.cpp + TapiFile.cpp + TapiUniversal.cpp + WasmObjectFile.cpp + WindowsMachineFlag.cpp + WindowsResource.cpp + XCOFFObjectFile.cpp +) + +END() |