diff options
| author | vvvv <[email protected]> | 2024-02-06 20:01:22 +0300 |
|---|---|---|
| committer | vvvv <[email protected]> | 2024-02-06 20:22:16 +0300 |
| commit | 0203b7a9a40828bb2bd4c32029b79ff0ea3d1f8f (patch) | |
| tree | e630d0d5bd0bd29fc8c2d2842ed2cfde781b993a /contrib/libs/llvm16/lib/ObjCopy | |
| parent | ba27db76d99d12a4f1c06960b5449423218614c4 (diff) | |
llvm16 targets
Diffstat (limited to 'contrib/libs/llvm16/lib/ObjCopy')
38 files changed, 10357 insertions, 0 deletions
diff --git a/contrib/libs/llvm16/lib/ObjCopy/Archive.cpp b/contrib/libs/llvm16/lib/ObjCopy/Archive.cpp new file mode 100644 index 00000000000..742ca0b890c --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/Archive.cpp @@ -0,0 +1,110 @@ +//===- Archive.cpp --------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Archive.h" +#include "llvm/ObjCopy/CommonConfig.h" +#include "llvm/ObjCopy/MultiFormatConfig.h" +#include "llvm/ObjCopy/ObjCopy.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/MachO.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/SmallVectorMemoryBuffer.h" + +namespace llvm { +namespace objcopy { + +using namespace llvm::object; + +Expected<std::vector<NewArchiveMember>> +createNewArchiveMembers(const MultiFormatConfig &Config, const Archive &Ar) { + std::vector<NewArchiveMember> NewArchiveMembers; + Error Err = Error::success(); + for (const Archive::Child &Child : Ar.children(Err)) { + Expected<StringRef> ChildNameOrErr = Child.getName(); + if (!ChildNameOrErr) + return createFileError(Ar.getFileName(), ChildNameOrErr.takeError()); + + Expected<std::unique_ptr<Binary>> ChildOrErr = Child.getAsBinary(); + if (!ChildOrErr) + return createFileError(Ar.getFileName() + "(" + *ChildNameOrErr + ")", + ChildOrErr.takeError()); + + SmallVector<char, 0> Buffer; + raw_svector_ostream MemStream(Buffer); + + if (Error E = executeObjcopyOnBinary(Config, *ChildOrErr->get(), MemStream)) + return std::move(E); + + Expected<NewArchiveMember> Member = NewArchiveMember::getOldMember( + Child, Config.getCommonConfig().DeterministicArchives); + if (!Member) + return createFileError(Ar.getFileName(), Member.takeError()); + + Member->Buf = std::make_unique<SmallVectorMemoryBuffer>( + std::move(Buffer), ChildNameOrErr.get()); + Member->MemberName = Member->Buf->getBufferIdentifier(); + NewArchiveMembers.push_back(std::move(*Member)); + } + if (Err) + return createFileError(Config.getCommonConfig().InputFilename, + std::move(Err)); + return std::move(NewArchiveMembers); +} + +// For regular archives this function simply calls llvm::writeArchive, +// For thin archives it writes the archive file itself as well as its members. +static Error deepWriteArchive(StringRef ArcName, + ArrayRef<NewArchiveMember> NewMembers, + bool WriteSymtab, object::Archive::Kind Kind, + bool Deterministic, bool Thin) { + if (Kind == object::Archive::K_BSD && !NewMembers.empty() && + NewMembers.front().detectKindFromObject() == object::Archive::K_DARWIN) + Kind = object::Archive::K_DARWIN; + + if (Error E = writeArchive(ArcName, NewMembers, WriteSymtab, Kind, + Deterministic, Thin)) + return createFileError(ArcName, std::move(E)); + + if (!Thin) + return Error::success(); + + for (const NewArchiveMember &Member : NewMembers) { + // For regular files (as is the case for deepWriteArchive), + // FileOutputBuffer::create will return OnDiskBuffer. + // OnDiskBuffer uses a temporary file and then renames it. So in reality + // there is no inefficiency / duplicated in-memory buffers in this case. For + // now in-memory buffers can not be completely avoided since + // NewArchiveMember still requires them even though writeArchive does not + // write them on disk. + Expected<std::unique_ptr<FileOutputBuffer>> FB = + FileOutputBuffer::create(Member.MemberName, Member.Buf->getBufferSize(), + FileOutputBuffer::F_executable); + if (!FB) + return FB.takeError(); + std::copy(Member.Buf->getBufferStart(), Member.Buf->getBufferEnd(), + (*FB)->getBufferStart()); + if (Error E = (*FB)->commit()) + return E; + } + return Error::success(); +} + +Error executeObjcopyOnArchive(const MultiFormatConfig &Config, + const object::Archive &Ar) { + Expected<std::vector<NewArchiveMember>> NewArchiveMembersOrErr = + createNewArchiveMembers(Config, Ar); + if (!NewArchiveMembersOrErr) + return NewArchiveMembersOrErr.takeError(); + const CommonConfig &CommonConfig = Config.getCommonConfig(); + return deepWriteArchive(CommonConfig.OutputFilename, *NewArchiveMembersOrErr, + Ar.hasSymbolTable(), Ar.kind(), + CommonConfig.DeterministicArchives, Ar.isThin()); +} + +} // end namespace objcopy +} // end namespace llvm diff --git a/contrib/libs/llvm16/lib/ObjCopy/Archive.h b/contrib/libs/llvm16/lib/ObjCopy/Archive.h new file mode 100644 index 00000000000..08aae563505 --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/Archive.h @@ -0,0 +1,31 @@ +//===- Archive.h ------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_ARCHIVE_H +#define LLVM_LIB_OBJCOPY_ARCHIVE_H + +#include "llvm/Object/ArchiveWriter.h" +#include "llvm/Support/Error.h" +#include <vector> + +namespace llvm { +namespace objcopy { + +class MultiFormatConfig; + +/// Applies the transformations described by \p Config to +/// each member in archive \p Ar. +/// \returns Vector of transformed archive members. +Expected<std::vector<NewArchiveMember>> +createNewArchiveMembers(const MultiFormatConfig &Config, + const object::Archive &Ar); + +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_ARCHIVE_H diff --git a/contrib/libs/llvm16/lib/ObjCopy/COFF/COFFObjcopy.cpp b/contrib/libs/llvm16/lib/ObjCopy/COFF/COFFObjcopy.cpp new file mode 100644 index 00000000000..37fb22740dc --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/COFF/COFFObjcopy.cpp @@ -0,0 +1,311 @@ +//===- COFFObjcopy.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ObjCopy/COFF/COFFObjcopy.h" +#include "COFFObject.h" +#include "COFFReader.h" +#include "COFFWriter.h" +#include "llvm/ObjCopy/COFF/COFFConfig.h" +#include "llvm/ObjCopy/CommonConfig.h" + +#include "llvm/Object/Binary.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/CRC.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Path.h" +#include <cassert> + +namespace llvm { +namespace objcopy { +namespace coff { + +using namespace object; +using namespace COFF; + +static bool isDebugSection(const Section &Sec) { + return Sec.Name.startswith(".debug"); +} + +static uint64_t getNextRVA(const Object &Obj) { + if (Obj.getSections().empty()) + return 0; + const Section &Last = Obj.getSections().back(); + return alignTo(Last.Header.VirtualAddress + Last.Header.VirtualSize, + Obj.IsPE ? Obj.PeHeader.SectionAlignment : 1); +} + +static Expected<std::vector<uint8_t>> +createGnuDebugLinkSectionContents(StringRef File) { + ErrorOr<std::unique_ptr<MemoryBuffer>> LinkTargetOrErr = + MemoryBuffer::getFile(File); + if (!LinkTargetOrErr) + return createFileError(File, LinkTargetOrErr.getError()); + auto LinkTarget = std::move(*LinkTargetOrErr); + uint32_t CRC32 = llvm::crc32(arrayRefFromStringRef(LinkTarget->getBuffer())); + + StringRef FileName = sys::path::filename(File); + size_t CRCPos = alignTo(FileName.size() + 1, 4); + std::vector<uint8_t> Data(CRCPos + 4); + memcpy(Data.data(), FileName.data(), FileName.size()); + support::endian::write32le(Data.data() + CRCPos, CRC32); + return Data; +} + +// Adds named section with given contents to the object. +static void addSection(Object &Obj, StringRef Name, ArrayRef<uint8_t> Contents, + uint32_t Characteristics) { + bool NeedVA = Characteristics & (IMAGE_SCN_MEM_EXECUTE | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_WRITE); + + Section Sec; + Sec.setOwnedContents(Contents); + Sec.Name = Name; + Sec.Header.VirtualSize = NeedVA ? Sec.getContents().size() : 0u; + Sec.Header.VirtualAddress = NeedVA ? getNextRVA(Obj) : 0u; + Sec.Header.SizeOfRawData = + NeedVA ? alignTo(Sec.Header.VirtualSize, + Obj.IsPE ? Obj.PeHeader.FileAlignment : 1) + : Sec.getContents().size(); + // Sec.Header.PointerToRawData is filled in by the writer. + Sec.Header.PointerToRelocations = 0; + Sec.Header.PointerToLinenumbers = 0; + // Sec.Header.NumberOfRelocations is filled in by the writer. + Sec.Header.NumberOfLinenumbers = 0; + Sec.Header.Characteristics = Characteristics; + + Obj.addSections(Sec); +} + +static Error addGnuDebugLink(Object &Obj, StringRef DebugLinkFile) { + Expected<std::vector<uint8_t>> Contents = + createGnuDebugLinkSectionContents(DebugLinkFile); + if (!Contents) + return Contents.takeError(); + + addSection(Obj, ".gnu_debuglink", *Contents, + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_DISCARDABLE); + + return Error::success(); +} + +static uint32_t flagsToCharacteristics(SectionFlag AllFlags, uint32_t OldChar) { + // Need to preserve alignment flags. + const uint32_t PreserveMask = + IMAGE_SCN_ALIGN_1BYTES | IMAGE_SCN_ALIGN_2BYTES | IMAGE_SCN_ALIGN_4BYTES | + IMAGE_SCN_ALIGN_8BYTES | IMAGE_SCN_ALIGN_16BYTES | + IMAGE_SCN_ALIGN_32BYTES | IMAGE_SCN_ALIGN_64BYTES | + IMAGE_SCN_ALIGN_128BYTES | IMAGE_SCN_ALIGN_256BYTES | + IMAGE_SCN_ALIGN_512BYTES | IMAGE_SCN_ALIGN_1024BYTES | + IMAGE_SCN_ALIGN_2048BYTES | IMAGE_SCN_ALIGN_4096BYTES | + IMAGE_SCN_ALIGN_8192BYTES; + + // Setup new section characteristics based on the flags provided in command + // line. + uint32_t NewCharacteristics = (OldChar & PreserveMask) | IMAGE_SCN_MEM_READ; + + if ((AllFlags & SectionFlag::SecAlloc) && !(AllFlags & SectionFlag::SecLoad)) + NewCharacteristics |= IMAGE_SCN_CNT_UNINITIALIZED_DATA; + if (AllFlags & SectionFlag::SecNoload) + NewCharacteristics |= IMAGE_SCN_LNK_REMOVE; + if (!(AllFlags & SectionFlag::SecReadonly)) + NewCharacteristics |= IMAGE_SCN_MEM_WRITE; + if (AllFlags & SectionFlag::SecDebug) + NewCharacteristics |= + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_DISCARDABLE; + if (AllFlags & SectionFlag::SecCode) + NewCharacteristics |= IMAGE_SCN_CNT_CODE | IMAGE_SCN_MEM_EXECUTE; + if (AllFlags & SectionFlag::SecData) + NewCharacteristics |= IMAGE_SCN_CNT_INITIALIZED_DATA; + if (AllFlags & SectionFlag::SecShare) + NewCharacteristics |= IMAGE_SCN_MEM_SHARED; + if (AllFlags & SectionFlag::SecExclude) + NewCharacteristics |= IMAGE_SCN_LNK_REMOVE; + + return NewCharacteristics; +} + +static Error handleArgs(const CommonConfig &Config, + const COFFConfig &COFFConfig, Object &Obj) { + // Perform the actual section removals. + Obj.removeSections([&Config](const Section &Sec) { + // Contrary to --only-keep-debug, --only-section fully removes sections that + // aren't mentioned. + if (!Config.OnlySection.empty() && !Config.OnlySection.matches(Sec.Name)) + return true; + + if (Config.StripDebug || Config.StripAll || Config.StripAllGNU || + Config.DiscardMode == DiscardType::All || Config.StripUnneeded) { + if (isDebugSection(Sec) && + (Sec.Header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) != 0) + return true; + } + + if (Config.ToRemove.matches(Sec.Name)) + return true; + + return false; + }); + + if (Config.OnlyKeepDebug) { + // For --only-keep-debug, we keep all other sections, but remove their + // content. The VirtualSize field in the section header is kept intact. + Obj.truncateSections([](const Section &Sec) { + return !isDebugSection(Sec) && Sec.Name != ".buildid" && + ((Sec.Header.Characteristics & + (IMAGE_SCN_CNT_CODE | IMAGE_SCN_CNT_INITIALIZED_DATA)) != 0); + }); + } + + // StripAll removes all symbols and thus also removes all relocations. + if (Config.StripAll || Config.StripAllGNU) + for (Section &Sec : Obj.getMutableSections()) + Sec.Relocs.clear(); + + // If we need to do per-symbol removals, initialize the Referenced field. + if (Config.StripUnneeded || Config.DiscardMode == DiscardType::All || + !Config.SymbolsToRemove.empty()) + if (Error E = Obj.markSymbols()) + return E; + + for (Symbol &Sym : Obj.getMutableSymbols()) { + auto I = Config.SymbolsToRename.find(Sym.Name); + if (I != Config.SymbolsToRename.end()) + Sym.Name = I->getValue(); + } + + auto ToRemove = [&](const Symbol &Sym) -> Expected<bool> { + // For StripAll, all relocations have been stripped and we remove all + // symbols. + if (Config.StripAll || Config.StripAllGNU) + return true; + + if (Config.SymbolsToRemove.matches(Sym.Name)) { + // Explicitly removing a referenced symbol is an error. + if (Sym.Referenced) + return createStringError( + llvm::errc::invalid_argument, + "'" + Config.OutputFilename + "': not stripping symbol '" + + Sym.Name.str() + "' because it is named in a relocation"); + return true; + } + + if (!Sym.Referenced) { + // With --strip-unneeded, GNU objcopy removes all unreferenced local + // symbols, and any unreferenced undefined external. + // With --strip-unneeded-symbol we strip only specific unreferenced + // local symbol instead of removing all of such. + if (Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC || + Sym.Sym.SectionNumber == 0) + if (Config.StripUnneeded || + Config.UnneededSymbolsToRemove.matches(Sym.Name)) + return true; + + // GNU objcopy keeps referenced local symbols and external symbols + // if --discard-all is set, similar to what --strip-unneeded does, + // but undefined local symbols are kept when --discard-all is set. + if (Config.DiscardMode == DiscardType::All && + Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC && + Sym.Sym.SectionNumber != 0) + return true; + } + + return false; + }; + + // Actually do removals of symbols. + if (Error Err = Obj.removeSymbols(ToRemove)) + return Err; + + if (!Config.SetSectionFlags.empty()) + for (Section &Sec : Obj.getMutableSections()) { + const auto It = Config.SetSectionFlags.find(Sec.Name); + if (It != Config.SetSectionFlags.end()) + Sec.Header.Characteristics = flagsToCharacteristics( + It->second.NewFlags, Sec.Header.Characteristics); + } + + for (const NewSectionInfo &NewSection : Config.AddSection) { + uint32_t Characteristics; + const auto It = Config.SetSectionFlags.find(NewSection.SectionName); + if (It != Config.SetSectionFlags.end()) + Characteristics = flagsToCharacteristics(It->second.NewFlags, 0); + else + Characteristics = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_ALIGN_1BYTES; + + addSection(Obj, NewSection.SectionName, + ArrayRef(reinterpret_cast<const uint8_t *>( + NewSection.SectionData->getBufferStart()), + NewSection.SectionData->getBufferSize()), + Characteristics); + } + + for (const NewSectionInfo &NewSection : Config.UpdateSection) { + auto It = llvm::find_if(Obj.getMutableSections(), [&](auto &Sec) { + return Sec.Name == NewSection.SectionName; + }); + if (It == Obj.getMutableSections().end()) + return createStringError(errc::invalid_argument, + "could not find section with name '%s'", + NewSection.SectionName.str().c_str()); + size_t ContentSize = It->getContents().size(); + if (!ContentSize) + return createStringError( + errc::invalid_argument, + "section '%s' cannot be updated because it does not have contents", + NewSection.SectionName.str().c_str()); + if (ContentSize < NewSection.SectionData->getBufferSize()) + return createStringError( + errc::invalid_argument, + "new section cannot be larger than previous section"); + It->setOwnedContents({NewSection.SectionData->getBufferStart(), + NewSection.SectionData->getBufferEnd()}); + } + + if (!Config.AddGnuDebugLink.empty()) + if (Error E = addGnuDebugLink(Obj, Config.AddGnuDebugLink)) + return E; + + if (COFFConfig.Subsystem || COFFConfig.MajorSubsystemVersion || + COFFConfig.MinorSubsystemVersion) { + if (!Obj.IsPE) + return createStringError( + errc::invalid_argument, + "'" + Config.OutputFilename + + "': unable to set subsystem on a relocatable object file"); + if (COFFConfig.Subsystem) + Obj.PeHeader.Subsystem = *COFFConfig.Subsystem; + if (COFFConfig.MajorSubsystemVersion) + Obj.PeHeader.MajorSubsystemVersion = *COFFConfig.MajorSubsystemVersion; + if (COFFConfig.MinorSubsystemVersion) + Obj.PeHeader.MinorSubsystemVersion = *COFFConfig.MinorSubsystemVersion; + } + + return Error::success(); +} + +Error executeObjcopyOnBinary(const CommonConfig &Config, + const COFFConfig &COFFConfig, COFFObjectFile &In, + raw_ostream &Out) { + COFFReader Reader(In); + Expected<std::unique_ptr<Object>> ObjOrErr = Reader.create(); + if (!ObjOrErr) + return createFileError(Config.InputFilename, ObjOrErr.takeError()); + Object *Obj = ObjOrErr->get(); + assert(Obj && "Unable to deserialize COFF object"); + if (Error E = handleArgs(Config, COFFConfig, *Obj)) + return createFileError(Config.InputFilename, std::move(E)); + COFFWriter Writer(*Obj, Out); + if (Error E = Writer.write()) + return createFileError(Config.OutputFilename, std::move(E)); + return Error::success(); +} + +} // end namespace coff +} // end namespace objcopy +} // end namespace llvm diff --git a/contrib/libs/llvm16/lib/ObjCopy/COFF/COFFObject.cpp b/contrib/libs/llvm16/lib/ObjCopy/COFF/COFFObject.cpp new file mode 100644 index 00000000000..1d27b7eaa89 --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/COFF/COFFObject.cpp @@ -0,0 +1,132 @@ +//===- COFFObject.cpp -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "COFFObject.h" +#include "llvm/ADT/DenseSet.h" +#include <algorithm> + +namespace llvm { +namespace objcopy { +namespace coff { + +using namespace object; + +void Object::addSymbols(ArrayRef<Symbol> NewSymbols) { + for (Symbol S : NewSymbols) { + S.UniqueId = NextSymbolUniqueId++; + Symbols.emplace_back(S); + } + updateSymbols(); +} + +void Object::updateSymbols() { + SymbolMap = DenseMap<size_t, Symbol *>(Symbols.size()); + for (Symbol &Sym : Symbols) + SymbolMap[Sym.UniqueId] = &Sym; +} + +const Symbol *Object::findSymbol(size_t UniqueId) const { + return SymbolMap.lookup(UniqueId); +} + +Error Object::removeSymbols( + function_ref<Expected<bool>(const Symbol &)> ToRemove) { + Error Errs = Error::success(); + llvm::erase_if(Symbols, [ToRemove, &Errs](const Symbol &Sym) { + Expected<bool> ShouldRemove = ToRemove(Sym); + if (!ShouldRemove) { + Errs = joinErrors(std::move(Errs), ShouldRemove.takeError()); + return false; + } + return *ShouldRemove; + }); + + updateSymbols(); + return Errs; +} + +Error Object::markSymbols() { + for (Symbol &Sym : Symbols) + Sym.Referenced = false; + for (const Section &Sec : Sections) { + for (const Relocation &R : Sec.Relocs) { + auto It = SymbolMap.find(R.Target); + if (It == SymbolMap.end()) + return createStringError(object_error::invalid_symbol_index, + "relocation target %zu not found", R.Target); + It->second->Referenced = true; + } + } + return Error::success(); +} + +void Object::addSections(ArrayRef<Section> NewSections) { + for (Section S : NewSections) { + S.UniqueId = NextSectionUniqueId++; + Sections.emplace_back(S); + } + updateSections(); +} + +void Object::updateSections() { + SectionMap = DenseMap<ssize_t, Section *>(Sections.size()); + size_t Index = 1; + for (Section &S : Sections) { + SectionMap[S.UniqueId] = &S; + S.Index = Index++; + } +} + +const Section *Object::findSection(ssize_t UniqueId) const { + return SectionMap.lookup(UniqueId); +} + +void Object::removeSections(function_ref<bool(const Section &)> ToRemove) { + DenseSet<ssize_t> AssociatedSections; + auto RemoveAssociated = [&AssociatedSections](const Section &Sec) { + return AssociatedSections.contains(Sec.UniqueId); + }; + do { + DenseSet<ssize_t> RemovedSections; + llvm::erase_if(Sections, [ToRemove, &RemovedSections](const Section &Sec) { + bool Remove = ToRemove(Sec); + if (Remove) + RemovedSections.insert(Sec.UniqueId); + return Remove; + }); + // Remove all symbols referring to the removed sections. + AssociatedSections.clear(); + llvm::erase_if( + Symbols, [&RemovedSections, &AssociatedSections](const Symbol &Sym) { + // If there are sections that are associative to a removed + // section, + // remove those as well as nothing will include them (and we can't + // leave them dangling). + if (RemovedSections.contains(Sym.AssociativeComdatTargetSectionId)) + AssociatedSections.insert(Sym.TargetSectionId); + return RemovedSections.contains(Sym.TargetSectionId); + }); + ToRemove = RemoveAssociated; + } while (!AssociatedSections.empty()); + updateSections(); + updateSymbols(); +} + +void Object::truncateSections(function_ref<bool(const Section &)> ToTruncate) { + for (Section &Sec : Sections) { + if (ToTruncate(Sec)) { + Sec.clearContents(); + Sec.Relocs.clear(); + Sec.Header.SizeOfRawData = 0; + } + } +} + +} // end namespace coff +} // end namespace objcopy +} // end namespace llvm diff --git a/contrib/libs/llvm16/lib/ObjCopy/COFF/COFFObject.h b/contrib/libs/llvm16/lib/ObjCopy/COFF/COFFObject.h new file mode 100644 index 00000000000..cdd1f17fc60 --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/COFF/COFFObject.h @@ -0,0 +1,211 @@ +//===- COFFObject.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_COFF_COFFOBJECT_H +#define LLVM_LIB_OBJCOPY_COFF_COFFOBJECT_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/Object/COFF.h" +#include <cstddef> +#include <cstdint> +#include <vector> + +namespace llvm { +namespace objcopy { +namespace coff { + +struct Relocation { + Relocation() = default; + Relocation(const object::coff_relocation &R) : Reloc(R) {} + + object::coff_relocation Reloc; + size_t Target = 0; + StringRef TargetName; // Used for diagnostics only +}; + +struct Section { + object::coff_section Header; + std::vector<Relocation> Relocs; + StringRef Name; + ssize_t UniqueId; + size_t Index; + + ArrayRef<uint8_t> getContents() const { + if (!OwnedContents.empty()) + return OwnedContents; + return ContentsRef; + } + + void setContentsRef(ArrayRef<uint8_t> Data) { + OwnedContents.clear(); + ContentsRef = Data; + } + + void setOwnedContents(std::vector<uint8_t> &&Data) { + ContentsRef = ArrayRef<uint8_t>(); + OwnedContents = std::move(Data); + Header.SizeOfRawData = OwnedContents.size(); + } + + void clearContents() { + ContentsRef = ArrayRef<uint8_t>(); + OwnedContents.clear(); + } + +private: + ArrayRef<uint8_t> ContentsRef; + std::vector<uint8_t> OwnedContents; +}; + +struct AuxSymbol { + AuxSymbol(ArrayRef<uint8_t> In) { + assert(In.size() == sizeof(Opaque)); + std::copy(In.begin(), In.end(), Opaque); + } + + ArrayRef<uint8_t> getRef() const { + return ArrayRef<uint8_t>(Opaque, sizeof(Opaque)); + } + + uint8_t Opaque[sizeof(object::coff_symbol16)]; +}; + +struct Symbol { + object::coff_symbol32 Sym; + StringRef Name; + std::vector<AuxSymbol> AuxData; + StringRef AuxFile; + ssize_t TargetSectionId; + ssize_t AssociativeComdatTargetSectionId = 0; + std::optional<size_t> WeakTargetSymbolId; + size_t UniqueId; + size_t RawIndex; + bool Referenced; +}; + +struct Object { + bool IsPE = false; + + object::dos_header DosHeader; + ArrayRef<uint8_t> DosStub; + + object::coff_file_header CoffFileHeader; + + bool Is64 = false; + object::pe32plus_header PeHeader; + uint32_t BaseOfData = 0; // pe32plus_header lacks this field. + + std::vector<object::data_directory> DataDirectories; + + ArrayRef<Symbol> getSymbols() const { return Symbols; } + // This allows mutating individual Symbols, but not mutating the list + // of symbols itself. + iterator_range<std::vector<Symbol>::iterator> getMutableSymbols() { + return make_range(Symbols.begin(), Symbols.end()); + } + + const Symbol *findSymbol(size_t UniqueId) const; + + void addSymbols(ArrayRef<Symbol> NewSymbols); + Error removeSymbols(function_ref<Expected<bool>(const Symbol &)> ToRemove); + + // Set the Referenced field on all Symbols, based on relocations in + // all sections. + Error markSymbols(); + + ArrayRef<Section> getSections() const { return Sections; } + // This allows mutating individual Sections, but not mutating the list + // of sections itself. + iterator_range<std::vector<Section>::iterator> getMutableSections() { + return make_range(Sections.begin(), Sections.end()); + } + + const Section *findSection(ssize_t UniqueId) const; + + void addSections(ArrayRef<Section> NewSections); + void removeSections(function_ref<bool(const Section &)> ToRemove); + void truncateSections(function_ref<bool(const Section &)> ToTruncate); + +private: + std::vector<Symbol> Symbols; + DenseMap<size_t, Symbol *> SymbolMap; + + size_t NextSymbolUniqueId = 0; + + std::vector<Section> Sections; + DenseMap<ssize_t, Section *> SectionMap; + + ssize_t NextSectionUniqueId = 1; // Allow a UniqueId 0 to mean undefined. + + // Update SymbolMap. + void updateSymbols(); + + // Update SectionMap and Index in each Section. + void updateSections(); +}; + +// Copy between coff_symbol16 and coff_symbol32. +// The source and destination files can use either coff_symbol16 or +// coff_symbol32, while we always store them as coff_symbol32 in the +// intermediate data structure. +template <class Symbol1Ty, class Symbol2Ty> +void copySymbol(Symbol1Ty &Dest, const Symbol2Ty &Src) { + static_assert(sizeof(Dest.Name.ShortName) == sizeof(Src.Name.ShortName), + "Mismatched name sizes"); + memcpy(Dest.Name.ShortName, Src.Name.ShortName, sizeof(Dest.Name.ShortName)); + Dest.Value = Src.Value; + Dest.SectionNumber = Src.SectionNumber; + Dest.Type = Src.Type; + Dest.StorageClass = Src.StorageClass; + Dest.NumberOfAuxSymbols = Src.NumberOfAuxSymbols; +} + +// Copy between pe32_header and pe32plus_header. +// We store the intermediate state in a pe32plus_header. +template <class PeHeader1Ty, class PeHeader2Ty> +void copyPeHeader(PeHeader1Ty &Dest, const PeHeader2Ty &Src) { + Dest.Magic = Src.Magic; + Dest.MajorLinkerVersion = Src.MajorLinkerVersion; + Dest.MinorLinkerVersion = Src.MinorLinkerVersion; + Dest.SizeOfCode = Src.SizeOfCode; + Dest.SizeOfInitializedData = Src.SizeOfInitializedData; + Dest.SizeOfUninitializedData = Src.SizeOfUninitializedData; + Dest.AddressOfEntryPoint = Src.AddressOfEntryPoint; + Dest.BaseOfCode = Src.BaseOfCode; + Dest.ImageBase = Src.ImageBase; + Dest.SectionAlignment = Src.SectionAlignment; + Dest.FileAlignment = Src.FileAlignment; + Dest.MajorOperatingSystemVersion = Src.MajorOperatingSystemVersion; + Dest.MinorOperatingSystemVersion = Src.MinorOperatingSystemVersion; + Dest.MajorImageVersion = Src.MajorImageVersion; + Dest.MinorImageVersion = Src.MinorImageVersion; + Dest.MajorSubsystemVersion = Src.MajorSubsystemVersion; + Dest.MinorSubsystemVersion = Src.MinorSubsystemVersion; + Dest.Win32VersionValue = Src.Win32VersionValue; + Dest.SizeOfImage = Src.SizeOfImage; + Dest.SizeOfHeaders = Src.SizeOfHeaders; + Dest.CheckSum = Src.CheckSum; + Dest.Subsystem = Src.Subsystem; + Dest.DLLCharacteristics = Src.DLLCharacteristics; + Dest.SizeOfStackReserve = Src.SizeOfStackReserve; + Dest.SizeOfStackCommit = Src.SizeOfStackCommit; + Dest.SizeOfHeapReserve = Src.SizeOfHeapReserve; + Dest.SizeOfHeapCommit = Src.SizeOfHeapCommit; + Dest.LoaderFlags = Src.LoaderFlags; + Dest.NumberOfRvaAndSize = Src.NumberOfRvaAndSize; +} + +} // end namespace coff +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_COFF_COFFOBJECT_H diff --git a/contrib/libs/llvm16/lib/ObjCopy/COFF/COFFReader.cpp b/contrib/libs/llvm16/lib/ObjCopy/COFF/COFFReader.cpp new file mode 100644 index 00000000000..32aceb805a2 --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/COFF/COFFReader.cpp @@ -0,0 +1,226 @@ +//===- COFFReader.cpp -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "COFFReader.h" +#include "COFFObject.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/ErrorHandling.h" +#include <cstddef> +#include <cstdint> + +namespace llvm { +namespace objcopy { +namespace coff { + +using namespace object; +using namespace COFF; + +Error COFFReader::readExecutableHeaders(Object &Obj) const { + const dos_header *DH = COFFObj.getDOSHeader(); + Obj.Is64 = COFFObj.is64(); + if (!DH) + return Error::success(); + + Obj.IsPE = true; + Obj.DosHeader = *DH; + if (DH->AddressOfNewExeHeader > sizeof(*DH)) + Obj.DosStub = ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(&DH[1]), + DH->AddressOfNewExeHeader - sizeof(*DH)); + + if (COFFObj.is64()) { + Obj.PeHeader = *COFFObj.getPE32PlusHeader(); + } else { + const pe32_header *PE32 = COFFObj.getPE32Header(); + copyPeHeader(Obj.PeHeader, *PE32); + // The pe32plus_header (stored in Object) lacks the BaseOfData field. + Obj.BaseOfData = PE32->BaseOfData; + } + + for (size_t I = 0; I < Obj.PeHeader.NumberOfRvaAndSize; I++) { + const data_directory *Dir = COFFObj.getDataDirectory(I); + if (!Dir) + return errorCodeToError(object_error::parse_failed); + Obj.DataDirectories.emplace_back(*Dir); + } + return Error::success(); +} + +Error COFFReader::readSections(Object &Obj) const { + std::vector<Section> Sections; + // Section indexing starts from 1. + for (size_t I = 1, E = COFFObj.getNumberOfSections(); I <= E; I++) { + Expected<const coff_section *> SecOrErr = COFFObj.getSection(I); + if (!SecOrErr) + return SecOrErr.takeError(); + const coff_section *Sec = *SecOrErr; + Sections.push_back(Section()); + Section &S = Sections.back(); + S.Header = *Sec; + S.Header.Characteristics &= ~COFF::IMAGE_SCN_LNK_NRELOC_OVFL; + ArrayRef<uint8_t> Contents; + if (Error E = COFFObj.getSectionContents(Sec, Contents)) + return E; + S.setContentsRef(Contents); + ArrayRef<coff_relocation> Relocs = COFFObj.getRelocations(Sec); + for (const coff_relocation &R : Relocs) + S.Relocs.push_back(R); + if (Expected<StringRef> NameOrErr = COFFObj.getSectionName(Sec)) + S.Name = *NameOrErr; + else + return NameOrErr.takeError(); + } + Obj.addSections(Sections); + return Error::success(); +} + +Error COFFReader::readSymbols(Object &Obj, bool IsBigObj) const { + std::vector<Symbol> Symbols; + Symbols.reserve(COFFObj.getNumberOfSymbols()); + ArrayRef<Section> Sections = Obj.getSections(); + for (uint32_t I = 0, E = COFFObj.getNumberOfSymbols(); I < E;) { + Expected<COFFSymbolRef> SymOrErr = COFFObj.getSymbol(I); + if (!SymOrErr) + return SymOrErr.takeError(); + COFFSymbolRef SymRef = *SymOrErr; + + Symbols.push_back(Symbol()); + Symbol &Sym = Symbols.back(); + // Copy symbols from the original form into an intermediate coff_symbol32. + if (IsBigObj) + copySymbol(Sym.Sym, + *reinterpret_cast<const coff_symbol32 *>(SymRef.getRawPtr())); + else + copySymbol(Sym.Sym, + *reinterpret_cast<const coff_symbol16 *>(SymRef.getRawPtr())); + auto NameOrErr = COFFObj.getSymbolName(SymRef); + if (!NameOrErr) + return NameOrErr.takeError(); + Sym.Name = *NameOrErr; + + ArrayRef<uint8_t> AuxData = COFFObj.getSymbolAuxData(SymRef); + size_t SymSize = IsBigObj ? sizeof(coff_symbol32) : sizeof(coff_symbol16); + assert(AuxData.size() == SymSize * SymRef.getNumberOfAuxSymbols()); + // The auxillary symbols are structs of sizeof(coff_symbol16) each. + // In the big object format (where symbols are coff_symbol32), each + // auxillary symbol is padded with 2 bytes at the end. Copy each + // auxillary symbol to the Sym.AuxData vector. For file symbols, + // the whole range of aux symbols are interpreted as one null padded + // string instead. + if (SymRef.isFileRecord()) + Sym.AuxFile = StringRef(reinterpret_cast<const char *>(AuxData.data()), + AuxData.size()) + .rtrim('\0'); + else + for (size_t I = 0; I < SymRef.getNumberOfAuxSymbols(); I++) + Sym.AuxData.push_back(AuxData.slice(I * SymSize, sizeof(AuxSymbol))); + + // Find the unique id of the section + if (SymRef.getSectionNumber() <= + 0) // Special symbol (undefined/absolute/debug) + Sym.TargetSectionId = SymRef.getSectionNumber(); + else if (static_cast<uint32_t>(SymRef.getSectionNumber() - 1) < + Sections.size()) + Sym.TargetSectionId = Sections[SymRef.getSectionNumber() - 1].UniqueId; + else + return createStringError(object_error::parse_failed, + "section number out of range"); + // For section definitions, check if it is comdat associative, and if + // it is, find the target section unique id. + const coff_aux_section_definition *SD = SymRef.getSectionDefinition(); + const coff_aux_weak_external *WE = SymRef.getWeakExternal(); + if (SD && SD->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) { + int32_t Index = SD->getNumber(IsBigObj); + if (Index <= 0 || static_cast<uint32_t>(Index - 1) >= Sections.size()) + return createStringError(object_error::parse_failed, + "unexpected associative section index"); + Sym.AssociativeComdatTargetSectionId = Sections[Index - 1].UniqueId; + } else if (WE) { + // This is a raw symbol index for now, but store it in the Symbol + // until we've added them to the Object, which assigns the final + // unique ids. + Sym.WeakTargetSymbolId = WE->TagIndex; + } + I += 1 + SymRef.getNumberOfAuxSymbols(); + } + Obj.addSymbols(Symbols); + return Error::success(); +} + +Error COFFReader::setSymbolTargets(Object &Obj) const { + std::vector<const Symbol *> RawSymbolTable; + for (const Symbol &Sym : Obj.getSymbols()) { + RawSymbolTable.push_back(&Sym); + for (size_t I = 0; I < Sym.Sym.NumberOfAuxSymbols; I++) + RawSymbolTable.push_back(nullptr); + } + for (Symbol &Sym : Obj.getMutableSymbols()) { + // Convert WeakTargetSymbolId from the original raw symbol index to + // a proper unique id. + if (Sym.WeakTargetSymbolId) { + if (*Sym.WeakTargetSymbolId >= RawSymbolTable.size()) + return createStringError(object_error::parse_failed, + "weak external reference out of range"); + const Symbol *Target = RawSymbolTable[*Sym.WeakTargetSymbolId]; + if (Target == nullptr) + return createStringError(object_error::parse_failed, + "invalid SymbolTableIndex"); + Sym.WeakTargetSymbolId = Target->UniqueId; + } + } + for (Section &Sec : Obj.getMutableSections()) { + for (Relocation &R : Sec.Relocs) { + if (R.Reloc.SymbolTableIndex >= RawSymbolTable.size()) + return createStringError(object_error::parse_failed, + "SymbolTableIndex out of range"); + const Symbol *Sym = RawSymbolTable[R.Reloc.SymbolTableIndex]; + if (Sym == nullptr) + return createStringError(object_error::parse_failed, + "invalid SymbolTableIndex"); + R.Target = Sym->UniqueId; + R.TargetName = Sym->Name; + } + } + return Error::success(); +} + +Expected<std::unique_ptr<Object>> COFFReader::create() const { + auto Obj = std::make_unique<Object>(); + + bool IsBigObj = false; + if (const coff_file_header *CFH = COFFObj.getCOFFHeader()) { + Obj->CoffFileHeader = *CFH; + } else { + const coff_bigobj_file_header *CBFH = COFFObj.getCOFFBigObjHeader(); + if (!CBFH) + return createStringError(object_error::parse_failed, + "no COFF file header returned"); + // Only copying the few fields from the bigobj header that we need + // and won't recreate in the end. + Obj->CoffFileHeader.Machine = CBFH->Machine; + Obj->CoffFileHeader.TimeDateStamp = CBFH->TimeDateStamp; + IsBigObj = true; + } + + if (Error E = readExecutableHeaders(*Obj)) + return std::move(E); + if (Error E = readSections(*Obj)) + return std::move(E); + if (Error E = readSymbols(*Obj, IsBigObj)) + return std::move(E); + if (Error E = setSymbolTargets(*Obj)) + return std::move(E); + + return std::move(Obj); +} + +} // end namespace coff +} // end namespace objcopy +} // end namespace llvm diff --git a/contrib/libs/llvm16/lib/ObjCopy/COFF/COFFReader.h b/contrib/libs/llvm16/lib/ObjCopy/COFF/COFFReader.h new file mode 100644 index 00000000000..b4957f84439 --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/COFF/COFFReader.h @@ -0,0 +1,41 @@ +//===- COFFReader.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_COFF_COFFREADER_H +#define LLVM_LIB_OBJCOPY_COFF_COFFREADER_H + +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace objcopy { +namespace coff { + +struct Object; + +using object::COFFObjectFile; + +class COFFReader { + const COFFObjectFile &COFFObj; + + Error readExecutableHeaders(Object &Obj) const; + Error readSections(Object &Obj) const; + Error readSymbols(Object &Obj, bool IsBigObj) const; + Error setSymbolTargets(Object &Obj) const; + +public: + explicit COFFReader(const COFFObjectFile &O) : COFFObj(O) {} + Expected<std::unique_ptr<Object>> create() const; +}; + +} // end namespace coff +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_COFF_COFFREADER_H diff --git a/contrib/libs/llvm16/lib/ObjCopy/COFF/COFFWriter.cpp b/contrib/libs/llvm16/lib/ObjCopy/COFF/COFFWriter.cpp new file mode 100644 index 00000000000..1f489359291 --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/COFF/COFFWriter.cpp @@ -0,0 +1,468 @@ +//===- COFFWriter.cpp -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "COFFWriter.h" +#include "COFFObject.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" +#include <cstddef> +#include <cstdint> + +namespace llvm { +namespace objcopy { +namespace coff { + +using namespace object; +using namespace COFF; + +Error COFFWriter::finalizeRelocTargets() { + for (Section &Sec : Obj.getMutableSections()) { + for (Relocation &R : Sec.Relocs) { + const Symbol *Sym = Obj.findSymbol(R.Target); + if (Sym == nullptr) + return createStringError(object_error::invalid_symbol_index, + "relocation target '%s' (%zu) not found", + R.TargetName.str().c_str(), R.Target); + R.Reloc.SymbolTableIndex = Sym->RawIndex; + } + } + return Error::success(); +} + +Error COFFWriter::finalizeSymbolContents() { + for (Symbol &Sym : Obj.getMutableSymbols()) { + if (Sym.TargetSectionId <= 0) { + // Undefined, or a special kind of symbol. These negative values + // are stored in the SectionNumber field which is unsigned. + Sym.Sym.SectionNumber = static_cast<uint32_t>(Sym.TargetSectionId); + } else { + const Section *Sec = Obj.findSection(Sym.TargetSectionId); + if (Sec == nullptr) + return createStringError(object_error::invalid_symbol_index, + "symbol '%s' points to a removed section", + Sym.Name.str().c_str()); + Sym.Sym.SectionNumber = Sec->Index; + + if (Sym.Sym.NumberOfAuxSymbols == 1 && + Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC) { + coff_aux_section_definition *SD = + reinterpret_cast<coff_aux_section_definition *>( + Sym.AuxData[0].Opaque); + uint32_t SDSectionNumber; + if (Sym.AssociativeComdatTargetSectionId == 0) { + // Not a comdat associative section; just set the Number field to + // the number of the section itself. + SDSectionNumber = Sec->Index; + } else { + Sec = Obj.findSection(Sym.AssociativeComdatTargetSectionId); + if (Sec == nullptr) + return createStringError( + object_error::invalid_symbol_index, + "symbol '%s' is associative to a removed section", + Sym.Name.str().c_str()); + SDSectionNumber = Sec->Index; + } + // Update the section definition with the new section number. + SD->NumberLowPart = static_cast<uint16_t>(SDSectionNumber); + SD->NumberHighPart = static_cast<uint16_t>(SDSectionNumber >> 16); + } + } + // Check that we actually have got AuxData to match the weak symbol target + // we want to set. Only >= 1 would be required, but only == 1 makes sense. + if (Sym.WeakTargetSymbolId && Sym.Sym.NumberOfAuxSymbols == 1) { + coff_aux_weak_external *WE = + reinterpret_cast<coff_aux_weak_external *>(Sym.AuxData[0].Opaque); + const Symbol *Target = Obj.findSymbol(*Sym.WeakTargetSymbolId); + if (Target == nullptr) + return createStringError(object_error::invalid_symbol_index, + "symbol '%s' is missing its weak target", + Sym.Name.str().c_str()); + WE->TagIndex = Target->RawIndex; + } + } + return Error::success(); +} + +void COFFWriter::layoutSections() { + for (auto &S : Obj.getMutableSections()) { + if (S.Header.SizeOfRawData > 0) + S.Header.PointerToRawData = FileSize; + else + S.Header.PointerToRawData = 0; + FileSize += S.Header.SizeOfRawData; // For executables, this is already + // aligned to FileAlignment. + if (S.Relocs.size() >= 0xffff) { + S.Header.Characteristics |= COFF::IMAGE_SCN_LNK_NRELOC_OVFL; + S.Header.NumberOfRelocations = 0xffff; + S.Header.PointerToRelocations = FileSize; + FileSize += sizeof(coff_relocation); + } else { + S.Header.NumberOfRelocations = S.Relocs.size(); + S.Header.PointerToRelocations = S.Relocs.size() ? FileSize : 0; + } + + FileSize += S.Relocs.size() * sizeof(coff_relocation); + FileSize = alignTo(FileSize, FileAlignment); + + if (S.Header.Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA) + SizeOfInitializedData += S.Header.SizeOfRawData; + } +} + +Expected<size_t> COFFWriter::finalizeStringTable() { + for (const auto &S : Obj.getSections()) + if (S.Name.size() > COFF::NameSize) + StrTabBuilder.add(S.Name); + + for (const auto &S : Obj.getSymbols()) + if (S.Name.size() > COFF::NameSize) + StrTabBuilder.add(S.Name); + + StrTabBuilder.finalize(); + + for (auto &S : Obj.getMutableSections()) { + memset(S.Header.Name, 0, sizeof(S.Header.Name)); + if (S.Name.size() <= COFF::NameSize) { + // Short names can go in the field directly. + memcpy(S.Header.Name, S.Name.data(), S.Name.size()); + } else { + // Offset of the section name in the string table. + size_t Offset = StrTabBuilder.getOffset(S.Name); + if (!COFF::encodeSectionName(S.Header.Name, Offset)) + return createStringError(object_error::invalid_section_index, + "COFF string table is greater than 64GB, " + "unable to encode section name offset"); + } + } + for (auto &S : Obj.getMutableSymbols()) { + if (S.Name.size() > COFF::NameSize) { + S.Sym.Name.Offset.Zeroes = 0; + S.Sym.Name.Offset.Offset = StrTabBuilder.getOffset(S.Name); + } else { + strncpy(S.Sym.Name.ShortName, S.Name.data(), COFF::NameSize); + } + } + return StrTabBuilder.getSize(); +} + +template <class SymbolTy> +std::pair<size_t, size_t> COFFWriter::finalizeSymbolTable() { + size_t RawSymIndex = 0; + for (auto &S : Obj.getMutableSymbols()) { + // Symbols normally have NumberOfAuxSymbols set correctly all the time. + // For file symbols, we need to know the output file's symbol size to be + // able to calculate the number of slots it occupies. + if (!S.AuxFile.empty()) + S.Sym.NumberOfAuxSymbols = + alignTo(S.AuxFile.size(), sizeof(SymbolTy)) / sizeof(SymbolTy); + S.RawIndex = RawSymIndex; + RawSymIndex += 1 + S.Sym.NumberOfAuxSymbols; + } + return std::make_pair(RawSymIndex * sizeof(SymbolTy), sizeof(SymbolTy)); +} + +Error COFFWriter::finalize(bool IsBigObj) { + size_t SymTabSize, SymbolSize; + std::tie(SymTabSize, SymbolSize) = IsBigObj + ? finalizeSymbolTable<coff_symbol32>() + : finalizeSymbolTable<coff_symbol16>(); + + if (Error E = finalizeRelocTargets()) + return E; + if (Error E = finalizeSymbolContents()) + return E; + + size_t SizeOfHeaders = 0; + FileAlignment = 1; + size_t PeHeaderSize = 0; + if (Obj.IsPE) { + Obj.DosHeader.AddressOfNewExeHeader = + sizeof(Obj.DosHeader) + Obj.DosStub.size(); + SizeOfHeaders += Obj.DosHeader.AddressOfNewExeHeader + sizeof(PEMagic); + + FileAlignment = Obj.PeHeader.FileAlignment; + Obj.PeHeader.NumberOfRvaAndSize = Obj.DataDirectories.size(); + + PeHeaderSize = Obj.Is64 ? sizeof(pe32plus_header) : sizeof(pe32_header); + SizeOfHeaders += + PeHeaderSize + sizeof(data_directory) * Obj.DataDirectories.size(); + } + Obj.CoffFileHeader.NumberOfSections = Obj.getSections().size(); + SizeOfHeaders += + IsBigObj ? sizeof(coff_bigobj_file_header) : sizeof(coff_file_header); + SizeOfHeaders += sizeof(coff_section) * Obj.getSections().size(); + SizeOfHeaders = alignTo(SizeOfHeaders, FileAlignment); + + Obj.CoffFileHeader.SizeOfOptionalHeader = + PeHeaderSize + sizeof(data_directory) * Obj.DataDirectories.size(); + + FileSize = SizeOfHeaders; + SizeOfInitializedData = 0; + + layoutSections(); + + if (Obj.IsPE) { + Obj.PeHeader.SizeOfHeaders = SizeOfHeaders; + Obj.PeHeader.SizeOfInitializedData = SizeOfInitializedData; + + if (!Obj.getSections().empty()) { + const Section &S = Obj.getSections().back(); + Obj.PeHeader.SizeOfImage = + alignTo(S.Header.VirtualAddress + S.Header.VirtualSize, + Obj.PeHeader.SectionAlignment); + } + + // If the PE header had a checksum, clear it, since it isn't valid + // any longer. (We don't calculate a new one.) + Obj.PeHeader.CheckSum = 0; + } + + Expected<size_t> StrTabSizeOrErr = finalizeStringTable(); + if (!StrTabSizeOrErr) + return StrTabSizeOrErr.takeError(); + + size_t StrTabSize = *StrTabSizeOrErr; + + size_t PointerToSymbolTable = FileSize; + // StrTabSize <= 4 is the size of an empty string table, only consisting + // of the length field. + if (SymTabSize == 0 && StrTabSize <= 4 && Obj.IsPE) { + // For executables, don't point to the symbol table and skip writing + // the length field, if both the symbol and string tables are empty. + PointerToSymbolTable = 0; + StrTabSize = 0; + } + + size_t NumRawSymbols = SymTabSize / SymbolSize; + Obj.CoffFileHeader.PointerToSymbolTable = PointerToSymbolTable; + Obj.CoffFileHeader.NumberOfSymbols = NumRawSymbols; + FileSize += SymTabSize + StrTabSize; + FileSize = alignTo(FileSize, FileAlignment); + + return Error::success(); +} + +void COFFWriter::writeHeaders(bool IsBigObj) { + uint8_t *Ptr = reinterpret_cast<uint8_t *>(Buf->getBufferStart()); + if (Obj.IsPE) { + memcpy(Ptr, &Obj.DosHeader, sizeof(Obj.DosHeader)); + Ptr += sizeof(Obj.DosHeader); + memcpy(Ptr, Obj.DosStub.data(), Obj.DosStub.size()); + Ptr += Obj.DosStub.size(); + memcpy(Ptr, PEMagic, sizeof(PEMagic)); + Ptr += sizeof(PEMagic); + } + if (!IsBigObj) { + memcpy(Ptr, &Obj.CoffFileHeader, sizeof(Obj.CoffFileHeader)); + Ptr += sizeof(Obj.CoffFileHeader); + } else { + // Generate a coff_bigobj_file_header, filling it in with the values + // from Obj.CoffFileHeader. All extra fields that don't exist in + // coff_file_header can be set to hardcoded values. + coff_bigobj_file_header BigObjHeader; + BigObjHeader.Sig1 = IMAGE_FILE_MACHINE_UNKNOWN; + BigObjHeader.Sig2 = 0xffff; + BigObjHeader.Version = BigObjHeader::MinBigObjectVersion; + BigObjHeader.Machine = Obj.CoffFileHeader.Machine; + BigObjHeader.TimeDateStamp = Obj.CoffFileHeader.TimeDateStamp; + memcpy(BigObjHeader.UUID, BigObjMagic, sizeof(BigObjMagic)); + BigObjHeader.unused1 = 0; + BigObjHeader.unused2 = 0; + BigObjHeader.unused3 = 0; + BigObjHeader.unused4 = 0; + // The value in Obj.CoffFileHeader.NumberOfSections is truncated, thus + // get the original one instead. + BigObjHeader.NumberOfSections = Obj.getSections().size(); + BigObjHeader.PointerToSymbolTable = Obj.CoffFileHeader.PointerToSymbolTable; + BigObjHeader.NumberOfSymbols = Obj.CoffFileHeader.NumberOfSymbols; + + memcpy(Ptr, &BigObjHeader, sizeof(BigObjHeader)); + Ptr += sizeof(BigObjHeader); + } + if (Obj.IsPE) { + if (Obj.Is64) { + memcpy(Ptr, &Obj.PeHeader, sizeof(Obj.PeHeader)); + Ptr += sizeof(Obj.PeHeader); + } else { + pe32_header PeHeader; + copyPeHeader(PeHeader, Obj.PeHeader); + // The pe32plus_header (stored in Object) lacks the BaseOfData field. + PeHeader.BaseOfData = Obj.BaseOfData; + + memcpy(Ptr, &PeHeader, sizeof(PeHeader)); + Ptr += sizeof(PeHeader); + } + for (const auto &DD : Obj.DataDirectories) { + memcpy(Ptr, &DD, sizeof(DD)); + Ptr += sizeof(DD); + } + } + for (const auto &S : Obj.getSections()) { + memcpy(Ptr, &S.Header, sizeof(S.Header)); + Ptr += sizeof(S.Header); + } +} + +void COFFWriter::writeSections() { + for (const auto &S : Obj.getSections()) { + uint8_t *Ptr = reinterpret_cast<uint8_t *>(Buf->getBufferStart()) + + S.Header.PointerToRawData; + ArrayRef<uint8_t> Contents = S.getContents(); + std::copy(Contents.begin(), Contents.end(), Ptr); + + // For executable sections, pad the remainder of the raw data size with + // 0xcc, which is int3 on x86. + if ((S.Header.Characteristics & IMAGE_SCN_CNT_CODE) && + S.Header.SizeOfRawData > Contents.size()) + memset(Ptr + Contents.size(), 0xcc, + S.Header.SizeOfRawData - Contents.size()); + + Ptr += S.Header.SizeOfRawData; + + if (S.Relocs.size() >= 0xffff) { + object::coff_relocation R; + R.VirtualAddress = S.Relocs.size() + 1; + R.SymbolTableIndex = 0; + R.Type = 0; + memcpy(Ptr, &R, sizeof(R)); + Ptr += sizeof(R); + } + for (const auto &R : S.Relocs) { + memcpy(Ptr, &R.Reloc, sizeof(R.Reloc)); + Ptr += sizeof(R.Reloc); + } + } +} + +template <class SymbolTy> void COFFWriter::writeSymbolStringTables() { + uint8_t *Ptr = reinterpret_cast<uint8_t *>(Buf->getBufferStart()) + + Obj.CoffFileHeader.PointerToSymbolTable; + for (const auto &S : Obj.getSymbols()) { + // Convert symbols back to the right size, from coff_symbol32. + copySymbol<SymbolTy, coff_symbol32>(*reinterpret_cast<SymbolTy *>(Ptr), + S.Sym); + Ptr += sizeof(SymbolTy); + if (!S.AuxFile.empty()) { + // For file symbols, just write the string into the aux symbol slots, + // assuming that the unwritten parts are initialized to zero in the memory + // mapped file. + std::copy(S.AuxFile.begin(), S.AuxFile.end(), Ptr); + Ptr += S.Sym.NumberOfAuxSymbols * sizeof(SymbolTy); + } else { + // For other auxillary symbols, write their opaque payload into one symbol + // table slot each. For big object files, the symbols are larger than the + // opaque auxillary symbol struct and we leave padding at the end of each + // entry. + for (const AuxSymbol &AuxSym : S.AuxData) { + ArrayRef<uint8_t> Ref = AuxSym.getRef(); + std::copy(Ref.begin(), Ref.end(), Ptr); + Ptr += sizeof(SymbolTy); + } + } + } + if (StrTabBuilder.getSize() > 4 || !Obj.IsPE) { + // Always write a string table in object files, even an empty one. + StrTabBuilder.write(Ptr); + Ptr += StrTabBuilder.getSize(); + } +} + +Error COFFWriter::write(bool IsBigObj) { + if (Error E = finalize(IsBigObj)) + return E; + + Buf = WritableMemoryBuffer::getNewMemBuffer(FileSize); + if (!Buf) + return createStringError(llvm::errc::not_enough_memory, + "failed to allocate memory buffer of " + + Twine::utohexstr(FileSize) + " bytes."); + + writeHeaders(IsBigObj); + writeSections(); + if (IsBigObj) + writeSymbolStringTables<coff_symbol32>(); + else + writeSymbolStringTables<coff_symbol16>(); + + if (Obj.IsPE) + if (Error E = patchDebugDirectory()) + return E; + + // TODO: Implement direct writing to the output stream (without intermediate + // memory buffer Buf). + Out.write(Buf->getBufferStart(), Buf->getBufferSize()); + return Error::success(); +} + +Expected<uint32_t> COFFWriter::virtualAddressToFileAddress(uint32_t RVA) { + for (const auto &S : Obj.getSections()) { + if (RVA >= S.Header.VirtualAddress && + RVA < S.Header.VirtualAddress + S.Header.SizeOfRawData) + return S.Header.PointerToRawData + RVA - S.Header.VirtualAddress; + } + return createStringError(object_error::parse_failed, + "debug directory payload not found"); +} + +// Locate which sections contain the debug directories, iterate over all +// the debug_directory structs in there, and set the PointerToRawData field +// in all of them, according to their new physical location in the file. +Error COFFWriter::patchDebugDirectory() { + if (Obj.DataDirectories.size() <= DEBUG_DIRECTORY) + return Error::success(); + const data_directory *Dir = &Obj.DataDirectories[DEBUG_DIRECTORY]; + if (Dir->Size <= 0) + return Error::success(); + for (const auto &S : Obj.getSections()) { + if (Dir->RelativeVirtualAddress >= S.Header.VirtualAddress && + Dir->RelativeVirtualAddress < + S.Header.VirtualAddress + S.Header.SizeOfRawData) { + if (Dir->RelativeVirtualAddress + Dir->Size > + S.Header.VirtualAddress + S.Header.SizeOfRawData) + return createStringError(object_error::parse_failed, + "debug directory extends past end of section"); + + size_t Offset = Dir->RelativeVirtualAddress - S.Header.VirtualAddress; + uint8_t *Ptr = reinterpret_cast<uint8_t *>(Buf->getBufferStart()) + + S.Header.PointerToRawData + Offset; + uint8_t *End = Ptr + Dir->Size; + while (Ptr < End) { + debug_directory *Debug = reinterpret_cast<debug_directory *>(Ptr); + if (Debug->PointerToRawData) { + if (Expected<uint32_t> FilePosOrErr = + virtualAddressToFileAddress(Debug->AddressOfRawData)) + Debug->PointerToRawData = *FilePosOrErr; + else + return FilePosOrErr.takeError(); + } + Ptr += sizeof(debug_directory); + Offset += sizeof(debug_directory); + } + // Debug directory found and patched, all done. + return Error::success(); + } + } + return createStringError(object_error::parse_failed, + "debug directory not found"); +} + +Error COFFWriter::write() { + bool IsBigObj = Obj.getSections().size() > MaxNumberOfSections16; + if (IsBigObj && Obj.IsPE) + return createStringError(object_error::parse_failed, + "too many sections for executable"); + return write(IsBigObj); +} + +} // end namespace coff +} // end namespace objcopy +} // end namespace llvm diff --git a/contrib/libs/llvm16/lib/ObjCopy/COFF/COFFWriter.h b/contrib/libs/llvm16/lib/ObjCopy/COFF/COFFWriter.h new file mode 100644 index 00000000000..b7dca69e9a8 --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/COFF/COFFWriter.h @@ -0,0 +1,63 @@ +//===- COFFWriter.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_COFF_COFFWRITER_H +#define LLVM_LIB_OBJCOPY_COFF_COFFWRITER_H + +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" +#include <cstddef> +#include <utility> + +namespace llvm { +namespace objcopy { +namespace coff { + +struct Object; + +class COFFWriter { + Object &Obj; + std::unique_ptr<WritableMemoryBuffer> Buf; + raw_ostream &Out; + + size_t FileSize; + size_t FileAlignment; + size_t SizeOfInitializedData; + StringTableBuilder StrTabBuilder; + + template <class SymbolTy> std::pair<size_t, size_t> finalizeSymbolTable(); + Error finalizeRelocTargets(); + Error finalizeSymbolContents(); + void layoutSections(); + Expected<size_t> finalizeStringTable(); + + Error finalize(bool IsBigObj); + + void writeHeaders(bool IsBigObj); + void writeSections(); + template <class SymbolTy> void writeSymbolStringTables(); + + Error write(bool IsBigObj); + + Error patchDebugDirectory(); + Expected<uint32_t> virtualAddressToFileAddress(uint32_t RVA); + +public: + virtual ~COFFWriter() {} + Error write(); + + COFFWriter(Object &Obj, raw_ostream &Out) + : Obj(Obj), Out(Out), StrTabBuilder(StringTableBuilder::WinCOFF) {} +}; + +} // end namespace coff +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_COFF_COFFWRITER_H diff --git a/contrib/libs/llvm16/lib/ObjCopy/CommonConfig.cpp b/contrib/libs/llvm16/lib/ObjCopy/CommonConfig.cpp new file mode 100644 index 00000000000..e85715d0c44 --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/CommonConfig.cpp @@ -0,0 +1,50 @@ +//===- CommonConfig.cpp ---------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ObjCopy/CommonConfig.h" + +namespace llvm { +namespace objcopy { + +Expected<NameOrPattern> +NameOrPattern::create(StringRef Pattern, MatchStyle MS, + function_ref<Error(Error)> ErrorCallback) { + switch (MS) { + case MatchStyle::Literal: + return NameOrPattern(Pattern); + case MatchStyle::Wildcard: { + SmallVector<char, 32> Data; + bool IsPositiveMatch = true; + if (Pattern[0] == '!') { + IsPositiveMatch = false; + Pattern = Pattern.drop_front(); + } + Expected<GlobPattern> GlobOrErr = GlobPattern::create(Pattern); + + // If we couldn't create it as a glob, report the error, but try again + // with a literal if the error reporting is non-fatal. + if (!GlobOrErr) { + if (Error E = ErrorCallback(GlobOrErr.takeError())) + return std::move(E); + return create(Pattern, MatchStyle::Literal, ErrorCallback); + } + + return NameOrPattern(std::make_shared<GlobPattern>(*GlobOrErr), + IsPositiveMatch); + } + case MatchStyle::Regex: { + SmallVector<char, 32> Data; + return NameOrPattern(std::make_shared<Regex>( + ("^" + Pattern.ltrim('^').rtrim('$') + "$").toStringRef(Data))); + } + } + llvm_unreachable("Unhandled llvm.objcopy.MatchStyle enum"); +} + +} // end namespace objcopy +} // end namespace llvm diff --git a/contrib/libs/llvm16/lib/ObjCopy/ConfigManager.cpp b/contrib/libs/llvm16/lib/ObjCopy/ConfigManager.cpp new file mode 100644 index 00000000000..77321829e61 --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/ConfigManager.cpp @@ -0,0 +1,99 @@ +//===- ConfigManager.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ObjCopy/ConfigManager.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace objcopy { + +Expected<const COFFConfig &> ConfigManager::getCOFFConfig() const { + if (!Common.SplitDWO.empty() || !Common.SymbolsPrefix.empty() || + !Common.AllocSectionsPrefix.empty() || !Common.DumpSection.empty() || + !Common.KeepSection.empty() || !Common.SymbolsToGlobalize.empty() || + !Common.SymbolsToKeep.empty() || !Common.SymbolsToLocalize.empty() || + !Common.SymbolsToWeaken.empty() || !Common.SymbolsToKeepGlobal.empty() || + !Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() || + !Common.SetSectionType.empty() || Common.ExtractDWO || + Common.PreserveDates || Common.StripDWO || Common.StripNonAlloc || + Common.StripSections || Common.Weaken || Common.DecompressDebugSections || + Common.DiscardMode == DiscardType::Locals || !Common.SymbolsToAdd.empty()) + return createStringError(llvm::errc::invalid_argument, + "option is not supported for COFF"); + + return COFF; +} + +Expected<const MachOConfig &> ConfigManager::getMachOConfig() const { + if (!Common.SplitDWO.empty() || !Common.SymbolsPrefix.empty() || + !Common.AllocSectionsPrefix.empty() || !Common.KeepSection.empty() || + !Common.SymbolsToGlobalize.empty() || !Common.SymbolsToKeep.empty() || + !Common.SymbolsToLocalize.empty() || !Common.SymbolsToWeaken.empty() || + !Common.SymbolsToKeepGlobal.empty() || !Common.SectionsToRename.empty() || + !Common.UnneededSymbolsToRemove.empty() || + !Common.SetSectionAlignment.empty() || !Common.SetSectionFlags.empty() || + !Common.SetSectionType.empty() || Common.ExtractDWO || + Common.PreserveDates || Common.StripAllGNU || Common.StripDWO || + Common.StripNonAlloc || Common.StripSections || Common.Weaken || + Common.DecompressDebugSections || Common.StripUnneeded || + Common.DiscardMode == DiscardType::Locals || !Common.SymbolsToAdd.empty()) + return createStringError(llvm::errc::invalid_argument, + "option is not supported for MachO"); + + return MachO; +} + +Expected<const WasmConfig &> ConfigManager::getWasmConfig() const { + if (!Common.AddGnuDebugLink.empty() || Common.ExtractPartition || + !Common.SplitDWO.empty() || !Common.SymbolsPrefix.empty() || + !Common.AllocSectionsPrefix.empty() || + Common.DiscardMode != DiscardType::None || !Common.SymbolsToAdd.empty() || + !Common.SymbolsToGlobalize.empty() || !Common.SymbolsToLocalize.empty() || + !Common.SymbolsToKeep.empty() || !Common.SymbolsToRemove.empty() || + !Common.UnneededSymbolsToRemove.empty() || + !Common.SymbolsToWeaken.empty() || !Common.SymbolsToKeepGlobal.empty() || + !Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() || + !Common.SetSectionFlags.empty() || !Common.SetSectionType.empty() || + !Common.SymbolsToRename.empty()) + return createStringError(llvm::errc::invalid_argument, + "only flags for section dumping, removal, and " + "addition are supported"); + + return Wasm; +} + +Expected<const XCOFFConfig &> ConfigManager::getXCOFFConfig() const { + if (!Common.AddGnuDebugLink.empty() || Common.ExtractPartition || + !Common.SplitDWO.empty() || !Common.SymbolsPrefix.empty() || + !Common.AllocSectionsPrefix.empty() || + Common.DiscardMode != DiscardType::None || !Common.AddSection.empty() || + !Common.DumpSection.empty() || !Common.SymbolsToAdd.empty() || + !Common.KeepSection.empty() || !Common.OnlySection.empty() || + !Common.ToRemove.empty() || !Common.SymbolsToGlobalize.empty() || + !Common.SymbolsToKeep.empty() || !Common.SymbolsToLocalize.empty() || + !Common.SymbolsToRemove.empty() || + !Common.UnneededSymbolsToRemove.empty() || + !Common.SymbolsToWeaken.empty() || !Common.SymbolsToKeepGlobal.empty() || + !Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() || + !Common.SetSectionFlags.empty() || !Common.SetSectionType.empty() || + !Common.SymbolsToRename.empty() || Common.ExtractDWO || + Common.ExtractMainPartition || Common.OnlyKeepDebug || + Common.PreserveDates || Common.StripAllGNU || Common.StripDWO || + Common.StripDebug || Common.StripNonAlloc || Common.StripSections || + Common.Weaken || Common.StripUnneeded || Common.DecompressDebugSections) { + return createStringError( + llvm::errc::invalid_argument, + "no flags are supported yet, only basic copying is allowed"); + } + + return XCOFF; +} + +} // end namespace objcopy +} // end namespace llvm diff --git a/contrib/libs/llvm16/lib/ObjCopy/ELF/ELFObjcopy.cpp b/contrib/libs/llvm16/lib/ObjCopy/ELF/ELFObjcopy.cpp new file mode 100644 index 00000000000..689c9152c7d --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/ELF/ELFObjcopy.cpp @@ -0,0 +1,823 @@ +//===- ELFObjcopy.cpp -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ObjCopy/ELF/ELFObjcopy.h" +#include "ELFObject.h" +#include "llvm/ADT/BitmaskEnum.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/ObjCopy/CommonConfig.h" +#include "llvm/ObjCopy/ELF/ELFConfig.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ELFTypes.h" +#include "llvm/Object/Error.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compression.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Memory.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstdlib> +#include <functional> +#include <iterator> +#include <memory> +#include <string> +#include <system_error> +#include <utility> + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::objcopy; +using namespace llvm::objcopy::elf; +using namespace llvm::object; + +using SectionPred = std::function<bool(const SectionBase &Sec)>; + +static bool isDebugSection(const SectionBase &Sec) { + return StringRef(Sec.Name).startswith(".debug") || Sec.Name == ".gdb_index"; +} + +static bool isDWOSection(const SectionBase &Sec) { + return StringRef(Sec.Name).endswith(".dwo"); +} + +static bool onlyKeepDWOPred(const Object &Obj, const SectionBase &Sec) { + // We can't remove the section header string table. + if (&Sec == Obj.SectionNames) + return false; + // Short of keeping the string table we want to keep everything that is a DWO + // section and remove everything else. + return !isDWOSection(Sec); +} + +static uint64_t getNewShfFlags(SectionFlag AllFlags) { + uint64_t NewFlags = 0; + if (AllFlags & SectionFlag::SecAlloc) + NewFlags |= ELF::SHF_ALLOC; + if (!(AllFlags & SectionFlag::SecReadonly)) + NewFlags |= ELF::SHF_WRITE; + if (AllFlags & SectionFlag::SecCode) + NewFlags |= ELF::SHF_EXECINSTR; + if (AllFlags & SectionFlag::SecMerge) + NewFlags |= ELF::SHF_MERGE; + if (AllFlags & SectionFlag::SecStrings) + NewFlags |= ELF::SHF_STRINGS; + if (AllFlags & SectionFlag::SecExclude) + NewFlags |= ELF::SHF_EXCLUDE; + return NewFlags; +} + +static uint64_t getSectionFlagsPreserveMask(uint64_t OldFlags, + uint64_t NewFlags) { + // Preserve some flags which should not be dropped when setting flags. + // Also, preserve anything OS/processor dependant. + const uint64_t PreserveMask = + (ELF::SHF_COMPRESSED | ELF::SHF_GROUP | ELF::SHF_LINK_ORDER | + ELF::SHF_MASKOS | ELF::SHF_MASKPROC | ELF::SHF_TLS | + ELF::SHF_INFO_LINK) & + ~ELF::SHF_EXCLUDE; + return (OldFlags & PreserveMask) | (NewFlags & ~PreserveMask); +} + +static void setSectionFlagsAndType(SectionBase &Sec, SectionFlag Flags) { + Sec.Flags = getSectionFlagsPreserveMask(Sec.Flags, getNewShfFlags(Flags)); + + // In GNU objcopy, certain flags promote SHT_NOBITS to SHT_PROGBITS. This rule + // may promote more non-ALLOC sections than GNU objcopy, but it is fine as + // non-ALLOC SHT_NOBITS sections do not make much sense. + if (Sec.Type == SHT_NOBITS && + (!(Sec.Flags & ELF::SHF_ALLOC) || + Flags & (SectionFlag::SecContents | SectionFlag::SecLoad))) + Sec.Type = SHT_PROGBITS; +} + +static ElfType getOutputElfType(const Binary &Bin) { + // Infer output ELF type from the input ELF object + if (isa<ELFObjectFile<ELF32LE>>(Bin)) + return ELFT_ELF32LE; + if (isa<ELFObjectFile<ELF64LE>>(Bin)) + return ELFT_ELF64LE; + if (isa<ELFObjectFile<ELF32BE>>(Bin)) + return ELFT_ELF32BE; + if (isa<ELFObjectFile<ELF64BE>>(Bin)) + return ELFT_ELF64BE; + llvm_unreachable("Invalid ELFType"); +} + +static ElfType getOutputElfType(const MachineInfo &MI) { + // Infer output ELF type from the binary arch specified + if (MI.Is64Bit) + return MI.IsLittleEndian ? ELFT_ELF64LE : ELFT_ELF64BE; + else + return MI.IsLittleEndian ? ELFT_ELF32LE : ELFT_ELF32BE; +} + +static std::unique_ptr<Writer> createELFWriter(const CommonConfig &Config, + Object &Obj, raw_ostream &Out, + ElfType OutputElfType) { + // Depending on the initial ELFT and OutputFormat we need a different Writer. + switch (OutputElfType) { + case ELFT_ELF32LE: + return std::make_unique<ELFWriter<ELF32LE>>(Obj, Out, !Config.StripSections, + Config.OnlyKeepDebug); + case ELFT_ELF64LE: + return std::make_unique<ELFWriter<ELF64LE>>(Obj, Out, !Config.StripSections, + Config.OnlyKeepDebug); + case ELFT_ELF32BE: + return std::make_unique<ELFWriter<ELF32BE>>(Obj, Out, !Config.StripSections, + Config.OnlyKeepDebug); + case ELFT_ELF64BE: + return std::make_unique<ELFWriter<ELF64BE>>(Obj, Out, !Config.StripSections, + Config.OnlyKeepDebug); + } + llvm_unreachable("Invalid output format"); +} + +static std::unique_ptr<Writer> createWriter(const CommonConfig &Config, + Object &Obj, raw_ostream &Out, + ElfType OutputElfType) { + switch (Config.OutputFormat) { + case FileFormat::Binary: + return std::make_unique<BinaryWriter>(Obj, Out); + case FileFormat::IHex: + return std::make_unique<IHexWriter>(Obj, Out); + default: + return createELFWriter(Config, Obj, Out, OutputElfType); + } +} + +template <class... Ts> +static Error makeStringError(std::error_code EC, const Twine &Msg, + Ts &&...Args) { + std::string FullMsg = (EC.message() + ": " + Msg).str(); + return createStringError(EC, FullMsg.c_str(), std::forward<Ts>(Args)...); +} + +static Error dumpSectionToFile(StringRef SecName, StringRef Filename, + Object &Obj) { + for (auto &Sec : Obj.sections()) { + if (Sec.Name == SecName) { + if (Sec.Type == SHT_NOBITS) + return createStringError(object_error::parse_failed, + "cannot dump section '%s': it has no contents", + SecName.str().c_str()); + Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr = + FileOutputBuffer::create(Filename, Sec.OriginalData.size()); + if (!BufferOrErr) + return BufferOrErr.takeError(); + std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr); + std::copy(Sec.OriginalData.begin(), Sec.OriginalData.end(), + Buf->getBufferStart()); + if (Error E = Buf->commit()) + return E; + return Error::success(); + } + } + return createStringError(object_error::parse_failed, "section '%s' not found", + SecName.str().c_str()); +} + +static bool isCompressable(const SectionBase &Sec) { + return !(Sec.Flags & ELF::SHF_COMPRESSED) && + StringRef(Sec.Name).startswith(".debug"); +} + +static Error replaceDebugSections( + Object &Obj, function_ref<bool(const SectionBase &)> ShouldReplace, + function_ref<Expected<SectionBase *>(const SectionBase *)> AddSection) { + // Build a list of the debug sections we are going to replace. + // We can't call `AddSection` while iterating over sections, + // because it would mutate the sections array. + SmallVector<SectionBase *, 13> ToReplace; + for (auto &Sec : Obj.sections()) + if (ShouldReplace(Sec)) + ToReplace.push_back(&Sec); + + // Build a mapping from original section to a new one. + DenseMap<SectionBase *, SectionBase *> FromTo; + for (SectionBase *S : ToReplace) { + Expected<SectionBase *> NewSection = AddSection(S); + if (!NewSection) + return NewSection.takeError(); + + FromTo[S] = *NewSection; + } + + return Obj.replaceSections(FromTo); +} + +static bool isAArch64MappingSymbol(const Symbol &Sym) { + if (Sym.Binding != STB_LOCAL || Sym.Type != STT_NOTYPE || + Sym.getShndx() == SHN_UNDEF) + return false; + StringRef Name = Sym.Name; + if (!Name.consume_front("$x") && !Name.consume_front("$d")) + return false; + return Name.empty() || Name.startswith("."); +} + +static bool isArmMappingSymbol(const Symbol &Sym) { + if (Sym.Binding != STB_LOCAL || Sym.Type != STT_NOTYPE || + Sym.getShndx() == SHN_UNDEF) + return false; + StringRef Name = Sym.Name; + if (!Name.consume_front("$a") && !Name.consume_front("$d") && + !Name.consume_front("$t")) + return false; + return Name.empty() || Name.startswith("."); +} + +// Check if the symbol should be preserved because it is required by ABI. +static bool isRequiredByABISymbol(const Object &Obj, const Symbol &Sym) { + switch (Obj.Machine) { + case EM_AARCH64: + // Mapping symbols should be preserved for a relocatable object file. + return Obj.isRelocatable() && isAArch64MappingSymbol(Sym); + case EM_ARM: + // Mapping symbols should be preserved for a relocatable object file. + return Obj.isRelocatable() && isArmMappingSymbol(Sym); + default: + return false; + } +} + +static bool isUnneededSymbol(const Symbol &Sym) { + return !Sym.Referenced && + (Sym.Binding == STB_LOCAL || Sym.getShndx() == SHN_UNDEF) && + Sym.Type != STT_SECTION; +} + +static Error updateAndRemoveSymbols(const CommonConfig &Config, + const ELFConfig &ELFConfig, Object &Obj) { + // TODO: update or remove symbols only if there is an option that affects + // them. + if (!Obj.SymbolTable) + return Error::success(); + + Obj.SymbolTable->updateSymbols([&](Symbol &Sym) { + // Common and undefined symbols don't make sense as local symbols, and can + // even cause crashes if we localize those, so skip them. + if (!Sym.isCommon() && Sym.getShndx() != SHN_UNDEF && + ((ELFConfig.LocalizeHidden && + (Sym.Visibility == STV_HIDDEN || Sym.Visibility == STV_INTERNAL)) || + Config.SymbolsToLocalize.matches(Sym.Name))) + Sym.Binding = STB_LOCAL; + + // Note: these two globalize flags have very similar names but different + // meanings: + // + // --globalize-symbol: promote a symbol to global + // --keep-global-symbol: all symbols except for these should be made local + // + // If --globalize-symbol is specified for a given symbol, it will be + // global in the output file even if it is not included via + // --keep-global-symbol. Because of that, make sure to check + // --globalize-symbol second. + if (!Config.SymbolsToKeepGlobal.empty() && + !Config.SymbolsToKeepGlobal.matches(Sym.Name) && + Sym.getShndx() != SHN_UNDEF) + Sym.Binding = STB_LOCAL; + + if (Config.SymbolsToGlobalize.matches(Sym.Name) && + Sym.getShndx() != SHN_UNDEF) + Sym.Binding = STB_GLOBAL; + + // SymbolsToWeaken applies to both STB_GLOBAL and STB_GNU_UNIQUE. + if (Config.SymbolsToWeaken.matches(Sym.Name) && Sym.Binding != STB_LOCAL) + Sym.Binding = STB_WEAK; + + if (Config.Weaken && Sym.Binding != STB_LOCAL && + Sym.getShndx() != SHN_UNDEF) + Sym.Binding = STB_WEAK; + + const auto I = Config.SymbolsToRename.find(Sym.Name); + if (I != Config.SymbolsToRename.end()) + Sym.Name = std::string(I->getValue()); + + if (!Config.SymbolsPrefix.empty() && Sym.Type != STT_SECTION) + Sym.Name = (Config.SymbolsPrefix + Sym.Name).str(); + }); + + // The purpose of this loop is to mark symbols referenced by sections + // (like GroupSection or RelocationSection). This way, we know which + // symbols are still 'needed' and which are not. + if (Config.StripUnneeded || !Config.UnneededSymbolsToRemove.empty() || + !Config.OnlySection.empty()) { + for (SectionBase &Sec : Obj.sections()) + Sec.markSymbols(); + } + + auto RemoveSymbolsPred = [&](const Symbol &Sym) { + if (Config.SymbolsToKeep.matches(Sym.Name) || + (ELFConfig.KeepFileSymbols && Sym.Type == STT_FILE)) + return false; + + if (Config.SymbolsToRemove.matches(Sym.Name)) + return true; + + if (Config.StripAll || Config.StripAllGNU) + return true; + + if (isRequiredByABISymbol(Obj, Sym)) + return false; + + if (Config.StripDebug && Sym.Type == STT_FILE) + return true; + + if ((Config.DiscardMode == DiscardType::All || + (Config.DiscardMode == DiscardType::Locals && + StringRef(Sym.Name).startswith(".L"))) && + Sym.Binding == STB_LOCAL && Sym.getShndx() != SHN_UNDEF && + Sym.Type != STT_FILE && Sym.Type != STT_SECTION) + return true; + + if ((Config.StripUnneeded || + Config.UnneededSymbolsToRemove.matches(Sym.Name)) && + (!Obj.isRelocatable() || isUnneededSymbol(Sym))) + return true; + + // We want to remove undefined symbols if all references have been stripped. + if (!Config.OnlySection.empty() && !Sym.Referenced && + Sym.getShndx() == SHN_UNDEF) + return true; + + return false; + }; + + return Obj.removeSymbols(RemoveSymbolsPred); +} + +static Error replaceAndRemoveSections(const CommonConfig &Config, + const ELFConfig &ELFConfig, Object &Obj) { + SectionPred RemovePred = [](const SectionBase &) { return false; }; + + // Removes: + if (!Config.ToRemove.empty()) { + RemovePred = [&Config](const SectionBase &Sec) { + return Config.ToRemove.matches(Sec.Name); + }; + } + + if (Config.StripDWO) + RemovePred = [RemovePred](const SectionBase &Sec) { + return isDWOSection(Sec) || RemovePred(Sec); + }; + + if (Config.ExtractDWO) + RemovePred = [RemovePred, &Obj](const SectionBase &Sec) { + return onlyKeepDWOPred(Obj, Sec) || RemovePred(Sec); + }; + + if (Config.StripAllGNU) + RemovePred = [RemovePred, &Obj](const SectionBase &Sec) { + if (RemovePred(Sec)) + return true; + if ((Sec.Flags & SHF_ALLOC) != 0) + return false; + if (&Sec == Obj.SectionNames) + return false; + switch (Sec.Type) { + case SHT_SYMTAB: + case SHT_REL: + case SHT_RELA: + case SHT_STRTAB: + return true; + } + return isDebugSection(Sec); + }; + + if (Config.StripSections) { + RemovePred = [RemovePred](const SectionBase &Sec) { + return RemovePred(Sec) || Sec.ParentSegment == nullptr; + }; + } + + if (Config.StripDebug || Config.StripUnneeded) { + RemovePred = [RemovePred](const SectionBase &Sec) { + return RemovePred(Sec) || isDebugSection(Sec); + }; + } + + if (Config.StripNonAlloc) + RemovePred = [RemovePred, &Obj](const SectionBase &Sec) { + if (RemovePred(Sec)) + return true; + if (&Sec == Obj.SectionNames) + return false; + return (Sec.Flags & SHF_ALLOC) == 0 && Sec.ParentSegment == nullptr; + }; + + if (Config.StripAll) + RemovePred = [RemovePred, &Obj](const SectionBase &Sec) { + if (RemovePred(Sec)) + return true; + if (&Sec == Obj.SectionNames) + return false; + if (StringRef(Sec.Name).startswith(".gnu.warning")) + return false; + // We keep the .ARM.attribute section to maintain compatibility + // with Debian derived distributions. This is a bug in their + // patchset as documented here: + // https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=943798 + if (Sec.Type == SHT_ARM_ATTRIBUTES) + return false; + if (Sec.ParentSegment != nullptr) + return false; + return (Sec.Flags & SHF_ALLOC) == 0; + }; + + if (Config.ExtractPartition || Config.ExtractMainPartition) { + RemovePred = [RemovePred](const SectionBase &Sec) { + if (RemovePred(Sec)) + return true; + if (Sec.Type == SHT_LLVM_PART_EHDR || Sec.Type == SHT_LLVM_PART_PHDR) + return true; + return (Sec.Flags & SHF_ALLOC) != 0 && !Sec.ParentSegment; + }; + } + + // Explicit copies: + if (!Config.OnlySection.empty()) { + RemovePred = [&Config, RemovePred, &Obj](const SectionBase &Sec) { + // Explicitly keep these sections regardless of previous removes. + if (Config.OnlySection.matches(Sec.Name)) + return false; + + // Allow all implicit removes. + if (RemovePred(Sec)) + return true; + + // Keep special sections. + if (Obj.SectionNames == &Sec) + return false; + if (Obj.SymbolTable == &Sec || + (Obj.SymbolTable && Obj.SymbolTable->getStrTab() == &Sec)) + return false; + + // Remove everything else. + return true; + }; + } + + if (!Config.KeepSection.empty()) { + RemovePred = [&Config, RemovePred](const SectionBase &Sec) { + // Explicitly keep these sections regardless of previous removes. + if (Config.KeepSection.matches(Sec.Name)) + return false; + // Otherwise defer to RemovePred. + return RemovePred(Sec); + }; + } + + // This has to be the last predicate assignment. + // If the option --keep-symbol has been specified + // and at least one of those symbols is present + // (equivalently, the updated symbol table is not empty) + // the symbol table and the string table should not be removed. + if ((!Config.SymbolsToKeep.empty() || ELFConfig.KeepFileSymbols) && + Obj.SymbolTable && !Obj.SymbolTable->empty()) { + RemovePred = [&Obj, RemovePred](const SectionBase &Sec) { + if (&Sec == Obj.SymbolTable || &Sec == Obj.SymbolTable->getStrTab()) + return false; + return RemovePred(Sec); + }; + } + + if (Error E = Obj.removeSections(ELFConfig.AllowBrokenLinks, RemovePred)) + return E; + + if (Config.CompressionType != DebugCompressionType::None) { + if (Error Err = replaceDebugSections( + Obj, isCompressable, + [&Config, &Obj](const SectionBase *S) -> Expected<SectionBase *> { + return &Obj.addSection<CompressedSection>( + CompressedSection(*S, Config.CompressionType, Obj.Is64Bits)); + })) + return Err; + } else if (Config.DecompressDebugSections) { + if (Error Err = replaceDebugSections( + Obj, + [](const SectionBase &S) { return isa<CompressedSection>(&S); }, + [&Obj](const SectionBase *S) { + const CompressedSection *CS = cast<CompressedSection>(S); + return &Obj.addSection<DecompressedSection>(*CS); + })) + return Err; + } + + return Error::success(); +} + +// Add symbol to the Object symbol table with the specified properties. +static void addSymbol(Object &Obj, const NewSymbolInfo &SymInfo, + uint8_t DefaultVisibility) { + SectionBase *Sec = Obj.findSection(SymInfo.SectionName); + uint64_t Value = Sec ? Sec->Addr + SymInfo.Value : SymInfo.Value; + + uint8_t Bind = ELF::STB_GLOBAL; + uint8_t Type = ELF::STT_NOTYPE; + uint8_t Visibility = DefaultVisibility; + + for (SymbolFlag FlagValue : SymInfo.Flags) + switch (FlagValue) { + case SymbolFlag::Global: + Bind = ELF::STB_GLOBAL; + break; + case SymbolFlag::Local: + Bind = ELF::STB_LOCAL; + break; + case SymbolFlag::Weak: + Bind = ELF::STB_WEAK; + break; + case SymbolFlag::Default: + Visibility = ELF::STV_DEFAULT; + break; + case SymbolFlag::Hidden: + Visibility = ELF::STV_HIDDEN; + break; + case SymbolFlag::Protected: + Visibility = ELF::STV_PROTECTED; + break; + case SymbolFlag::File: + Type = ELF::STT_FILE; + break; + case SymbolFlag::Section: + Type = ELF::STT_SECTION; + break; + case SymbolFlag::Object: + Type = ELF::STT_OBJECT; + break; + case SymbolFlag::Function: + Type = ELF::STT_FUNC; + break; + case SymbolFlag::IndirectFunction: + Type = ELF::STT_GNU_IFUNC; + break; + default: /* Other flag values are ignored for ELF. */ + break; + }; + + Obj.SymbolTable->addSymbol( + SymInfo.SymbolName, Bind, Type, Sec, Value, Visibility, + Sec ? (uint16_t)SYMBOL_SIMPLE_INDEX : (uint16_t)SHN_ABS, 0); +} + +static Error +handleUserSection(const NewSectionInfo &NewSection, + function_ref<Error(StringRef, ArrayRef<uint8_t>)> F) { + ArrayRef<uint8_t> Data(reinterpret_cast<const uint8_t *>( + NewSection.SectionData->getBufferStart()), + NewSection.SectionData->getBufferSize()); + return F(NewSection.SectionName, Data); +} + +// This function handles the high level operations of GNU objcopy including +// handling command line options. It's important to outline certain properties +// we expect to hold of the command line operations. Any operation that "keeps" +// should keep regardless of a remove. Additionally any removal should respect +// any previous removals. Lastly whether or not something is removed shouldn't +// depend a) on the order the options occur in or b) on some opaque priority +// system. The only priority is that keeps/copies overrule removes. +static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig, + Object &Obj) { + if (Config.OutputArch) { + Obj.Machine = Config.OutputArch->EMachine; + Obj.OSABI = Config.OutputArch->OSABI; + } + + if (!Config.SplitDWO.empty() && Config.ExtractDWO) { + return Obj.removeSections( + ELFConfig.AllowBrokenLinks, + [&Obj](const SectionBase &Sec) { return onlyKeepDWOPred(Obj, Sec); }); + } + + // Dump sections before add/remove for compatibility with GNU objcopy. + for (StringRef Flag : Config.DumpSection) { + StringRef SectionName; + StringRef FileName; + std::tie(SectionName, FileName) = Flag.split('='); + if (Error E = dumpSectionToFile(SectionName, FileName, Obj)) + return E; + } + + // It is important to remove the sections first. For example, we want to + // remove the relocation sections before removing the symbols. That allows + // us to avoid reporting the inappropriate errors about removing symbols + // named in relocations. + if (Error E = replaceAndRemoveSections(Config, ELFConfig, Obj)) + return E; + + if (Error E = updateAndRemoveSymbols(Config, ELFConfig, Obj)) + return E; + + if (!Config.SetSectionAlignment.empty()) { + for (SectionBase &Sec : Obj.sections()) { + auto I = Config.SetSectionAlignment.find(Sec.Name); + if (I != Config.SetSectionAlignment.end()) + Sec.Align = I->second; + } + } + + if (Config.OnlyKeepDebug) + for (auto &Sec : Obj.sections()) + if (Sec.Flags & SHF_ALLOC && Sec.Type != SHT_NOTE) + Sec.Type = SHT_NOBITS; + + for (const NewSectionInfo &AddedSection : Config.AddSection) { + auto AddSection = [&](StringRef Name, ArrayRef<uint8_t> Data) { + OwnedDataSection &NewSection = + Obj.addSection<OwnedDataSection>(Name, Data); + if (Name.startswith(".note") && Name != ".note.GNU-stack") + NewSection.Type = SHT_NOTE; + return Error::success(); + }; + if (Error E = handleUserSection(AddedSection, AddSection)) + return E; + } + + for (const NewSectionInfo &NewSection : Config.UpdateSection) { + auto UpdateSection = [&](StringRef Name, ArrayRef<uint8_t> Data) { + return Obj.updateSection(Name, Data); + }; + if (Error E = handleUserSection(NewSection, UpdateSection)) + return E; + } + + if (!Config.AddGnuDebugLink.empty()) + Obj.addSection<GnuDebugLinkSection>(Config.AddGnuDebugLink, + Config.GnuDebugLinkCRC32); + + // If the symbol table was previously removed, we need to create a new one + // before adding new symbols. + if (!Obj.SymbolTable && !Config.SymbolsToAdd.empty()) + if (Error E = Obj.addNewSymbolTable()) + return E; + + for (const NewSymbolInfo &SI : Config.SymbolsToAdd) + addSymbol(Obj, SI, ELFConfig.NewSymbolVisibility); + + // --set-section-{flags,type} work with sections added by --add-section. + if (!Config.SetSectionFlags.empty() || !Config.SetSectionType.empty()) { + for (auto &Sec : Obj.sections()) { + const auto Iter = Config.SetSectionFlags.find(Sec.Name); + if (Iter != Config.SetSectionFlags.end()) { + const SectionFlagsUpdate &SFU = Iter->second; + setSectionFlagsAndType(Sec, SFU.NewFlags); + } + auto It2 = Config.SetSectionType.find(Sec.Name); + if (It2 != Config.SetSectionType.end()) + Sec.Type = It2->second; + } + } + + if (!Config.SectionsToRename.empty()) { + std::vector<RelocationSectionBase *> RelocSections; + DenseSet<SectionBase *> RenamedSections; + for (SectionBase &Sec : Obj.sections()) { + auto *RelocSec = dyn_cast<RelocationSectionBase>(&Sec); + const auto Iter = Config.SectionsToRename.find(Sec.Name); + if (Iter != Config.SectionsToRename.end()) { + const SectionRename &SR = Iter->second; + Sec.Name = std::string(SR.NewName); + if (SR.NewFlags) + setSectionFlagsAndType(Sec, *SR.NewFlags); + RenamedSections.insert(&Sec); + } else if (RelocSec && !(Sec.Flags & SHF_ALLOC)) + // Postpone processing relocation sections which are not specified in + // their explicit '--rename-section' commands until after their target + // sections are renamed. + // Dynamic relocation sections (i.e. ones with SHF_ALLOC) should be + // renamed only explicitly. Otherwise, renaming, for example, '.got.plt' + // would affect '.rela.plt', which is not desirable. + RelocSections.push_back(RelocSec); + } + + // Rename relocation sections according to their target sections. + for (RelocationSectionBase *RelocSec : RelocSections) { + auto Iter = RenamedSections.find(RelocSec->getSection()); + if (Iter != RenamedSections.end()) + RelocSec->Name = (RelocSec->getNamePrefix() + (*Iter)->Name).str(); + } + } + + // Add a prefix to allocated sections and their relocation sections. This + // should be done after renaming the section by Config.SectionToRename to + // imitate the GNU objcopy behavior. + if (!Config.AllocSectionsPrefix.empty()) { + DenseSet<SectionBase *> PrefixedSections; + for (SectionBase &Sec : Obj.sections()) { + if (Sec.Flags & SHF_ALLOC) { + Sec.Name = (Config.AllocSectionsPrefix + Sec.Name).str(); + PrefixedSections.insert(&Sec); + } else if (auto *RelocSec = dyn_cast<RelocationSectionBase>(&Sec)) { + // Rename relocation sections associated to the allocated sections. + // For example, if we rename .text to .prefix.text, we also rename + // .rel.text to .rel.prefix.text. + // + // Dynamic relocation sections (SHT_REL[A] with SHF_ALLOC) are handled + // above, e.g., .rela.plt is renamed to .prefix.rela.plt, not + // .rela.prefix.plt since GNU objcopy does so. + const SectionBase *TargetSec = RelocSec->getSection(); + if (TargetSec && (TargetSec->Flags & SHF_ALLOC)) { + // If the relocation section comes *after* the target section, we + // don't add Config.AllocSectionsPrefix because we've already added + // the prefix to TargetSec->Name. Otherwise, if the relocation + // section comes *before* the target section, we add the prefix. + if (PrefixedSections.count(TargetSec)) + Sec.Name = (RelocSec->getNamePrefix() + TargetSec->Name).str(); + else + Sec.Name = (RelocSec->getNamePrefix() + Config.AllocSectionsPrefix + + TargetSec->Name) + .str(); + } + } + } + } + + if (ELFConfig.EntryExpr) + Obj.Entry = ELFConfig.EntryExpr(Obj.Entry); + return Error::success(); +} + +static Error writeOutput(const CommonConfig &Config, Object &Obj, + raw_ostream &Out, ElfType OutputElfType) { + std::unique_ptr<Writer> Writer = + createWriter(Config, Obj, Out, OutputElfType); + if (Error E = Writer->finalize()) + return E; + return Writer->write(); +} + +Error objcopy::elf::executeObjcopyOnIHex(const CommonConfig &Config, + const ELFConfig &ELFConfig, + MemoryBuffer &In, raw_ostream &Out) { + IHexReader Reader(&In); + Expected<std::unique_ptr<Object>> Obj = Reader.create(true); + if (!Obj) + return Obj.takeError(); + + const ElfType OutputElfType = + getOutputElfType(Config.OutputArch.value_or(MachineInfo())); + if (Error E = handleArgs(Config, ELFConfig, **Obj)) + return E; + return writeOutput(Config, **Obj, Out, OutputElfType); +} + +Error objcopy::elf::executeObjcopyOnRawBinary(const CommonConfig &Config, + const ELFConfig &ELFConfig, + MemoryBuffer &In, + raw_ostream &Out) { + BinaryReader Reader(&In, ELFConfig.NewSymbolVisibility); + Expected<std::unique_ptr<Object>> Obj = Reader.create(true); + if (!Obj) + return Obj.takeError(); + + // Prefer OutputArch (-O<format>) if set, otherwise fallback to BinaryArch + // (-B<arch>). + const ElfType OutputElfType = + getOutputElfType(Config.OutputArch.value_or(MachineInfo())); + if (Error E = handleArgs(Config, ELFConfig, **Obj)) + return E; + return writeOutput(Config, **Obj, Out, OutputElfType); +} + +Error objcopy::elf::executeObjcopyOnBinary(const CommonConfig &Config, + const ELFConfig &ELFConfig, + object::ELFObjectFileBase &In, + raw_ostream &Out) { + ELFReader Reader(&In, Config.ExtractPartition); + Expected<std::unique_ptr<Object>> Obj = + Reader.create(!Config.SymbolsToAdd.empty()); + if (!Obj) + return Obj.takeError(); + // Prefer OutputArch (-O<format>) if set, otherwise infer it from the input. + const ElfType OutputElfType = Config.OutputArch + ? getOutputElfType(*Config.OutputArch) + : getOutputElfType(In); + + if (Error E = handleArgs(Config, ELFConfig, **Obj)) + return createFileError(Config.InputFilename, std::move(E)); + + if (Error E = writeOutput(Config, **Obj, Out, OutputElfType)) + return createFileError(Config.InputFilename, std::move(E)); + + return Error::success(); +} diff --git a/contrib/libs/llvm16/lib/ObjCopy/ELF/ELFObject.cpp b/contrib/libs/llvm16/lib/ObjCopy/ELF/ELFObject.cpp new file mode 100644 index 00000000000..ea6dadabace --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/ELF/ELFObject.cpp @@ -0,0 +1,2782 @@ +//===- ELFObject.cpp ------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ELFObject.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/Object/ELF.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/Compression.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/Path.h" +#include <algorithm> +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <unordered_set> +#include <utility> +#include <vector> + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::objcopy::elf; +using namespace llvm::object; + +template <class ELFT> void ELFWriter<ELFT>::writePhdr(const Segment &Seg) { + uint8_t *B = reinterpret_cast<uint8_t *>(Buf->getBufferStart()) + + Obj.ProgramHdrSegment.Offset + Seg.Index * sizeof(Elf_Phdr); + Elf_Phdr &Phdr = *reinterpret_cast<Elf_Phdr *>(B); + Phdr.p_type = Seg.Type; + Phdr.p_flags = Seg.Flags; + Phdr.p_offset = Seg.Offset; + Phdr.p_vaddr = Seg.VAddr; + Phdr.p_paddr = Seg.PAddr; + Phdr.p_filesz = Seg.FileSize; + Phdr.p_memsz = Seg.MemSize; + Phdr.p_align = Seg.Align; +} + +Error SectionBase::removeSectionReferences( + bool, function_ref<bool(const SectionBase *)>) { + return Error::success(); +} + +Error SectionBase::removeSymbols(function_ref<bool(const Symbol &)>) { + return Error::success(); +} + +Error SectionBase::initialize(SectionTableRef) { return Error::success(); } +void SectionBase::finalize() {} +void SectionBase::markSymbols() {} +void SectionBase::replaceSectionReferences( + const DenseMap<SectionBase *, SectionBase *> &) {} +void SectionBase::onRemove() {} + +template <class ELFT> void ELFWriter<ELFT>::writeShdr(const SectionBase &Sec) { + uint8_t *B = + reinterpret_cast<uint8_t *>(Buf->getBufferStart()) + Sec.HeaderOffset; + Elf_Shdr &Shdr = *reinterpret_cast<Elf_Shdr *>(B); + Shdr.sh_name = Sec.NameIndex; + Shdr.sh_type = Sec.Type; + Shdr.sh_flags = Sec.Flags; + Shdr.sh_addr = Sec.Addr; + Shdr.sh_offset = Sec.Offset; + Shdr.sh_size = Sec.Size; + Shdr.sh_link = Sec.Link; + Shdr.sh_info = Sec.Info; + Shdr.sh_addralign = Sec.Align; + Shdr.sh_entsize = Sec.EntrySize; +} + +template <class ELFT> Error ELFSectionSizer<ELFT>::visit(Section &) { + return Error::success(); +} + +template <class ELFT> Error ELFSectionSizer<ELFT>::visit(OwnedDataSection &) { + return Error::success(); +} + +template <class ELFT> Error ELFSectionSizer<ELFT>::visit(StringTableSection &) { + return Error::success(); +} + +template <class ELFT> +Error ELFSectionSizer<ELFT>::visit(DynamicRelocationSection &) { + return Error::success(); +} + +template <class ELFT> +Error ELFSectionSizer<ELFT>::visit(SymbolTableSection &Sec) { + Sec.EntrySize = sizeof(Elf_Sym); + Sec.Size = Sec.Symbols.size() * Sec.EntrySize; + // Align to the largest field in Elf_Sym. + Sec.Align = ELFT::Is64Bits ? sizeof(Elf_Xword) : sizeof(Elf_Word); + return Error::success(); +} + +template <class ELFT> +Error ELFSectionSizer<ELFT>::visit(RelocationSection &Sec) { + Sec.EntrySize = Sec.Type == SHT_REL ? sizeof(Elf_Rel) : sizeof(Elf_Rela); + Sec.Size = Sec.Relocations.size() * Sec.EntrySize; + // Align to the largest field in Elf_Rel(a). + Sec.Align = ELFT::Is64Bits ? sizeof(Elf_Xword) : sizeof(Elf_Word); + return Error::success(); +} + +template <class ELFT> +Error ELFSectionSizer<ELFT>::visit(GnuDebugLinkSection &) { + return Error::success(); +} + +template <class ELFT> Error ELFSectionSizer<ELFT>::visit(GroupSection &Sec) { + Sec.Size = sizeof(Elf_Word) + Sec.GroupMembers.size() * sizeof(Elf_Word); + return Error::success(); +} + +template <class ELFT> +Error ELFSectionSizer<ELFT>::visit(SectionIndexSection &) { + return Error::success(); +} + +template <class ELFT> Error ELFSectionSizer<ELFT>::visit(CompressedSection &) { + return Error::success(); +} + +template <class ELFT> +Error ELFSectionSizer<ELFT>::visit(DecompressedSection &) { + return Error::success(); +} + +Error BinarySectionWriter::visit(const SectionIndexSection &Sec) { + return createStringError(errc::operation_not_permitted, + "cannot write symbol section index table '" + + Sec.Name + "' "); +} + +Error BinarySectionWriter::visit(const SymbolTableSection &Sec) { + return createStringError(errc::operation_not_permitted, + "cannot write symbol table '" + Sec.Name + + "' out to binary"); +} + +Error BinarySectionWriter::visit(const RelocationSection &Sec) { + return createStringError(errc::operation_not_permitted, + "cannot write relocation section '" + Sec.Name + + "' out to binary"); +} + +Error BinarySectionWriter::visit(const GnuDebugLinkSection &Sec) { + return createStringError(errc::operation_not_permitted, + "cannot write '" + Sec.Name + "' out to binary"); +} + +Error BinarySectionWriter::visit(const GroupSection &Sec) { + return createStringError(errc::operation_not_permitted, + "cannot write '" + Sec.Name + "' out to binary"); +} + +Error SectionWriter::visit(const Section &Sec) { + if (Sec.Type != SHT_NOBITS) + llvm::copy(Sec.Contents, Out.getBufferStart() + Sec.Offset); + + return Error::success(); +} + +static bool addressOverflows32bit(uint64_t Addr) { + // Sign extended 32 bit addresses (e.g 0xFFFFFFFF80000000) are ok + return Addr > UINT32_MAX && Addr + 0x80000000 > UINT32_MAX; +} + +template <class T> static T checkedGetHex(StringRef S) { + T Value; + bool Fail = S.getAsInteger(16, Value); + assert(!Fail); + (void)Fail; + return Value; +} + +// Fills exactly Len bytes of buffer with hexadecimal characters +// representing value 'X' +template <class T, class Iterator> +static Iterator toHexStr(T X, Iterator It, size_t Len) { + // Fill range with '0' + std::fill(It, It + Len, '0'); + + for (long I = Len - 1; I >= 0; --I) { + unsigned char Mod = static_cast<unsigned char>(X) & 15; + *(It + I) = hexdigit(Mod, false); + X >>= 4; + } + assert(X == 0); + return It + Len; +} + +uint8_t IHexRecord::getChecksum(StringRef S) { + assert((S.size() & 1) == 0); + uint8_t Checksum = 0; + while (!S.empty()) { + Checksum += checkedGetHex<uint8_t>(S.take_front(2)); + S = S.drop_front(2); + } + return -Checksum; +} + +IHexLineData IHexRecord::getLine(uint8_t Type, uint16_t Addr, + ArrayRef<uint8_t> Data) { + IHexLineData Line(getLineLength(Data.size())); + assert(Line.size()); + auto Iter = Line.begin(); + *Iter++ = ':'; + Iter = toHexStr(Data.size(), Iter, 2); + Iter = toHexStr(Addr, Iter, 4); + Iter = toHexStr(Type, Iter, 2); + for (uint8_t X : Data) + Iter = toHexStr(X, Iter, 2); + StringRef S(Line.data() + 1, std::distance(Line.begin() + 1, Iter)); + Iter = toHexStr(getChecksum(S), Iter, 2); + *Iter++ = '\r'; + *Iter++ = '\n'; + assert(Iter == Line.end()); + return Line; +} + +static Error checkRecord(const IHexRecord &R) { + switch (R.Type) { + case IHexRecord::Data: + if (R.HexData.size() == 0) + return createStringError( + errc::invalid_argument, + "zero data length is not allowed for data records"); + break; + case IHexRecord::EndOfFile: + break; + case IHexRecord::SegmentAddr: + // 20-bit segment address. Data length must be 2 bytes + // (4 bytes in hex) + if (R.HexData.size() != 4) + return createStringError( + errc::invalid_argument, + "segment address data should be 2 bytes in size"); + break; + case IHexRecord::StartAddr80x86: + case IHexRecord::StartAddr: + if (R.HexData.size() != 8) + return createStringError(errc::invalid_argument, + "start address data should be 4 bytes in size"); + // According to Intel HEX specification '03' record + // only specifies the code address within the 20-bit + // segmented address space of the 8086/80186. This + // means 12 high order bits should be zeroes. + if (R.Type == IHexRecord::StartAddr80x86 && + R.HexData.take_front(3) != "000") + return createStringError(errc::invalid_argument, + "start address exceeds 20 bit for 80x86"); + break; + case IHexRecord::ExtendedAddr: + // 16-31 bits of linear base address + if (R.HexData.size() != 4) + return createStringError( + errc::invalid_argument, + "extended address data should be 2 bytes in size"); + break; + default: + // Unknown record type + return createStringError(errc::invalid_argument, "unknown record type: %u", + static_cast<unsigned>(R.Type)); + } + return Error::success(); +} + +// Checks that IHEX line contains valid characters. +// This allows converting hexadecimal data to integers +// without extra verification. +static Error checkChars(StringRef Line) { + assert(!Line.empty()); + if (Line[0] != ':') + return createStringError(errc::invalid_argument, + "missing ':' in the beginning of line."); + + for (size_t Pos = 1; Pos < Line.size(); ++Pos) + if (hexDigitValue(Line[Pos]) == -1U) + return createStringError(errc::invalid_argument, + "invalid character at position %zu.", Pos + 1); + return Error::success(); +} + +Expected<IHexRecord> IHexRecord::parse(StringRef Line) { + assert(!Line.empty()); + + // ':' + Length + Address + Type + Checksum with empty data ':LLAAAATTCC' + if (Line.size() < 11) + return createStringError(errc::invalid_argument, + "line is too short: %zu chars.", Line.size()); + + if (Error E = checkChars(Line)) + return std::move(E); + + IHexRecord Rec; + size_t DataLen = checkedGetHex<uint8_t>(Line.substr(1, 2)); + if (Line.size() != getLength(DataLen)) + return createStringError(errc::invalid_argument, + "invalid line length %zu (should be %zu)", + Line.size(), getLength(DataLen)); + + Rec.Addr = checkedGetHex<uint16_t>(Line.substr(3, 4)); + Rec.Type = checkedGetHex<uint8_t>(Line.substr(7, 2)); + Rec.HexData = Line.substr(9, DataLen * 2); + + if (getChecksum(Line.drop_front(1)) != 0) + return createStringError(errc::invalid_argument, "incorrect checksum."); + if (Error E = checkRecord(Rec)) + return std::move(E); + return Rec; +} + +static uint64_t sectionPhysicalAddr(const SectionBase *Sec) { + Segment *Seg = Sec->ParentSegment; + if (Seg && Seg->Type != ELF::PT_LOAD) + Seg = nullptr; + return Seg ? Seg->PAddr + Sec->OriginalOffset - Seg->OriginalOffset + : Sec->Addr; +} + +void IHexSectionWriterBase::writeSection(const SectionBase *Sec, + ArrayRef<uint8_t> Data) { + assert(Data.size() == Sec->Size); + const uint32_t ChunkSize = 16; + uint32_t Addr = sectionPhysicalAddr(Sec) & 0xFFFFFFFFU; + while (!Data.empty()) { + uint64_t DataSize = std::min<uint64_t>(Data.size(), ChunkSize); + if (Addr > SegmentAddr + BaseAddr + 0xFFFFU) { + if (Addr > 0xFFFFFU) { + // Write extended address record, zeroing segment address + // if needed. + if (SegmentAddr != 0) + SegmentAddr = writeSegmentAddr(0U); + BaseAddr = writeBaseAddr(Addr); + } else { + // We can still remain 16-bit + SegmentAddr = writeSegmentAddr(Addr); + } + } + uint64_t SegOffset = Addr - BaseAddr - SegmentAddr; + assert(SegOffset <= 0xFFFFU); + DataSize = std::min(DataSize, 0x10000U - SegOffset); + writeData(0, SegOffset, Data.take_front(DataSize)); + Addr += DataSize; + Data = Data.drop_front(DataSize); + } +} + +uint64_t IHexSectionWriterBase::writeSegmentAddr(uint64_t Addr) { + assert(Addr <= 0xFFFFFU); + uint8_t Data[] = {static_cast<uint8_t>((Addr & 0xF0000U) >> 12), 0}; + writeData(2, 0, Data); + return Addr & 0xF0000U; +} + +uint64_t IHexSectionWriterBase::writeBaseAddr(uint64_t Addr) { + assert(Addr <= 0xFFFFFFFFU); + uint64_t Base = Addr & 0xFFFF0000U; + uint8_t Data[] = {static_cast<uint8_t>(Base >> 24), + static_cast<uint8_t>((Base >> 16) & 0xFF)}; + writeData(4, 0, Data); + return Base; +} + +void IHexSectionWriterBase::writeData(uint8_t, uint16_t, + ArrayRef<uint8_t> Data) { + Offset += IHexRecord::getLineLength(Data.size()); +} + +Error IHexSectionWriterBase::visit(const Section &Sec) { + writeSection(&Sec, Sec.Contents); + return Error::success(); +} + +Error IHexSectionWriterBase::visit(const OwnedDataSection &Sec) { + writeSection(&Sec, Sec.Data); + return Error::success(); +} + +Error IHexSectionWriterBase::visit(const StringTableSection &Sec) { + // Check that sizer has already done its work + assert(Sec.Size == Sec.StrTabBuilder.getSize()); + // We are free to pass an invalid pointer to writeSection as long + // as we don't actually write any data. The real writer class has + // to override this method . + writeSection(&Sec, {nullptr, static_cast<size_t>(Sec.Size)}); + return Error::success(); +} + +Error IHexSectionWriterBase::visit(const DynamicRelocationSection &Sec) { + writeSection(&Sec, Sec.Contents); + return Error::success(); +} + +void IHexSectionWriter::writeData(uint8_t Type, uint16_t Addr, + ArrayRef<uint8_t> Data) { + IHexLineData HexData = IHexRecord::getLine(Type, Addr, Data); + memcpy(Out.getBufferStart() + Offset, HexData.data(), HexData.size()); + Offset += HexData.size(); +} + +Error IHexSectionWriter::visit(const StringTableSection &Sec) { + assert(Sec.Size == Sec.StrTabBuilder.getSize()); + std::vector<uint8_t> Data(Sec.Size); + Sec.StrTabBuilder.write(Data.data()); + writeSection(&Sec, Data); + return Error::success(); +} + +Error Section::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); +} + +Error Section::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); +} + +Error SectionWriter::visit(const OwnedDataSection &Sec) { + llvm::copy(Sec.Data, Out.getBufferStart() + Sec.Offset); + return Error::success(); +} + +template <class ELFT> +Error ELFSectionWriter<ELFT>::visit(const DecompressedSection &Sec) { + ArrayRef<uint8_t> Compressed = + Sec.OriginalData.slice(sizeof(Elf_Chdr_Impl<ELFT>)); + SmallVector<uint8_t, 128> Decompressed; + DebugCompressionType Type; + switch (Sec.ChType) { + case ELFCOMPRESS_ZLIB: + Type = DebugCompressionType::Zlib; + break; + case ELFCOMPRESS_ZSTD: + Type = DebugCompressionType::Zstd; + break; + default: + return createStringError(errc::invalid_argument, + "--decompress-debug-sections: ch_type (" + + Twine(Sec.ChType) + ") of section '" + + Sec.Name + "' is unsupported"); + } + if (auto *Reason = + compression::getReasonIfUnsupported(compression::formatFor(Type))) + return createStringError(errc::invalid_argument, + "failed to decompress section '" + Sec.Name + + "': " + Reason); + if (Error E = compression::decompress(Type, Compressed, Decompressed, + static_cast<size_t>(Sec.Size))) + return createStringError(errc::invalid_argument, + "failed to decompress section '" + Sec.Name + + "': " + toString(std::move(E))); + + uint8_t *Buf = reinterpret_cast<uint8_t *>(Out.getBufferStart()) + Sec.Offset; + std::copy(Decompressed.begin(), Decompressed.end(), Buf); + + return Error::success(); +} + +Error BinarySectionWriter::visit(const DecompressedSection &Sec) { + return createStringError(errc::operation_not_permitted, + "cannot write compressed section '" + Sec.Name + + "' "); +} + +Error DecompressedSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); +} + +Error DecompressedSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); +} + +Error OwnedDataSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); +} + +Error OwnedDataSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); +} + +void OwnedDataSection::appendHexData(StringRef HexData) { + assert((HexData.size() & 1) == 0); + while (!HexData.empty()) { + Data.push_back(checkedGetHex<uint8_t>(HexData.take_front(2))); + HexData = HexData.drop_front(2); + } + Size = Data.size(); +} + +Error BinarySectionWriter::visit(const CompressedSection &Sec) { + return createStringError(errc::operation_not_permitted, + "cannot write compressed section '" + Sec.Name + + "' "); +} + +template <class ELFT> +Error ELFSectionWriter<ELFT>::visit(const CompressedSection &Sec) { + uint8_t *Buf = reinterpret_cast<uint8_t *>(Out.getBufferStart()) + Sec.Offset; + Elf_Chdr_Impl<ELFT> Chdr = {}; + switch (Sec.CompressionType) { + case DebugCompressionType::None: + std::copy(Sec.OriginalData.begin(), Sec.OriginalData.end(), Buf); + return Error::success(); + case DebugCompressionType::Zlib: + Chdr.ch_type = ELF::ELFCOMPRESS_ZLIB; + break; + case DebugCompressionType::Zstd: + Chdr.ch_type = ELF::ELFCOMPRESS_ZSTD; + break; + } + Chdr.ch_size = Sec.DecompressedSize; + Chdr.ch_addralign = Sec.DecompressedAlign; + memcpy(Buf, &Chdr, sizeof(Chdr)); + Buf += sizeof(Chdr); + + std::copy(Sec.CompressedData.begin(), Sec.CompressedData.end(), Buf); + return Error::success(); +} + +CompressedSection::CompressedSection(const SectionBase &Sec, + DebugCompressionType CompressionType, + bool Is64Bits) + : SectionBase(Sec), CompressionType(CompressionType), + DecompressedSize(Sec.OriginalData.size()), DecompressedAlign(Sec.Align) { + compression::compress(compression::Params(CompressionType), OriginalData, + CompressedData); + + Flags |= ELF::SHF_COMPRESSED; + size_t ChdrSize = Is64Bits ? sizeof(object::Elf_Chdr_Impl<object::ELF64LE>) + : sizeof(object::Elf_Chdr_Impl<object::ELF32LE>); + Size = ChdrSize + CompressedData.size(); + Align = 8; +} + +CompressedSection::CompressedSection(ArrayRef<uint8_t> CompressedData, + uint32_t ChType, uint64_t DecompressedSize, + uint64_t DecompressedAlign) + : ChType(ChType), CompressionType(DebugCompressionType::None), + DecompressedSize(DecompressedSize), DecompressedAlign(DecompressedAlign) { + OriginalData = CompressedData; +} + +Error CompressedSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); +} + +Error CompressedSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); +} + +void StringTableSection::addString(StringRef Name) { StrTabBuilder.add(Name); } + +uint32_t StringTableSection::findIndex(StringRef Name) const { + return StrTabBuilder.getOffset(Name); +} + +void StringTableSection::prepareForLayout() { + StrTabBuilder.finalize(); + Size = StrTabBuilder.getSize(); +} + +Error SectionWriter::visit(const StringTableSection &Sec) { + Sec.StrTabBuilder.write(reinterpret_cast<uint8_t *>(Out.getBufferStart()) + + Sec.Offset); + return Error::success(); +} + +Error StringTableSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); +} + +Error StringTableSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); +} + +template <class ELFT> +Error ELFSectionWriter<ELFT>::visit(const SectionIndexSection &Sec) { + uint8_t *Buf = reinterpret_cast<uint8_t *>(Out.getBufferStart()) + Sec.Offset; + llvm::copy(Sec.Indexes, reinterpret_cast<Elf_Word *>(Buf)); + return Error::success(); +} + +Error SectionIndexSection::initialize(SectionTableRef SecTable) { + Size = 0; + Expected<SymbolTableSection *> Sec = + SecTable.getSectionOfType<SymbolTableSection>( + Link, + "Link field value " + Twine(Link) + " in section " + Name + + " is invalid", + "Link field value " + Twine(Link) + " in section " + Name + + " is not a symbol table"); + if (!Sec) + return Sec.takeError(); + + setSymTab(*Sec); + Symbols->setShndxTable(this); + return Error::success(); +} + +void SectionIndexSection::finalize() { Link = Symbols->Index; } + +Error SectionIndexSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); +} + +Error SectionIndexSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); +} + +static bool isValidReservedSectionIndex(uint16_t Index, uint16_t Machine) { + switch (Index) { + case SHN_ABS: + case SHN_COMMON: + return true; + } + + if (Machine == EM_AMDGPU) { + return Index == SHN_AMDGPU_LDS; + } + + if (Machine == EM_MIPS) { + switch (Index) { + case SHN_MIPS_ACOMMON: + case SHN_MIPS_SCOMMON: + case SHN_MIPS_SUNDEFINED: + return true; + } + } + + if (Machine == EM_HEXAGON) { + switch (Index) { + case SHN_HEXAGON_SCOMMON: + case SHN_HEXAGON_SCOMMON_1: + case SHN_HEXAGON_SCOMMON_2: + case SHN_HEXAGON_SCOMMON_4: + case SHN_HEXAGON_SCOMMON_8: + return true; + } + } + return false; +} + +// Large indexes force us to clarify exactly what this function should do. This +// function should return the value that will appear in st_shndx when written +// out. +uint16_t Symbol::getShndx() const { + if (DefinedIn != nullptr) { + if (DefinedIn->Index >= SHN_LORESERVE) + return SHN_XINDEX; + return DefinedIn->Index; + } + + if (ShndxType == SYMBOL_SIMPLE_INDEX) { + // This means that we don't have a defined section but we do need to + // output a legitimate section index. + return SHN_UNDEF; + } + + assert(ShndxType == SYMBOL_ABS || ShndxType == SYMBOL_COMMON || + (ShndxType >= SYMBOL_LOPROC && ShndxType <= SYMBOL_HIPROC) || + (ShndxType >= SYMBOL_LOOS && ShndxType <= SYMBOL_HIOS)); + return static_cast<uint16_t>(ShndxType); +} + +bool Symbol::isCommon() const { return getShndx() == SHN_COMMON; } + +void SymbolTableSection::assignIndices() { + uint32_t Index = 0; + for (auto &Sym : Symbols) + Sym->Index = Index++; +} + +void SymbolTableSection::addSymbol(Twine Name, uint8_t Bind, uint8_t Type, + SectionBase *DefinedIn, uint64_t Value, + uint8_t Visibility, uint16_t Shndx, + uint64_t SymbolSize) { + Symbol Sym; + Sym.Name = Name.str(); + Sym.Binding = Bind; + Sym.Type = Type; + Sym.DefinedIn = DefinedIn; + if (DefinedIn != nullptr) + DefinedIn->HasSymbol = true; + if (DefinedIn == nullptr) { + if (Shndx >= SHN_LORESERVE) + Sym.ShndxType = static_cast<SymbolShndxType>(Shndx); + else + Sym.ShndxType = SYMBOL_SIMPLE_INDEX; + } + Sym.Value = Value; + Sym.Visibility = Visibility; + Sym.Size = SymbolSize; + Sym.Index = Symbols.size(); + Symbols.emplace_back(std::make_unique<Symbol>(Sym)); + Size += this->EntrySize; +} + +Error SymbolTableSection::removeSectionReferences( + bool AllowBrokenLinks, function_ref<bool(const SectionBase *)> ToRemove) { + if (ToRemove(SectionIndexTable)) + SectionIndexTable = nullptr; + if (ToRemove(SymbolNames)) { + if (!AllowBrokenLinks) + return createStringError( + llvm::errc::invalid_argument, + "string table '%s' cannot be removed because it is " + "referenced by the symbol table '%s'", + SymbolNames->Name.data(), this->Name.data()); + SymbolNames = nullptr; + } + return removeSymbols( + [ToRemove](const Symbol &Sym) { return ToRemove(Sym.DefinedIn); }); +} + +void SymbolTableSection::updateSymbols(function_ref<void(Symbol &)> Callable) { + for (SymPtr &Sym : llvm::drop_begin(Symbols)) + Callable(*Sym); + std::stable_partition( + std::begin(Symbols), std::end(Symbols), + [](const SymPtr &Sym) { return Sym->Binding == STB_LOCAL; }); + assignIndices(); +} + +Error SymbolTableSection::removeSymbols( + function_ref<bool(const Symbol &)> ToRemove) { + Symbols.erase( + std::remove_if(std::begin(Symbols) + 1, std::end(Symbols), + [ToRemove](const SymPtr &Sym) { return ToRemove(*Sym); }), + std::end(Symbols)); + Size = Symbols.size() * EntrySize; + assignIndices(); + return Error::success(); +} + +void SymbolTableSection::replaceSectionReferences( + const DenseMap<SectionBase *, SectionBase *> &FromTo) { + for (std::unique_ptr<Symbol> &Sym : Symbols) + if (SectionBase *To = FromTo.lookup(Sym->DefinedIn)) + Sym->DefinedIn = To; +} + +Error SymbolTableSection::initialize(SectionTableRef SecTable) { + Size = 0; + Expected<StringTableSection *> Sec = + SecTable.getSectionOfType<StringTableSection>( + Link, + "Symbol table has link index of " + Twine(Link) + + " which is not a valid index", + "Symbol table has link index of " + Twine(Link) + + " which is not a string table"); + if (!Sec) + return Sec.takeError(); + + setStrTab(*Sec); + return Error::success(); +} + +void SymbolTableSection::finalize() { + uint32_t MaxLocalIndex = 0; + for (std::unique_ptr<Symbol> &Sym : Symbols) { + Sym->NameIndex = + SymbolNames == nullptr ? 0 : SymbolNames->findIndex(Sym->Name); + if (Sym->Binding == STB_LOCAL) + MaxLocalIndex = std::max(MaxLocalIndex, Sym->Index); + } + // Now we need to set the Link and Info fields. + Link = SymbolNames == nullptr ? 0 : SymbolNames->Index; + Info = MaxLocalIndex + 1; +} + +void SymbolTableSection::prepareForLayout() { + // Reserve proper amount of space in section index table, so we can + // layout sections correctly. We will fill the table with correct + // indexes later in fillShdnxTable. + if (SectionIndexTable) + SectionIndexTable->reserve(Symbols.size()); + + // Add all of our strings to SymbolNames so that SymbolNames has the right + // size before layout is decided. + // If the symbol names section has been removed, don't try to add strings to + // the table. + if (SymbolNames != nullptr) + for (std::unique_ptr<Symbol> &Sym : Symbols) + SymbolNames->addString(Sym->Name); +} + +void SymbolTableSection::fillShndxTable() { + if (SectionIndexTable == nullptr) + return; + // Fill section index table with real section indexes. This function must + // be called after assignOffsets. + for (const std::unique_ptr<Symbol> &Sym : Symbols) { + if (Sym->DefinedIn != nullptr && Sym->DefinedIn->Index >= SHN_LORESERVE) + SectionIndexTable->addIndex(Sym->DefinedIn->Index); + else + SectionIndexTable->addIndex(SHN_UNDEF); + } +} + +Expected<const Symbol *> +SymbolTableSection::getSymbolByIndex(uint32_t Index) const { + if (Symbols.size() <= Index) + return createStringError(errc::invalid_argument, + "invalid symbol index: " + Twine(Index)); + return Symbols[Index].get(); +} + +Expected<Symbol *> SymbolTableSection::getSymbolByIndex(uint32_t Index) { + Expected<const Symbol *> Sym = + static_cast<const SymbolTableSection *>(this)->getSymbolByIndex(Index); + if (!Sym) + return Sym.takeError(); + + return const_cast<Symbol *>(*Sym); +} + +template <class ELFT> +Error ELFSectionWriter<ELFT>::visit(const SymbolTableSection &Sec) { + Elf_Sym *Sym = reinterpret_cast<Elf_Sym *>(Out.getBufferStart() + Sec.Offset); + // Loop though symbols setting each entry of the symbol table. + for (const std::unique_ptr<Symbol> &Symbol : Sec.Symbols) { + Sym->st_name = Symbol->NameIndex; + Sym->st_value = Symbol->Value; + Sym->st_size = Symbol->Size; + Sym->st_other = Symbol->Visibility; + Sym->setBinding(Symbol->Binding); + Sym->setType(Symbol->Type); + Sym->st_shndx = Symbol->getShndx(); + ++Sym; + } + return Error::success(); +} + +Error SymbolTableSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); +} + +Error SymbolTableSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); +} + +StringRef RelocationSectionBase::getNamePrefix() const { + switch (Type) { + case SHT_REL: + return ".rel"; + case SHT_RELA: + return ".rela"; + default: + llvm_unreachable("not a relocation section"); + } +} + +Error RelocationSection::removeSectionReferences( + bool AllowBrokenLinks, function_ref<bool(const SectionBase *)> ToRemove) { + if (ToRemove(Symbols)) { + if (!AllowBrokenLinks) + return createStringError( + llvm::errc::invalid_argument, + "symbol table '%s' cannot be removed because it is " + "referenced by the relocation section '%s'", + Symbols->Name.data(), this->Name.data()); + Symbols = nullptr; + } + + for (const Relocation &R : Relocations) { + if (!R.RelocSymbol || !R.RelocSymbol->DefinedIn || + !ToRemove(R.RelocSymbol->DefinedIn)) + continue; + return createStringError(llvm::errc::invalid_argument, + "section '%s' cannot be removed: (%s+0x%" PRIx64 + ") has relocation against symbol '%s'", + R.RelocSymbol->DefinedIn->Name.data(), + SecToApplyRel->Name.data(), R.Offset, + R.RelocSymbol->Name.c_str()); + } + + return Error::success(); +} + +template <class SymTabType> +Error RelocSectionWithSymtabBase<SymTabType>::initialize( + SectionTableRef SecTable) { + if (Link != SHN_UNDEF) { + Expected<SymTabType *> Sec = SecTable.getSectionOfType<SymTabType>( + Link, + "Link field value " + Twine(Link) + " in section " + Name + + " is invalid", + "Link field value " + Twine(Link) + " in section " + Name + + " is not a symbol table"); + if (!Sec) + return Sec.takeError(); + + setSymTab(*Sec); + } + + if (Info != SHN_UNDEF) { + Expected<SectionBase *> Sec = + SecTable.getSection(Info, "Info field value " + Twine(Info) + + " in section " + Name + " is invalid"); + if (!Sec) + return Sec.takeError(); + + setSection(*Sec); + } else + setSection(nullptr); + + return Error::success(); +} + +template <class SymTabType> +void RelocSectionWithSymtabBase<SymTabType>::finalize() { + this->Link = Symbols ? Symbols->Index : 0; + + if (SecToApplyRel != nullptr) + this->Info = SecToApplyRel->Index; +} + +template <class ELFT> +static void setAddend(Elf_Rel_Impl<ELFT, false> &, uint64_t) {} + +template <class ELFT> +static void setAddend(Elf_Rel_Impl<ELFT, true> &Rela, uint64_t Addend) { + Rela.r_addend = Addend; +} + +template <class RelRange, class T> +static void writeRel(const RelRange &Relocations, T *Buf, bool IsMips64EL) { + for (const auto &Reloc : Relocations) { + Buf->r_offset = Reloc.Offset; + setAddend(*Buf, Reloc.Addend); + Buf->setSymbolAndType(Reloc.RelocSymbol ? Reloc.RelocSymbol->Index : 0, + Reloc.Type, IsMips64EL); + ++Buf; + } +} + +template <class ELFT> +Error ELFSectionWriter<ELFT>::visit(const RelocationSection &Sec) { + uint8_t *Buf = reinterpret_cast<uint8_t *>(Out.getBufferStart()) + Sec.Offset; + if (Sec.Type == SHT_REL) + writeRel(Sec.Relocations, reinterpret_cast<Elf_Rel *>(Buf), + Sec.getObject().IsMips64EL); + else + writeRel(Sec.Relocations, reinterpret_cast<Elf_Rela *>(Buf), + Sec.getObject().IsMips64EL); + return Error::success(); +} + +Error RelocationSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); +} + +Error RelocationSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); +} + +Error RelocationSection::removeSymbols( + function_ref<bool(const Symbol &)> ToRemove) { + for (const Relocation &Reloc : Relocations) + if (Reloc.RelocSymbol && ToRemove(*Reloc.RelocSymbol)) + return createStringError( + llvm::errc::invalid_argument, + "not stripping symbol '%s' because it is named in a relocation", + Reloc.RelocSymbol->Name.data()); + return Error::success(); +} + +void RelocationSection::markSymbols() { + for (const Relocation &Reloc : Relocations) + if (Reloc.RelocSymbol) + Reloc.RelocSymbol->Referenced = true; +} + +void RelocationSection::replaceSectionReferences( + const DenseMap<SectionBase *, SectionBase *> &FromTo) { + // Update the target section if it was replaced. + if (SectionBase *To = FromTo.lookup(SecToApplyRel)) + SecToApplyRel = To; +} + +Error SectionWriter::visit(const DynamicRelocationSection &Sec) { + llvm::copy(Sec.Contents, Out.getBufferStart() + Sec.Offset); + return Error::success(); +} + +Error DynamicRelocationSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); +} + +Error DynamicRelocationSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); +} + +Error DynamicRelocationSection::removeSectionReferences( + bool AllowBrokenLinks, function_ref<bool(const SectionBase *)> ToRemove) { + if (ToRemove(Symbols)) { + if (!AllowBrokenLinks) + return createStringError( + llvm::errc::invalid_argument, + "symbol table '%s' cannot be removed because it is " + "referenced by the relocation section '%s'", + Symbols->Name.data(), this->Name.data()); + Symbols = nullptr; + } + + // SecToApplyRel contains a section referenced by sh_info field. It keeps + // a section to which the relocation section applies. When we remove any + // sections we also remove their relocation sections. Since we do that much + // earlier, this assert should never be triggered. + assert(!SecToApplyRel || !ToRemove(SecToApplyRel)); + return Error::success(); +} + +Error Section::removeSectionReferences( + bool AllowBrokenDependency, + function_ref<bool(const SectionBase *)> ToRemove) { + if (ToRemove(LinkSection)) { + if (!AllowBrokenDependency) + return createStringError(llvm::errc::invalid_argument, + "section '%s' cannot be removed because it is " + "referenced by the section '%s'", + LinkSection->Name.data(), this->Name.data()); + LinkSection = nullptr; + } + return Error::success(); +} + +void GroupSection::finalize() { + this->Info = Sym ? Sym->Index : 0; + this->Link = SymTab ? SymTab->Index : 0; + // Linker deduplication for GRP_COMDAT is based on Sym->Name. The local/global + // status is not part of the equation. If Sym is localized, the intention is + // likely to make the group fully localized. Drop GRP_COMDAT to suppress + // deduplication. See https://groups.google.com/g/generic-abi/c/2X6mR-s2zoc + if ((FlagWord & GRP_COMDAT) && Sym && Sym->Binding == STB_LOCAL) + this->FlagWord &= ~GRP_COMDAT; +} + +Error GroupSection::removeSectionReferences( + bool AllowBrokenLinks, function_ref<bool(const SectionBase *)> ToRemove) { + if (ToRemove(SymTab)) { + if (!AllowBrokenLinks) + return createStringError( + llvm::errc::invalid_argument, + "section '.symtab' cannot be removed because it is " + "referenced by the group section '%s'", + this->Name.data()); + SymTab = nullptr; + Sym = nullptr; + } + llvm::erase_if(GroupMembers, ToRemove); + return Error::success(); +} + +Error GroupSection::removeSymbols(function_ref<bool(const Symbol &)> ToRemove) { + if (ToRemove(*Sym)) + return createStringError(llvm::errc::invalid_argument, + "symbol '%s' cannot be removed because it is " + "referenced by the section '%s[%d]'", + Sym->Name.data(), this->Name.data(), this->Index); + return Error::success(); +} + +void GroupSection::markSymbols() { + if (Sym) + Sym->Referenced = true; +} + +void GroupSection::replaceSectionReferences( + const DenseMap<SectionBase *, SectionBase *> &FromTo) { + for (SectionBase *&Sec : GroupMembers) + if (SectionBase *To = FromTo.lookup(Sec)) + Sec = To; +} + +void GroupSection::onRemove() { + // As the header section of the group is removed, drop the Group flag in its + // former members. + for (SectionBase *Sec : GroupMembers) + Sec->Flags &= ~SHF_GROUP; +} + +Error Section::initialize(SectionTableRef SecTable) { + if (Link == ELF::SHN_UNDEF) + return Error::success(); + + Expected<SectionBase *> Sec = + SecTable.getSection(Link, "Link field value " + Twine(Link) + + " in section " + Name + " is invalid"); + if (!Sec) + return Sec.takeError(); + + LinkSection = *Sec; + + if (LinkSection->Type == ELF::SHT_SYMTAB) + LinkSection = nullptr; + + return Error::success(); +} + +void Section::finalize() { this->Link = LinkSection ? LinkSection->Index : 0; } + +void GnuDebugLinkSection::init(StringRef File) { + FileName = sys::path::filename(File); + // The format for the .gnu_debuglink starts with the file name and is + // followed by a null terminator and then the CRC32 of the file. The CRC32 + // should be 4 byte aligned. So we add the FileName size, a 1 for the null + // byte, and then finally push the size to alignment and add 4. + Size = alignTo(FileName.size() + 1, 4) + 4; + // The CRC32 will only be aligned if we align the whole section. + Align = 4; + Type = OriginalType = ELF::SHT_PROGBITS; + Name = ".gnu_debuglink"; + // For sections not found in segments, OriginalOffset is only used to + // establish the order that sections should go in. By using the maximum + // possible offset we cause this section to wind up at the end. + OriginalOffset = std::numeric_limits<uint64_t>::max(); +} + +GnuDebugLinkSection::GnuDebugLinkSection(StringRef File, + uint32_t PrecomputedCRC) + : FileName(File), CRC32(PrecomputedCRC) { + init(File); +} + +template <class ELFT> +Error ELFSectionWriter<ELFT>::visit(const GnuDebugLinkSection &Sec) { + unsigned char *Buf = + reinterpret_cast<uint8_t *>(Out.getBufferStart()) + Sec.Offset; + Elf_Word *CRC = + reinterpret_cast<Elf_Word *>(Buf + Sec.Size - sizeof(Elf_Word)); + *CRC = Sec.CRC32; + llvm::copy(Sec.FileName, Buf); + return Error::success(); +} + +Error GnuDebugLinkSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); +} + +Error GnuDebugLinkSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); +} + +template <class ELFT> +Error ELFSectionWriter<ELFT>::visit(const GroupSection &Sec) { + ELF::Elf32_Word *Buf = + reinterpret_cast<ELF::Elf32_Word *>(Out.getBufferStart() + Sec.Offset); + support::endian::write32<ELFT::TargetEndianness>(Buf++, Sec.FlagWord); + for (SectionBase *S : Sec.GroupMembers) + support::endian::write32<ELFT::TargetEndianness>(Buf++, S->Index); + return Error::success(); +} + +Error GroupSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); +} + +Error GroupSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); +} + +// Returns true IFF a section is wholly inside the range of a segment +static bool sectionWithinSegment(const SectionBase &Sec, const Segment &Seg) { + // If a section is empty it should be treated like it has a size of 1. This is + // to clarify the case when an empty section lies on a boundary between two + // segments and ensures that the section "belongs" to the second segment and + // not the first. + uint64_t SecSize = Sec.Size ? Sec.Size : 1; + + // Ignore just added sections. + if (Sec.OriginalOffset == std::numeric_limits<uint64_t>::max()) + return false; + + if (Sec.Type == SHT_NOBITS) { + if (!(Sec.Flags & SHF_ALLOC)) + return false; + + bool SectionIsTLS = Sec.Flags & SHF_TLS; + bool SegmentIsTLS = Seg.Type == PT_TLS; + if (SectionIsTLS != SegmentIsTLS) + return false; + + return Seg.VAddr <= Sec.Addr && + Seg.VAddr + Seg.MemSize >= Sec.Addr + SecSize; + } + + return Seg.Offset <= Sec.OriginalOffset && + Seg.Offset + Seg.FileSize >= Sec.OriginalOffset + SecSize; +} + +// Returns true IFF a segment's original offset is inside of another segment's +// range. +static bool segmentOverlapsSegment(const Segment &Child, + const Segment &Parent) { + + return Parent.OriginalOffset <= Child.OriginalOffset && + Parent.OriginalOffset + Parent.FileSize > Child.OriginalOffset; +} + +static bool compareSegmentsByOffset(const Segment *A, const Segment *B) { + // Any segment without a parent segment should come before a segment + // that has a parent segment. + if (A->OriginalOffset < B->OriginalOffset) + return true; + if (A->OriginalOffset > B->OriginalOffset) + return false; + return A->Index < B->Index; +} + +void BasicELFBuilder::initFileHeader() { + Obj->Flags = 0x0; + Obj->Type = ET_REL; + Obj->OSABI = ELFOSABI_NONE; + Obj->ABIVersion = 0; + Obj->Entry = 0x0; + Obj->Machine = EM_NONE; + Obj->Version = 1; +} + +void BasicELFBuilder::initHeaderSegment() { Obj->ElfHdrSegment.Index = 0; } + +StringTableSection *BasicELFBuilder::addStrTab() { + auto &StrTab = Obj->addSection<StringTableSection>(); + StrTab.Name = ".strtab"; + + Obj->SectionNames = &StrTab; + return &StrTab; +} + +SymbolTableSection *BasicELFBuilder::addSymTab(StringTableSection *StrTab) { + auto &SymTab = Obj->addSection<SymbolTableSection>(); + + SymTab.Name = ".symtab"; + SymTab.Link = StrTab->Index; + + // The symbol table always needs a null symbol + SymTab.addSymbol("", 0, 0, nullptr, 0, 0, 0, 0); + + Obj->SymbolTable = &SymTab; + return &SymTab; +} + +Error BasicELFBuilder::initSections() { + for (SectionBase &Sec : Obj->sections()) + if (Error Err = Sec.initialize(Obj->sections())) + return Err; + + return Error::success(); +} + +void BinaryELFBuilder::addData(SymbolTableSection *SymTab) { + auto Data = ArrayRef<uint8_t>( + reinterpret_cast<const uint8_t *>(MemBuf->getBufferStart()), + MemBuf->getBufferSize()); + auto &DataSection = Obj->addSection<Section>(Data); + DataSection.Name = ".data"; + DataSection.Type = ELF::SHT_PROGBITS; + DataSection.Size = Data.size(); + DataSection.Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE; + + std::string SanitizedFilename = MemBuf->getBufferIdentifier().str(); + std::replace_if( + std::begin(SanitizedFilename), std::end(SanitizedFilename), + [](char C) { return !isAlnum(C); }, '_'); + Twine Prefix = Twine("_binary_") + SanitizedFilename; + + SymTab->addSymbol(Prefix + "_start", STB_GLOBAL, STT_NOTYPE, &DataSection, + /*Value=*/0, NewSymbolVisibility, 0, 0); + SymTab->addSymbol(Prefix + "_end", STB_GLOBAL, STT_NOTYPE, &DataSection, + /*Value=*/DataSection.Size, NewSymbolVisibility, 0, 0); + SymTab->addSymbol(Prefix + "_size", STB_GLOBAL, STT_NOTYPE, nullptr, + /*Value=*/DataSection.Size, NewSymbolVisibility, SHN_ABS, + 0); +} + +Expected<std::unique_ptr<Object>> BinaryELFBuilder::build() { + initFileHeader(); + initHeaderSegment(); + + SymbolTableSection *SymTab = addSymTab(addStrTab()); + if (Error Err = initSections()) + return std::move(Err); + addData(SymTab); + + return std::move(Obj); +} + +// Adds sections from IHEX data file. Data should have been +// fully validated by this time. +void IHexELFBuilder::addDataSections() { + OwnedDataSection *Section = nullptr; + uint64_t SegmentAddr = 0, BaseAddr = 0; + uint32_t SecNo = 1; + + for (const IHexRecord &R : Records) { + uint64_t RecAddr; + switch (R.Type) { + case IHexRecord::Data: + // Ignore empty data records + if (R.HexData.empty()) + continue; + RecAddr = R.Addr + SegmentAddr + BaseAddr; + if (!Section || Section->Addr + Section->Size != RecAddr) { + // OriginalOffset field is only used to sort sections before layout, so + // instead of keeping track of real offsets in IHEX file, and as + // layoutSections() and layoutSectionsForOnlyKeepDebug() use + // llvm::stable_sort(), we can just set it to a constant (zero). + Section = &Obj->addSection<OwnedDataSection>( + ".sec" + std::to_string(SecNo), RecAddr, + ELF::SHF_ALLOC | ELF::SHF_WRITE, 0); + SecNo++; + } + Section->appendHexData(R.HexData); + break; + case IHexRecord::EndOfFile: + break; + case IHexRecord::SegmentAddr: + // 20-bit segment address. + SegmentAddr = checkedGetHex<uint16_t>(R.HexData) << 4; + break; + case IHexRecord::StartAddr80x86: + case IHexRecord::StartAddr: + Obj->Entry = checkedGetHex<uint32_t>(R.HexData); + assert(Obj->Entry <= 0xFFFFFU); + break; + case IHexRecord::ExtendedAddr: + // 16-31 bits of linear base address + BaseAddr = checkedGetHex<uint16_t>(R.HexData) << 16; + break; + default: + llvm_unreachable("unknown record type"); + } + } +} + +Expected<std::unique_ptr<Object>> IHexELFBuilder::build() { + initFileHeader(); + initHeaderSegment(); + StringTableSection *StrTab = addStrTab(); + addSymTab(StrTab); + if (Error Err = initSections()) + return std::move(Err); + addDataSections(); + + return std::move(Obj); +} + +template <class ELFT> +ELFBuilder<ELFT>::ELFBuilder(const ELFObjectFile<ELFT> &ElfObj, Object &Obj, + std::optional<StringRef> ExtractPartition) + : ElfFile(ElfObj.getELFFile()), Obj(Obj), + ExtractPartition(ExtractPartition) { + Obj.IsMips64EL = ElfFile.isMips64EL(); +} + +template <class ELFT> void ELFBuilder<ELFT>::setParentSegment(Segment &Child) { + for (Segment &Parent : Obj.segments()) { + // Every segment will overlap with itself but we don't want a segment to + // be its own parent so we avoid that situation. + if (&Child != &Parent && segmentOverlapsSegment(Child, Parent)) { + // We want a canonical "most parental" segment but this requires + // inspecting the ParentSegment. + if (compareSegmentsByOffset(&Parent, &Child)) + if (Child.ParentSegment == nullptr || + compareSegmentsByOffset(&Parent, Child.ParentSegment)) { + Child.ParentSegment = &Parent; + } + } + } +} + +template <class ELFT> Error ELFBuilder<ELFT>::findEhdrOffset() { + if (!ExtractPartition) + return Error::success(); + + for (const SectionBase &Sec : Obj.sections()) { + if (Sec.Type == SHT_LLVM_PART_EHDR && Sec.Name == *ExtractPartition) { + EhdrOffset = Sec.Offset; + return Error::success(); + } + } + return createStringError(errc::invalid_argument, + "could not find partition named '" + + *ExtractPartition + "'"); +} + +template <class ELFT> +Error ELFBuilder<ELFT>::readProgramHeaders(const ELFFile<ELFT> &HeadersFile) { + uint32_t Index = 0; + + Expected<typename ELFFile<ELFT>::Elf_Phdr_Range> Headers = + HeadersFile.program_headers(); + if (!Headers) + return Headers.takeError(); + + for (const typename ELFFile<ELFT>::Elf_Phdr &Phdr : *Headers) { + if (Phdr.p_offset + Phdr.p_filesz > HeadersFile.getBufSize()) + return createStringError( + errc::invalid_argument, + "program header with offset 0x" + Twine::utohexstr(Phdr.p_offset) + + " and file size 0x" + Twine::utohexstr(Phdr.p_filesz) + + " goes past the end of the file"); + + ArrayRef<uint8_t> Data{HeadersFile.base() + Phdr.p_offset, + (size_t)Phdr.p_filesz}; + Segment &Seg = Obj.addSegment(Data); + Seg.Type = Phdr.p_type; + Seg.Flags = Phdr.p_flags; + Seg.OriginalOffset = Phdr.p_offset + EhdrOffset; + Seg.Offset = Phdr.p_offset + EhdrOffset; + Seg.VAddr = Phdr.p_vaddr; + Seg.PAddr = Phdr.p_paddr; + Seg.FileSize = Phdr.p_filesz; + Seg.MemSize = Phdr.p_memsz; + Seg.Align = Phdr.p_align; + Seg.Index = Index++; + for (SectionBase &Sec : Obj.sections()) + if (sectionWithinSegment(Sec, Seg)) { + Seg.addSection(&Sec); + if (!Sec.ParentSegment || Sec.ParentSegment->Offset > Seg.Offset) + Sec.ParentSegment = &Seg; + } + } + + auto &ElfHdr = Obj.ElfHdrSegment; + ElfHdr.Index = Index++; + ElfHdr.OriginalOffset = ElfHdr.Offset = EhdrOffset; + + const typename ELFT::Ehdr &Ehdr = HeadersFile.getHeader(); + auto &PrHdr = Obj.ProgramHdrSegment; + PrHdr.Type = PT_PHDR; + PrHdr.Flags = 0; + // The spec requires us to have p_vaddr % p_align == p_offset % p_align. + // Whereas this works automatically for ElfHdr, here OriginalOffset is + // always non-zero and to ensure the equation we assign the same value to + // VAddr as well. + PrHdr.OriginalOffset = PrHdr.Offset = PrHdr.VAddr = EhdrOffset + Ehdr.e_phoff; + PrHdr.PAddr = 0; + PrHdr.FileSize = PrHdr.MemSize = Ehdr.e_phentsize * Ehdr.e_phnum; + // The spec requires us to naturally align all the fields. + PrHdr.Align = sizeof(Elf_Addr); + PrHdr.Index = Index++; + + // Now we do an O(n^2) loop through the segments in order to match up + // segments. + for (Segment &Child : Obj.segments()) + setParentSegment(Child); + setParentSegment(ElfHdr); + setParentSegment(PrHdr); + + return Error::success(); +} + +template <class ELFT> +Error ELFBuilder<ELFT>::initGroupSection(GroupSection *GroupSec) { + if (GroupSec->Align % sizeof(ELF::Elf32_Word) != 0) + return createStringError(errc::invalid_argument, + "invalid alignment " + Twine(GroupSec->Align) + + " of group section '" + GroupSec->Name + "'"); + SectionTableRef SecTable = Obj.sections(); + if (GroupSec->Link != SHN_UNDEF) { + auto SymTab = SecTable.template getSectionOfType<SymbolTableSection>( + GroupSec->Link, + "link field value '" + Twine(GroupSec->Link) + "' in section '" + + GroupSec->Name + "' is invalid", + "link field value '" + Twine(GroupSec->Link) + "' in section '" + + GroupSec->Name + "' is not a symbol table"); + if (!SymTab) + return SymTab.takeError(); + + Expected<Symbol *> Sym = (*SymTab)->getSymbolByIndex(GroupSec->Info); + if (!Sym) + return createStringError(errc::invalid_argument, + "info field value '" + Twine(GroupSec->Info) + + "' in section '" + GroupSec->Name + + "' is not a valid symbol index"); + GroupSec->setSymTab(*SymTab); + GroupSec->setSymbol(*Sym); + } + if (GroupSec->Contents.size() % sizeof(ELF::Elf32_Word) || + GroupSec->Contents.empty()) + return createStringError(errc::invalid_argument, + "the content of the section " + GroupSec->Name + + " is malformed"); + const ELF::Elf32_Word *Word = + reinterpret_cast<const ELF::Elf32_Word *>(GroupSec->Contents.data()); + const ELF::Elf32_Word *End = + Word + GroupSec->Contents.size() / sizeof(ELF::Elf32_Word); + GroupSec->setFlagWord( + support::endian::read32<ELFT::TargetEndianness>(Word++)); + for (; Word != End; ++Word) { + uint32_t Index = support::endian::read32<ELFT::TargetEndianness>(Word); + Expected<SectionBase *> Sec = SecTable.getSection( + Index, "group member index " + Twine(Index) + " in section '" + + GroupSec->Name + "' is invalid"); + if (!Sec) + return Sec.takeError(); + + GroupSec->addMember(*Sec); + } + + return Error::success(); +} + +template <class ELFT> +Error ELFBuilder<ELFT>::initSymbolTable(SymbolTableSection *SymTab) { + Expected<const Elf_Shdr *> Shdr = ElfFile.getSection(SymTab->Index); + if (!Shdr) + return Shdr.takeError(); + + Expected<StringRef> StrTabData = ElfFile.getStringTableForSymtab(**Shdr); + if (!StrTabData) + return StrTabData.takeError(); + + ArrayRef<Elf_Word> ShndxData; + + Expected<typename ELFFile<ELFT>::Elf_Sym_Range> Symbols = + ElfFile.symbols(*Shdr); + if (!Symbols) + return Symbols.takeError(); + + for (const typename ELFFile<ELFT>::Elf_Sym &Sym : *Symbols) { + SectionBase *DefSection = nullptr; + + Expected<StringRef> Name = Sym.getName(*StrTabData); + if (!Name) + return Name.takeError(); + + if (Sym.st_shndx == SHN_XINDEX) { + if (SymTab->getShndxTable() == nullptr) + return createStringError(errc::invalid_argument, + "symbol '" + *Name + + "' has index SHN_XINDEX but no " + "SHT_SYMTAB_SHNDX section exists"); + if (ShndxData.data() == nullptr) { + Expected<const Elf_Shdr *> ShndxSec = + ElfFile.getSection(SymTab->getShndxTable()->Index); + if (!ShndxSec) + return ShndxSec.takeError(); + + Expected<ArrayRef<Elf_Word>> Data = + ElfFile.template getSectionContentsAsArray<Elf_Word>(**ShndxSec); + if (!Data) + return Data.takeError(); + + ShndxData = *Data; + if (ShndxData.size() != Symbols->size()) + return createStringError( + errc::invalid_argument, + "symbol section index table does not have the same number of " + "entries as the symbol table"); + } + Elf_Word Index = ShndxData[&Sym - Symbols->begin()]; + Expected<SectionBase *> Sec = Obj.sections().getSection( + Index, + "symbol '" + *Name + "' has invalid section index " + Twine(Index)); + if (!Sec) + return Sec.takeError(); + + DefSection = *Sec; + } else if (Sym.st_shndx >= SHN_LORESERVE) { + if (!isValidReservedSectionIndex(Sym.st_shndx, Obj.Machine)) { + return createStringError( + errc::invalid_argument, + "symbol '" + *Name + + "' has unsupported value greater than or equal " + "to SHN_LORESERVE: " + + Twine(Sym.st_shndx)); + } + } else if (Sym.st_shndx != SHN_UNDEF) { + Expected<SectionBase *> Sec = Obj.sections().getSection( + Sym.st_shndx, "symbol '" + *Name + + "' is defined has invalid section index " + + Twine(Sym.st_shndx)); + if (!Sec) + return Sec.takeError(); + + DefSection = *Sec; + } + + SymTab->addSymbol(*Name, Sym.getBinding(), Sym.getType(), DefSection, + Sym.getValue(), Sym.st_other, Sym.st_shndx, Sym.st_size); + } + + return Error::success(); +} + +template <class ELFT> +static void getAddend(uint64_t &, const Elf_Rel_Impl<ELFT, false> &) {} + +template <class ELFT> +static void getAddend(uint64_t &ToSet, const Elf_Rel_Impl<ELFT, true> &Rela) { + ToSet = Rela.r_addend; +} + +template <class T> +static Error initRelocations(RelocationSection *Relocs, T RelRange) { + for (const auto &Rel : RelRange) { + Relocation ToAdd; + ToAdd.Offset = Rel.r_offset; + getAddend(ToAdd.Addend, Rel); + ToAdd.Type = Rel.getType(Relocs->getObject().IsMips64EL); + + if (uint32_t Sym = Rel.getSymbol(Relocs->getObject().IsMips64EL)) { + if (!Relocs->getObject().SymbolTable) + return createStringError( + errc::invalid_argument, + "'" + Relocs->Name + "': relocation references symbol with index " + + Twine(Sym) + ", but there is no symbol table"); + Expected<Symbol *> SymByIndex = + Relocs->getObject().SymbolTable->getSymbolByIndex(Sym); + if (!SymByIndex) + return SymByIndex.takeError(); + + ToAdd.RelocSymbol = *SymByIndex; + } + + Relocs->addRelocation(ToAdd); + } + + return Error::success(); +} + +Expected<SectionBase *> SectionTableRef::getSection(uint32_t Index, + Twine ErrMsg) { + if (Index == SHN_UNDEF || Index > Sections.size()) + return createStringError(errc::invalid_argument, ErrMsg); + return Sections[Index - 1].get(); +} + +template <class T> +Expected<T *> SectionTableRef::getSectionOfType(uint32_t Index, + Twine IndexErrMsg, + Twine TypeErrMsg) { + Expected<SectionBase *> BaseSec = getSection(Index, IndexErrMsg); + if (!BaseSec) + return BaseSec.takeError(); + + if (T *Sec = dyn_cast<T>(*BaseSec)) + return Sec; + + return createStringError(errc::invalid_argument, TypeErrMsg); +} + +template <class ELFT> +Expected<SectionBase &> ELFBuilder<ELFT>::makeSection(const Elf_Shdr &Shdr) { + switch (Shdr.sh_type) { + case SHT_REL: + case SHT_RELA: + if (Shdr.sh_flags & SHF_ALLOC) { + if (Expected<ArrayRef<uint8_t>> Data = ElfFile.getSectionContents(Shdr)) + return Obj.addSection<DynamicRelocationSection>(*Data); + else + return Data.takeError(); + } + return Obj.addSection<RelocationSection>(Obj); + case SHT_STRTAB: + // If a string table is allocated we don't want to mess with it. That would + // mean altering the memory image. There are no special link types or + // anything so we can just use a Section. + if (Shdr.sh_flags & SHF_ALLOC) { + if (Expected<ArrayRef<uint8_t>> Data = ElfFile.getSectionContents(Shdr)) + return Obj.addSection<Section>(*Data); + else + return Data.takeError(); + } + return Obj.addSection<StringTableSection>(); + case SHT_HASH: + case SHT_GNU_HASH: + // Hash tables should refer to SHT_DYNSYM which we're not going to change. + // Because of this we don't need to mess with the hash tables either. + if (Expected<ArrayRef<uint8_t>> Data = ElfFile.getSectionContents(Shdr)) + return Obj.addSection<Section>(*Data); + else + return Data.takeError(); + case SHT_GROUP: + if (Expected<ArrayRef<uint8_t>> Data = ElfFile.getSectionContents(Shdr)) + return Obj.addSection<GroupSection>(*Data); + else + return Data.takeError(); + case SHT_DYNSYM: + if (Expected<ArrayRef<uint8_t>> Data = ElfFile.getSectionContents(Shdr)) + return Obj.addSection<DynamicSymbolTableSection>(*Data); + else + return Data.takeError(); + case SHT_DYNAMIC: + if (Expected<ArrayRef<uint8_t>> Data = ElfFile.getSectionContents(Shdr)) + return Obj.addSection<DynamicSection>(*Data); + else + return Data.takeError(); + case SHT_SYMTAB: { + auto &SymTab = Obj.addSection<SymbolTableSection>(); + Obj.SymbolTable = &SymTab; + return SymTab; + } + case SHT_SYMTAB_SHNDX: { + auto &ShndxSection = Obj.addSection<SectionIndexSection>(); + Obj.SectionIndexTable = &ShndxSection; + return ShndxSection; + } + case SHT_NOBITS: + return Obj.addSection<Section>(ArrayRef<uint8_t>()); + default: { + Expected<ArrayRef<uint8_t>> Data = ElfFile.getSectionContents(Shdr); + if (!Data) + return Data.takeError(); + + Expected<StringRef> Name = ElfFile.getSectionName(Shdr); + if (!Name) + return Name.takeError(); + + if (!(Shdr.sh_flags & ELF::SHF_COMPRESSED)) + return Obj.addSection<Section>(*Data); + auto *Chdr = reinterpret_cast<const Elf_Chdr_Impl<ELFT> *>(Data->data()); + return Obj.addSection<CompressedSection>(CompressedSection( + *Data, Chdr->ch_type, Chdr->ch_size, Chdr->ch_addralign)); + } + } +} + +template <class ELFT> Error ELFBuilder<ELFT>::readSectionHeaders() { + uint32_t Index = 0; + Expected<typename ELFFile<ELFT>::Elf_Shdr_Range> Sections = + ElfFile.sections(); + if (!Sections) + return Sections.takeError(); + + for (const typename ELFFile<ELFT>::Elf_Shdr &Shdr : *Sections) { + if (Index == 0) { + ++Index; + continue; + } + Expected<SectionBase &> Sec = makeSection(Shdr); + if (!Sec) + return Sec.takeError(); + + Expected<StringRef> SecName = ElfFile.getSectionName(Shdr); + if (!SecName) + return SecName.takeError(); + Sec->Name = SecName->str(); + Sec->Type = Sec->OriginalType = Shdr.sh_type; + Sec->Flags = Sec->OriginalFlags = Shdr.sh_flags; + Sec->Addr = Shdr.sh_addr; + Sec->Offset = Shdr.sh_offset; + Sec->OriginalOffset = Shdr.sh_offset; + Sec->Size = Shdr.sh_size; + Sec->Link = Shdr.sh_link; + Sec->Info = Shdr.sh_info; + Sec->Align = Shdr.sh_addralign; + Sec->EntrySize = Shdr.sh_entsize; + Sec->Index = Index++; + Sec->OriginalIndex = Sec->Index; + Sec->OriginalData = ArrayRef<uint8_t>( + ElfFile.base() + Shdr.sh_offset, + (Shdr.sh_type == SHT_NOBITS) ? (size_t)0 : Shdr.sh_size); + } + + return Error::success(); +} + +template <class ELFT> Error ELFBuilder<ELFT>::readSections(bool EnsureSymtab) { + uint32_t ShstrIndex = ElfFile.getHeader().e_shstrndx; + if (ShstrIndex == SHN_XINDEX) { + Expected<const Elf_Shdr *> Sec = ElfFile.getSection(0); + if (!Sec) + return Sec.takeError(); + + ShstrIndex = (*Sec)->sh_link; + } + + if (ShstrIndex == SHN_UNDEF) + Obj.HadShdrs = false; + else { + Expected<StringTableSection *> Sec = + Obj.sections().template getSectionOfType<StringTableSection>( + ShstrIndex, + "e_shstrndx field value " + Twine(ShstrIndex) + " in elf header " + + " is invalid", + "e_shstrndx field value " + Twine(ShstrIndex) + " in elf header " + + " does not reference a string table"); + if (!Sec) + return Sec.takeError(); + + Obj.SectionNames = *Sec; + } + + // If a section index table exists we'll need to initialize it before we + // initialize the symbol table because the symbol table might need to + // reference it. + if (Obj.SectionIndexTable) + if (Error Err = Obj.SectionIndexTable->initialize(Obj.sections())) + return Err; + + // Now that all of the sections have been added we can fill out some extra + // details about symbol tables. We need the symbol table filled out before + // any relocations. + if (Obj.SymbolTable) { + if (Error Err = Obj.SymbolTable->initialize(Obj.sections())) + return Err; + if (Error Err = initSymbolTable(Obj.SymbolTable)) + return Err; + } else if (EnsureSymtab) { + if (Error Err = Obj.addNewSymbolTable()) + return Err; + } + + // Now that all sections and symbols have been added we can add + // relocations that reference symbols and set the link and info fields for + // relocation sections. + for (SectionBase &Sec : Obj.sections()) { + if (&Sec == Obj.SymbolTable) + continue; + if (Error Err = Sec.initialize(Obj.sections())) + return Err; + if (auto RelSec = dyn_cast<RelocationSection>(&Sec)) { + Expected<typename ELFFile<ELFT>::Elf_Shdr_Range> Sections = + ElfFile.sections(); + if (!Sections) + return Sections.takeError(); + + const typename ELFFile<ELFT>::Elf_Shdr *Shdr = + Sections->begin() + RelSec->Index; + if (RelSec->Type == SHT_REL) { + Expected<typename ELFFile<ELFT>::Elf_Rel_Range> Rels = + ElfFile.rels(*Shdr); + if (!Rels) + return Rels.takeError(); + + if (Error Err = initRelocations(RelSec, *Rels)) + return Err; + } else { + Expected<typename ELFFile<ELFT>::Elf_Rela_Range> Relas = + ElfFile.relas(*Shdr); + if (!Relas) + return Relas.takeError(); + + if (Error Err = initRelocations(RelSec, *Relas)) + return Err; + } + } else if (auto GroupSec = dyn_cast<GroupSection>(&Sec)) { + if (Error Err = initGroupSection(GroupSec)) + return Err; + } + } + + return Error::success(); +} + +template <class ELFT> Error ELFBuilder<ELFT>::build(bool EnsureSymtab) { + if (Error E = readSectionHeaders()) + return E; + if (Error E = findEhdrOffset()) + return E; + + // The ELFFile whose ELF headers and program headers are copied into the + // output file. Normally the same as ElfFile, but if we're extracting a + // loadable partition it will point to the partition's headers. + Expected<ELFFile<ELFT>> HeadersFile = ELFFile<ELFT>::create(toStringRef( + {ElfFile.base() + EhdrOffset, ElfFile.getBufSize() - EhdrOffset})); + if (!HeadersFile) + return HeadersFile.takeError(); + + const typename ELFFile<ELFT>::Elf_Ehdr &Ehdr = HeadersFile->getHeader(); + Obj.Is64Bits = Ehdr.e_ident[EI_CLASS] == ELFCLASS64; + Obj.OSABI = Ehdr.e_ident[EI_OSABI]; + Obj.ABIVersion = Ehdr.e_ident[EI_ABIVERSION]; + Obj.Type = Ehdr.e_type; + Obj.Machine = Ehdr.e_machine; + Obj.Version = Ehdr.e_version; + Obj.Entry = Ehdr.e_entry; + Obj.Flags = Ehdr.e_flags; + + if (Error E = readSections(EnsureSymtab)) + return E; + return readProgramHeaders(*HeadersFile); +} + +Writer::~Writer() = default; + +Reader::~Reader() = default; + +Expected<std::unique_ptr<Object>> +BinaryReader::create(bool /*EnsureSymtab*/) const { + return BinaryELFBuilder(MemBuf, NewSymbolVisibility).build(); +} + +Expected<std::vector<IHexRecord>> IHexReader::parse() const { + SmallVector<StringRef, 16> Lines; + std::vector<IHexRecord> Records; + bool HasSections = false; + + MemBuf->getBuffer().split(Lines, '\n'); + Records.reserve(Lines.size()); + for (size_t LineNo = 1; LineNo <= Lines.size(); ++LineNo) { + StringRef Line = Lines[LineNo - 1].trim(); + if (Line.empty()) + continue; + + Expected<IHexRecord> R = IHexRecord::parse(Line); + if (!R) + return parseError(LineNo, R.takeError()); + if (R->Type == IHexRecord::EndOfFile) + break; + HasSections |= (R->Type == IHexRecord::Data); + Records.push_back(*R); + } + if (!HasSections) + return parseError(-1U, "no sections"); + + return std::move(Records); +} + +Expected<std::unique_ptr<Object>> +IHexReader::create(bool /*EnsureSymtab*/) const { + Expected<std::vector<IHexRecord>> Records = parse(); + if (!Records) + return Records.takeError(); + + return IHexELFBuilder(*Records).build(); +} + +Expected<std::unique_ptr<Object>> ELFReader::create(bool EnsureSymtab) const { + auto Obj = std::make_unique<Object>(); + if (auto *O = dyn_cast<ELFObjectFile<ELF32LE>>(Bin)) { + ELFBuilder<ELF32LE> Builder(*O, *Obj, ExtractPartition); + if (Error Err = Builder.build(EnsureSymtab)) + return std::move(Err); + return std::move(Obj); + } else if (auto *O = dyn_cast<ELFObjectFile<ELF64LE>>(Bin)) { + ELFBuilder<ELF64LE> Builder(*O, *Obj, ExtractPartition); + if (Error Err = Builder.build(EnsureSymtab)) + return std::move(Err); + return std::move(Obj); + } else if (auto *O = dyn_cast<ELFObjectFile<ELF32BE>>(Bin)) { + ELFBuilder<ELF32BE> Builder(*O, *Obj, ExtractPartition); + if (Error Err = Builder.build(EnsureSymtab)) + return std::move(Err); + return std::move(Obj); + } else if (auto *O = dyn_cast<ELFObjectFile<ELF64BE>>(Bin)) { + ELFBuilder<ELF64BE> Builder(*O, *Obj, ExtractPartition); + if (Error Err = Builder.build(EnsureSymtab)) + return std::move(Err); + return std::move(Obj); + } + return createStringError(errc::invalid_argument, "invalid file type"); +} + +template <class ELFT> void ELFWriter<ELFT>::writeEhdr() { + Elf_Ehdr &Ehdr = *reinterpret_cast<Elf_Ehdr *>(Buf->getBufferStart()); + std::fill(Ehdr.e_ident, Ehdr.e_ident + 16, 0); + Ehdr.e_ident[EI_MAG0] = 0x7f; + Ehdr.e_ident[EI_MAG1] = 'E'; + Ehdr.e_ident[EI_MAG2] = 'L'; + Ehdr.e_ident[EI_MAG3] = 'F'; + Ehdr.e_ident[EI_CLASS] = ELFT::Is64Bits ? ELFCLASS64 : ELFCLASS32; + Ehdr.e_ident[EI_DATA] = + ELFT::TargetEndianness == support::big ? ELFDATA2MSB : ELFDATA2LSB; + Ehdr.e_ident[EI_VERSION] = EV_CURRENT; + Ehdr.e_ident[EI_OSABI] = Obj.OSABI; + Ehdr.e_ident[EI_ABIVERSION] = Obj.ABIVersion; + + Ehdr.e_type = Obj.Type; + Ehdr.e_machine = Obj.Machine; + Ehdr.e_version = Obj.Version; + Ehdr.e_entry = Obj.Entry; + // We have to use the fully-qualified name llvm::size + // since some compilers complain on ambiguous resolution. + Ehdr.e_phnum = llvm::size(Obj.segments()); + Ehdr.e_phoff = (Ehdr.e_phnum != 0) ? Obj.ProgramHdrSegment.Offset : 0; + Ehdr.e_phentsize = (Ehdr.e_phnum != 0) ? sizeof(Elf_Phdr) : 0; + Ehdr.e_flags = Obj.Flags; + Ehdr.e_ehsize = sizeof(Elf_Ehdr); + if (WriteSectionHeaders && Obj.sections().size() != 0) { + Ehdr.e_shentsize = sizeof(Elf_Shdr); + Ehdr.e_shoff = Obj.SHOff; + // """ + // If the number of sections is greater than or equal to + // SHN_LORESERVE (0xff00), this member has the value zero and the actual + // number of section header table entries is contained in the sh_size field + // of the section header at index 0. + // """ + auto Shnum = Obj.sections().size() + 1; + if (Shnum >= SHN_LORESERVE) + Ehdr.e_shnum = 0; + else + Ehdr.e_shnum = Shnum; + // """ + // If the section name string table section index is greater than or equal + // to SHN_LORESERVE (0xff00), this member has the value SHN_XINDEX (0xffff) + // and the actual index of the section name string table section is + // contained in the sh_link field of the section header at index 0. + // """ + if (Obj.SectionNames->Index >= SHN_LORESERVE) + Ehdr.e_shstrndx = SHN_XINDEX; + else + Ehdr.e_shstrndx = Obj.SectionNames->Index; + } else { + Ehdr.e_shentsize = 0; + Ehdr.e_shoff = 0; + Ehdr.e_shnum = 0; + Ehdr.e_shstrndx = 0; + } +} + +template <class ELFT> void ELFWriter<ELFT>::writePhdrs() { + for (auto &Seg : Obj.segments()) + writePhdr(Seg); +} + +template <class ELFT> void ELFWriter<ELFT>::writeShdrs() { + // This reference serves to write the dummy section header at the begining + // of the file. It is not used for anything else + Elf_Shdr &Shdr = + *reinterpret_cast<Elf_Shdr *>(Buf->getBufferStart() + Obj.SHOff); + Shdr.sh_name = 0; + Shdr.sh_type = SHT_NULL; + Shdr.sh_flags = 0; + Shdr.sh_addr = 0; + Shdr.sh_offset = 0; + // See writeEhdr for why we do this. + uint64_t Shnum = Obj.sections().size() + 1; + if (Shnum >= SHN_LORESERVE) + Shdr.sh_size = Shnum; + else + Shdr.sh_size = 0; + // See writeEhdr for why we do this. + if (Obj.SectionNames != nullptr && Obj.SectionNames->Index >= SHN_LORESERVE) + Shdr.sh_link = Obj.SectionNames->Index; + else + Shdr.sh_link = 0; + Shdr.sh_info = 0; + Shdr.sh_addralign = 0; + Shdr.sh_entsize = 0; + + for (SectionBase &Sec : Obj.sections()) + writeShdr(Sec); +} + +template <class ELFT> Error ELFWriter<ELFT>::writeSectionData() { + for (SectionBase &Sec : Obj.sections()) + // Segments are responsible for writing their contents, so only write the + // section data if the section is not in a segment. Note that this renders + // sections in segments effectively immutable. + if (Sec.ParentSegment == nullptr) + if (Error Err = Sec.accept(*SecWriter)) + return Err; + + return Error::success(); +} + +template <class ELFT> void ELFWriter<ELFT>::writeSegmentData() { + for (Segment &Seg : Obj.segments()) { + size_t Size = std::min<size_t>(Seg.FileSize, Seg.getContents().size()); + std::memcpy(Buf->getBufferStart() + Seg.Offset, Seg.getContents().data(), + Size); + } + + for (auto it : Obj.getUpdatedSections()) { + SectionBase *Sec = it.first; + ArrayRef<uint8_t> Data = it.second; + + auto *Parent = Sec->ParentSegment; + assert(Parent && "This section should've been part of a segment."); + uint64_t Offset = + Sec->OriginalOffset - Parent->OriginalOffset + Parent->Offset; + llvm::copy(Data, Buf->getBufferStart() + Offset); + } + + // Iterate over removed sections and overwrite their old data with zeroes. + for (auto &Sec : Obj.removedSections()) { + Segment *Parent = Sec.ParentSegment; + if (Parent == nullptr || Sec.Type == SHT_NOBITS || Sec.Size == 0) + continue; + uint64_t Offset = + Sec.OriginalOffset - Parent->OriginalOffset + Parent->Offset; + std::memset(Buf->getBufferStart() + Offset, 0, Sec.Size); + } +} + +template <class ELFT> +ELFWriter<ELFT>::ELFWriter(Object &Obj, raw_ostream &Buf, bool WSH, + bool OnlyKeepDebug) + : Writer(Obj, Buf), WriteSectionHeaders(WSH && Obj.HadShdrs), + OnlyKeepDebug(OnlyKeepDebug) {} + +Error Object::updateSection(StringRef Name, ArrayRef<uint8_t> Data) { + auto It = llvm::find_if(Sections, + [&](const SecPtr &Sec) { return Sec->Name == Name; }); + if (It == Sections.end()) + return createStringError(errc::invalid_argument, "section '%s' not found", + Name.str().c_str()); + + auto *OldSec = It->get(); + if (!OldSec->hasContents()) + return createStringError( + errc::invalid_argument, + "section '%s' cannot be updated because it does not have contents", + Name.str().c_str()); + + if (Data.size() > OldSec->Size && OldSec->ParentSegment) + return createStringError(errc::invalid_argument, + "cannot fit data of size %zu into section '%s' " + "with size %" PRIu64 " that is part of a segment", + Data.size(), Name.str().c_str(), OldSec->Size); + + if (!OldSec->ParentSegment) { + *It = std::make_unique<OwnedDataSection>(*OldSec, Data); + } else { + // The segment writer will be in charge of updating these contents. + OldSec->Size = Data.size(); + UpdatedSections[OldSec] = Data; + } + + return Error::success(); +} + +Error Object::removeSections( + bool AllowBrokenLinks, std::function<bool(const SectionBase &)> ToRemove) { + + auto Iter = std::stable_partition( + std::begin(Sections), std::end(Sections), [=](const SecPtr &Sec) { + if (ToRemove(*Sec)) + return false; + if (auto RelSec = dyn_cast<RelocationSectionBase>(Sec.get())) { + if (auto ToRelSec = RelSec->getSection()) + return !ToRemove(*ToRelSec); + } + return true; + }); + if (SymbolTable != nullptr && ToRemove(*SymbolTable)) + SymbolTable = nullptr; + if (SectionNames != nullptr && ToRemove(*SectionNames)) + SectionNames = nullptr; + if (SectionIndexTable != nullptr && ToRemove(*SectionIndexTable)) + SectionIndexTable = nullptr; + // Now make sure there are no remaining references to the sections that will + // be removed. Sometimes it is impossible to remove a reference so we emit + // an error here instead. + std::unordered_set<const SectionBase *> RemoveSections; + RemoveSections.reserve(std::distance(Iter, std::end(Sections))); + for (auto &RemoveSec : make_range(Iter, std::end(Sections))) { + for (auto &Segment : Segments) + Segment->removeSection(RemoveSec.get()); + RemoveSec->onRemove(); + RemoveSections.insert(RemoveSec.get()); + } + + // For each section that remains alive, we want to remove the dead references. + // This either might update the content of the section (e.g. remove symbols + // from symbol table that belongs to removed section) or trigger an error if + // a live section critically depends on a section being removed somehow + // (e.g. the removed section is referenced by a relocation). + for (auto &KeepSec : make_range(std::begin(Sections), Iter)) { + if (Error E = KeepSec->removeSectionReferences( + AllowBrokenLinks, [&RemoveSections](const SectionBase *Sec) { + return RemoveSections.find(Sec) != RemoveSections.end(); + })) + return E; + } + + // Transfer removed sections into the Object RemovedSections container for use + // later. + std::move(Iter, Sections.end(), std::back_inserter(RemovedSections)); + // Now finally get rid of them all together. + Sections.erase(Iter, std::end(Sections)); + return Error::success(); +} + +Error Object::replaceSections( + const DenseMap<SectionBase *, SectionBase *> &FromTo) { + auto SectionIndexLess = [](const SecPtr &Lhs, const SecPtr &Rhs) { + return Lhs->Index < Rhs->Index; + }; + assert(llvm::is_sorted(Sections, SectionIndexLess) && + "Sections are expected to be sorted by Index"); + // Set indices of new sections so that they can be later sorted into positions + // of removed ones. + for (auto &I : FromTo) + I.second->Index = I.first->Index; + + // Notify all sections about the replacement. + for (auto &Sec : Sections) + Sec->replaceSectionReferences(FromTo); + + if (Error E = removeSections( + /*AllowBrokenLinks=*/false, + [=](const SectionBase &Sec) { return FromTo.count(&Sec) > 0; })) + return E; + llvm::sort(Sections, SectionIndexLess); + return Error::success(); +} + +Error Object::removeSymbols(function_ref<bool(const Symbol &)> ToRemove) { + if (SymbolTable) + for (const SecPtr &Sec : Sections) + if (Error E = Sec->removeSymbols(ToRemove)) + return E; + return Error::success(); +} + +Error Object::addNewSymbolTable() { + assert(!SymbolTable && "Object must not has a SymbolTable."); + + // Reuse an existing SHT_STRTAB section if it exists. + StringTableSection *StrTab = nullptr; + for (SectionBase &Sec : sections()) { + if (Sec.Type == ELF::SHT_STRTAB && !(Sec.Flags & SHF_ALLOC)) { + StrTab = static_cast<StringTableSection *>(&Sec); + + // Prefer a string table that is not the section header string table, if + // such a table exists. + if (SectionNames != &Sec) + break; + } + } + if (!StrTab) + StrTab = &addSection<StringTableSection>(); + + SymbolTableSection &SymTab = addSection<SymbolTableSection>(); + SymTab.Name = ".symtab"; + SymTab.Link = StrTab->Index; + if (Error Err = SymTab.initialize(sections())) + return Err; + SymTab.addSymbol("", 0, 0, nullptr, 0, 0, 0, 0); + + SymbolTable = &SymTab; + + return Error::success(); +} + +// Orders segments such that if x = y->ParentSegment then y comes before x. +static void orderSegments(std::vector<Segment *> &Segments) { + llvm::stable_sort(Segments, compareSegmentsByOffset); +} + +// This function finds a consistent layout for a list of segments starting from +// an Offset. It assumes that Segments have been sorted by orderSegments and +// returns an Offset one past the end of the last segment. +static uint64_t layoutSegments(std::vector<Segment *> &Segments, + uint64_t Offset) { + assert(llvm::is_sorted(Segments, compareSegmentsByOffset)); + // The only way a segment should move is if a section was between two + // segments and that section was removed. If that section isn't in a segment + // then it's acceptable, but not ideal, to simply move it to after the + // segments. So we can simply layout segments one after the other accounting + // for alignment. + for (Segment *Seg : Segments) { + // We assume that segments have been ordered by OriginalOffset and Index + // such that a parent segment will always come before a child segment in + // OrderedSegments. This means that the Offset of the ParentSegment should + // already be set and we can set our offset relative to it. + if (Seg->ParentSegment != nullptr) { + Segment *Parent = Seg->ParentSegment; + Seg->Offset = + Parent->Offset + Seg->OriginalOffset - Parent->OriginalOffset; + } else { + Seg->Offset = + alignTo(Offset, std::max<uint64_t>(Seg->Align, 1), Seg->VAddr); + } + Offset = std::max(Offset, Seg->Offset + Seg->FileSize); + } + return Offset; +} + +// This function finds a consistent layout for a list of sections. It assumes +// that the ->ParentSegment of each section has already been laid out. The +// supplied starting Offset is used for the starting offset of any section that +// does not have a ParentSegment. It returns either the offset given if all +// sections had a ParentSegment or an offset one past the last section if there +// was a section that didn't have a ParentSegment. +template <class Range> +static uint64_t layoutSections(Range Sections, uint64_t Offset) { + // Now the offset of every segment has been set we can assign the offsets + // of each section. For sections that are covered by a segment we should use + // the segment's original offset and the section's original offset to compute + // the offset from the start of the segment. Using the offset from the start + // of the segment we can assign a new offset to the section. For sections not + // covered by segments we can just bump Offset to the next valid location. + // While it is not necessary, layout the sections in the order based on their + // original offsets to resemble the input file as close as possible. + std::vector<SectionBase *> OutOfSegmentSections; + uint32_t Index = 1; + for (auto &Sec : Sections) { + Sec.Index = Index++; + if (Sec.ParentSegment != nullptr) { + auto Segment = *Sec.ParentSegment; + Sec.Offset = + Segment.Offset + (Sec.OriginalOffset - Segment.OriginalOffset); + } else + OutOfSegmentSections.push_back(&Sec); + } + + llvm::stable_sort(OutOfSegmentSections, + [](const SectionBase *Lhs, const SectionBase *Rhs) { + return Lhs->OriginalOffset < Rhs->OriginalOffset; + }); + for (auto *Sec : OutOfSegmentSections) { + Offset = alignTo(Offset, Sec->Align == 0 ? 1 : Sec->Align); + Sec->Offset = Offset; + if (Sec->Type != SHT_NOBITS) + Offset += Sec->Size; + } + return Offset; +} + +// Rewrite sh_offset after some sections are changed to SHT_NOBITS and thus +// occupy no space in the file. +static uint64_t layoutSectionsForOnlyKeepDebug(Object &Obj, uint64_t Off) { + // The layout algorithm requires the sections to be handled in the order of + // their offsets in the input file, at least inside segments. + std::vector<SectionBase *> Sections; + Sections.reserve(Obj.sections().size()); + uint32_t Index = 1; + for (auto &Sec : Obj.sections()) { + Sec.Index = Index++; + Sections.push_back(&Sec); + } + llvm::stable_sort(Sections, + [](const SectionBase *Lhs, const SectionBase *Rhs) { + return Lhs->OriginalOffset < Rhs->OriginalOffset; + }); + + for (auto *Sec : Sections) { + auto *FirstSec = Sec->ParentSegment && Sec->ParentSegment->Type == PT_LOAD + ? Sec->ParentSegment->firstSection() + : nullptr; + + // The first section in a PT_LOAD has to have congruent offset and address + // modulo the alignment, which usually equals the maximum page size. + if (FirstSec && FirstSec == Sec) + Off = alignTo(Off, Sec->ParentSegment->Align, Sec->Addr); + + // sh_offset is not significant for SHT_NOBITS sections, but the congruence + // rule must be followed if it is the first section in a PT_LOAD. Do not + // advance Off. + if (Sec->Type == SHT_NOBITS) { + Sec->Offset = Off; + continue; + } + + if (!FirstSec) { + // FirstSec being nullptr generally means that Sec does not have the + // SHF_ALLOC flag. + Off = Sec->Align ? alignTo(Off, Sec->Align) : Off; + } else if (FirstSec != Sec) { + // The offset is relative to the first section in the PT_LOAD segment. Use + // sh_offset for non-SHF_ALLOC sections. + Off = Sec->OriginalOffset - FirstSec->OriginalOffset + FirstSec->Offset; + } + Sec->Offset = Off; + Off += Sec->Size; + } + return Off; +} + +// Rewrite p_offset and p_filesz of non-PT_PHDR segments after sh_offset values +// have been updated. +static uint64_t layoutSegmentsForOnlyKeepDebug(std::vector<Segment *> &Segments, + uint64_t HdrEnd) { + uint64_t MaxOffset = 0; + for (Segment *Seg : Segments) { + if (Seg->Type == PT_PHDR) + continue; + + // The segment offset is generally the offset of the first section. + // + // For a segment containing no section (see sectionWithinSegment), if it has + // a parent segment, copy the parent segment's offset field. This works for + // empty PT_TLS. If no parent segment, use 0: the segment is not useful for + // debugging anyway. + const SectionBase *FirstSec = Seg->firstSection(); + uint64_t Offset = + FirstSec ? FirstSec->Offset + : (Seg->ParentSegment ? Seg->ParentSegment->Offset : 0); + uint64_t FileSize = 0; + for (const SectionBase *Sec : Seg->Sections) { + uint64_t Size = Sec->Type == SHT_NOBITS ? 0 : Sec->Size; + if (Sec->Offset + Size > Offset) + FileSize = std::max(FileSize, Sec->Offset + Size - Offset); + } + + // If the segment includes EHDR and program headers, don't make it smaller + // than the headers. + if (Seg->Offset < HdrEnd && HdrEnd <= Seg->Offset + Seg->FileSize) { + FileSize += Offset - Seg->Offset; + Offset = Seg->Offset; + FileSize = std::max(FileSize, HdrEnd - Offset); + } + + Seg->Offset = Offset; + Seg->FileSize = FileSize; + MaxOffset = std::max(MaxOffset, Offset + FileSize); + } + return MaxOffset; +} + +template <class ELFT> void ELFWriter<ELFT>::initEhdrSegment() { + Segment &ElfHdr = Obj.ElfHdrSegment; + ElfHdr.Type = PT_PHDR; + ElfHdr.Flags = 0; + ElfHdr.VAddr = 0; + ElfHdr.PAddr = 0; + ElfHdr.FileSize = ElfHdr.MemSize = sizeof(Elf_Ehdr); + ElfHdr.Align = 0; +} + +template <class ELFT> void ELFWriter<ELFT>::assignOffsets() { + // We need a temporary list of segments that has a special order to it + // so that we know that anytime ->ParentSegment is set that segment has + // already had its offset properly set. + std::vector<Segment *> OrderedSegments; + for (Segment &Segment : Obj.segments()) + OrderedSegments.push_back(&Segment); + OrderedSegments.push_back(&Obj.ElfHdrSegment); + OrderedSegments.push_back(&Obj.ProgramHdrSegment); + orderSegments(OrderedSegments); + + uint64_t Offset; + if (OnlyKeepDebug) { + // For --only-keep-debug, the sections that did not preserve contents were + // changed to SHT_NOBITS. We now rewrite sh_offset fields of sections, and + // then rewrite p_offset/p_filesz of program headers. + uint64_t HdrEnd = + sizeof(Elf_Ehdr) + llvm::size(Obj.segments()) * sizeof(Elf_Phdr); + Offset = layoutSectionsForOnlyKeepDebug(Obj, HdrEnd); + Offset = std::max(Offset, + layoutSegmentsForOnlyKeepDebug(OrderedSegments, HdrEnd)); + } else { + // Offset is used as the start offset of the first segment to be laid out. + // Since the ELF Header (ElfHdrSegment) must be at the start of the file, + // we start at offset 0. + Offset = layoutSegments(OrderedSegments, 0); + Offset = layoutSections(Obj.sections(), Offset); + } + // If we need to write the section header table out then we need to align the + // Offset so that SHOffset is valid. + if (WriteSectionHeaders) + Offset = alignTo(Offset, sizeof(Elf_Addr)); + Obj.SHOff = Offset; +} + +template <class ELFT> size_t ELFWriter<ELFT>::totalSize() const { + // We already have the section header offset so we can calculate the total + // size by just adding up the size of each section header. + if (!WriteSectionHeaders) + return Obj.SHOff; + size_t ShdrCount = Obj.sections().size() + 1; // Includes null shdr. + return Obj.SHOff + ShdrCount * sizeof(Elf_Shdr); +} + +template <class ELFT> Error ELFWriter<ELFT>::write() { + // Segment data must be written first, so that the ELF header and program + // header tables can overwrite it, if covered by a segment. + writeSegmentData(); + writeEhdr(); + writePhdrs(); + if (Error E = writeSectionData()) + return E; + if (WriteSectionHeaders) + writeShdrs(); + + // TODO: Implement direct writing to the output stream (without intermediate + // memory buffer Buf). + Out.write(Buf->getBufferStart(), Buf->getBufferSize()); + return Error::success(); +} + +static Error removeUnneededSections(Object &Obj) { + // We can remove an empty symbol table from non-relocatable objects. + // Relocatable objects typically have relocation sections whose + // sh_link field points to .symtab, so we can't remove .symtab + // even if it is empty. + if (Obj.isRelocatable() || Obj.SymbolTable == nullptr || + !Obj.SymbolTable->empty()) + return Error::success(); + + // .strtab can be used for section names. In such a case we shouldn't + // remove it. + auto *StrTab = Obj.SymbolTable->getStrTab() == Obj.SectionNames + ? nullptr + : Obj.SymbolTable->getStrTab(); + return Obj.removeSections(false, [&](const SectionBase &Sec) { + return &Sec == Obj.SymbolTable || &Sec == StrTab; + }); +} + +template <class ELFT> Error ELFWriter<ELFT>::finalize() { + // It could happen that SectionNames has been removed and yet the user wants + // a section header table output. We need to throw an error if a user tries + // to do that. + if (Obj.SectionNames == nullptr && WriteSectionHeaders) + return createStringError(llvm::errc::invalid_argument, + "cannot write section header table because " + "section header string table was removed"); + + if (Error E = removeUnneededSections(Obj)) + return E; + + // We need to assign indexes before we perform layout because we need to know + // if we need large indexes or not. We can assign indexes first and check as + // we go to see if we will actully need large indexes. + bool NeedsLargeIndexes = false; + if (Obj.sections().size() >= SHN_LORESERVE) { + SectionTableRef Sections = Obj.sections(); + // Sections doesn't include the null section header, so account for this + // when skipping the first N sections. + NeedsLargeIndexes = + any_of(drop_begin(Sections, SHN_LORESERVE - 1), + [](const SectionBase &Sec) { return Sec.HasSymbol; }); + // TODO: handle case where only one section needs the large index table but + // only needs it because the large index table hasn't been removed yet. + } + + if (NeedsLargeIndexes) { + // This means we definitely need to have a section index table but if we + // already have one then we should use it instead of making a new one. + if (Obj.SymbolTable != nullptr && Obj.SectionIndexTable == nullptr) { + // Addition of a section to the end does not invalidate the indexes of + // other sections and assigns the correct index to the new section. + auto &Shndx = Obj.addSection<SectionIndexSection>(); + Obj.SymbolTable->setShndxTable(&Shndx); + Shndx.setSymTab(Obj.SymbolTable); + } + } else { + // Since we don't need SectionIndexTable we should remove it and all + // references to it. + if (Obj.SectionIndexTable != nullptr) { + // We do not support sections referring to the section index table. + if (Error E = Obj.removeSections(false /*AllowBrokenLinks*/, + [this](const SectionBase &Sec) { + return &Sec == Obj.SectionIndexTable; + })) + return E; + } + } + + // Make sure we add the names of all the sections. Importantly this must be + // done after we decide to add or remove SectionIndexes. + if (Obj.SectionNames != nullptr) + for (const SectionBase &Sec : Obj.sections()) + Obj.SectionNames->addString(Sec.Name); + + initEhdrSegment(); + + // Before we can prepare for layout the indexes need to be finalized. + // Also, the output arch may not be the same as the input arch, so fix up + // size-related fields before doing layout calculations. + uint64_t Index = 0; + auto SecSizer = std::make_unique<ELFSectionSizer<ELFT>>(); + for (SectionBase &Sec : Obj.sections()) { + Sec.Index = Index++; + if (Error Err = Sec.accept(*SecSizer)) + return Err; + } + + // The symbol table does not update all other sections on update. For + // instance, symbol names are not added as new symbols are added. This means + // that some sections, like .strtab, don't yet have their final size. + if (Obj.SymbolTable != nullptr) + Obj.SymbolTable->prepareForLayout(); + + // Now that all strings are added we want to finalize string table builders, + // because that affects section sizes which in turn affects section offsets. + for (SectionBase &Sec : Obj.sections()) + if (auto StrTab = dyn_cast<StringTableSection>(&Sec)) + StrTab->prepareForLayout(); + + assignOffsets(); + + // layoutSections could have modified section indexes, so we need + // to fill the index table after assignOffsets. + if (Obj.SymbolTable != nullptr) + Obj.SymbolTable->fillShndxTable(); + + // Finally now that all offsets and indexes have been set we can finalize any + // remaining issues. + uint64_t Offset = Obj.SHOff + sizeof(Elf_Shdr); + for (SectionBase &Sec : Obj.sections()) { + Sec.HeaderOffset = Offset; + Offset += sizeof(Elf_Shdr); + if (WriteSectionHeaders) + Sec.NameIndex = Obj.SectionNames->findIndex(Sec.Name); + Sec.finalize(); + } + + size_t TotalSize = totalSize(); + Buf = WritableMemoryBuffer::getNewMemBuffer(TotalSize); + if (!Buf) + return createStringError(errc::not_enough_memory, + "failed to allocate memory buffer of " + + Twine::utohexstr(TotalSize) + " bytes"); + + SecWriter = std::make_unique<ELFSectionWriter<ELFT>>(*Buf); + return Error::success(); +} + +Error BinaryWriter::write() { + for (const SectionBase &Sec : Obj.allocSections()) + if (Error Err = Sec.accept(*SecWriter)) + return Err; + + // TODO: Implement direct writing to the output stream (without intermediate + // memory buffer Buf). + Out.write(Buf->getBufferStart(), Buf->getBufferSize()); + return Error::success(); +} + +Error BinaryWriter::finalize() { + // Compute the section LMA based on its sh_offset and the containing segment's + // p_offset and p_paddr. Also compute the minimum LMA of all non-empty + // sections as MinAddr. In the output, the contents between address 0 and + // MinAddr will be skipped. + uint64_t MinAddr = UINT64_MAX; + for (SectionBase &Sec : Obj.allocSections()) { + // If Sec's type is changed from SHT_NOBITS due to --set-section-flags, + // Offset may not be aligned. Align it to max(Align, 1). + if (Sec.ParentSegment != nullptr) + Sec.Addr = alignTo(Sec.Offset - Sec.ParentSegment->Offset + + Sec.ParentSegment->PAddr, + std::max(Sec.Align, uint64_t(1))); + if (Sec.Type != SHT_NOBITS && Sec.Size > 0) + MinAddr = std::min(MinAddr, Sec.Addr); + } + + // Now that every section has been laid out we just need to compute the total + // file size. This might not be the same as the offset returned by + // layoutSections, because we want to truncate the last segment to the end of + // its last non-empty section, to match GNU objcopy's behaviour. + TotalSize = 0; + for (SectionBase &Sec : Obj.allocSections()) + if (Sec.Type != SHT_NOBITS && Sec.Size > 0) { + Sec.Offset = Sec.Addr - MinAddr; + TotalSize = std::max(TotalSize, Sec.Offset + Sec.Size); + } + + Buf = WritableMemoryBuffer::getNewMemBuffer(TotalSize); + if (!Buf) + return createStringError(errc::not_enough_memory, + "failed to allocate memory buffer of " + + Twine::utohexstr(TotalSize) + " bytes"); + SecWriter = std::make_unique<BinarySectionWriter>(*Buf); + return Error::success(); +} + +bool IHexWriter::SectionCompare::operator()(const SectionBase *Lhs, + const SectionBase *Rhs) const { + return (sectionPhysicalAddr(Lhs) & 0xFFFFFFFFU) < + (sectionPhysicalAddr(Rhs) & 0xFFFFFFFFU); +} + +uint64_t IHexWriter::writeEntryPointRecord(uint8_t *Buf) { + IHexLineData HexData; + uint8_t Data[4] = {}; + // We don't write entry point record if entry is zero. + if (Obj.Entry == 0) + return 0; + + if (Obj.Entry <= 0xFFFFFU) { + Data[0] = ((Obj.Entry & 0xF0000U) >> 12) & 0xFF; + support::endian::write(&Data[2], static_cast<uint16_t>(Obj.Entry), + support::big); + HexData = IHexRecord::getLine(IHexRecord::StartAddr80x86, 0, Data); + } else { + support::endian::write(Data, static_cast<uint32_t>(Obj.Entry), + support::big); + HexData = IHexRecord::getLine(IHexRecord::StartAddr, 0, Data); + } + memcpy(Buf, HexData.data(), HexData.size()); + return HexData.size(); +} + +uint64_t IHexWriter::writeEndOfFileRecord(uint8_t *Buf) { + IHexLineData HexData = IHexRecord::getLine(IHexRecord::EndOfFile, 0, {}); + memcpy(Buf, HexData.data(), HexData.size()); + return HexData.size(); +} + +Error IHexWriter::write() { + IHexSectionWriter Writer(*Buf); + // Write sections. + for (const SectionBase *Sec : Sections) + if (Error Err = Sec->accept(Writer)) + return Err; + + uint64_t Offset = Writer.getBufferOffset(); + // Write entry point address. + Offset += writeEntryPointRecord( + reinterpret_cast<uint8_t *>(Buf->getBufferStart()) + Offset); + // Write EOF. + Offset += writeEndOfFileRecord( + reinterpret_cast<uint8_t *>(Buf->getBufferStart()) + Offset); + assert(Offset == TotalSize); + + // TODO: Implement direct writing to the output stream (without intermediate + // memory buffer Buf). + Out.write(Buf->getBufferStart(), Buf->getBufferSize()); + return Error::success(); +} + +Error IHexWriter::checkSection(const SectionBase &Sec) { + uint64_t Addr = sectionPhysicalAddr(&Sec); + if (addressOverflows32bit(Addr) || addressOverflows32bit(Addr + Sec.Size - 1)) + return createStringError( + errc::invalid_argument, + "Section '%s' address range [0x%llx, 0x%llx] is not 32 bit", + Sec.Name.c_str(), Addr, Addr + Sec.Size - 1); + return Error::success(); +} + +Error IHexWriter::finalize() { + // We can't write 64-bit addresses. + if (addressOverflows32bit(Obj.Entry)) + return createStringError(errc::invalid_argument, + "Entry point address 0x%llx overflows 32 bits", + Obj.Entry); + + for (const SectionBase &Sec : Obj.sections()) + if ((Sec.Flags & ELF::SHF_ALLOC) && Sec.Type != ELF::SHT_NOBITS && + Sec.Size > 0) { + if (Error E = checkSection(Sec)) + return E; + Sections.insert(&Sec); + } + + std::unique_ptr<WritableMemoryBuffer> EmptyBuffer = + WritableMemoryBuffer::getNewMemBuffer(0); + if (!EmptyBuffer) + return createStringError(errc::not_enough_memory, + "failed to allocate memory buffer of 0 bytes"); + + IHexSectionWriterBase LengthCalc(*EmptyBuffer); + for (const SectionBase *Sec : Sections) + if (Error Err = Sec->accept(LengthCalc)) + return Err; + + // We need space to write section records + StartAddress record + // (if start adress is not zero) + EndOfFile record. + TotalSize = LengthCalc.getBufferOffset() + + (Obj.Entry ? IHexRecord::getLineLength(4) : 0) + + IHexRecord::getLineLength(0); + + Buf = WritableMemoryBuffer::getNewMemBuffer(TotalSize); + if (!Buf) + return createStringError(errc::not_enough_memory, + "failed to allocate memory buffer of " + + Twine::utohexstr(TotalSize) + " bytes"); + + return Error::success(); +} + +namespace llvm { +namespace objcopy { +namespace elf { + +template class ELFBuilder<ELF64LE>; +template class ELFBuilder<ELF64BE>; +template class ELFBuilder<ELF32LE>; +template class ELFBuilder<ELF32BE>; + +template class ELFWriter<ELF64LE>; +template class ELFWriter<ELF64BE>; +template class ELFWriter<ELF32LE>; +template class ELFWriter<ELF32BE>; + +} // end namespace elf +} // end namespace objcopy +} // end namespace llvm diff --git a/contrib/libs/llvm16/lib/ObjCopy/ELF/ELFObject.h b/contrib/libs/llvm16/lib/ObjCopy/ELF/ELFObject.h new file mode 100644 index 00000000000..94b5afe7df8 --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/ELF/ELFObject.h @@ -0,0 +1,1112 @@ +//===- ELFObject.h ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_ELF_ELFOBJECT_H +#define LLVM_LIB_OBJCOPY_ELF_ELFOBJECT_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/ObjCopy/CommonConfig.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/MemoryBuffer.h" +#include <cstddef> +#include <cstdint> +#include <functional> +#include <memory> +#include <set> +#include <vector> + +namespace llvm { +enum class DebugCompressionType; +namespace objcopy { +namespace elf { + +class SectionBase; +class Section; +class OwnedDataSection; +class StringTableSection; +class SymbolTableSection; +class RelocationSection; +class DynamicRelocationSection; +class GnuDebugLinkSection; +class GroupSection; +class SectionIndexSection; +class CompressedSection; +class DecompressedSection; +class Segment; +class Object; +struct Symbol; + +class SectionTableRef { + ArrayRef<std::unique_ptr<SectionBase>> Sections; + +public: + using iterator = pointee_iterator<const std::unique_ptr<SectionBase> *>; + + explicit SectionTableRef(ArrayRef<std::unique_ptr<SectionBase>> Secs) + : Sections(Secs) {} + SectionTableRef(const SectionTableRef &) = default; + + iterator begin() const { return iterator(Sections.data()); } + iterator end() const { return iterator(Sections.data() + Sections.size()); } + size_t size() const { return Sections.size(); } + + Expected<SectionBase *> getSection(uint32_t Index, Twine ErrMsg); + + template <class T> + Expected<T *> getSectionOfType(uint32_t Index, Twine IndexErrMsg, + Twine TypeErrMsg); +}; + +enum ElfType { ELFT_ELF32LE, ELFT_ELF64LE, ELFT_ELF32BE, ELFT_ELF64BE }; + +class SectionVisitor { +public: + virtual ~SectionVisitor() = default; + + virtual Error visit(const Section &Sec) = 0; + virtual Error visit(const OwnedDataSection &Sec) = 0; + virtual Error visit(const StringTableSection &Sec) = 0; + virtual Error visit(const SymbolTableSection &Sec) = 0; + virtual Error visit(const RelocationSection &Sec) = 0; + virtual Error visit(const DynamicRelocationSection &Sec) = 0; + virtual Error visit(const GnuDebugLinkSection &Sec) = 0; + virtual Error visit(const GroupSection &Sec) = 0; + virtual Error visit(const SectionIndexSection &Sec) = 0; + virtual Error visit(const CompressedSection &Sec) = 0; + virtual Error visit(const DecompressedSection &Sec) = 0; +}; + +class MutableSectionVisitor { +public: + virtual ~MutableSectionVisitor() = default; + + virtual Error visit(Section &Sec) = 0; + virtual Error visit(OwnedDataSection &Sec) = 0; + virtual Error visit(StringTableSection &Sec) = 0; + virtual Error visit(SymbolTableSection &Sec) = 0; + virtual Error visit(RelocationSection &Sec) = 0; + virtual Error visit(DynamicRelocationSection &Sec) = 0; + virtual Error visit(GnuDebugLinkSection &Sec) = 0; + virtual Error visit(GroupSection &Sec) = 0; + virtual Error visit(SectionIndexSection &Sec) = 0; + virtual Error visit(CompressedSection &Sec) = 0; + virtual Error visit(DecompressedSection &Sec) = 0; +}; + +class SectionWriter : public SectionVisitor { +protected: + WritableMemoryBuffer &Out; + +public: + virtual ~SectionWriter() = default; + + Error visit(const Section &Sec) override; + Error visit(const OwnedDataSection &Sec) override; + Error visit(const StringTableSection &Sec) override; + Error visit(const DynamicRelocationSection &Sec) override; + Error visit(const SymbolTableSection &Sec) override = 0; + Error visit(const RelocationSection &Sec) override = 0; + Error visit(const GnuDebugLinkSection &Sec) override = 0; + Error visit(const GroupSection &Sec) override = 0; + Error visit(const SectionIndexSection &Sec) override = 0; + Error visit(const CompressedSection &Sec) override = 0; + Error visit(const DecompressedSection &Sec) override = 0; + + explicit SectionWriter(WritableMemoryBuffer &Buf) : Out(Buf) {} +}; + +template <class ELFT> class ELFSectionWriter : public SectionWriter { +private: + using Elf_Word = typename ELFT::Word; + using Elf_Rel = typename ELFT::Rel; + using Elf_Rela = typename ELFT::Rela; + using Elf_Sym = typename ELFT::Sym; + +public: + virtual ~ELFSectionWriter() {} + Error visit(const SymbolTableSection &Sec) override; + Error visit(const RelocationSection &Sec) override; + Error visit(const GnuDebugLinkSection &Sec) override; + Error visit(const GroupSection &Sec) override; + Error visit(const SectionIndexSection &Sec) override; + Error visit(const CompressedSection &Sec) override; + Error visit(const DecompressedSection &Sec) override; + + explicit ELFSectionWriter(WritableMemoryBuffer &Buf) : SectionWriter(Buf) {} +}; + +template <class ELFT> class ELFSectionSizer : public MutableSectionVisitor { +private: + using Elf_Rel = typename ELFT::Rel; + using Elf_Rela = typename ELFT::Rela; + using Elf_Sym = typename ELFT::Sym; + using Elf_Word = typename ELFT::Word; + using Elf_Xword = typename ELFT::Xword; + +public: + Error visit(Section &Sec) override; + Error visit(OwnedDataSection &Sec) override; + Error visit(StringTableSection &Sec) override; + Error visit(DynamicRelocationSection &Sec) override; + Error visit(SymbolTableSection &Sec) override; + Error visit(RelocationSection &Sec) override; + Error visit(GnuDebugLinkSection &Sec) override; + Error visit(GroupSection &Sec) override; + Error visit(SectionIndexSection &Sec) override; + Error visit(CompressedSection &Sec) override; + Error visit(DecompressedSection &Sec) override; +}; + +#define MAKE_SEC_WRITER_FRIEND \ + friend class SectionWriter; \ + friend class IHexSectionWriterBase; \ + friend class IHexSectionWriter; \ + template <class ELFT> friend class ELFSectionWriter; \ + template <class ELFT> friend class ELFSectionSizer; + +class BinarySectionWriter : public SectionWriter { +public: + virtual ~BinarySectionWriter() {} + + Error visit(const SymbolTableSection &Sec) override; + Error visit(const RelocationSection &Sec) override; + Error visit(const GnuDebugLinkSection &Sec) override; + Error visit(const GroupSection &Sec) override; + Error visit(const SectionIndexSection &Sec) override; + Error visit(const CompressedSection &Sec) override; + Error visit(const DecompressedSection &Sec) override; + + explicit BinarySectionWriter(WritableMemoryBuffer &Buf) + : SectionWriter(Buf) {} +}; + +using IHexLineData = SmallVector<char, 64>; + +struct IHexRecord { + // Memory address of the record. + uint16_t Addr; + // Record type (see below). + uint16_t Type; + // Record data in hexadecimal form. + StringRef HexData; + + // Helper method to get file length of the record + // including newline character + static size_t getLength(size_t DataSize) { + // :LLAAAATT[DD...DD]CC' + return DataSize * 2 + 11; + } + + // Gets length of line in a file (getLength + CRLF). + static size_t getLineLength(size_t DataSize) { + return getLength(DataSize) + 2; + } + + // Given type, address and data returns line which can + // be written to output file. + static IHexLineData getLine(uint8_t Type, uint16_t Addr, + ArrayRef<uint8_t> Data); + + // Parses the line and returns record if possible. + // Line should be trimmed from whitespace characters. + static Expected<IHexRecord> parse(StringRef Line); + + // Calculates checksum of stringified record representation + // S must NOT contain leading ':' and trailing whitespace + // characters + static uint8_t getChecksum(StringRef S); + + enum Type { + // Contains data and a 16-bit starting address for the data. + // The byte count specifies number of data bytes in the record. + Data = 0, + // Must occur exactly once per file in the last line of the file. + // The data field is empty (thus byte count is 00) and the address + // field is typically 0000. + EndOfFile = 1, + // The data field contains a 16-bit segment base address (thus byte + // count is always 02) compatible with 80x86 real mode addressing. + // The address field (typically 0000) is ignored. The segment address + // from the most recent 02 record is multiplied by 16 and added to each + // subsequent data record address to form the physical starting address + // for the data. This allows addressing up to one megabyte of address + // space. + SegmentAddr = 2, + // or 80x86 processors, specifies the initial content of the CS:IP + // registers. The address field is 0000, the byte count is always 04, + // the first two data bytes are the CS value, the latter two are the + // IP value. + StartAddr80x86 = 3, + // Allows for 32 bit addressing (up to 4GiB). The record's address field + // is ignored (typically 0000) and its byte count is always 02. The two + // data bytes (big endian) specify the upper 16 bits of the 32 bit + // absolute address for all subsequent type 00 records + ExtendedAddr = 4, + // The address field is 0000 (not used) and the byte count is always 04. + // The four data bytes represent a 32-bit address value. In the case of + // 80386 and higher CPUs, this address is loaded into the EIP register. + StartAddr = 5, + // We have no other valid types + InvalidType = 6 + }; +}; + +// Base class for IHexSectionWriter. This class implements writing algorithm, +// but doesn't actually write records. It is used for output buffer size +// calculation in IHexWriter::finalize. +class IHexSectionWriterBase : public BinarySectionWriter { + // 20-bit segment address + uint32_t SegmentAddr = 0; + // Extended linear address + uint32_t BaseAddr = 0; + + // Write segment address corresponding to 'Addr' + uint64_t writeSegmentAddr(uint64_t Addr); + // Write extended linear (base) address corresponding to 'Addr' + uint64_t writeBaseAddr(uint64_t Addr); + +protected: + // Offset in the output buffer + uint64_t Offset = 0; + + void writeSection(const SectionBase *Sec, ArrayRef<uint8_t> Data); + virtual void writeData(uint8_t Type, uint16_t Addr, ArrayRef<uint8_t> Data); + +public: + explicit IHexSectionWriterBase(WritableMemoryBuffer &Buf) + : BinarySectionWriter(Buf) {} + + uint64_t getBufferOffset() const { return Offset; } + Error visit(const Section &Sec) final; + Error visit(const OwnedDataSection &Sec) final; + Error visit(const StringTableSection &Sec) override; + Error visit(const DynamicRelocationSection &Sec) final; + using BinarySectionWriter::visit; +}; + +// Real IHEX section writer +class IHexSectionWriter : public IHexSectionWriterBase { +public: + IHexSectionWriter(WritableMemoryBuffer &Buf) : IHexSectionWriterBase(Buf) {} + + void writeData(uint8_t Type, uint16_t Addr, ArrayRef<uint8_t> Data) override; + Error visit(const StringTableSection &Sec) override; +}; + +class Writer { +protected: + Object &Obj; + std::unique_ptr<WritableMemoryBuffer> Buf; + raw_ostream &Out; + +public: + virtual ~Writer(); + virtual Error finalize() = 0; + virtual Error write() = 0; + + Writer(Object &O, raw_ostream &Out) : Obj(O), Out(Out) {} +}; + +template <class ELFT> class ELFWriter : public Writer { +private: + using Elf_Addr = typename ELFT::Addr; + using Elf_Shdr = typename ELFT::Shdr; + using Elf_Phdr = typename ELFT::Phdr; + using Elf_Ehdr = typename ELFT::Ehdr; + + void initEhdrSegment(); + + void writeEhdr(); + void writePhdr(const Segment &Seg); + void writeShdr(const SectionBase &Sec); + + void writePhdrs(); + void writeShdrs(); + Error writeSectionData(); + void writeSegmentData(); + + void assignOffsets(); + + std::unique_ptr<ELFSectionWriter<ELFT>> SecWriter; + + size_t totalSize() const; + +public: + virtual ~ELFWriter() {} + bool WriteSectionHeaders; + + // For --only-keep-debug, select an alternative section/segment layout + // algorithm. + bool OnlyKeepDebug; + + Error finalize() override; + Error write() override; + ELFWriter(Object &Obj, raw_ostream &Out, bool WSH, bool OnlyKeepDebug); +}; + +class BinaryWriter : public Writer { +private: + std::unique_ptr<BinarySectionWriter> SecWriter; + + uint64_t TotalSize = 0; + +public: + ~BinaryWriter() {} + Error finalize() override; + Error write() override; + BinaryWriter(Object &Obj, raw_ostream &Out) : Writer(Obj, Out) {} +}; + +class IHexWriter : public Writer { + struct SectionCompare { + bool operator()(const SectionBase *Lhs, const SectionBase *Rhs) const; + }; + + std::set<const SectionBase *, SectionCompare> Sections; + size_t TotalSize = 0; + + Error checkSection(const SectionBase &Sec); + uint64_t writeEntryPointRecord(uint8_t *Buf); + uint64_t writeEndOfFileRecord(uint8_t *Buf); + +public: + ~IHexWriter() {} + Error finalize() override; + Error write() override; + IHexWriter(Object &Obj, raw_ostream &Out) : Writer(Obj, Out) {} +}; + +class SectionBase { +public: + std::string Name; + Segment *ParentSegment = nullptr; + uint64_t HeaderOffset = 0; + uint32_t Index = 0; + + uint32_t OriginalIndex = 0; + uint64_t OriginalFlags = 0; + uint64_t OriginalType = ELF::SHT_NULL; + uint64_t OriginalOffset = std::numeric_limits<uint64_t>::max(); + + uint64_t Addr = 0; + uint64_t Align = 1; + uint32_t EntrySize = 0; + uint64_t Flags = 0; + uint64_t Info = 0; + uint64_t Link = ELF::SHN_UNDEF; + uint64_t NameIndex = 0; + uint64_t Offset = 0; + uint64_t Size = 0; + uint64_t Type = ELF::SHT_NULL; + ArrayRef<uint8_t> OriginalData; + bool HasSymbol = false; + + SectionBase() = default; + SectionBase(const SectionBase &) = default; + + virtual ~SectionBase() = default; + + virtual Error initialize(SectionTableRef SecTable); + virtual void finalize(); + // Remove references to these sections. The list of sections must be sorted. + virtual Error + removeSectionReferences(bool AllowBrokenLinks, + function_ref<bool(const SectionBase *)> ToRemove); + virtual Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove); + virtual Error accept(SectionVisitor &Visitor) const = 0; + virtual Error accept(MutableSectionVisitor &Visitor) = 0; + virtual void markSymbols(); + virtual void + replaceSectionReferences(const DenseMap<SectionBase *, SectionBase *> &); + virtual bool hasContents() const { return false; } + // Notify the section that it is subject to removal. + virtual void onRemove(); +}; + +class Segment { +private: + struct SectionCompare { + bool operator()(const SectionBase *Lhs, const SectionBase *Rhs) const { + // Some sections might have the same address if one of them is empty. To + // fix this we can use the lexicographic ordering on ->Addr and the + // original index. + if (Lhs->OriginalOffset == Rhs->OriginalOffset) + return Lhs->OriginalIndex < Rhs->OriginalIndex; + return Lhs->OriginalOffset < Rhs->OriginalOffset; + } + }; + +public: + uint32_t Type = 0; + uint32_t Flags = 0; + uint64_t Offset = 0; + uint64_t VAddr = 0; + uint64_t PAddr = 0; + uint64_t FileSize = 0; + uint64_t MemSize = 0; + uint64_t Align = 0; + + uint32_t Index = 0; + uint64_t OriginalOffset = 0; + Segment *ParentSegment = nullptr; + ArrayRef<uint8_t> Contents; + std::set<const SectionBase *, SectionCompare> Sections; + + explicit Segment(ArrayRef<uint8_t> Data) : Contents(Data) {} + Segment() = default; + + const SectionBase *firstSection() const { + if (!Sections.empty()) + return *Sections.begin(); + return nullptr; + } + + void removeSection(const SectionBase *Sec) { Sections.erase(Sec); } + void addSection(const SectionBase *Sec) { Sections.insert(Sec); } + + ArrayRef<uint8_t> getContents() const { return Contents; } +}; + +class Section : public SectionBase { + MAKE_SEC_WRITER_FRIEND + + ArrayRef<uint8_t> Contents; + SectionBase *LinkSection = nullptr; + +public: + explicit Section(ArrayRef<uint8_t> Data) : Contents(Data) {} + + Error accept(SectionVisitor &Visitor) const override; + Error accept(MutableSectionVisitor &Visitor) override; + Error removeSectionReferences( + bool AllowBrokenLinks, + function_ref<bool(const SectionBase *)> ToRemove) override; + Error initialize(SectionTableRef SecTable) override; + void finalize() override; + bool hasContents() const override { + return Type != ELF::SHT_NOBITS && Type != ELF::SHT_NULL; + } +}; + +class OwnedDataSection : public SectionBase { + MAKE_SEC_WRITER_FRIEND + + std::vector<uint8_t> Data; + +public: + OwnedDataSection(StringRef SecName, ArrayRef<uint8_t> Data) + : Data(std::begin(Data), std::end(Data)) { + Name = SecName.str(); + Type = OriginalType = ELF::SHT_PROGBITS; + Size = Data.size(); + OriginalOffset = std::numeric_limits<uint64_t>::max(); + } + + OwnedDataSection(const Twine &SecName, uint64_t SecAddr, uint64_t SecFlags, + uint64_t SecOff) { + Name = SecName.str(); + Type = OriginalType = ELF::SHT_PROGBITS; + Addr = SecAddr; + Flags = OriginalFlags = SecFlags; + OriginalOffset = SecOff; + } + + OwnedDataSection(SectionBase &S, ArrayRef<uint8_t> Data) + : SectionBase(S), Data(std::begin(Data), std::end(Data)) { + Size = Data.size(); + } + + void appendHexData(StringRef HexData); + Error accept(SectionVisitor &Sec) const override; + Error accept(MutableSectionVisitor &Visitor) override; + bool hasContents() const override { return true; } +}; + +class CompressedSection : public SectionBase { + MAKE_SEC_WRITER_FRIEND + + uint32_t ChType = 0; + DebugCompressionType CompressionType; + uint64_t DecompressedSize; + uint64_t DecompressedAlign; + SmallVector<uint8_t, 128> CompressedData; + +public: + CompressedSection(const SectionBase &Sec, + DebugCompressionType CompressionType, bool Is64Bits); + CompressedSection(ArrayRef<uint8_t> CompressedData, uint32_t ChType, + uint64_t DecompressedSize, uint64_t DecompressedAlign); + + uint64_t getDecompressedSize() const { return DecompressedSize; } + uint64_t getDecompressedAlign() const { return DecompressedAlign; } + uint64_t getChType() const { return ChType; } + + Error accept(SectionVisitor &Visitor) const override; + Error accept(MutableSectionVisitor &Visitor) override; + + static bool classof(const SectionBase *S) { + return S->OriginalFlags & ELF::SHF_COMPRESSED; + } +}; + +class DecompressedSection : public SectionBase { + MAKE_SEC_WRITER_FRIEND + +public: + uint32_t ChType; + explicit DecompressedSection(const CompressedSection &Sec) + : SectionBase(Sec), ChType(Sec.getChType()) { + Size = Sec.getDecompressedSize(); + Align = Sec.getDecompressedAlign(); + Flags = OriginalFlags = (Flags & ~ELF::SHF_COMPRESSED); + } + + Error accept(SectionVisitor &Visitor) const override; + Error accept(MutableSectionVisitor &Visitor) override; +}; + +// There are two types of string tables that can exist, dynamic and not dynamic. +// In the dynamic case the string table is allocated. Changing a dynamic string +// table would mean altering virtual addresses and thus the memory image. So +// dynamic string tables should not have an interface to modify them or +// reconstruct them. This type lets us reconstruct a string table. To avoid +// this class being used for dynamic string tables (which has happened) the +// classof method checks that the particular instance is not allocated. This +// then agrees with the makeSection method used to construct most sections. +class StringTableSection : public SectionBase { + MAKE_SEC_WRITER_FRIEND + + StringTableBuilder StrTabBuilder; + +public: + StringTableSection() : StrTabBuilder(StringTableBuilder::ELF) { + Type = OriginalType = ELF::SHT_STRTAB; + } + + void addString(StringRef Name); + uint32_t findIndex(StringRef Name) const; + void prepareForLayout(); + Error accept(SectionVisitor &Visitor) const override; + Error accept(MutableSectionVisitor &Visitor) override; + + static bool classof(const SectionBase *S) { + if (S->OriginalFlags & ELF::SHF_ALLOC) + return false; + return S->OriginalType == ELF::SHT_STRTAB; + } +}; + +// Symbols have a st_shndx field that normally stores an index but occasionally +// stores a different special value. This enum keeps track of what the st_shndx +// field means. Most of the values are just copies of the special SHN_* values. +// SYMBOL_SIMPLE_INDEX means that the st_shndx is just an index of a section. +enum SymbolShndxType { + SYMBOL_SIMPLE_INDEX = 0, + SYMBOL_ABS = ELF::SHN_ABS, + SYMBOL_COMMON = ELF::SHN_COMMON, + SYMBOL_LOPROC = ELF::SHN_LOPROC, + SYMBOL_AMDGPU_LDS = ELF::SHN_AMDGPU_LDS, + SYMBOL_HEXAGON_SCOMMON = ELF::SHN_HEXAGON_SCOMMON, + SYMBOL_HEXAGON_SCOMMON_2 = ELF::SHN_HEXAGON_SCOMMON_2, + SYMBOL_HEXAGON_SCOMMON_4 = ELF::SHN_HEXAGON_SCOMMON_4, + SYMBOL_HEXAGON_SCOMMON_8 = ELF::SHN_HEXAGON_SCOMMON_8, + SYMBOL_MIPS_ACOMMON = ELF::SHN_MIPS_ACOMMON, + SYMBOL_MIPS_TEXT = ELF::SHN_MIPS_TEXT, + SYMBOL_MIPS_DATA = ELF::SHN_MIPS_DATA, + SYMBOL_MIPS_SCOMMON = ELF::SHN_MIPS_SCOMMON, + SYMBOL_MIPS_SUNDEFINED = ELF::SHN_MIPS_SUNDEFINED, + SYMBOL_HIPROC = ELF::SHN_HIPROC, + SYMBOL_LOOS = ELF::SHN_LOOS, + SYMBOL_HIOS = ELF::SHN_HIOS, + SYMBOL_XINDEX = ELF::SHN_XINDEX, +}; + +struct Symbol { + uint8_t Binding; + SectionBase *DefinedIn = nullptr; + SymbolShndxType ShndxType; + uint32_t Index; + std::string Name; + uint32_t NameIndex; + uint64_t Size; + uint8_t Type; + uint64_t Value; + uint8_t Visibility; + bool Referenced = false; + + uint16_t getShndx() const; + bool isCommon() const; +}; + +class SectionIndexSection : public SectionBase { + MAKE_SEC_WRITER_FRIEND + +private: + std::vector<uint32_t> Indexes; + SymbolTableSection *Symbols = nullptr; + +public: + virtual ~SectionIndexSection() {} + void addIndex(uint32_t Index) { + assert(Size > 0); + Indexes.push_back(Index); + } + + void reserve(size_t NumSymbols) { + Indexes.reserve(NumSymbols); + Size = NumSymbols * 4; + } + void setSymTab(SymbolTableSection *SymTab) { Symbols = SymTab; } + Error initialize(SectionTableRef SecTable) override; + void finalize() override; + Error accept(SectionVisitor &Visitor) const override; + Error accept(MutableSectionVisitor &Visitor) override; + + SectionIndexSection() { + Name = ".symtab_shndx"; + Align = 4; + EntrySize = 4; + Type = OriginalType = ELF::SHT_SYMTAB_SHNDX; + } +}; + +class SymbolTableSection : public SectionBase { + MAKE_SEC_WRITER_FRIEND + + void setStrTab(StringTableSection *StrTab) { SymbolNames = StrTab; } + void assignIndices(); + +protected: + std::vector<std::unique_ptr<Symbol>> Symbols; + StringTableSection *SymbolNames = nullptr; + SectionIndexSection *SectionIndexTable = nullptr; + + using SymPtr = std::unique_ptr<Symbol>; + +public: + SymbolTableSection() { Type = OriginalType = ELF::SHT_SYMTAB; } + + void addSymbol(Twine Name, uint8_t Bind, uint8_t Type, SectionBase *DefinedIn, + uint64_t Value, uint8_t Visibility, uint16_t Shndx, + uint64_t SymbolSize); + void prepareForLayout(); + // An 'empty' symbol table still contains a null symbol. + bool empty() const { return Symbols.size() == 1; } + void setShndxTable(SectionIndexSection *ShndxTable) { + SectionIndexTable = ShndxTable; + } + const SectionIndexSection *getShndxTable() const { return SectionIndexTable; } + void fillShndxTable(); + const SectionBase *getStrTab() const { return SymbolNames; } + Expected<const Symbol *> getSymbolByIndex(uint32_t Index) const; + Expected<Symbol *> getSymbolByIndex(uint32_t Index); + void updateSymbols(function_ref<void(Symbol &)> Callable); + + Error removeSectionReferences( + bool AllowBrokenLinks, + function_ref<bool(const SectionBase *)> ToRemove) override; + Error initialize(SectionTableRef SecTable) override; + void finalize() override; + Error accept(SectionVisitor &Visitor) const override; + Error accept(MutableSectionVisitor &Visitor) override; + Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override; + void replaceSectionReferences( + const DenseMap<SectionBase *, SectionBase *> &FromTo) override; + + static bool classof(const SectionBase *S) { + return S->OriginalType == ELF::SHT_SYMTAB; + } +}; + +struct Relocation { + Symbol *RelocSymbol = nullptr; + uint64_t Offset; + uint64_t Addend; + uint32_t Type; +}; + +// All relocation sections denote relocations to apply to another section. +// However, some relocation sections use a dynamic symbol table and others use +// a regular symbol table. Because the types of the two symbol tables differ in +// our system (because they should behave differently) we can't uniformly +// represent all relocations with the same base class if we expose an interface +// that mentions the symbol table type. So we split the two base types into two +// different classes, one which handles the section the relocation is applied to +// and another which handles the symbol table type. The symbol table type is +// taken as a type parameter to the class (see RelocSectionWithSymtabBase). +class RelocationSectionBase : public SectionBase { +protected: + SectionBase *SecToApplyRel = nullptr; + +public: + const SectionBase *getSection() const { return SecToApplyRel; } + void setSection(SectionBase *Sec) { SecToApplyRel = Sec; } + + StringRef getNamePrefix() const; + + static bool classof(const SectionBase *S) { + return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA; + } +}; + +// Takes the symbol table type to use as a parameter so that we can deduplicate +// that code between the two symbol table types. +template <class SymTabType> +class RelocSectionWithSymtabBase : public RelocationSectionBase { + void setSymTab(SymTabType *SymTab) { Symbols = SymTab; } + +protected: + RelocSectionWithSymtabBase() = default; + + SymTabType *Symbols = nullptr; + +public: + Error initialize(SectionTableRef SecTable) override; + void finalize() override; +}; + +class RelocationSection + : public RelocSectionWithSymtabBase<SymbolTableSection> { + MAKE_SEC_WRITER_FRIEND + + std::vector<Relocation> Relocations; + const Object &Obj; + +public: + RelocationSection(const Object &O) : Obj(O) {} + void addRelocation(Relocation Rel) { Relocations.push_back(Rel); } + Error accept(SectionVisitor &Visitor) const override; + Error accept(MutableSectionVisitor &Visitor) override; + Error removeSectionReferences( + bool AllowBrokenLinks, + function_ref<bool(const SectionBase *)> ToRemove) override; + Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override; + void markSymbols() override; + void replaceSectionReferences( + const DenseMap<SectionBase *, SectionBase *> &FromTo) override; + const Object &getObject() const { return Obj; } + + static bool classof(const SectionBase *S) { + if (S->OriginalFlags & ELF::SHF_ALLOC) + return false; + return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA; + } +}; + +// TODO: The way stripping and groups interact is complicated +// and still needs to be worked on. + +class GroupSection : public SectionBase { + MAKE_SEC_WRITER_FRIEND + const SymbolTableSection *SymTab = nullptr; + Symbol *Sym = nullptr; + ELF::Elf32_Word FlagWord; + SmallVector<SectionBase *, 3> GroupMembers; + +public: + // TODO: Contents is present in several classes of the hierarchy. + // This needs to be refactored to avoid duplication. + ArrayRef<uint8_t> Contents; + + explicit GroupSection(ArrayRef<uint8_t> Data) : Contents(Data) {} + + void setSymTab(const SymbolTableSection *SymTabSec) { SymTab = SymTabSec; } + void setSymbol(Symbol *S) { Sym = S; } + void setFlagWord(ELF::Elf32_Word W) { FlagWord = W; } + void addMember(SectionBase *Sec) { GroupMembers.push_back(Sec); } + + Error accept(SectionVisitor &) const override; + Error accept(MutableSectionVisitor &Visitor) override; + void finalize() override; + Error removeSectionReferences( + bool AllowBrokenLinks, + function_ref<bool(const SectionBase *)> ToRemove) override; + Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override; + void markSymbols() override; + void replaceSectionReferences( + const DenseMap<SectionBase *, SectionBase *> &FromTo) override; + void onRemove() override; + + static bool classof(const SectionBase *S) { + return S->OriginalType == ELF::SHT_GROUP; + } +}; + +class DynamicSymbolTableSection : public Section { +public: + explicit DynamicSymbolTableSection(ArrayRef<uint8_t> Data) : Section(Data) {} + + static bool classof(const SectionBase *S) { + return S->OriginalType == ELF::SHT_DYNSYM; + } +}; + +class DynamicSection : public Section { +public: + explicit DynamicSection(ArrayRef<uint8_t> Data) : Section(Data) {} + + static bool classof(const SectionBase *S) { + return S->OriginalType == ELF::SHT_DYNAMIC; + } +}; + +class DynamicRelocationSection + : public RelocSectionWithSymtabBase<DynamicSymbolTableSection> { + MAKE_SEC_WRITER_FRIEND + +private: + ArrayRef<uint8_t> Contents; + +public: + explicit DynamicRelocationSection(ArrayRef<uint8_t> Data) : Contents(Data) {} + + Error accept(SectionVisitor &) const override; + Error accept(MutableSectionVisitor &Visitor) override; + Error removeSectionReferences( + bool AllowBrokenLinks, + function_ref<bool(const SectionBase *)> ToRemove) override; + + static bool classof(const SectionBase *S) { + if (!(S->OriginalFlags & ELF::SHF_ALLOC)) + return false; + return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA; + } +}; + +class GnuDebugLinkSection : public SectionBase { + MAKE_SEC_WRITER_FRIEND + +private: + StringRef FileName; + uint32_t CRC32; + + void init(StringRef File); + +public: + // If we add this section from an external source we can use this ctor. + explicit GnuDebugLinkSection(StringRef File, uint32_t PrecomputedCRC); + Error accept(SectionVisitor &Visitor) const override; + Error accept(MutableSectionVisitor &Visitor) override; +}; + +class Reader { +public: + virtual ~Reader(); + virtual Expected<std::unique_ptr<Object>> create(bool EnsureSymtab) const = 0; +}; + +using object::Binary; +using object::ELFFile; +using object::ELFObjectFile; +using object::OwningBinary; + +class BasicELFBuilder { +protected: + std::unique_ptr<Object> Obj; + + void initFileHeader(); + void initHeaderSegment(); + StringTableSection *addStrTab(); + SymbolTableSection *addSymTab(StringTableSection *StrTab); + Error initSections(); + +public: + BasicELFBuilder() : Obj(std::make_unique<Object>()) {} +}; + +class BinaryELFBuilder : public BasicELFBuilder { + MemoryBuffer *MemBuf; + uint8_t NewSymbolVisibility; + void addData(SymbolTableSection *SymTab); + +public: + BinaryELFBuilder(MemoryBuffer *MB, uint8_t NewSymbolVisibility) + : MemBuf(MB), NewSymbolVisibility(NewSymbolVisibility) {} + + Expected<std::unique_ptr<Object>> build(); +}; + +class IHexELFBuilder : public BasicELFBuilder { + const std::vector<IHexRecord> &Records; + + void addDataSections(); + +public: + IHexELFBuilder(const std::vector<IHexRecord> &Records) : Records(Records) {} + + Expected<std::unique_ptr<Object>> build(); +}; + +template <class ELFT> class ELFBuilder { +private: + using Elf_Addr = typename ELFT::Addr; + using Elf_Shdr = typename ELFT::Shdr; + using Elf_Word = typename ELFT::Word; + + const ELFFile<ELFT> &ElfFile; + Object &Obj; + size_t EhdrOffset = 0; + std::optional<StringRef> ExtractPartition; + + void setParentSegment(Segment &Child); + Error readProgramHeaders(const ELFFile<ELFT> &HeadersFile); + Error initGroupSection(GroupSection *GroupSec); + Error initSymbolTable(SymbolTableSection *SymTab); + Error readSectionHeaders(); + Error readSections(bool EnsureSymtab); + Error findEhdrOffset(); + Expected<SectionBase &> makeSection(const Elf_Shdr &Shdr); + +public: + ELFBuilder(const ELFObjectFile<ELFT> &ElfObj, Object &Obj, + std::optional<StringRef> ExtractPartition); + + Error build(bool EnsureSymtab); +}; + +class BinaryReader : public Reader { + MemoryBuffer *MemBuf; + uint8_t NewSymbolVisibility; + +public: + BinaryReader(MemoryBuffer *MB, const uint8_t NewSymbolVisibility) + : MemBuf(MB), NewSymbolVisibility(NewSymbolVisibility) {} + Expected<std::unique_ptr<Object>> create(bool EnsureSymtab) const override; +}; + +class IHexReader : public Reader { + MemoryBuffer *MemBuf; + + Expected<std::vector<IHexRecord>> parse() const; + Error parseError(size_t LineNo, Error E) const { + return LineNo == -1U + ? createFileError(MemBuf->getBufferIdentifier(), std::move(E)) + : createFileError(MemBuf->getBufferIdentifier(), LineNo, + std::move(E)); + } + template <typename... Ts> + Error parseError(size_t LineNo, char const *Fmt, const Ts &...Vals) const { + Error E = createStringError(errc::invalid_argument, Fmt, Vals...); + return parseError(LineNo, std::move(E)); + } + +public: + IHexReader(MemoryBuffer *MB) : MemBuf(MB) {} + + Expected<std::unique_ptr<Object>> create(bool EnsureSymtab) const override; +}; + +class ELFReader : public Reader { + Binary *Bin; + std::optional<StringRef> ExtractPartition; + +public: + Expected<std::unique_ptr<Object>> create(bool EnsureSymtab) const override; + explicit ELFReader(Binary *B, std::optional<StringRef> ExtractPartition) + : Bin(B), ExtractPartition(ExtractPartition) {} +}; + +class Object { +private: + using SecPtr = std::unique_ptr<SectionBase>; + using SegPtr = std::unique_ptr<Segment>; + + std::vector<SecPtr> Sections; + std::vector<SegPtr> Segments; + std::vector<SecPtr> RemovedSections; + DenseMap<SectionBase *, std::vector<uint8_t>> UpdatedSections; + + static bool sectionIsAlloc(const SectionBase &Sec) { + return Sec.Flags & ELF::SHF_ALLOC; + }; + +public: + template <class T> + using ConstRange = iterator_range<pointee_iterator< + typename std::vector<std::unique_ptr<T>>::const_iterator>>; + + // It is often the case that the ELF header and the program header table are + // not present in any segment. This could be a problem during file layout, + // because other segments may get assigned an offset where either of the + // two should reside, which will effectively corrupt the resulting binary. + // Other than that we use these segments to track program header offsets + // when they may not follow the ELF header. + Segment ElfHdrSegment; + Segment ProgramHdrSegment; + + bool Is64Bits; + uint8_t OSABI; + uint8_t ABIVersion; + uint64_t Entry; + uint64_t SHOff; + uint32_t Type; + uint32_t Machine; + uint32_t Version; + uint32_t Flags; + + bool HadShdrs = true; + bool MustBeRelocatable = false; + StringTableSection *SectionNames = nullptr; + SymbolTableSection *SymbolTable = nullptr; + SectionIndexSection *SectionIndexTable = nullptr; + + bool IsMips64EL = false; + + SectionTableRef sections() const { return SectionTableRef(Sections); } + iterator_range< + filter_iterator<pointee_iterator<std::vector<SecPtr>::const_iterator>, + decltype(§ionIsAlloc)>> + allocSections() const { + return make_filter_range(make_pointee_range(Sections), sectionIsAlloc); + } + + const auto &getUpdatedSections() const { return UpdatedSections; } + Error updateSection(StringRef Name, ArrayRef<uint8_t> Data); + + SectionBase *findSection(StringRef Name) { + auto SecIt = + find_if(Sections, [&](const SecPtr &Sec) { return Sec->Name == Name; }); + return SecIt == Sections.end() ? nullptr : SecIt->get(); + } + SectionTableRef removedSections() { return SectionTableRef(RemovedSections); } + + ConstRange<Segment> segments() const { return make_pointee_range(Segments); } + + Error removeSections(bool AllowBrokenLinks, + std::function<bool(const SectionBase &)> ToRemove); + Error replaceSections(const DenseMap<SectionBase *, SectionBase *> &FromTo); + Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove); + template <class T, class... Ts> T &addSection(Ts &&...Args) { + auto Sec = std::make_unique<T>(std::forward<Ts>(Args)...); + auto Ptr = Sec.get(); + MustBeRelocatable |= isa<RelocationSection>(*Ptr); + Sections.emplace_back(std::move(Sec)); + Ptr->Index = Sections.size(); + return *Ptr; + } + Error addNewSymbolTable(); + Segment &addSegment(ArrayRef<uint8_t> Data) { + Segments.emplace_back(std::make_unique<Segment>(Data)); + return *Segments.back(); + } + bool isRelocatable() const { + return (Type != ELF::ET_DYN && Type != ELF::ET_EXEC) || MustBeRelocatable; + } +}; + +} // end namespace elf +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_ELF_ELFOBJECT_H diff --git a/contrib/libs/llvm16/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp b/contrib/libs/llvm16/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp new file mode 100644 index 00000000000..067ef39d905 --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp @@ -0,0 +1,466 @@ +//===- MachOLayoutBuilder.cpp -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MachOLayoutBuilder.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; +using namespace llvm::objcopy::macho; + +StringTableBuilder::Kind +MachOLayoutBuilder::getStringTableBuilderKind(const Object &O, bool Is64Bit) { + if (O.Header.FileType == MachO::HeaderFileType::MH_OBJECT) + return Is64Bit ? StringTableBuilder::MachO64 : StringTableBuilder::MachO; + return Is64Bit ? StringTableBuilder::MachO64Linked + : StringTableBuilder::MachOLinked; +} + +uint32_t MachOLayoutBuilder::computeSizeOfCmds() const { + uint32_t Size = 0; + for (const LoadCommand &LC : O.LoadCommands) { + const MachO::macho_load_command &MLC = LC.MachOLoadCommand; + auto cmd = MLC.load_command_data.cmd; + switch (cmd) { + case MachO::LC_SEGMENT: + Size += sizeof(MachO::segment_command) + + sizeof(MachO::section) * LC.Sections.size(); + continue; + case MachO::LC_SEGMENT_64: + Size += sizeof(MachO::segment_command_64) + + sizeof(MachO::section_64) * LC.Sections.size(); + continue; + } + + switch (cmd) { +#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ + case MachO::LCName: \ + Size += sizeof(MachO::LCStruct) + LC.Payload.size(); \ + break; +#include "llvm/BinaryFormat/MachO.def" +#undef HANDLE_LOAD_COMMAND + } + } + + return Size; +} + +void MachOLayoutBuilder::constructStringTable() { + for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols) + StrTableBuilder.add(Sym->Name); + StrTableBuilder.finalize(); +} + +void MachOLayoutBuilder::updateSymbolIndexes() { + uint32_t Index = 0; + for (auto &Symbol : O.SymTable.Symbols) + Symbol->Index = Index++; +} + +// Updates the index and the number of local/external/undefined symbols. +void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) { + assert(MLC.load_command_data.cmd == MachO::LC_DYSYMTAB); + // Make sure that nlist entries in the symbol table are sorted by the those + // types. The order is: local < defined external < undefined external. + assert(llvm::is_sorted(O.SymTable.Symbols, + [](const std::unique_ptr<SymbolEntry> &A, + const std::unique_ptr<SymbolEntry> &B) { + bool AL = A->isLocalSymbol(), + BL = B->isLocalSymbol(); + if (AL != BL) + return AL; + return !AL && !A->isUndefinedSymbol() && + B->isUndefinedSymbol(); + }) && + "Symbols are not sorted by their types."); + + uint32_t NumLocalSymbols = 0; + auto Iter = O.SymTable.Symbols.begin(); + auto End = O.SymTable.Symbols.end(); + for (; Iter != End; ++Iter) { + if ((*Iter)->isExternalSymbol()) + break; + + ++NumLocalSymbols; + } + + uint32_t NumExtDefSymbols = 0; + for (; Iter != End; ++Iter) { + if ((*Iter)->isUndefinedSymbol()) + break; + + ++NumExtDefSymbols; + } + + MLC.dysymtab_command_data.ilocalsym = 0; + MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols; + MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols; + MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols; + MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols; + MLC.dysymtab_command_data.nundefsym = + O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols); +} + +// Recomputes and updates offset and size fields in load commands and sections +// since they could be modified. +uint64_t MachOLayoutBuilder::layoutSegments() { + auto HeaderSize = + Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); + const bool IsObjectFile = + O.Header.FileType == MachO::HeaderFileType::MH_OBJECT; + uint64_t Offset = IsObjectFile ? (HeaderSize + O.Header.SizeOfCmds) : 0; + for (LoadCommand &LC : O.LoadCommands) { + auto &MLC = LC.MachOLoadCommand; + StringRef Segname; + uint64_t SegmentVmAddr; + uint64_t SegmentVmSize; + switch (MLC.load_command_data.cmd) { + case MachO::LC_SEGMENT: + SegmentVmAddr = MLC.segment_command_data.vmaddr; + SegmentVmSize = MLC.segment_command_data.vmsize; + Segname = StringRef(MLC.segment_command_data.segname, + strnlen(MLC.segment_command_data.segname, + sizeof(MLC.segment_command_data.segname))); + break; + case MachO::LC_SEGMENT_64: + SegmentVmAddr = MLC.segment_command_64_data.vmaddr; + SegmentVmSize = MLC.segment_command_64_data.vmsize; + Segname = StringRef(MLC.segment_command_64_data.segname, + strnlen(MLC.segment_command_64_data.segname, + sizeof(MLC.segment_command_64_data.segname))); + break; + default: + continue; + } + + if (Segname == "__LINKEDIT") { + // We update the __LINKEDIT segment later (in layoutTail). + assert(LC.Sections.empty() && "__LINKEDIT segment has sections"); + LinkEditLoadCommand = &MLC; + continue; + } + + // Update file offsets and sizes of sections. + uint64_t SegOffset = Offset; + uint64_t SegFileSize = 0; + uint64_t VMSize = 0; + for (std::unique_ptr<Section> &Sec : LC.Sections) { + assert(SegmentVmAddr <= Sec->Addr && + "Section's address cannot be smaller than Segment's one"); + uint32_t SectOffset = Sec->Addr - SegmentVmAddr; + if (IsObjectFile) { + if (!Sec->hasValidOffset()) { + Sec->Offset = 0; + } else { + uint64_t PaddingSize = + offsetToAlignment(SegFileSize, Align(1ull << Sec->Align)); + Sec->Offset = SegOffset + SegFileSize + PaddingSize; + Sec->Size = Sec->Content.size(); + SegFileSize += PaddingSize + Sec->Size; + } + } else { + if (!Sec->hasValidOffset()) { + Sec->Offset = 0; + } else { + Sec->Offset = SegOffset + SectOffset; + Sec->Size = Sec->Content.size(); + SegFileSize = std::max(SegFileSize, SectOffset + Sec->Size); + } + } + VMSize = std::max(VMSize, SectOffset + Sec->Size); + } + + if (IsObjectFile) { + Offset += SegFileSize; + } else { + Offset = alignTo(Offset + SegFileSize, PageSize); + SegFileSize = alignTo(SegFileSize, PageSize); + // Use the original vmsize if the segment is __PAGEZERO. + VMSize = + Segname == "__PAGEZERO" ? SegmentVmSize : alignTo(VMSize, PageSize); + } + + switch (MLC.load_command_data.cmd) { + case MachO::LC_SEGMENT: + MLC.segment_command_data.cmdsize = + sizeof(MachO::segment_command) + + sizeof(MachO::section) * LC.Sections.size(); + MLC.segment_command_data.nsects = LC.Sections.size(); + MLC.segment_command_data.fileoff = SegOffset; + MLC.segment_command_data.vmsize = VMSize; + MLC.segment_command_data.filesize = SegFileSize; + break; + case MachO::LC_SEGMENT_64: + MLC.segment_command_64_data.cmdsize = + sizeof(MachO::segment_command_64) + + sizeof(MachO::section_64) * LC.Sections.size(); + MLC.segment_command_64_data.nsects = LC.Sections.size(); + MLC.segment_command_64_data.fileoff = SegOffset; + MLC.segment_command_64_data.vmsize = VMSize; + MLC.segment_command_64_data.filesize = SegFileSize; + break; + } + } + + return Offset; +} + +uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset) { + for (LoadCommand &LC : O.LoadCommands) + for (std::unique_ptr<Section> &Sec : LC.Sections) { + Sec->RelOff = Sec->Relocations.empty() ? 0 : Offset; + Sec->NReloc = Sec->Relocations.size(); + Offset += sizeof(MachO::any_relocation_info) * Sec->NReloc; + } + + return Offset; +} + +Error MachOLayoutBuilder::layoutTail(uint64_t Offset) { + // If we are building the layout of an executable or dynamic library + // which does not have any segments other than __LINKEDIT, + // the Offset can be equal to zero by this time. It happens because of the + // convention that in such cases the file offsets specified by LC_SEGMENT + // start with zero (unlike the case of a relocatable object file). + const uint64_t HeaderSize = + Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); + assert((!(O.Header.FileType == MachO::HeaderFileType::MH_OBJECT) || + Offset >= HeaderSize + O.Header.SizeOfCmds) && + "Incorrect tail offset"); + Offset = std::max(Offset, HeaderSize + O.Header.SizeOfCmds); + + // The exports trie can be in either LC_DYLD_INFO or in + // LC_DYLD_EXPORTS_TRIE, but not both. + size_t DyldInfoExportsTrieSize = 0; + size_t DyldExportsTrieSize = 0; + for (const auto &LC : O.LoadCommands) { + switch (LC.MachOLoadCommand.load_command_data.cmd) { + case MachO::LC_DYLD_INFO: + case MachO::LC_DYLD_INFO_ONLY: + DyldInfoExportsTrieSize = O.Exports.Trie.size(); + break; + case MachO::LC_DYLD_EXPORTS_TRIE: + DyldExportsTrieSize = O.Exports.Trie.size(); + break; + default: + break; + } + } + assert((DyldInfoExportsTrieSize == 0 || DyldExportsTrieSize == 0) && + "Export trie in both LCs"); + + uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); + uint64_t StartOfLinkEdit = Offset; + + // The order of LINKEDIT elements is as follows: + // rebase info, binding info, weak binding info, lazy binding info, export + // trie, chained fixups, dyld exports trie, function starts, data-in-code, + // symbol table, indirect symbol table, symbol table strings, + // dylib codesign drs, and code signature. + auto updateOffset = [&Offset](size_t Size) { + uint64_t PreviousOffset = Offset; + Offset += Size; + return PreviousOffset; + }; + + uint64_t StartOfRebaseInfo = updateOffset(O.Rebases.Opcodes.size()); + uint64_t StartOfBindingInfo = updateOffset(O.Binds.Opcodes.size()); + uint64_t StartOfWeakBindingInfo = updateOffset(O.WeakBinds.Opcodes.size()); + uint64_t StartOfLazyBindingInfo = updateOffset(O.LazyBinds.Opcodes.size()); + uint64_t StartOfExportTrie = updateOffset(DyldInfoExportsTrieSize); + uint64_t StartOfChainedFixups = updateOffset(O.ChainedFixups.Data.size()); + uint64_t StartOfDyldExportsTrie = updateOffset(DyldExportsTrieSize); + uint64_t StartOfFunctionStarts = updateOffset(O.FunctionStarts.Data.size()); + uint64_t StartOfDataInCode = updateOffset(O.DataInCode.Data.size()); + uint64_t StartOfLinkerOptimizationHint = + updateOffset(O.LinkerOptimizationHint.Data.size()); + uint64_t StartOfSymbols = updateOffset(NListSize * O.SymTable.Symbols.size()); + uint64_t StartOfIndirectSymbols = + updateOffset(sizeof(uint32_t) * O.IndirectSymTable.Symbols.size()); + uint64_t StartOfSymbolStrings = updateOffset(StrTableBuilder.getSize()); + uint64_t StartOfDylibCodeSignDRs = updateOffset(O.DylibCodeSignDRs.Data.size()); + + uint64_t StartOfCodeSignature = Offset; + uint32_t CodeSignatureSize = 0; + if (O.CodeSignatureCommandIndex) { + StartOfCodeSignature = alignTo(StartOfCodeSignature, 16); + + // Note: These calculations are to be kept in sync with the same + // calculations performed in LLD's CodeSignatureSection. + const uint32_t AllHeadersSize = + alignTo(CodeSignature.FixedHeadersSize + OutputFileName.size() + 1, + CodeSignature.Align); + const uint32_t BlockCount = + (StartOfCodeSignature + CodeSignature.BlockSize - 1) / + CodeSignature.BlockSize; + const uint32_t Size = + alignTo(AllHeadersSize + BlockCount * CodeSignature.HashSize, + CodeSignature.Align); + + CodeSignature.StartOffset = StartOfCodeSignature; + CodeSignature.AllHeadersSize = AllHeadersSize; + CodeSignature.BlockCount = BlockCount; + CodeSignature.OutputFileName = OutputFileName; + CodeSignature.Size = Size; + CodeSignatureSize = Size; + } + uint64_t LinkEditSize = + StartOfCodeSignature + CodeSignatureSize - StartOfLinkEdit; + + // Now we have determined the layout of the contents of the __LINKEDIT + // segment. Update its load command. + if (LinkEditLoadCommand) { + MachO::macho_load_command *MLC = LinkEditLoadCommand; + switch (LinkEditLoadCommand->load_command_data.cmd) { + case MachO::LC_SEGMENT: + MLC->segment_command_data.cmdsize = sizeof(MachO::segment_command); + MLC->segment_command_data.fileoff = StartOfLinkEdit; + MLC->segment_command_data.vmsize = alignTo(LinkEditSize, PageSize); + MLC->segment_command_data.filesize = LinkEditSize; + break; + case MachO::LC_SEGMENT_64: + MLC->segment_command_64_data.cmdsize = sizeof(MachO::segment_command_64); + MLC->segment_command_64_data.fileoff = StartOfLinkEdit; + MLC->segment_command_64_data.vmsize = alignTo(LinkEditSize, PageSize); + MLC->segment_command_64_data.filesize = LinkEditSize; + break; + } + } + + for (LoadCommand &LC : O.LoadCommands) { + auto &MLC = LC.MachOLoadCommand; + auto cmd = MLC.load_command_data.cmd; + switch (cmd) { + case MachO::LC_CODE_SIGNATURE: + MLC.linkedit_data_command_data.dataoff = StartOfCodeSignature; + MLC.linkedit_data_command_data.datasize = CodeSignatureSize; + break; + case MachO::LC_DYLIB_CODE_SIGN_DRS: + MLC.linkedit_data_command_data.dataoff = StartOfDylibCodeSignDRs; + MLC.linkedit_data_command_data.datasize = O.DylibCodeSignDRs.Data.size(); + break; + case MachO::LC_SYMTAB: + MLC.symtab_command_data.symoff = StartOfSymbols; + MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size(); + MLC.symtab_command_data.stroff = StartOfSymbolStrings; + MLC.symtab_command_data.strsize = StrTableBuilder.getSize(); + break; + case MachO::LC_DYSYMTAB: { + if (MLC.dysymtab_command_data.ntoc != 0 || + MLC.dysymtab_command_data.nmodtab != 0 || + MLC.dysymtab_command_data.nextrefsyms != 0 || + MLC.dysymtab_command_data.nlocrel != 0 || + MLC.dysymtab_command_data.nextrel != 0) + return createStringError(llvm::errc::not_supported, + "shared library is not yet supported"); + + if (!O.IndirectSymTable.Symbols.empty()) { + MLC.dysymtab_command_data.indirectsymoff = StartOfIndirectSymbols; + MLC.dysymtab_command_data.nindirectsyms = + O.IndirectSymTable.Symbols.size(); + } + + updateDySymTab(MLC); + break; + } + case MachO::LC_DATA_IN_CODE: + MLC.linkedit_data_command_data.dataoff = StartOfDataInCode; + MLC.linkedit_data_command_data.datasize = O.DataInCode.Data.size(); + break; + case MachO::LC_LINKER_OPTIMIZATION_HINT: + MLC.linkedit_data_command_data.dataoff = StartOfLinkerOptimizationHint; + MLC.linkedit_data_command_data.datasize = + O.LinkerOptimizationHint.Data.size(); + break; + case MachO::LC_FUNCTION_STARTS: + MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts; + MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size(); + break; + case MachO::LC_DYLD_CHAINED_FIXUPS: + MLC.linkedit_data_command_data.dataoff = StartOfChainedFixups; + MLC.linkedit_data_command_data.datasize = O.ChainedFixups.Data.size(); + break; + case MachO::LC_DYLD_EXPORTS_TRIE: + MLC.linkedit_data_command_data.dataoff = StartOfDyldExportsTrie; + MLC.linkedit_data_command_data.datasize = DyldExportsTrieSize; + break; + case MachO::LC_DYLD_INFO: + case MachO::LC_DYLD_INFO_ONLY: + MLC.dyld_info_command_data.rebase_off = + O.Rebases.Opcodes.empty() ? 0 : StartOfRebaseInfo; + MLC.dyld_info_command_data.rebase_size = O.Rebases.Opcodes.size(); + MLC.dyld_info_command_data.bind_off = + O.Binds.Opcodes.empty() ? 0 : StartOfBindingInfo; + MLC.dyld_info_command_data.bind_size = O.Binds.Opcodes.size(); + MLC.dyld_info_command_data.weak_bind_off = + O.WeakBinds.Opcodes.empty() ? 0 : StartOfWeakBindingInfo; + MLC.dyld_info_command_data.weak_bind_size = O.WeakBinds.Opcodes.size(); + MLC.dyld_info_command_data.lazy_bind_off = + O.LazyBinds.Opcodes.empty() ? 0 : StartOfLazyBindingInfo; + MLC.dyld_info_command_data.lazy_bind_size = O.LazyBinds.Opcodes.size(); + MLC.dyld_info_command_data.export_off = + O.Exports.Trie.empty() ? 0 : StartOfExportTrie; + MLC.dyld_info_command_data.export_size = DyldInfoExportsTrieSize; + break; + // Note that LC_ENCRYPTION_INFO.cryptoff despite its name and the comment in + // <mach-o/loader.h> is not an offset in the binary file, instead, it is a + // relative virtual address. At the moment modification of the __TEXT + // segment of executables isn't supported anyway (e.g. data in code entries + // are not recalculated). Moreover, in general + // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 are nontrivial to update because + // without making additional assumptions (e.g. that the entire __TEXT + // segment should be encrypted) we do not know how to recalculate the + // boundaries of the encrypted part. For now just copy over these load + // commands until we encounter a real world usecase where + // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 need to be adjusted. + case MachO::LC_ENCRYPTION_INFO: + case MachO::LC_ENCRYPTION_INFO_64: + case MachO::LC_LOAD_DYLINKER: + case MachO::LC_MAIN: + case MachO::LC_RPATH: + case MachO::LC_SEGMENT: + case MachO::LC_SEGMENT_64: + case MachO::LC_VERSION_MIN_MACOSX: + case MachO::LC_VERSION_MIN_IPHONEOS: + case MachO::LC_VERSION_MIN_TVOS: + case MachO::LC_VERSION_MIN_WATCHOS: + case MachO::LC_BUILD_VERSION: + case MachO::LC_ID_DYLIB: + case MachO::LC_LOAD_DYLIB: + case MachO::LC_LOAD_WEAK_DYLIB: + case MachO::LC_UUID: + case MachO::LC_SOURCE_VERSION: + case MachO::LC_THREAD: + case MachO::LC_UNIXTHREAD: + case MachO::LC_SUB_FRAMEWORK: + case MachO::LC_SUB_UMBRELLA: + case MachO::LC_SUB_CLIENT: + case MachO::LC_SUB_LIBRARY: + case MachO::LC_LINKER_OPTION: + // Nothing to update. + break; + default: + // Abort if it's unsupported in order to prevent corrupting the object. + return createStringError(llvm::errc::not_supported, + "unsupported load command (cmd=0x%x)", cmd); + } + } + + return Error::success(); +} + +Error MachOLayoutBuilder::layout() { + O.Header.NCmds = O.LoadCommands.size(); + O.Header.SizeOfCmds = computeSizeOfCmds(); + constructStringTable(); + updateSymbolIndexes(); + uint64_t Offset = layoutSegments(); + Offset = layoutRelocations(Offset); + return layoutTail(Offset); +} diff --git a/contrib/libs/llvm16/lib/ObjCopy/MachO/MachOLayoutBuilder.h b/contrib/libs/llvm16/lib/ObjCopy/MachO/MachOLayoutBuilder.h new file mode 100644 index 00000000000..8d8716df22b --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/MachO/MachOLayoutBuilder.h @@ -0,0 +1,97 @@ +//===- MachOLayoutBuilder.h -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H +#define LLVM_LIB_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H + +#include "MachOObject.h" +#include "llvm/ObjCopy/MachO/MachOObjcopy.h" + +namespace llvm { +namespace objcopy { +namespace macho { + +/// When MachO binaries include a LC_CODE_SIGNATURE load command, +/// the __LINKEDIT data segment will include a section corresponding +/// to the LC_CODE_SIGNATURE load command. This section serves as a signature +/// for the binary. Included in the CodeSignature section is a header followed +/// by a hash of the binary. If present, the CodeSignature section is the +/// last component of the binary. +struct CodeSignatureInfo { + // NOTE: These values are to be kept in sync with those in + // LLD's CodeSignatureSection class. + + static constexpr uint32_t Align = 16; + static constexpr uint8_t BlockSizeShift = 12; + // The binary is read in blocks of the following size. + static constexpr size_t BlockSize = (1 << BlockSizeShift); // 4 KiB + // For each block, a SHA256 hash (256 bits, 32 bytes) is written to + // the CodeSignature section. + static constexpr size_t HashSize = 256 / 8; + static constexpr size_t BlobHeadersSize = llvm::alignTo<8>( + sizeof(llvm::MachO::CS_SuperBlob) + sizeof(llvm::MachO::CS_BlobIndex)); + // The size of the entire header depends upon the filename the binary is being + // written to, but the rest of the header is fixed in size. + static constexpr uint32_t FixedHeadersSize = + BlobHeadersSize + sizeof(llvm::MachO::CS_CodeDirectory); + + // The offset relative to the start of the binary where + // the CodeSignature section should begin. + uint32_t StartOffset; + // The size of the entire header, output file name size included. + uint32_t AllHeadersSize; + // The number of blocks required to hash the binary. + uint32_t BlockCount; + StringRef OutputFileName; + // The size of the entire CodeSignature section, including both the header and + // hashes. + uint32_t Size; +}; + +class MachOLayoutBuilder { + Object &O; + bool Is64Bit; + StringRef OutputFileName; + uint64_t PageSize; + CodeSignatureInfo CodeSignature; + + // Points to the __LINKEDIT segment if it exists. + MachO::macho_load_command *LinkEditLoadCommand = nullptr; + StringTableBuilder StrTableBuilder; + + uint32_t computeSizeOfCmds() const; + void constructStringTable(); + void updateSymbolIndexes(); + void updateDySymTab(MachO::macho_load_command &MLC); + uint64_t layoutSegments(); + uint64_t layoutRelocations(uint64_t Offset); + Error layoutTail(uint64_t Offset); + + static StringTableBuilder::Kind getStringTableBuilderKind(const Object &O, + bool Is64Bit); + +public: + MachOLayoutBuilder(Object &O, bool Is64Bit, StringRef OutputFileName, + uint64_t PageSize) + : O(O), Is64Bit(Is64Bit), OutputFileName(OutputFileName), + PageSize(PageSize), + StrTableBuilder(getStringTableBuilderKind(O, Is64Bit)) {} + + // Recomputes and updates fields in the given object such as file offsets. + Error layout(); + + StringTableBuilder &getStringTableBuilder() { return StrTableBuilder; } + + const CodeSignatureInfo &getCodeSignature() const { return CodeSignature; } +}; + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H diff --git a/contrib/libs/llvm16/lib/ObjCopy/MachO/MachOObjcopy.cpp b/contrib/libs/llvm16/lib/ObjCopy/MachO/MachOObjcopy.cpp new file mode 100644 index 00000000000..d37241682ef --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/MachO/MachOObjcopy.cpp @@ -0,0 +1,550 @@ +//===- MachOObjcopy.cpp -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ObjCopy/MachO/MachOObjcopy.h" +#include "Archive.h" +#include "MachOReader.h" +#include "MachOWriter.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ObjCopy/CommonConfig.h" +#include "llvm/ObjCopy/MachO/MachOConfig.h" +#include "llvm/ObjCopy/MultiFormatConfig.h" +#include "llvm/ObjCopy/ObjCopy.h" +#include "llvm/Object/ArchiveWriter.h" +#include "llvm/Object/MachOUniversal.h" +#include "llvm/Object/MachOUniversalWriter.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/SmallVectorMemoryBuffer.h" + +using namespace llvm; +using namespace llvm::objcopy; +using namespace llvm::objcopy::macho; +using namespace llvm::object; + +using SectionPred = std::function<bool(const std::unique_ptr<Section> &Sec)>; +using LoadCommandPred = std::function<bool(const LoadCommand &LC)>; + +#ifndef NDEBUG +static bool isLoadCommandWithPayloadString(const LoadCommand &LC) { + // TODO: Add support for LC_REEXPORT_DYLIB, LC_LOAD_UPWARD_DYLIB and + // LC_LAZY_LOAD_DYLIB + return LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH || + LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_ID_DYLIB || + LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_DYLIB || + LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_WEAK_DYLIB; +} +#endif + +static StringRef getPayloadString(const LoadCommand &LC) { + assert(isLoadCommandWithPayloadString(LC) && + "unsupported load command encountered"); + + return StringRef(reinterpret_cast<const char *>(LC.Payload.data()), + LC.Payload.size()) + .rtrim('\0'); +} + +static Error removeSections(const CommonConfig &Config, Object &Obj) { + SectionPred RemovePred = [](const std::unique_ptr<Section> &) { + return false; + }; + + if (!Config.ToRemove.empty()) { + RemovePred = [&Config, RemovePred](const std::unique_ptr<Section> &Sec) { + return Config.ToRemove.matches(Sec->CanonicalName); + }; + } + + if (Config.StripAll || Config.StripDebug) { + // Remove all debug sections. + RemovePred = [RemovePred](const std::unique_ptr<Section> &Sec) { + if (Sec->Segname == "__DWARF") + return true; + + return RemovePred(Sec); + }; + } + + if (!Config.OnlySection.empty()) { + // Overwrite RemovePred because --only-section takes priority. + RemovePred = [&Config](const std::unique_ptr<Section> &Sec) { + return !Config.OnlySection.matches(Sec->CanonicalName); + }; + } + + return Obj.removeSections(RemovePred); +} + +static void markSymbols(const CommonConfig &, Object &Obj) { + // Symbols referenced from the indirect symbol table must not be removed. + for (IndirectSymbolEntry &ISE : Obj.IndirectSymTable.Symbols) + if (ISE.Symbol) + (*ISE.Symbol)->Referenced = true; +} + +static void updateAndRemoveSymbols(const CommonConfig &Config, + const MachOConfig &MachOConfig, + Object &Obj) { + for (SymbolEntry &Sym : Obj.SymTable) { + auto I = Config.SymbolsToRename.find(Sym.Name); + if (I != Config.SymbolsToRename.end()) + Sym.Name = std::string(I->getValue()); + } + + auto RemovePred = [&Config, &MachOConfig, + &Obj](const std::unique_ptr<SymbolEntry> &N) { + if (N->Referenced) + return false; + if (MachOConfig.KeepUndefined && N->isUndefinedSymbol()) + return false; + if (N->n_desc & MachO::REFERENCED_DYNAMICALLY) + return false; + if (Config.StripAll) + return true; + if (Config.DiscardMode == DiscardType::All && !(N->n_type & MachO::N_EXT)) + return true; + // This behavior is consistent with cctools' strip. + if (MachOConfig.StripSwiftSymbols && + (Obj.Header.Flags & MachO::MH_DYLDLINK) && Obj.SwiftVersion && + *Obj.SwiftVersion && N->isSwiftSymbol()) + return true; + return false; + }; + + Obj.SymTable.removeSymbols(RemovePred); +} + +template <typename LCType> +static void updateLoadCommandPayloadString(LoadCommand &LC, StringRef S) { + assert(isLoadCommandWithPayloadString(LC) && + "unsupported load command encountered"); + + uint32_t NewCmdsize = alignTo(sizeof(LCType) + S.size() + 1, 8); + + LC.MachOLoadCommand.load_command_data.cmdsize = NewCmdsize; + LC.Payload.assign(NewCmdsize - sizeof(LCType), 0); + std::copy(S.begin(), S.end(), LC.Payload.begin()); +} + +static LoadCommand buildRPathLoadCommand(StringRef Path) { + LoadCommand LC; + MachO::rpath_command RPathLC; + RPathLC.cmd = MachO::LC_RPATH; + RPathLC.path = sizeof(MachO::rpath_command); + RPathLC.cmdsize = alignTo(sizeof(MachO::rpath_command) + Path.size() + 1, 8); + LC.MachOLoadCommand.rpath_command_data = RPathLC; + LC.Payload.assign(RPathLC.cmdsize - sizeof(MachO::rpath_command), 0); + std::copy(Path.begin(), Path.end(), LC.Payload.begin()); + return LC; +} + +static Error processLoadCommands(const MachOConfig &MachOConfig, Object &Obj) { + // Remove RPaths. + DenseSet<StringRef> RPathsToRemove(MachOConfig.RPathsToRemove.begin(), + MachOConfig.RPathsToRemove.end()); + + LoadCommandPred RemovePred = [&RPathsToRemove, + &MachOConfig](const LoadCommand &LC) { + if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) { + // When removing all RPaths we don't need to care + // about what it contains + if (MachOConfig.RemoveAllRpaths) + return true; + + StringRef RPath = getPayloadString(LC); + if (RPathsToRemove.count(RPath)) { + RPathsToRemove.erase(RPath); + return true; + } + } + return false; + }; + + if (Error E = Obj.removeLoadCommands(RemovePred)) + return E; + + // Emit an error if the Mach-O binary does not contain an rpath path name + // specified in -delete_rpath. + for (StringRef RPath : MachOConfig.RPathsToRemove) { + if (RPathsToRemove.count(RPath)) + return createStringError(errc::invalid_argument, + "no LC_RPATH load command with path: %s", + RPath.str().c_str()); + } + + DenseSet<StringRef> RPaths; + + // Get all existing RPaths. + for (LoadCommand &LC : Obj.LoadCommands) { + if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) + RPaths.insert(getPayloadString(LC)); + } + + // Throw errors for invalid RPaths. + for (const auto &OldNew : MachOConfig.RPathsToUpdate) { + StringRef Old = OldNew.getFirst(); + StringRef New = OldNew.getSecond(); + if (!RPaths.contains(Old)) + return createStringError(errc::invalid_argument, + "no LC_RPATH load command with path: " + Old); + if (RPaths.contains(New)) + return createStringError(errc::invalid_argument, + "rpath '" + New + + "' would create a duplicate load command"); + } + + // Update load commands. + for (LoadCommand &LC : Obj.LoadCommands) { + switch (LC.MachOLoadCommand.load_command_data.cmd) { + case MachO::LC_ID_DYLIB: + if (MachOConfig.SharedLibId) + updateLoadCommandPayloadString<MachO::dylib_command>( + LC, *MachOConfig.SharedLibId); + break; + + case MachO::LC_RPATH: { + StringRef RPath = getPayloadString(LC); + StringRef NewRPath = MachOConfig.RPathsToUpdate.lookup(RPath); + if (!NewRPath.empty()) + updateLoadCommandPayloadString<MachO::rpath_command>(LC, NewRPath); + break; + } + + // TODO: Add LC_REEXPORT_DYLIB, LC_LAZY_LOAD_DYLIB, and LC_LOAD_UPWARD_DYLIB + // here once llvm-objcopy supports them. + case MachO::LC_LOAD_DYLIB: + case MachO::LC_LOAD_WEAK_DYLIB: + StringRef InstallName = getPayloadString(LC); + StringRef NewInstallName = + MachOConfig.InstallNamesToUpdate.lookup(InstallName); + if (!NewInstallName.empty()) + updateLoadCommandPayloadString<MachO::dylib_command>(LC, + NewInstallName); + break; + } + } + + // Add new RPaths. + for (StringRef RPath : MachOConfig.RPathToAdd) { + if (RPaths.contains(RPath)) + return createStringError(errc::invalid_argument, + "rpath '" + RPath + + "' would create a duplicate load command"); + RPaths.insert(RPath); + Obj.LoadCommands.push_back(buildRPathLoadCommand(RPath)); + } + + for (StringRef RPath : MachOConfig.RPathToPrepend) { + if (RPaths.contains(RPath)) + return createStringError(errc::invalid_argument, + "rpath '" + RPath + + "' would create a duplicate load command"); + + RPaths.insert(RPath); + Obj.LoadCommands.insert(Obj.LoadCommands.begin(), + buildRPathLoadCommand(RPath)); + } + + // Unlike appending rpaths, the indexes of subsequent load commands must + // be recalculated after prepending one. + if (!MachOConfig.RPathToPrepend.empty()) + Obj.updateLoadCommandIndexes(); + + // Remove any empty segments if required. + if (!MachOConfig.EmptySegmentsToRemove.empty()) { + auto RemovePred = [&MachOConfig](const LoadCommand &LC) { + if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_SEGMENT_64 || + LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_SEGMENT) { + return LC.Sections.empty() && + MachOConfig.EmptySegmentsToRemove.contains(*LC.getSegmentName()); + } + return false; + }; + if (Error E = Obj.removeLoadCommands(RemovePred)) + return E; + } + + return Error::success(); +} + +static Error dumpSectionToFile(StringRef SecName, StringRef Filename, + Object &Obj) { + for (LoadCommand &LC : Obj.LoadCommands) + for (const std::unique_ptr<Section> &Sec : LC.Sections) { + if (Sec->CanonicalName == SecName) { + Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr = + FileOutputBuffer::create(Filename, Sec->Content.size()); + if (!BufferOrErr) + return BufferOrErr.takeError(); + std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr); + llvm::copy(Sec->Content, Buf->getBufferStart()); + + if (Error E = Buf->commit()) + return E; + return Error::success(); + } + } + + return createStringError(object_error::parse_failed, "section '%s' not found", + SecName.str().c_str()); +} + +static Error addSection(const NewSectionInfo &NewSection, Object &Obj) { + std::pair<StringRef, StringRef> Pair = NewSection.SectionName.split(','); + StringRef TargetSegName = Pair.first; + Section Sec(TargetSegName, Pair.second); + Sec.Content = + Obj.NewSectionsContents.save(NewSection.SectionData->getBuffer()); + Sec.Size = Sec.Content.size(); + + // Add the a section into an existing segment. + for (LoadCommand &LC : Obj.LoadCommands) { + std::optional<StringRef> SegName = LC.getSegmentName(); + if (SegName && SegName == TargetSegName) { + uint64_t Addr = *LC.getSegmentVMAddr(); + for (const std::unique_ptr<Section> &S : LC.Sections) + Addr = std::max(Addr, S->Addr + S->Size); + LC.Sections.push_back(std::make_unique<Section>(Sec)); + LC.Sections.back()->Addr = Addr; + return Error::success(); + } + } + + // There's no segment named TargetSegName. Create a new load command and + // Insert a new section into it. + LoadCommand &NewSegment = + Obj.addSegment(TargetSegName, alignTo(Sec.Size, 16384)); + NewSegment.Sections.push_back(std::make_unique<Section>(Sec)); + NewSegment.Sections.back()->Addr = *NewSegment.getSegmentVMAddr(); + return Error::success(); +} + +static Expected<Section &> findSection(StringRef SecName, Object &O) { + StringRef SegName; + std::tie(SegName, SecName) = SecName.split(","); + auto FoundSeg = + llvm::find_if(O.LoadCommands, [SegName](const LoadCommand &LC) { + return LC.getSegmentName() == SegName; + }); + if (FoundSeg == O.LoadCommands.end()) + return createStringError(errc::invalid_argument, + "could not find segment with name '%s'", + SegName.str().c_str()); + auto FoundSec = llvm::find_if(FoundSeg->Sections, + [SecName](const std::unique_ptr<Section> &Sec) { + return Sec->Sectname == SecName; + }); + if (FoundSec == FoundSeg->Sections.end()) + return createStringError(errc::invalid_argument, + "could not find section with name '%s'", + SecName.str().c_str()); + + assert(FoundSec->get()->CanonicalName == (SegName + "," + SecName).str()); + return **FoundSec; +} + +static Error updateSection(const NewSectionInfo &NewSection, Object &O) { + Expected<Section &> SecToUpdateOrErr = findSection(NewSection.SectionName, O); + + if (!SecToUpdateOrErr) + return SecToUpdateOrErr.takeError(); + Section &Sec = *SecToUpdateOrErr; + + if (NewSection.SectionData->getBufferSize() > Sec.Size) + return createStringError( + errc::invalid_argument, + "new section cannot be larger than previous section"); + Sec.Content = O.NewSectionsContents.save(NewSection.SectionData->getBuffer()); + Sec.Size = Sec.Content.size(); + return Error::success(); +} + +// isValidMachOCannonicalName returns success if Name is a MachO cannonical name +// ("<segment>,<section>") and lengths of both segment and section names are +// valid. +static Error isValidMachOCannonicalName(StringRef Name) { + if (Name.count(',') != 1) + return createStringError(errc::invalid_argument, + "invalid section name '%s' (should be formatted " + "as '<segment name>,<section name>')", + Name.str().c_str()); + + std::pair<StringRef, StringRef> Pair = Name.split(','); + if (Pair.first.size() > 16) + return createStringError(errc::invalid_argument, + "too long segment name: '%s'", + Pair.first.str().c_str()); + if (Pair.second.size() > 16) + return createStringError(errc::invalid_argument, + "too long section name: '%s'", + Pair.second.str().c_str()); + return Error::success(); +} + +static Error handleArgs(const CommonConfig &Config, + const MachOConfig &MachOConfig, Object &Obj) { + // Dump sections before add/remove for compatibility with GNU objcopy. + for (StringRef Flag : Config.DumpSection) { + StringRef SectionName; + StringRef FileName; + std::tie(SectionName, FileName) = Flag.split('='); + if (Error E = dumpSectionToFile(SectionName, FileName, Obj)) + return E; + } + + if (Error E = removeSections(Config, Obj)) + return E; + + // Mark symbols to determine which symbols are still needed. + if (Config.StripAll) + markSymbols(Config, Obj); + + updateAndRemoveSymbols(Config, MachOConfig, Obj); + + if (Config.StripAll) + for (LoadCommand &LC : Obj.LoadCommands) + for (std::unique_ptr<Section> &Sec : LC.Sections) + Sec->Relocations.clear(); + + for (const NewSectionInfo &NewSection : Config.AddSection) { + if (Error E = isValidMachOCannonicalName(NewSection.SectionName)) + return E; + if (Error E = addSection(NewSection, Obj)) + return E; + } + + for (const NewSectionInfo &NewSection : Config.UpdateSection) { + if (Error E = isValidMachOCannonicalName(NewSection.SectionName)) + return E; + if (Error E = updateSection(NewSection, Obj)) + return E; + } + + if (Error E = processLoadCommands(MachOConfig, Obj)) + return E; + + return Error::success(); +} + +Error objcopy::macho::executeObjcopyOnBinary(const CommonConfig &Config, + const MachOConfig &MachOConfig, + object::MachOObjectFile &In, + raw_ostream &Out) { + MachOReader Reader(In); + Expected<std::unique_ptr<Object>> O = Reader.create(); + if (!O) + return createFileError(Config.InputFilename, O.takeError()); + + if (O->get()->Header.FileType == MachO::HeaderFileType::MH_PRELOAD) + return createStringError(std::errc::not_supported, + "%s: MH_PRELOAD files are not supported", + Config.InputFilename.str().c_str()); + + if (Error E = handleArgs(Config, MachOConfig, **O)) + return createFileError(Config.InputFilename, std::move(E)); + + // Page size used for alignment of segment sizes in Mach-O executables and + // dynamic libraries. + uint64_t PageSize; + switch (In.getArch()) { + case Triple::ArchType::arm: + case Triple::ArchType::aarch64: + case Triple::ArchType::aarch64_32: + PageSize = 16384; + break; + default: + PageSize = 4096; + } + + MachOWriter Writer(**O, In.is64Bit(), In.isLittleEndian(), + sys::path::filename(Config.OutputFilename), PageSize, Out); + if (auto E = Writer.finalize()) + return E; + return Writer.write(); +} + +Error objcopy::macho::executeObjcopyOnMachOUniversalBinary( + const MultiFormatConfig &Config, const MachOUniversalBinary &In, + raw_ostream &Out) { + SmallVector<OwningBinary<Binary>, 2> Binaries; + SmallVector<Slice, 2> Slices; + for (const auto &O : In.objects()) { + Expected<std::unique_ptr<Archive>> ArOrErr = O.getAsArchive(); + if (ArOrErr) { + Expected<std::vector<NewArchiveMember>> NewArchiveMembersOrErr = + createNewArchiveMembers(Config, **ArOrErr); + if (!NewArchiveMembersOrErr) + return NewArchiveMembersOrErr.takeError(); + auto Kind = (*ArOrErr)->kind(); + if (Kind == object::Archive::K_BSD) + Kind = object::Archive::K_DARWIN; + Expected<std::unique_ptr<MemoryBuffer>> OutputBufferOrErr = + writeArchiveToBuffer(*NewArchiveMembersOrErr, + (*ArOrErr)->hasSymbolTable(), Kind, + Config.getCommonConfig().DeterministicArchives, + (*ArOrErr)->isThin()); + if (!OutputBufferOrErr) + return OutputBufferOrErr.takeError(); + Expected<std::unique_ptr<Binary>> BinaryOrErr = + object::createBinary(**OutputBufferOrErr); + if (!BinaryOrErr) + return BinaryOrErr.takeError(); + Binaries.emplace_back(std::move(*BinaryOrErr), + std::move(*OutputBufferOrErr)); + Slices.emplace_back(*cast<Archive>(Binaries.back().getBinary()), + O.getCPUType(), O.getCPUSubType(), + O.getArchFlagName(), O.getAlign()); + continue; + } + // The methods getAsArchive, getAsObjectFile, getAsIRObject of the class + // ObjectForArch return an Error in case of the type mismatch. We need to + // check each in turn to see what kind of slice this is, so ignore errors + // produced along the way. + consumeError(ArOrErr.takeError()); + + Expected<std::unique_ptr<MachOObjectFile>> ObjOrErr = O.getAsObjectFile(); + if (!ObjOrErr) { + consumeError(ObjOrErr.takeError()); + return createStringError( + std::errc::invalid_argument, + "slice for '%s' of the universal Mach-O binary " + "'%s' is not a Mach-O object or an archive", + O.getArchFlagName().c_str(), + Config.getCommonConfig().InputFilename.str().c_str()); + } + std::string ArchFlagName = O.getArchFlagName(); + + SmallVector<char, 0> Buffer; + raw_svector_ostream MemStream(Buffer); + + Expected<const MachOConfig &> MachO = Config.getMachOConfig(); + if (!MachO) + return MachO.takeError(); + + if (Error E = executeObjcopyOnBinary(Config.getCommonConfig(), *MachO, + **ObjOrErr, MemStream)) + return E; + + auto MB = std::make_unique<SmallVectorMemoryBuffer>( + std::move(Buffer), ArchFlagName, /*RequiresNullTerminator=*/false); + Expected<std::unique_ptr<Binary>> BinaryOrErr = object::createBinary(*MB); + if (!BinaryOrErr) + return BinaryOrErr.takeError(); + Binaries.emplace_back(std::move(*BinaryOrErr), std::move(MB)); + Slices.emplace_back(*cast<MachOObjectFile>(Binaries.back().getBinary()), + O.getAlign()); + } + + if (Error Err = writeUniversalBinaryToStream(Slices, Out)) + return Err; + + return Error::success(); +} diff --git a/contrib/libs/llvm16/lib/ObjCopy/MachO/MachOObject.cpp b/contrib/libs/llvm16/lib/ObjCopy/MachO/MachOObject.cpp new file mode 100644 index 00000000000..9a4abadc871 --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/MachO/MachOObject.cpp @@ -0,0 +1,226 @@ +//===- MachOObject.cpp - Mach-O object file model ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MachOObject.h" +#include "llvm/ADT/SmallPtrSet.h" +#include <unordered_set> + +using namespace llvm; +using namespace llvm::objcopy::macho; + +Section::Section(StringRef SegName, StringRef SectName) + : Segname(SegName), Sectname(SectName), + CanonicalName((Twine(SegName) + Twine(',') + SectName).str()) {} + +Section::Section(StringRef SegName, StringRef SectName, StringRef Content) + : Segname(SegName), Sectname(SectName), + CanonicalName((Twine(SegName) + Twine(',') + SectName).str()), + Content(Content) {} + +const SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) const { + assert(Index < Symbols.size() && "invalid symbol index"); + return Symbols[Index].get(); +} + +SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) { + return const_cast<SymbolEntry *>( + static_cast<const SymbolTable *>(this)->getSymbolByIndex(Index)); +} + +void SymbolTable::removeSymbols( + function_ref<bool(const std::unique_ptr<SymbolEntry> &)> ToRemove) { + llvm::erase_if(Symbols, ToRemove); +} + +void Object::updateLoadCommandIndexes() { + static constexpr char TextSegmentName[] = "__TEXT"; + // Update indices of special load commands + for (size_t Index = 0, Size = LoadCommands.size(); Index < Size; ++Index) { + LoadCommand &LC = LoadCommands[Index]; + switch (LC.MachOLoadCommand.load_command_data.cmd) { + case MachO::LC_CODE_SIGNATURE: + CodeSignatureCommandIndex = Index; + break; + case MachO::LC_SEGMENT: + if (StringRef(LC.MachOLoadCommand.segment_command_data.segname) == + TextSegmentName) + TextSegmentCommandIndex = Index; + break; + case MachO::LC_SEGMENT_64: + if (StringRef(LC.MachOLoadCommand.segment_command_64_data.segname) == + TextSegmentName) + TextSegmentCommandIndex = Index; + break; + case MachO::LC_SYMTAB: + SymTabCommandIndex = Index; + break; + case MachO::LC_DYSYMTAB: + DySymTabCommandIndex = Index; + break; + case MachO::LC_DYLD_INFO: + case MachO::LC_DYLD_INFO_ONLY: + DyLdInfoCommandIndex = Index; + break; + case MachO::LC_DATA_IN_CODE: + DataInCodeCommandIndex = Index; + break; + case MachO::LC_LINKER_OPTIMIZATION_HINT: + LinkerOptimizationHintCommandIndex = Index; + break; + case MachO::LC_FUNCTION_STARTS: + FunctionStartsCommandIndex = Index; + break; + case MachO::LC_DYLIB_CODE_SIGN_DRS: + DylibCodeSignDRsIndex = Index; + break; + case MachO::LC_DYLD_CHAINED_FIXUPS: + ChainedFixupsCommandIndex = Index; + break; + case MachO::LC_DYLD_EXPORTS_TRIE: + ExportsTrieCommandIndex = Index; + break; + } + } +} + +Error Object::removeLoadCommands( + function_ref<bool(const LoadCommand &)> ToRemove) { + auto It = std::stable_partition( + LoadCommands.begin(), LoadCommands.end(), + [&](const LoadCommand &LC) { return !ToRemove(LC); }); + LoadCommands.erase(It, LoadCommands.end()); + + updateLoadCommandIndexes(); + return Error::success(); +} + +Error Object::removeSections( + function_ref<bool(const std::unique_ptr<Section> &)> ToRemove) { + DenseMap<uint32_t, const Section *> OldIndexToSection; + uint32_t NextSectionIndex = 1; + for (LoadCommand &LC : LoadCommands) { + auto It = std::stable_partition( + std::begin(LC.Sections), std::end(LC.Sections), + [&](const std::unique_ptr<Section> &Sec) { return !ToRemove(Sec); }); + for (auto I = LC.Sections.begin(), End = It; I != End; ++I) { + OldIndexToSection[(*I)->Index] = I->get(); + (*I)->Index = NextSectionIndex++; + } + LC.Sections.erase(It, LC.Sections.end()); + } + + auto IsDead = [&](const std::unique_ptr<SymbolEntry> &S) -> bool { + std::optional<uint32_t> Section = S->section(); + return (Section && !OldIndexToSection.count(*Section)); + }; + + SmallPtrSet<const SymbolEntry *, 2> DeadSymbols; + for (const std::unique_ptr<SymbolEntry> &Sym : SymTable.Symbols) + if (IsDead(Sym)) + DeadSymbols.insert(Sym.get()); + + for (const LoadCommand &LC : LoadCommands) + for (const std::unique_ptr<Section> &Sec : LC.Sections) + for (const RelocationInfo &R : Sec->Relocations) + if (R.Symbol && *R.Symbol && DeadSymbols.count(*R.Symbol)) + return createStringError(std::errc::invalid_argument, + "symbol '%s' defined in section with index " + "'%u' cannot be removed because it is " + "referenced by a relocation in section '%s'", + (*R.Symbol)->Name.c_str(), + *((*R.Symbol)->section()), + Sec->CanonicalName.c_str()); + SymTable.removeSymbols(IsDead); + for (std::unique_ptr<SymbolEntry> &S : SymTable.Symbols) + if (S->section()) + S->n_sect = OldIndexToSection[S->n_sect]->Index; + return Error::success(); +} + +uint64_t Object::nextAvailableSegmentAddress() const { + uint64_t HeaderSize = + is64Bit() ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); + uint64_t Addr = HeaderSize + Header.SizeOfCmds; + for (const LoadCommand &LC : LoadCommands) { + const MachO::macho_load_command &MLC = LC.MachOLoadCommand; + switch (MLC.load_command_data.cmd) { + case MachO::LC_SEGMENT: + Addr = std::max(Addr, + static_cast<uint64_t>(MLC.segment_command_data.vmaddr) + + MLC.segment_command_data.vmsize); + break; + case MachO::LC_SEGMENT_64: + Addr = std::max(Addr, MLC.segment_command_64_data.vmaddr + + MLC.segment_command_64_data.vmsize); + break; + default: + continue; + } + } + return Addr; +} + +template <typename SegmentType> +static void +constructSegment(SegmentType &Seg, llvm::MachO::LoadCommandType CmdType, + StringRef SegName, uint64_t SegVMAddr, uint64_t SegVMSize) { + assert(SegName.size() <= sizeof(Seg.segname) && "too long segment name"); + memset(&Seg, 0, sizeof(SegmentType)); + Seg.cmd = CmdType; + strncpy(Seg.segname, SegName.data(), SegName.size()); + Seg.maxprot |= + (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); + Seg.initprot |= + (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); + Seg.vmaddr = SegVMAddr; + Seg.vmsize = SegVMSize; +} + +LoadCommand &Object::addSegment(StringRef SegName, uint64_t SegVMSize) { + LoadCommand LC; + const uint64_t SegVMAddr = nextAvailableSegmentAddress(); + if (is64Bit()) + constructSegment(LC.MachOLoadCommand.segment_command_64_data, + MachO::LC_SEGMENT_64, SegName, SegVMAddr, SegVMSize); + else + constructSegment(LC.MachOLoadCommand.segment_command_data, + MachO::LC_SEGMENT, SegName, SegVMAddr, SegVMSize); + + LoadCommands.push_back(std::move(LC)); + return LoadCommands.back(); +} + +/// Extracts a segment name from a string which is possibly non-null-terminated. +static StringRef extractSegmentName(const char *SegName) { + return StringRef(SegName, + strnlen(SegName, sizeof(MachO::segment_command::segname))); +} + +std::optional<StringRef> LoadCommand::getSegmentName() const { + const MachO::macho_load_command &MLC = MachOLoadCommand; + switch (MLC.load_command_data.cmd) { + case MachO::LC_SEGMENT: + return extractSegmentName(MLC.segment_command_data.segname); + case MachO::LC_SEGMENT_64: + return extractSegmentName(MLC.segment_command_64_data.segname); + default: + return std::nullopt; + } +} + +std::optional<uint64_t> LoadCommand::getSegmentVMAddr() const { + const MachO::macho_load_command &MLC = MachOLoadCommand; + switch (MLC.load_command_data.cmd) { + case MachO::LC_SEGMENT: + return MLC.segment_command_data.vmaddr; + case MachO::LC_SEGMENT_64: + return MLC.segment_command_64_data.vmaddr; + default: + return std::nullopt; + } +} diff --git a/contrib/libs/llvm16/lib/ObjCopy/MachO/MachOObject.h b/contrib/libs/llvm16/lib/ObjCopy/MachO/MachOObject.h new file mode 100644 index 00000000000..1cbd2eb5f32 --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/MachO/MachOObject.h @@ -0,0 +1,373 @@ +//===- MachOObject.h - Mach-O object file model -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_MACHO_MACHOOBJECT_H +#define LLVM_LIB_OBJCOPY_MACHO_MACHOOBJECT_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/ObjectYAML/DWARFYAML.h" +#include "llvm/Support/StringSaver.h" +#include "llvm/Support/YAMLTraits.h" +#include <cstdint> +#include <string> +#include <vector> + +namespace llvm { +namespace objcopy { +namespace macho { + +struct MachHeader { + uint32_t Magic; + uint32_t CPUType; + uint32_t CPUSubType; + uint32_t FileType; + uint32_t NCmds; + uint32_t SizeOfCmds; + uint32_t Flags; + uint32_t Reserved = 0; +}; + +struct RelocationInfo; +struct Section { + uint32_t Index; + std::string Segname; + std::string Sectname; + // CanonicalName is a string formatted as “<Segname>,<Sectname>". + std::string CanonicalName; + uint64_t Addr = 0; + uint64_t Size = 0; + // Offset in the input file. + std::optional<uint32_t> OriginalOffset; + uint32_t Offset = 0; + uint32_t Align = 0; + uint32_t RelOff = 0; + uint32_t NReloc = 0; + uint32_t Flags = 0; + uint32_t Reserved1 = 0; + uint32_t Reserved2 = 0; + uint32_t Reserved3 = 0; + StringRef Content; + std::vector<RelocationInfo> Relocations; + + Section(StringRef SegName, StringRef SectName); + + Section(StringRef SegName, StringRef SectName, StringRef Content); + + MachO::SectionType getType() const { + return static_cast<MachO::SectionType>(Flags & MachO::SECTION_TYPE); + } + + bool isVirtualSection() const { + return (getType() == MachO::S_ZEROFILL || + getType() == MachO::S_GB_ZEROFILL || + getType() == MachO::S_THREAD_LOCAL_ZEROFILL); + } + + bool hasValidOffset() const { + return !(isVirtualSection() || (OriginalOffset && *OriginalOffset == 0)); + } +}; + +struct LoadCommand { + // The type MachO::macho_load_command is defined in llvm/BinaryFormat/MachO.h + // and it is a union of all the structs corresponding to various load + // commands. + MachO::macho_load_command MachOLoadCommand; + + // The raw content of the payload of the load command (located right after the + // corresponding struct). In some cases it is either empty or can be + // copied-over without digging into its structure. + std::vector<uint8_t> Payload; + + // Some load commands can contain (inside the payload) an array of sections, + // though the contents of the sections are stored separately. The struct + // Section describes only sections' metadata and where to find the + // corresponding content inside the binary. + std::vector<std::unique_ptr<Section>> Sections; + + // Returns the segment name if the load command is a segment command. + std::optional<StringRef> getSegmentName() const; + + // Returns the segment vm address if the load command is a segment command. + std::optional<uint64_t> getSegmentVMAddr() const; +}; + +// A symbol information. Fields which starts with "n_" are same as them in the +// nlist. +struct SymbolEntry { + std::string Name; + bool Referenced = false; + uint32_t Index; + uint8_t n_type; + uint8_t n_sect; + uint16_t n_desc; + uint64_t n_value; + + bool isExternalSymbol() const { return n_type & MachO::N_EXT; } + + bool isLocalSymbol() const { return !isExternalSymbol(); } + + bool isUndefinedSymbol() const { + return (n_type & MachO::N_TYPE) == MachO::N_UNDF; + } + + bool isSwiftSymbol() const { + return StringRef(Name).startswith("_$s") || + StringRef(Name).startswith("_$S"); + } + + std::optional<uint32_t> section() const { + return n_sect == MachO::NO_SECT ? std::nullopt + : std::optional<uint32_t>(n_sect); + } +}; + +/// The location of the symbol table inside the binary is described by LC_SYMTAB +/// load command. +struct SymbolTable { + std::vector<std::unique_ptr<SymbolEntry>> Symbols; + + using iterator = pointee_iterator< + std::vector<std::unique_ptr<SymbolEntry>>::const_iterator>; + + iterator begin() const { return iterator(Symbols.begin()); } + iterator end() const { return iterator(Symbols.end()); } + + const SymbolEntry *getSymbolByIndex(uint32_t Index) const; + SymbolEntry *getSymbolByIndex(uint32_t Index); + void removeSymbols( + function_ref<bool(const std::unique_ptr<SymbolEntry> &)> ToRemove); +}; + +struct IndirectSymbolEntry { + // The original value in an indirect symbol table. Higher bits encode extra + // information (INDIRECT_SYMBOL_LOCAL and INDIRECT_SYMBOL_ABS). + uint32_t OriginalIndex; + /// The Symbol referenced by this entry. It's std::nullopt if the index is + /// INDIRECT_SYMBOL_LOCAL or INDIRECT_SYMBOL_ABS. + std::optional<SymbolEntry *> Symbol; + + IndirectSymbolEntry(uint32_t OriginalIndex, + std::optional<SymbolEntry *> Symbol) + : OriginalIndex(OriginalIndex), Symbol(Symbol) {} +}; + +struct IndirectSymbolTable { + std::vector<IndirectSymbolEntry> Symbols; +}; + +/// The location of the string table inside the binary is described by LC_SYMTAB +/// load command. +struct StringTable { + std::vector<std::string> Strings; +}; + +struct RelocationInfo { + // The referenced symbol entry. Set if !Scattered && Extern. + std::optional<const SymbolEntry *> Symbol; + // The referenced section. Set if !Scattered && !Extern. + std::optional<const Section *> Sec; + // True if Info is a scattered_relocation_info. + bool Scattered; + // True if the type is an ADDEND. r_symbolnum holds the addend instead of a + // symbol index. + bool IsAddend; + // True if the r_symbolnum points to a section number (i.e. r_extern=0). + bool Extern; + MachO::any_relocation_info Info; + + unsigned getPlainRelocationSymbolNum(bool IsLittleEndian) { + if (IsLittleEndian) + return Info.r_word1 & 0xffffff; + return Info.r_word1 >> 8; + } + + void setPlainRelocationSymbolNum(unsigned SymbolNum, bool IsLittleEndian) { + assert(SymbolNum < (1 << 24) && "SymbolNum out of range"); + if (IsLittleEndian) + Info.r_word1 = (Info.r_word1 & ~0x00ffffff) | SymbolNum; + else + Info.r_word1 = (Info.r_word1 & ~0xffffff00) | (SymbolNum << 8); + } +}; + +/// The location of the rebase info inside the binary is described by +/// LC_DYLD_INFO load command. Dyld rebases an image whenever dyld loads it at +/// an address different from its preferred address. The rebase information is +/// a stream of byte sized opcodes whose symbolic names start with +/// REBASE_OPCODE_. Conceptually the rebase information is a table of tuples: +/// <seg-index, seg-offset, type> +/// The opcodes are a compressed way to encode the table by only +/// encoding when a column changes. In addition simple patterns +/// like "every n'th offset for m times" can be encoded in a few +/// bytes. +struct RebaseInfo { + // At the moment we do not parse this info (and it is simply copied over), + // but the proper support will be added later. + ArrayRef<uint8_t> Opcodes; +}; + +/// The location of the bind info inside the binary is described by +/// LC_DYLD_INFO load command. Dyld binds an image during the loading process, +/// if the image requires any pointers to be initialized to symbols in other +/// images. The bind information is a stream of byte sized opcodes whose +/// symbolic names start with BIND_OPCODE_. Conceptually the bind information is +/// a table of tuples: <seg-index, seg-offset, type, symbol-library-ordinal, +/// symbol-name, addend> The opcodes are a compressed way to encode the table by +/// only encoding when a column changes. In addition simple patterns like for +/// runs of pointers initialized to the same value can be encoded in a few +/// bytes. +struct BindInfo { + // At the moment we do not parse this info (and it is simply copied over), + // but the proper support will be added later. + ArrayRef<uint8_t> Opcodes; +}; + +/// The location of the weak bind info inside the binary is described by +/// LC_DYLD_INFO load command. Some C++ programs require dyld to unique symbols +/// so that all images in the process use the same copy of some code/data. This +/// step is done after binding. The content of the weak_bind info is an opcode +/// stream like the bind_info. But it is sorted alphabetically by symbol name. +/// This enable dyld to walk all images with weak binding information in order +/// and look for collisions. If there are no collisions, dyld does no updating. +/// That means that some fixups are also encoded in the bind_info. For +/// instance, all calls to "operator new" are first bound to libstdc++.dylib +/// using the information in bind_info. Then if some image overrides operator +/// new that is detected when the weak_bind information is processed and the +/// call to operator new is then rebound. +struct WeakBindInfo { + // At the moment we do not parse this info (and it is simply copied over), + // but the proper support will be added later. + ArrayRef<uint8_t> Opcodes; +}; + +/// The location of the lazy bind info inside the binary is described by +/// LC_DYLD_INFO load command. Some uses of external symbols do not need to be +/// bound immediately. Instead they can be lazily bound on first use. The +/// lazy_bind contains a stream of BIND opcodes to bind all lazy symbols. Normal +/// use is that dyld ignores the lazy_bind section when loading an image. +/// Instead the static linker arranged for the lazy pointer to initially point +/// to a helper function which pushes the offset into the lazy_bind area for the +/// symbol needing to be bound, then jumps to dyld which simply adds the offset +/// to lazy_bind_off to get the information on what to bind. +struct LazyBindInfo { + ArrayRef<uint8_t> Opcodes; +}; + +/// The location of the export info inside the binary is described by +/// LC_DYLD_INFO load command. The symbols exported by a dylib are encoded in a +/// trie. This is a compact representation that factors out common prefixes. It +/// also reduces LINKEDIT pages in RAM because it encodes all information (name, +/// address, flags) in one small, contiguous range. The export area is a stream +/// of nodes. The first node sequentially is the start node for the trie. Nodes +/// for a symbol start with a uleb128 that is the length of the exported symbol +/// information for the string so far. If there is no exported symbol, the node +/// starts with a zero byte. If there is exported info, it follows the length. +/// First is a uleb128 containing flags. Normally, it is followed by +/// a uleb128 encoded offset which is location of the content named +/// by the symbol from the mach_header for the image. If the flags +/// is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags is +/// a uleb128 encoded library ordinal, then a zero terminated +/// UTF8 string. If the string is zero length, then the symbol +/// is re-export from the specified dylib with the same name. +/// If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following +/// the flags is two uleb128s: the stub offset and the resolver offset. +/// The stub is used by non-lazy pointers. The resolver is used +/// by lazy pointers and must be called to get the actual address to use. +/// After the optional exported symbol information is a byte of +/// how many edges (0-255) that this node has leaving it, +/// followed by each edge. +/// Each edge is a zero terminated UTF8 of the addition chars +/// in the symbol, followed by a uleb128 offset for the node that +/// edge points to. +struct ExportInfo { + ArrayRef<uint8_t> Trie; +}; + +struct LinkData { + ArrayRef<uint8_t> Data; +}; + +struct Object { + MachHeader Header; + std::vector<LoadCommand> LoadCommands; + + SymbolTable SymTable; + StringTable StrTable; + + RebaseInfo Rebases; + BindInfo Binds; + WeakBindInfo WeakBinds; + LazyBindInfo LazyBinds; + ExportInfo Exports; + IndirectSymbolTable IndirectSymTable; + LinkData DataInCode; + LinkData LinkerOptimizationHint; + LinkData FunctionStarts; + LinkData ExportsTrie; + LinkData ChainedFixups; + LinkData DylibCodeSignDRs; + + std::optional<uint32_t> SwiftVersion; + + /// The index of LC_CODE_SIGNATURE load command if present. + std::optional<size_t> CodeSignatureCommandIndex; + /// The index of LC_DYLIB_CODE_SIGN_DRS load command if present. + std::optional<size_t> DylibCodeSignDRsIndex; + /// The index of LC_SYMTAB load command if present. + std::optional<size_t> SymTabCommandIndex; + /// The index of LC_DYLD_INFO or LC_DYLD_INFO_ONLY load command if present. + std::optional<size_t> DyLdInfoCommandIndex; + /// The index LC_DYSYMTAB load command if present. + std::optional<size_t> DySymTabCommandIndex; + /// The index LC_DATA_IN_CODE load command if present. + std::optional<size_t> DataInCodeCommandIndex; + /// The index of LC_LINKER_OPTIMIZATIN_HINT load command if present. + std::optional<size_t> LinkerOptimizationHintCommandIndex; + /// The index LC_FUNCTION_STARTS load command if present. + std::optional<size_t> FunctionStartsCommandIndex; + /// The index LC_DYLD_CHAINED_FIXUPS load command if present. + std::optional<size_t> ChainedFixupsCommandIndex; + /// The index LC_DYLD_EXPORTS_TRIE load command if present. + std::optional<size_t> ExportsTrieCommandIndex; + /// The index of the LC_SEGMENT or LC_SEGMENT_64 load command + /// corresponding to the __TEXT segment. + std::optional<size_t> TextSegmentCommandIndex; + + BumpPtrAllocator Alloc; + StringSaver NewSectionsContents; + + Object() : NewSectionsContents(Alloc) {} + + Error + removeSections(function_ref<bool(const std::unique_ptr<Section> &)> ToRemove); + + Error removeLoadCommands(function_ref<bool(const LoadCommand &)> ToRemove); + + void updateLoadCommandIndexes(); + + /// Creates a new segment load command in the object and returns a reference + /// to the newly created load command. The caller should verify that SegName + /// is not too long (SegName.size() should be less than or equal to 16). + LoadCommand &addSegment(StringRef SegName, uint64_t SegVMSize); + + bool is64Bit() const { + return Header.Magic == MachO::MH_MAGIC_64 || + Header.Magic == MachO::MH_CIGAM_64; + } + + uint64_t nextAvailableSegmentAddress() const; +}; + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_MACHO_MACHOOBJECT_H diff --git a/contrib/libs/llvm16/lib/ObjCopy/MachO/MachOReader.cpp b/contrib/libs/llvm16/lib/ObjCopy/MachO/MachOReader.cpp new file mode 100644 index 00000000000..2cbffc12adb --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/MachO/MachOReader.cpp @@ -0,0 +1,386 @@ +//===- MachOReader.cpp ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MachOReader.h" +#include "MachOObject.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Object/MachO.h" +#include "llvm/Support/Errc.h" +#include <memory> + +using namespace llvm; +using namespace llvm::objcopy; +using namespace llvm::objcopy::macho; + +void MachOReader::readHeader(Object &O) const { + O.Header.Magic = MachOObj.getHeader().magic; + O.Header.CPUType = MachOObj.getHeader().cputype; + O.Header.CPUSubType = MachOObj.getHeader().cpusubtype; + O.Header.FileType = MachOObj.getHeader().filetype; + O.Header.NCmds = MachOObj.getHeader().ncmds; + O.Header.SizeOfCmds = MachOObj.getHeader().sizeofcmds; + O.Header.Flags = MachOObj.getHeader().flags; +} + +template <typename SectionType> +static Section constructSectionCommon(const SectionType &Sec, uint32_t Index) { + StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname))); + StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname))); + Section S(SegName, SectName); + S.Index = Index; + S.Addr = Sec.addr; + S.Size = Sec.size; + S.OriginalOffset = Sec.offset; + S.Align = Sec.align; + S.RelOff = Sec.reloff; + S.NReloc = Sec.nreloc; + S.Flags = Sec.flags; + S.Reserved1 = Sec.reserved1; + S.Reserved2 = Sec.reserved2; + S.Reserved3 = 0; + return S; +} + +Section constructSection(const MachO::section &Sec, uint32_t Index) { + return constructSectionCommon(Sec, Index); +} + +Section constructSection(const MachO::section_64 &Sec, uint32_t Index) { + Section S = constructSectionCommon(Sec, Index); + S.Reserved3 = Sec.reserved3; + return S; +} + +template <typename SectionType, typename SegmentType> +Expected<std::vector<std::unique_ptr<Section>>> static extractSections( + const object::MachOObjectFile::LoadCommandInfo &LoadCmd, + const object::MachOObjectFile &MachOObj, uint32_t &NextSectionIndex) { + std::vector<std::unique_ptr<Section>> Sections; + for (auto Curr = reinterpret_cast<const SectionType *>(LoadCmd.Ptr + + sizeof(SegmentType)), + End = reinterpret_cast<const SectionType *>(LoadCmd.Ptr + + LoadCmd.C.cmdsize); + Curr < End; ++Curr) { + SectionType Sec; + memcpy((void *)&Sec, Curr, sizeof(SectionType)); + + if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) + MachO::swapStruct(Sec); + + Sections.push_back( + std::make_unique<Section>(constructSection(Sec, NextSectionIndex))); + + Section &S = *Sections.back(); + + Expected<object::SectionRef> SecRef = + MachOObj.getSection(NextSectionIndex++); + if (!SecRef) + return SecRef.takeError(); + + Expected<ArrayRef<uint8_t>> Data = + MachOObj.getSectionContents(SecRef->getRawDataRefImpl()); + if (!Data) + return Data.takeError(); + + S.Content = + StringRef(reinterpret_cast<const char *>(Data->data()), Data->size()); + + const uint32_t CPUType = MachOObj.getHeader().cputype; + S.Relocations.reserve(S.NReloc); + for (auto RI = MachOObj.section_rel_begin(SecRef->getRawDataRefImpl()), + RE = MachOObj.section_rel_end(SecRef->getRawDataRefImpl()); + RI != RE; ++RI) { + RelocationInfo R; + R.Symbol = nullptr; // We'll fill this field later. + R.Info = MachOObj.getRelocation(RI->getRawDataRefImpl()); + R.Scattered = MachOObj.isRelocationScattered(R.Info); + unsigned Type = MachOObj.getAnyRelocationType(R.Info); + // TODO Support CPU_TYPE_ARM. + R.IsAddend = !R.Scattered && (CPUType == MachO::CPU_TYPE_ARM64 && + Type == MachO::ARM64_RELOC_ADDEND); + R.Extern = !R.Scattered && MachOObj.getPlainRelocationExternal(R.Info); + S.Relocations.push_back(R); + } + + assert(S.NReloc == S.Relocations.size() && + "Incorrect number of relocations"); + } + return std::move(Sections); +} + +Error MachOReader::readLoadCommands(Object &O) const { + // For MachO sections indices start from 1. + uint32_t NextSectionIndex = 1; + static constexpr char TextSegmentName[] = "__TEXT"; + for (auto LoadCmd : MachOObj.load_commands()) { + LoadCommand LC; + switch (LoadCmd.C.cmd) { + case MachO::LC_CODE_SIGNATURE: + O.CodeSignatureCommandIndex = O.LoadCommands.size(); + break; + case MachO::LC_SEGMENT: + // LoadCmd.Ptr might not be aligned temporarily as + // MachO::segment_command requires, but the segname char pointer do not + // have alignment restrictions. + if (StringRef(reinterpret_cast<const char *>( + LoadCmd.Ptr + offsetof(MachO::segment_command, segname))) == + TextSegmentName) + O.TextSegmentCommandIndex = O.LoadCommands.size(); + + if (Expected<std::vector<std::unique_ptr<Section>>> Sections = + extractSections<MachO::section, MachO::segment_command>( + LoadCmd, MachOObj, NextSectionIndex)) + LC.Sections = std::move(*Sections); + else + return Sections.takeError(); + break; + case MachO::LC_SEGMENT_64: + // LoadCmd.Ptr might not be aligned temporarily as + // MachO::segment_command_64 requires, but the segname char pointer do + // not have alignment restrictions. + if (StringRef(reinterpret_cast<const char *>( + LoadCmd.Ptr + offsetof(MachO::segment_command_64, segname))) == + TextSegmentName) + O.TextSegmentCommandIndex = O.LoadCommands.size(); + + if (Expected<std::vector<std::unique_ptr<Section>>> Sections = + extractSections<MachO::section_64, MachO::segment_command_64>( + LoadCmd, MachOObj, NextSectionIndex)) + LC.Sections = std::move(*Sections); + else + return Sections.takeError(); + break; + case MachO::LC_SYMTAB: + O.SymTabCommandIndex = O.LoadCommands.size(); + break; + case MachO::LC_DYSYMTAB: + O.DySymTabCommandIndex = O.LoadCommands.size(); + break; + case MachO::LC_DYLD_INFO: + case MachO::LC_DYLD_INFO_ONLY: + O.DyLdInfoCommandIndex = O.LoadCommands.size(); + break; + case MachO::LC_DATA_IN_CODE: + O.DataInCodeCommandIndex = O.LoadCommands.size(); + break; + case MachO::LC_LINKER_OPTIMIZATION_HINT: + O.LinkerOptimizationHintCommandIndex = O.LoadCommands.size(); + break; + case MachO::LC_FUNCTION_STARTS: + O.FunctionStartsCommandIndex = O.LoadCommands.size(); + break; + case MachO::LC_DYLIB_CODE_SIGN_DRS: + O.DylibCodeSignDRsIndex = O.LoadCommands.size(); + break; + case MachO::LC_DYLD_EXPORTS_TRIE: + O.ExportsTrieCommandIndex = O.LoadCommands.size(); + break; + case MachO::LC_DYLD_CHAINED_FIXUPS: + O.ChainedFixupsCommandIndex = O.LoadCommands.size(); + break; + } +#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ + case MachO::LCName: \ + memcpy((void *)&(LC.MachOLoadCommand.LCStruct##_data), LoadCmd.Ptr, \ + sizeof(MachO::LCStruct)); \ + if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) \ + MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data); \ + if (LoadCmd.C.cmdsize > sizeof(MachO::LCStruct)) \ + LC.Payload = ArrayRef<uint8_t>( \ + reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + \ + sizeof(MachO::LCStruct), \ + LoadCmd.C.cmdsize - sizeof(MachO::LCStruct)); \ + break; + + switch (LoadCmd.C.cmd) { + default: + memcpy((void *)&(LC.MachOLoadCommand.load_command_data), LoadCmd.Ptr, + sizeof(MachO::load_command)); + if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) + MachO::swapStruct(LC.MachOLoadCommand.load_command_data); + if (LoadCmd.C.cmdsize > sizeof(MachO::load_command)) + LC.Payload = ArrayRef<uint8_t>( + reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + + sizeof(MachO::load_command), + LoadCmd.C.cmdsize - sizeof(MachO::load_command)); + break; +#include "llvm/BinaryFormat/MachO.def" + } + O.LoadCommands.push_back(std::move(LC)); + } + return Error::success(); +} + +template <typename nlist_t> +SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) { + assert(nlist.n_strx < StrTable.size() && + "n_strx exceeds the size of the string table"); + SymbolEntry SE; + SE.Name = StringRef(StrTable.data() + nlist.n_strx).str(); + SE.n_type = nlist.n_type; + SE.n_sect = nlist.n_sect; + SE.n_desc = nlist.n_desc; + SE.n_value = nlist.n_value; + return SE; +} + +void MachOReader::readSymbolTable(Object &O) const { + StringRef StrTable = MachOObj.getStringTableData(); + for (auto Symbol : MachOObj.symbols()) { + SymbolEntry SE = + (MachOObj.is64Bit() + ? constructSymbolEntry(StrTable, MachOObj.getSymbol64TableEntry( + Symbol.getRawDataRefImpl())) + : constructSymbolEntry(StrTable, MachOObj.getSymbolTableEntry( + Symbol.getRawDataRefImpl()))); + + O.SymTable.Symbols.push_back(std::make_unique<SymbolEntry>(SE)); + } +} + +void MachOReader::setSymbolInRelocationInfo(Object &O) const { + std::vector<const Section *> Sections; + for (auto &LC : O.LoadCommands) + for (std::unique_ptr<Section> &Sec : LC.Sections) + Sections.push_back(Sec.get()); + + for (LoadCommand &LC : O.LoadCommands) + for (std::unique_ptr<Section> &Sec : LC.Sections) + for (auto &Reloc : Sec->Relocations) + if (!Reloc.Scattered && !Reloc.IsAddend) { + const uint32_t SymbolNum = + Reloc.getPlainRelocationSymbolNum(MachOObj.isLittleEndian()); + if (Reloc.Extern) { + Reloc.Symbol = O.SymTable.getSymbolByIndex(SymbolNum); + } else { + // FIXME: Refactor error handling in MachOReader and report an error + // if we encounter an invalid relocation. + assert(SymbolNum >= 1 && SymbolNum <= Sections.size() && + "Invalid section index."); + Reloc.Sec = Sections[SymbolNum - 1]; + } + } +} + +void MachOReader::readRebaseInfo(Object &O) const { + O.Rebases.Opcodes = MachOObj.getDyldInfoRebaseOpcodes(); +} + +void MachOReader::readBindInfo(Object &O) const { + O.Binds.Opcodes = MachOObj.getDyldInfoBindOpcodes(); +} + +void MachOReader::readWeakBindInfo(Object &O) const { + O.WeakBinds.Opcodes = MachOObj.getDyldInfoWeakBindOpcodes(); +} + +void MachOReader::readLazyBindInfo(Object &O) const { + O.LazyBinds.Opcodes = MachOObj.getDyldInfoLazyBindOpcodes(); +} + +void MachOReader::readExportInfo(Object &O) const { + // This information can be in LC_DYLD_INFO or in LC_DYLD_EXPORTS_TRIE + ArrayRef<uint8_t> Trie = MachOObj.getDyldInfoExportsTrie(); + if (Trie.empty()) + Trie = MachOObj.getDyldExportsTrie(); + O.Exports.Trie = Trie; +} + +void MachOReader::readLinkData(Object &O, std::optional<size_t> LCIndex, + LinkData &LD) const { + if (!LCIndex) + return; + const MachO::linkedit_data_command &LC = + O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data; + LD.Data = + arrayRefFromStringRef(MachOObj.getData().substr(LC.dataoff, LC.datasize)); +} + +void MachOReader::readDataInCodeData(Object &O) const { + return readLinkData(O, O.DataInCodeCommandIndex, O.DataInCode); +} + +void MachOReader::readLinkerOptimizationHint(Object &O) const { + return readLinkData(O, O.LinkerOptimizationHintCommandIndex, + O.LinkerOptimizationHint); +} + +void MachOReader::readFunctionStartsData(Object &O) const { + return readLinkData(O, O.FunctionStartsCommandIndex, O.FunctionStarts); +} + +void MachOReader::readDylibCodeSignDRs(Object &O) const { + return readLinkData(O, O.DylibCodeSignDRsIndex, O.DylibCodeSignDRs); +} + +void MachOReader::readExportsTrie(Object &O) const { + return readLinkData(O, O.ExportsTrieCommandIndex, O.ExportsTrie); +} + +void MachOReader::readChainedFixups(Object &O) const { + return readLinkData(O, O.ChainedFixupsCommandIndex, O.ChainedFixups); +} + +void MachOReader::readIndirectSymbolTable(Object &O) const { + MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand(); + constexpr uint32_t AbsOrLocalMask = + MachO::INDIRECT_SYMBOL_LOCAL | MachO::INDIRECT_SYMBOL_ABS; + for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) { + uint32_t Index = MachOObj.getIndirectSymbolTableEntry(DySymTab, i); + if ((Index & AbsOrLocalMask) != 0) + O.IndirectSymTable.Symbols.emplace_back(Index, std::nullopt); + else + O.IndirectSymTable.Symbols.emplace_back( + Index, O.SymTable.getSymbolByIndex(Index)); + } +} + +void MachOReader::readSwiftVersion(Object &O) const { + struct ObjCImageInfo { + uint32_t Version; + uint32_t Flags; + } ImageInfo; + + for (const LoadCommand &LC : O.LoadCommands) + for (const std::unique_ptr<Section> &Sec : LC.Sections) + if (Sec->Sectname == "__objc_imageinfo" && + (Sec->Segname == "__DATA" || Sec->Segname == "__DATA_CONST" || + Sec->Segname == "__DATA_DIRTY") && + Sec->Content.size() >= sizeof(ObjCImageInfo)) { + memcpy(&ImageInfo, Sec->Content.data(), sizeof(ObjCImageInfo)); + if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) { + sys::swapByteOrder(ImageInfo.Version); + sys::swapByteOrder(ImageInfo.Flags); + } + O.SwiftVersion = (ImageInfo.Flags >> 8) & 0xff; + return; + } +} + +Expected<std::unique_ptr<Object>> MachOReader::create() const { + auto Obj = std::make_unique<Object>(); + readHeader(*Obj); + if (Error E = readLoadCommands(*Obj)) + return std::move(E); + readSymbolTable(*Obj); + setSymbolInRelocationInfo(*Obj); + readRebaseInfo(*Obj); + readBindInfo(*Obj); + readWeakBindInfo(*Obj); + readLazyBindInfo(*Obj); + readExportInfo(*Obj); + readDataInCodeData(*Obj); + readLinkerOptimizationHint(*Obj); + readFunctionStartsData(*Obj); + readDylibCodeSignDRs(*Obj); + readExportsTrie(*Obj); + readChainedFixups(*Obj); + readIndirectSymbolTable(*Obj); + readSwiftVersion(*Obj); + return std::move(Obj); +} diff --git a/contrib/libs/llvm16/lib/ObjCopy/MachO/MachOReader.h b/contrib/libs/llvm16/lib/ObjCopy/MachO/MachOReader.h new file mode 100644 index 00000000000..e315e6fd9b1 --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/MachO/MachOReader.h @@ -0,0 +1,64 @@ +//===- MachOReader.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_MACHO_MACHOREADER_H +#define LLVM_LIB_OBJCOPY_MACHO_MACHOREADER_H + +#include "MachOObject.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/ObjCopy/MachO/MachOObjcopy.h" +#include "llvm/Object/MachO.h" +#include <memory> + +namespace llvm { +namespace objcopy { +namespace macho { + +// The hierarchy of readers is responsible for parsing different inputs: +// raw binaries and regular MachO object files. +class Reader { +public: + virtual ~Reader(){}; + virtual Expected<std::unique_ptr<Object>> create() const = 0; +}; + +class MachOReader : public Reader { + const object::MachOObjectFile &MachOObj; + + void readHeader(Object &O) const; + Error readLoadCommands(Object &O) const; + void readSymbolTable(Object &O) const; + void setSymbolInRelocationInfo(Object &O) const; + void readRebaseInfo(Object &O) const; + void readBindInfo(Object &O) const; + void readWeakBindInfo(Object &O) const; + void readLazyBindInfo(Object &O) const; + void readExportInfo(Object &O) const; + void readLinkData(Object &O, std::optional<size_t> LCIndex, + LinkData &LD) const; + void readCodeSignature(Object &O) const; + void readDataInCodeData(Object &O) const; + void readLinkerOptimizationHint(Object &O) const; + void readFunctionStartsData(Object &O) const; + void readDylibCodeSignDRs(Object &O) const; + void readExportsTrie(Object &O) const; + void readChainedFixups(Object &O) const; + void readIndirectSymbolTable(Object &O) const; + void readSwiftVersion(Object &O) const; + +public: + explicit MachOReader(const object::MachOObjectFile &Obj) : MachOObj(Obj) {} + + Expected<std::unique_ptr<Object>> create() const override; +}; + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_MACHO_MACHOREADER_H diff --git a/contrib/libs/llvm16/lib/ObjCopy/MachO/MachOWriter.cpp b/contrib/libs/llvm16/lib/ObjCopy/MachO/MachOWriter.cpp new file mode 100644 index 00000000000..f416796496e --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/MachO/MachOWriter.cpp @@ -0,0 +1,676 @@ +//===- MachOWriter.cpp ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MachOWriter.h" +#include "MachOLayoutBuilder.h" +#include "MachOObject.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Object/MachO.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/SHA256.h" +#include <memory> + +#if defined(__APPLE__) +#include <sys/mman.h> +#endif + +using namespace llvm; +using namespace llvm::objcopy::macho; +using namespace llvm::support::endian; + +size_t MachOWriter::headerSize() const { + return Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); +} + +size_t MachOWriter::loadCommandsSize() const { return O.Header.SizeOfCmds; } + +size_t MachOWriter::symTableSize() const { + return O.SymTable.Symbols.size() * + (Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist)); +} + +size_t MachOWriter::totalSize() const { + // Going from tail to head and looking for an appropriate "anchor" to + // calculate the total size assuming that all the offsets are either valid + // ("true") or 0 (0 indicates that the corresponding part is missing). + + SmallVector<size_t, 7> Ends; + if (O.SymTabCommandIndex) { + const MachO::symtab_command &SymTabCommand = + O.LoadCommands[*O.SymTabCommandIndex] + .MachOLoadCommand.symtab_command_data; + if (SymTabCommand.symoff) + Ends.push_back(SymTabCommand.symoff + symTableSize()); + if (SymTabCommand.stroff) + Ends.push_back(SymTabCommand.stroff + SymTabCommand.strsize); + } + if (O.DyLdInfoCommandIndex) { + const MachO::dyld_info_command &DyLdInfoCommand = + O.LoadCommands[*O.DyLdInfoCommandIndex] + .MachOLoadCommand.dyld_info_command_data; + if (DyLdInfoCommand.rebase_off) { + assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) && + "Incorrect rebase opcodes size"); + Ends.push_back(DyLdInfoCommand.rebase_off + DyLdInfoCommand.rebase_size); + } + if (DyLdInfoCommand.bind_off) { + assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) && + "Incorrect bind opcodes size"); + Ends.push_back(DyLdInfoCommand.bind_off + DyLdInfoCommand.bind_size); + } + if (DyLdInfoCommand.weak_bind_off) { + assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) && + "Incorrect weak bind opcodes size"); + Ends.push_back(DyLdInfoCommand.weak_bind_off + + DyLdInfoCommand.weak_bind_size); + } + if (DyLdInfoCommand.lazy_bind_off) { + assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) && + "Incorrect lazy bind opcodes size"); + Ends.push_back(DyLdInfoCommand.lazy_bind_off + + DyLdInfoCommand.lazy_bind_size); + } + if (DyLdInfoCommand.export_off) { + assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) && + "Incorrect trie size"); + Ends.push_back(DyLdInfoCommand.export_off + DyLdInfoCommand.export_size); + } + } + + if (O.DySymTabCommandIndex) { + const MachO::dysymtab_command &DySymTabCommand = + O.LoadCommands[*O.DySymTabCommandIndex] + .MachOLoadCommand.dysymtab_command_data; + + if (DySymTabCommand.indirectsymoff) + Ends.push_back(DySymTabCommand.indirectsymoff + + sizeof(uint32_t) * O.IndirectSymTable.Symbols.size()); + } + + for (std::optional<size_t> LinkEditDataCommandIndex : + {O.CodeSignatureCommandIndex, O.DylibCodeSignDRsIndex, + O.DataInCodeCommandIndex, O.LinkerOptimizationHintCommandIndex, + O.FunctionStartsCommandIndex, O.ChainedFixupsCommandIndex, + O.ExportsTrieCommandIndex}) + if (LinkEditDataCommandIndex) { + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*LinkEditDataCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + if (LinkEditDataCommand.dataoff) + Ends.push_back(LinkEditDataCommand.dataoff + + LinkEditDataCommand.datasize); + } + + // Otherwise, use the last section / reloction. + for (const LoadCommand &LC : O.LoadCommands) + for (const std::unique_ptr<Section> &S : LC.Sections) { + if (!S->hasValidOffset()) { + assert((S->Offset == 0) && "Skipped section's offset must be zero"); + assert((S->isVirtualSection() || S->Size == 0) && + "Non-zero-fill sections with zero offset must have zero size"); + continue; + } + assert((S->Offset != 0) && + "Non-zero-fill section's offset cannot be zero"); + Ends.push_back(S->Offset + S->Size); + if (S->RelOff) + Ends.push_back(S->RelOff + + S->NReloc * sizeof(MachO::any_relocation_info)); + } + + if (!Ends.empty()) + return *std::max_element(Ends.begin(), Ends.end()); + + // Otherwise, we have only Mach header and load commands. + return headerSize() + loadCommandsSize(); +} + +void MachOWriter::writeHeader() { + MachO::mach_header_64 Header; + + Header.magic = O.Header.Magic; + Header.cputype = O.Header.CPUType; + Header.cpusubtype = O.Header.CPUSubType; + Header.filetype = O.Header.FileType; + Header.ncmds = O.Header.NCmds; + Header.sizeofcmds = O.Header.SizeOfCmds; + Header.flags = O.Header.Flags; + Header.reserved = O.Header.Reserved; + + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(Header); + + auto HeaderSize = + Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); + memcpy(Buf->getBufferStart(), &Header, HeaderSize); +} + +void MachOWriter::writeLoadCommands() { + uint8_t *Begin = + reinterpret_cast<uint8_t *>(Buf->getBufferStart()) + headerSize(); + for (const LoadCommand &LC : O.LoadCommands) { + // Construct a load command. + MachO::macho_load_command MLC = LC.MachOLoadCommand; + switch (MLC.load_command_data.cmd) { + case MachO::LC_SEGMENT: + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(MLC.segment_command_data); + memcpy(Begin, &MLC.segment_command_data, sizeof(MachO::segment_command)); + Begin += sizeof(MachO::segment_command); + + for (const std::unique_ptr<Section> &Sec : LC.Sections) + writeSectionInLoadCommand<MachO::section>(*Sec, Begin); + continue; + case MachO::LC_SEGMENT_64: + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(MLC.segment_command_64_data); + memcpy(Begin, &MLC.segment_command_64_data, + sizeof(MachO::segment_command_64)); + Begin += sizeof(MachO::segment_command_64); + + for (const std::unique_ptr<Section> &Sec : LC.Sections) + writeSectionInLoadCommand<MachO::section_64>(*Sec, Begin); + continue; + } + +#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ + case MachO::LCName: \ + assert(sizeof(MachO::LCStruct) + LC.Payload.size() == \ + MLC.load_command_data.cmdsize); \ + if (IsLittleEndian != sys::IsLittleEndianHost) \ + MachO::swapStruct(MLC.LCStruct##_data); \ + memcpy(Begin, &MLC.LCStruct##_data, sizeof(MachO::LCStruct)); \ + Begin += sizeof(MachO::LCStruct); \ + if (!LC.Payload.empty()) \ + memcpy(Begin, LC.Payload.data(), LC.Payload.size()); \ + Begin += LC.Payload.size(); \ + break; + + // Copy the load command as it is. + switch (MLC.load_command_data.cmd) { + default: + assert(sizeof(MachO::load_command) + LC.Payload.size() == + MLC.load_command_data.cmdsize); + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(MLC.load_command_data); + memcpy(Begin, &MLC.load_command_data, sizeof(MachO::load_command)); + Begin += sizeof(MachO::load_command); + if (!LC.Payload.empty()) + memcpy(Begin, LC.Payload.data(), LC.Payload.size()); + Begin += LC.Payload.size(); + break; +#include "llvm/BinaryFormat/MachO.def" + } + } +} + +template <typename StructType> +void MachOWriter::writeSectionInLoadCommand(const Section &Sec, uint8_t *&Out) { + StructType Temp; + assert(Sec.Segname.size() <= sizeof(Temp.segname) && "too long segment name"); + assert(Sec.Sectname.size() <= sizeof(Temp.sectname) && + "too long section name"); + memset(&Temp, 0, sizeof(StructType)); + memcpy(Temp.segname, Sec.Segname.data(), Sec.Segname.size()); + memcpy(Temp.sectname, Sec.Sectname.data(), Sec.Sectname.size()); + Temp.addr = Sec.Addr; + Temp.size = Sec.Size; + Temp.offset = Sec.Offset; + Temp.align = Sec.Align; + Temp.reloff = Sec.RelOff; + Temp.nreloc = Sec.NReloc; + Temp.flags = Sec.Flags; + Temp.reserved1 = Sec.Reserved1; + Temp.reserved2 = Sec.Reserved2; + + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(Temp); + memcpy(Out, &Temp, sizeof(StructType)); + Out += sizeof(StructType); +} + +void MachOWriter::writeSections() { + for (const LoadCommand &LC : O.LoadCommands) + for (const std::unique_ptr<Section> &Sec : LC.Sections) { + if (!Sec->hasValidOffset()) { + assert((Sec->Offset == 0) && "Skipped section's offset must be zero"); + assert((Sec->isVirtualSection() || Sec->Size == 0) && + "Non-zero-fill sections with zero offset must have zero size"); + continue; + } + + assert(Sec->Offset && "Section offset can not be zero"); + assert((Sec->Size == Sec->Content.size()) && "Incorrect section size"); + memcpy(Buf->getBufferStart() + Sec->Offset, Sec->Content.data(), + Sec->Content.size()); + for (size_t Index = 0; Index < Sec->Relocations.size(); ++Index) { + RelocationInfo RelocInfo = Sec->Relocations[Index]; + if (!RelocInfo.Scattered && !RelocInfo.IsAddend) { + const uint32_t SymbolNum = RelocInfo.Extern + ? (*RelocInfo.Symbol)->Index + : (*RelocInfo.Sec)->Index; + RelocInfo.setPlainRelocationSymbolNum(SymbolNum, IsLittleEndian); + } + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct( + reinterpret_cast<MachO::any_relocation_info &>(RelocInfo.Info)); + memcpy(Buf->getBufferStart() + Sec->RelOff + + Index * sizeof(MachO::any_relocation_info), + &RelocInfo.Info, sizeof(RelocInfo.Info)); + } + } +} + +template <typename NListType> +void writeNListEntry(const SymbolEntry &SE, bool IsLittleEndian, char *&Out, + uint32_t Nstrx) { + NListType ListEntry; + ListEntry.n_strx = Nstrx; + ListEntry.n_type = SE.n_type; + ListEntry.n_sect = SE.n_sect; + ListEntry.n_desc = SE.n_desc; + ListEntry.n_value = SE.n_value; + + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(ListEntry); + memcpy(Out, reinterpret_cast<const char *>(&ListEntry), sizeof(NListType)); + Out += sizeof(NListType); +} + +void MachOWriter::writeStringTable() { + if (!O.SymTabCommandIndex) + return; + const MachO::symtab_command &SymTabCommand = + O.LoadCommands[*O.SymTabCommandIndex] + .MachOLoadCommand.symtab_command_data; + + uint8_t *StrTable = (uint8_t *)Buf->getBufferStart() + SymTabCommand.stroff; + LayoutBuilder.getStringTableBuilder().write(StrTable); +} + +void MachOWriter::writeSymbolTable() { + if (!O.SymTabCommandIndex) + return; + const MachO::symtab_command &SymTabCommand = + O.LoadCommands[*O.SymTabCommandIndex] + .MachOLoadCommand.symtab_command_data; + + char *SymTable = (char *)Buf->getBufferStart() + SymTabCommand.symoff; + for (auto &Symbol : O.SymTable.Symbols) { + SymbolEntry *Sym = Symbol.get(); + uint32_t Nstrx = LayoutBuilder.getStringTableBuilder().getOffset(Sym->Name); + + if (Is64Bit) + writeNListEntry<MachO::nlist_64>(*Sym, IsLittleEndian, SymTable, Nstrx); + else + writeNListEntry<MachO::nlist>(*Sym, IsLittleEndian, SymTable, Nstrx); + } +} + +void MachOWriter::writeRebaseInfo() { + if (!O.DyLdInfoCommandIndex) + return; + const MachO::dyld_info_command &DyLdInfoCommand = + O.LoadCommands[*O.DyLdInfoCommandIndex] + .MachOLoadCommand.dyld_info_command_data; + char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.rebase_off; + assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) && + "Incorrect rebase opcodes size"); + memcpy(Out, O.Rebases.Opcodes.data(), O.Rebases.Opcodes.size()); +} + +void MachOWriter::writeBindInfo() { + if (!O.DyLdInfoCommandIndex) + return; + const MachO::dyld_info_command &DyLdInfoCommand = + O.LoadCommands[*O.DyLdInfoCommandIndex] + .MachOLoadCommand.dyld_info_command_data; + char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.bind_off; + assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) && + "Incorrect bind opcodes size"); + memcpy(Out, O.Binds.Opcodes.data(), O.Binds.Opcodes.size()); +} + +void MachOWriter::writeWeakBindInfo() { + if (!O.DyLdInfoCommandIndex) + return; + const MachO::dyld_info_command &DyLdInfoCommand = + O.LoadCommands[*O.DyLdInfoCommandIndex] + .MachOLoadCommand.dyld_info_command_data; + char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.weak_bind_off; + assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) && + "Incorrect weak bind opcodes size"); + memcpy(Out, O.WeakBinds.Opcodes.data(), O.WeakBinds.Opcodes.size()); +} + +void MachOWriter::writeLazyBindInfo() { + if (!O.DyLdInfoCommandIndex) + return; + const MachO::dyld_info_command &DyLdInfoCommand = + O.LoadCommands[*O.DyLdInfoCommandIndex] + .MachOLoadCommand.dyld_info_command_data; + char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.lazy_bind_off; + assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) && + "Incorrect lazy bind opcodes size"); + memcpy(Out, O.LazyBinds.Opcodes.data(), O.LazyBinds.Opcodes.size()); +} + +void MachOWriter::writeExportInfo() { + if (!O.DyLdInfoCommandIndex) + return; + const MachO::dyld_info_command &DyLdInfoCommand = + O.LoadCommands[*O.DyLdInfoCommandIndex] + .MachOLoadCommand.dyld_info_command_data; + char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.export_off; + assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) && + "Incorrect export trie size"); + memcpy(Out, O.Exports.Trie.data(), O.Exports.Trie.size()); +} + +void MachOWriter::writeIndirectSymbolTable() { + if (!O.DySymTabCommandIndex) + return; + + const MachO::dysymtab_command &DySymTabCommand = + O.LoadCommands[*O.DySymTabCommandIndex] + .MachOLoadCommand.dysymtab_command_data; + + uint32_t *Out = + (uint32_t *)(Buf->getBufferStart() + DySymTabCommand.indirectsymoff); + for (const IndirectSymbolEntry &Sym : O.IndirectSymTable.Symbols) { + uint32_t Entry = (Sym.Symbol) ? (*Sym.Symbol)->Index : Sym.OriginalIndex; + if (IsLittleEndian != sys::IsLittleEndianHost) + sys::swapByteOrder(Entry); + *Out++ = Entry; + } +} + +void MachOWriter::writeLinkData(std::optional<size_t> LCIndex, + const LinkData &LD) { + if (!LCIndex) + return; + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data; + char *Out = (char *)Buf->getBufferStart() + LinkEditDataCommand.dataoff; + assert((LinkEditDataCommand.datasize == LD.Data.size()) && + "Incorrect data size"); + memcpy(Out, LD.Data.data(), LD.Data.size()); +} + +static uint64_t +getSegmentFileOffset(const LoadCommand &TextSegmentLoadCommand) { + const MachO::macho_load_command &MLC = + TextSegmentLoadCommand.MachOLoadCommand; + switch (MLC.load_command_data.cmd) { + case MachO::LC_SEGMENT: + return MLC.segment_command_data.fileoff; + case MachO::LC_SEGMENT_64: + return MLC.segment_command_64_data.fileoff; + default: + return 0; + } +} + +static uint64_t getSegmentFileSize(const LoadCommand &TextSegmentLoadCommand) { + const MachO::macho_load_command &MLC = + TextSegmentLoadCommand.MachOLoadCommand; + switch (MLC.load_command_data.cmd) { + case MachO::LC_SEGMENT: + return MLC.segment_command_data.filesize; + case MachO::LC_SEGMENT_64: + return MLC.segment_command_64_data.filesize; + default: + return 0; + } +} + +void MachOWriter::writeCodeSignatureData() { + // NOTE: This CodeSignature section behaviour must be kept in sync with that + // performed in LLD's CodeSignatureSection::write / + // CodeSignatureSection::writeHashes. Furthermore, this call must occur only + // after the rest of the binary has already been written to the buffer. This + // is because the buffer is read from to perform the necessary hashing. + + // The CodeSignature section is the last section in the MachO binary and + // contains a hash of all content in the binary before it. Since llvm-objcopy + // has likely modified the target binary, the hash must be regenerated + // entirely. To generate this hash, we must read from the start of the binary + // (HashReadStart) to just before the start of the CodeSignature section + // (HashReadEnd). + + const CodeSignatureInfo &CodeSignature = LayoutBuilder.getCodeSignature(); + + uint8_t *BufferStart = reinterpret_cast<uint8_t *>(Buf->getBufferStart()); + uint8_t *HashReadStart = BufferStart; + uint8_t *HashReadEnd = BufferStart + CodeSignature.StartOffset; + + // The CodeSignature section begins with a header, after which the hashes + // of each page of the binary are written. + uint8_t *HashWriteStart = HashReadEnd + CodeSignature.AllHeadersSize; + + uint32_t TextSegmentFileOff = 0; + uint32_t TextSegmentFileSize = 0; + if (O.TextSegmentCommandIndex) { + const LoadCommand &TextSegmentLoadCommand = + O.LoadCommands[*O.TextSegmentCommandIndex]; + assert(TextSegmentLoadCommand.MachOLoadCommand.load_command_data.cmd == + MachO::LC_SEGMENT || + TextSegmentLoadCommand.MachOLoadCommand.load_command_data.cmd == + MachO::LC_SEGMENT_64); + assert(StringRef(TextSegmentLoadCommand.MachOLoadCommand + .segment_command_data.segname) == "__TEXT"); + TextSegmentFileOff = getSegmentFileOffset(TextSegmentLoadCommand); + TextSegmentFileSize = getSegmentFileSize(TextSegmentLoadCommand); + } + + const uint32_t FileNamePad = CodeSignature.AllHeadersSize - + CodeSignature.FixedHeadersSize - + CodeSignature.OutputFileName.size(); + + // Write code section header. + auto *SuperBlob = reinterpret_cast<MachO::CS_SuperBlob *>(HashReadEnd); + write32be(&SuperBlob->magic, MachO::CSMAGIC_EMBEDDED_SIGNATURE); + write32be(&SuperBlob->length, CodeSignature.Size); + write32be(&SuperBlob->count, 1); + auto *BlobIndex = reinterpret_cast<MachO::CS_BlobIndex *>(&SuperBlob[1]); + write32be(&BlobIndex->type, MachO::CSSLOT_CODEDIRECTORY); + write32be(&BlobIndex->offset, CodeSignature.BlobHeadersSize); + auto *CodeDirectory = reinterpret_cast<MachO::CS_CodeDirectory *>( + HashReadEnd + CodeSignature.BlobHeadersSize); + write32be(&CodeDirectory->magic, MachO::CSMAGIC_CODEDIRECTORY); + write32be(&CodeDirectory->length, + CodeSignature.Size - CodeSignature.BlobHeadersSize); + write32be(&CodeDirectory->version, MachO::CS_SUPPORTSEXECSEG); + write32be(&CodeDirectory->flags, MachO::CS_ADHOC | MachO::CS_LINKER_SIGNED); + write32be(&CodeDirectory->hashOffset, + sizeof(MachO::CS_CodeDirectory) + + CodeSignature.OutputFileName.size() + FileNamePad); + write32be(&CodeDirectory->identOffset, sizeof(MachO::CS_CodeDirectory)); + CodeDirectory->nSpecialSlots = 0; + write32be(&CodeDirectory->nCodeSlots, CodeSignature.BlockCount); + write32be(&CodeDirectory->codeLimit, CodeSignature.StartOffset); + CodeDirectory->hashSize = static_cast<uint8_t>(CodeSignature.HashSize); + CodeDirectory->hashType = MachO::kSecCodeSignatureHashSHA256; + CodeDirectory->platform = 0; + CodeDirectory->pageSize = CodeSignature.BlockSizeShift; + CodeDirectory->spare2 = 0; + CodeDirectory->scatterOffset = 0; + CodeDirectory->teamOffset = 0; + CodeDirectory->spare3 = 0; + CodeDirectory->codeLimit64 = 0; + write64be(&CodeDirectory->execSegBase, TextSegmentFileOff); + write64be(&CodeDirectory->execSegLimit, TextSegmentFileSize); + write64be(&CodeDirectory->execSegFlags, O.Header.FileType == MachO::MH_EXECUTE + ? MachO::CS_EXECSEG_MAIN_BINARY + : 0); + + auto *Id = reinterpret_cast<char *>(&CodeDirectory[1]); + memcpy(Id, CodeSignature.OutputFileName.begin(), + CodeSignature.OutputFileName.size()); + memset(Id + CodeSignature.OutputFileName.size(), 0, FileNamePad); + + // Write the hashes. + uint8_t *CurrHashReadPosition = HashReadStart; + uint8_t *CurrHashWritePosition = HashWriteStart; + while (CurrHashReadPosition < HashReadEnd) { + StringRef Block(reinterpret_cast<char *>(CurrHashReadPosition), + std::min(static_cast<size_t>(HashReadEnd + - CurrHashReadPosition), + static_cast<size_t>(CodeSignature.BlockSize))); + SHA256 Hasher; + Hasher.update(Block); + std::array<uint8_t, 32> Hash = Hasher.final(); + assert(Hash.size() == CodeSignature.HashSize); + memcpy(CurrHashWritePosition, Hash.data(), CodeSignature.HashSize); + CurrHashReadPosition += CodeSignature.BlockSize; + CurrHashWritePosition += CodeSignature.HashSize; + } +#if defined(__APPLE__) + // This is macOS-specific work-around and makes no sense for any + // other host OS. See https://openradar.appspot.com/FB8914231 + // + // The macOS kernel maintains a signature-verification cache to + // quickly validate applications at time of execve(2). The trouble + // is that for the kernel creates the cache entry at the time of the + // mmap(2) call, before we have a chance to write either the code to + // sign or the signature header+hashes. The fix is to invalidate + // all cached data associated with the output file, thus discarding + // the bogus prematurely-cached signature. + msync(BufferStart, CodeSignature.StartOffset + CodeSignature.Size, + MS_INVALIDATE); +#endif +} + +void MachOWriter::writeDataInCodeData() { + return writeLinkData(O.DataInCodeCommandIndex, O.DataInCode); +} + +void MachOWriter::writeLinkerOptimizationHint() { + return writeLinkData(O.LinkerOptimizationHintCommandIndex, + O.LinkerOptimizationHint); +} + +void MachOWriter::writeFunctionStartsData() { + return writeLinkData(O.FunctionStartsCommandIndex, O.FunctionStarts); +} + +void MachOWriter::writeDylibCodeSignDRsData() { + return writeLinkData(O.DylibCodeSignDRsIndex, O.DylibCodeSignDRs); +} + +void MachOWriter::writeChainedFixupsData() { + return writeLinkData(O.ChainedFixupsCommandIndex, O.ChainedFixups); +} + +void MachOWriter::writeExportsTrieData() { + if (!O.ExportsTrieCommandIndex) + return; + const MachO::linkedit_data_command &ExportsTrieCmd = + O.LoadCommands[*O.ExportsTrieCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + char *Out = (char *)Buf->getBufferStart() + ExportsTrieCmd.dataoff; + assert((ExportsTrieCmd.datasize == O.Exports.Trie.size()) && + "Incorrect export trie size"); + memcpy(Out, O.Exports.Trie.data(), O.Exports.Trie.size()); +} + +void MachOWriter::writeTail() { + typedef void (MachOWriter::*WriteHandlerType)(); + typedef std::pair<uint64_t, WriteHandlerType> WriteOperation; + SmallVector<WriteOperation, 7> Queue; + + if (O.SymTabCommandIndex) { + const MachO::symtab_command &SymTabCommand = + O.LoadCommands[*O.SymTabCommandIndex] + .MachOLoadCommand.symtab_command_data; + if (SymTabCommand.symoff) + Queue.push_back({SymTabCommand.symoff, &MachOWriter::writeSymbolTable}); + if (SymTabCommand.stroff) + Queue.push_back({SymTabCommand.stroff, &MachOWriter::writeStringTable}); + } + + if (O.DyLdInfoCommandIndex) { + const MachO::dyld_info_command &DyLdInfoCommand = + O.LoadCommands[*O.DyLdInfoCommandIndex] + .MachOLoadCommand.dyld_info_command_data; + if (DyLdInfoCommand.rebase_off) + Queue.push_back( + {DyLdInfoCommand.rebase_off, &MachOWriter::writeRebaseInfo}); + if (DyLdInfoCommand.bind_off) + Queue.push_back({DyLdInfoCommand.bind_off, &MachOWriter::writeBindInfo}); + if (DyLdInfoCommand.weak_bind_off) + Queue.push_back( + {DyLdInfoCommand.weak_bind_off, &MachOWriter::writeWeakBindInfo}); + if (DyLdInfoCommand.lazy_bind_off) + Queue.push_back( + {DyLdInfoCommand.lazy_bind_off, &MachOWriter::writeLazyBindInfo}); + if (DyLdInfoCommand.export_off) + Queue.push_back( + {DyLdInfoCommand.export_off, &MachOWriter::writeExportInfo}); + } + + if (O.DySymTabCommandIndex) { + const MachO::dysymtab_command &DySymTabCommand = + O.LoadCommands[*O.DySymTabCommandIndex] + .MachOLoadCommand.dysymtab_command_data; + + if (DySymTabCommand.indirectsymoff) + Queue.emplace_back(DySymTabCommand.indirectsymoff, + &MachOWriter::writeIndirectSymbolTable); + } + + std::initializer_list<std::pair<std::optional<size_t>, WriteHandlerType>> + LinkEditDataCommandWriters = { + {O.CodeSignatureCommandIndex, &MachOWriter::writeCodeSignatureData}, + {O.DylibCodeSignDRsIndex, &MachOWriter::writeDylibCodeSignDRsData}, + {O.DataInCodeCommandIndex, &MachOWriter::writeDataInCodeData}, + {O.LinkerOptimizationHintCommandIndex, + &MachOWriter::writeLinkerOptimizationHint}, + {O.FunctionStartsCommandIndex, &MachOWriter::writeFunctionStartsData}, + {O.ChainedFixupsCommandIndex, &MachOWriter::writeChainedFixupsData}, + {O.ExportsTrieCommandIndex, &MachOWriter::writeExportsTrieData}}; + for (const auto &W : LinkEditDataCommandWriters) { + std::optional<size_t> LinkEditDataCommandIndex; + WriteHandlerType WriteHandler; + std::tie(LinkEditDataCommandIndex, WriteHandler) = W; + if (LinkEditDataCommandIndex) { + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*LinkEditDataCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + if (LinkEditDataCommand.dataoff) + Queue.emplace_back(LinkEditDataCommand.dataoff, WriteHandler); + } + } + + llvm::sort(Queue, llvm::less_first()); + + for (auto WriteOp : Queue) + (this->*WriteOp.second)(); +} + +Error MachOWriter::finalize() { return LayoutBuilder.layout(); } + +Error MachOWriter::write() { + size_t TotalSize = totalSize(); + Buf = WritableMemoryBuffer::getNewMemBuffer(TotalSize); + if (!Buf) + return createStringError(errc::not_enough_memory, + "failed to allocate memory buffer of " + + Twine::utohexstr(TotalSize) + " bytes"); + writeHeader(); + writeLoadCommands(); + writeSections(); + writeTail(); + + // TODO: Implement direct writing to the output stream (without intermediate + // memory buffer Buf). + Out.write(Buf->getBufferStart(), Buf->getBufferSize()); + return Error::success(); +} diff --git a/contrib/libs/llvm16/lib/ObjCopy/MachO/MachOWriter.h b/contrib/libs/llvm16/lib/ObjCopy/MachO/MachOWriter.h new file mode 100644 index 00000000000..5da29dff656 --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/MachO/MachOWriter.h @@ -0,0 +1,77 @@ +//===- MachOWriter.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_MACHO_MACHOWRITER_H +#define LLVM_LIB_OBJCOPY_MACHO_MACHOWRITER_H + +#include "MachOLayoutBuilder.h" +#include "MachOObject.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/ObjCopy/MachO/MachOObjcopy.h" +#include "llvm/Object/MachO.h" + +namespace llvm { +class Error; + +namespace objcopy { +namespace macho { + +class MachOWriter { + Object &O; + bool Is64Bit; + bool IsLittleEndian; + uint64_t PageSize; + std::unique_ptr<WritableMemoryBuffer> Buf; + raw_ostream &Out; + MachOLayoutBuilder LayoutBuilder; + + size_t headerSize() const; + size_t loadCommandsSize() const; + size_t symTableSize() const; + size_t strTableSize() const; + + void writeHeader(); + void writeLoadCommands(); + template <typename StructType> + void writeSectionInLoadCommand(const Section &Sec, uint8_t *&Out); + void writeSections(); + void writeSymbolTable(); + void writeStringTable(); + void writeRebaseInfo(); + void writeBindInfo(); + void writeWeakBindInfo(); + void writeLazyBindInfo(); + void writeExportInfo(); + void writeIndirectSymbolTable(); + void writeLinkData(std::optional<size_t> LCIndex, const LinkData &LD); + void writeCodeSignatureData(); + void writeDataInCodeData(); + void writeLinkerOptimizationHint(); + void writeFunctionStartsData(); + void writeDylibCodeSignDRsData(); + void writeChainedFixupsData(); + void writeExportsTrieData(); + void writeTail(); + +public: + MachOWriter(Object &O, bool Is64Bit, bool IsLittleEndian, + StringRef OutputFileName, uint64_t PageSize, raw_ostream &Out) + : O(O), Is64Bit(Is64Bit), IsLittleEndian(IsLittleEndian), + PageSize(PageSize), Out(Out), + LayoutBuilder(O, Is64Bit, OutputFileName, PageSize) {} + + size_t totalSize() const; + Error finalize(); + Error write(); +}; + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_MACHO_MACHOWRITER_H diff --git a/contrib/libs/llvm16/lib/ObjCopy/ObjCopy.cpp b/contrib/libs/llvm16/lib/ObjCopy/ObjCopy.cpp new file mode 100644 index 00000000000..16968d20226 --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/ObjCopy.cpp @@ -0,0 +1,90 @@ +//===- Objcopy.cpp --------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ObjCopy/ObjCopy.h" +#include "llvm/ObjCopy/COFF/COFFConfig.h" +#include "llvm/ObjCopy/COFF/COFFObjcopy.h" +#include "llvm/ObjCopy/CommonConfig.h" +#include "llvm/ObjCopy/ELF/ELFConfig.h" +#include "llvm/ObjCopy/ELF/ELFObjcopy.h" +#include "llvm/ObjCopy/MachO/MachOConfig.h" +#include "llvm/ObjCopy/MachO/MachOObjcopy.h" +#include "llvm/ObjCopy/MultiFormatConfig.h" +#include "llvm/ObjCopy/wasm/WasmConfig.h" +#include "llvm/ObjCopy/wasm/WasmObjcopy.h" +#include "llvm/ObjCopy/XCOFF/XCOFFConfig.h" +#include "llvm/ObjCopy/XCOFF/XCOFFObjcopy.h" +#include "llvm/Object/COFF.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/MachO.h" +#include "llvm/Object/MachOUniversal.h" +#include "llvm/Object/Wasm.h" +#include "llvm/Object/XCOFFObjectFile.h" +#include "llvm/Support/SmallVectorMemoryBuffer.h" + +namespace llvm { +namespace objcopy { + +using namespace llvm::object; + +/// The function executeObjcopyOnBinary does the dispatch based on the format +/// of the input binary (ELF, MachO or COFF). +Error executeObjcopyOnBinary(const MultiFormatConfig &Config, + object::Binary &In, raw_ostream &Out) { + if (auto *ELFBinary = dyn_cast<object::ELFObjectFileBase>(&In)) { + Expected<const ELFConfig &> ELFConfig = Config.getELFConfig(); + if (!ELFConfig) + return ELFConfig.takeError(); + + return elf::executeObjcopyOnBinary(Config.getCommonConfig(), *ELFConfig, + *ELFBinary, Out); + } + if (auto *COFFBinary = dyn_cast<object::COFFObjectFile>(&In)) { + Expected<const COFFConfig &> COFFConfig = Config.getCOFFConfig(); + if (!COFFConfig) + return COFFConfig.takeError(); + + return coff::executeObjcopyOnBinary(Config.getCommonConfig(), *COFFConfig, + *COFFBinary, Out); + } + if (auto *MachOBinary = dyn_cast<object::MachOObjectFile>(&In)) { + Expected<const MachOConfig &> MachOConfig = Config.getMachOConfig(); + if (!MachOConfig) + return MachOConfig.takeError(); + + return macho::executeObjcopyOnBinary(Config.getCommonConfig(), *MachOConfig, + *MachOBinary, Out); + } + if (auto *MachOUniversalBinary = + dyn_cast<object::MachOUniversalBinary>(&In)) { + return macho::executeObjcopyOnMachOUniversalBinary( + Config, *MachOUniversalBinary, Out); + } + if (auto *WasmBinary = dyn_cast<object::WasmObjectFile>(&In)) { + Expected<const WasmConfig &> WasmConfig = Config.getWasmConfig(); + if (!WasmConfig) + return WasmConfig.takeError(); + + return objcopy::wasm::executeObjcopyOnBinary(Config.getCommonConfig(), + *WasmConfig, *WasmBinary, Out); + } + if (auto *XCOFFBinary = dyn_cast<object::XCOFFObjectFile>(&In)) { + Expected<const XCOFFConfig &> XCOFFConfig = Config.getXCOFFConfig(); + if (!XCOFFConfig) + return XCOFFConfig.takeError(); + + return xcoff::executeObjcopyOnBinary(Config.getCommonConfig(), *XCOFFConfig, + *XCOFFBinary, Out); + } + return createStringError(object_error::invalid_file_type, + "unsupported object file format"); +} + +} // end namespace objcopy +} // end namespace llvm diff --git a/contrib/libs/llvm16/lib/ObjCopy/XCOFF/XCOFFObjcopy.cpp b/contrib/libs/llvm16/lib/ObjCopy/XCOFF/XCOFFObjcopy.cpp new file mode 100644 index 00000000000..f6e29bd315c --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/XCOFF/XCOFFObjcopy.cpp @@ -0,0 +1,45 @@ +//===- XCOFFObjcopy.cpp ---------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ObjCopy/CommonConfig.h" +#include "llvm/ObjCopy/XCOFF/XCOFFConfig.h" +#include "llvm/ObjCopy/XCOFF/XCOFFObjcopy.h" +#include "llvm/Support/Errc.h" +#include "XCOFFObject.h" +#include "XCOFFReader.h" +#include "XCOFFWriter.h" + +namespace llvm { +namespace objcopy { +namespace xcoff { + +using namespace object; + +static Error handleArgs(const CommonConfig &Config, Object &Obj) { + return Error::success(); +} + +Error executeObjcopyOnBinary(const CommonConfig &Config, const XCOFFConfig &, + XCOFFObjectFile &In, raw_ostream &Out) { + XCOFFReader Reader(In); + Expected<std::unique_ptr<Object>> ObjOrErr = Reader.create(); + if (!ObjOrErr) + return createFileError(Config.InputFilename, ObjOrErr.takeError()); + Object *Obj = ObjOrErr->get(); + assert(Obj && "Unable to deserialize XCOFF object"); + if (Error E = handleArgs(Config, *Obj)) + return createFileError(Config.InputFilename, std::move(E)); + XCOFFWriter Writer(*Obj, Out); + if (Error E = Writer.write()) + return createFileError(Config.OutputFilename, std::move(E)); + return Error::success(); +} + +} // end namespace xcoff +} // end namespace objcopy +} // end namespace llvm diff --git a/contrib/libs/llvm16/lib/ObjCopy/XCOFF/XCOFFObject.h b/contrib/libs/llvm16/lib/ObjCopy/XCOFF/XCOFFObject.h new file mode 100644 index 00000000000..3c68b6d3878 --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/XCOFF/XCOFFObject.h @@ -0,0 +1,48 @@ +//===- XCOFFObject.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_XCOFF_XCOFFOBJECT_H +#define LLVM_LIB_OBJCOPY_XCOFF_XCOFFOBJECT_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/XCOFFObjectFile.h" +#include <vector> + +namespace llvm { +namespace objcopy { +namespace xcoff { + +using namespace object; + +struct Section { + XCOFFSectionHeader32 SectionHeader; + ArrayRef<uint8_t> Contents; + std::vector<XCOFFRelocation32> Relocations; +}; + +struct Symbol { + XCOFFSymbolEntry32 Sym; + // For now, each auxiliary symbol is only an opaque binary blob with no + // distinction. + StringRef AuxSymbolEntries; +}; + +struct Object { + XCOFFFileHeader32 FileHeader; + XCOFFAuxiliaryHeader32 OptionalFileHeader; + std::vector<Section> Sections; + std::vector<Symbol> Symbols; + StringRef StringTable; +}; + +} // end namespace xcoff +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_XCOFF_XCOFFOBJECT_H diff --git a/contrib/libs/llvm16/lib/ObjCopy/XCOFF/XCOFFReader.cpp b/contrib/libs/llvm16/lib/ObjCopy/XCOFF/XCOFFReader.cpp new file mode 100644 index 00000000000..8ad3021a034 --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/XCOFF/XCOFFReader.cpp @@ -0,0 +1,101 @@ +//===- XCOFFReader.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "XCOFFReader.h" + +namespace llvm { +namespace objcopy { +namespace xcoff { + +using namespace object; + +Error XCOFFReader::readSections(Object &Obj) const { + ArrayRef<XCOFFSectionHeader32> Sections = XCOFFObj.sections32(); + for (const XCOFFSectionHeader32 &Sec : Sections) { + Section ReadSec; + // Section header. + ReadSec.SectionHeader = Sec; + DataRefImpl SectionDRI; + SectionDRI.p = reinterpret_cast<uintptr_t>(&Sec); + + // Section data. + if (Sec.SectionSize) { + Expected<ArrayRef<uint8_t>> ContentsRef = + XCOFFObj.getSectionContents(SectionDRI); + if (!ContentsRef) + return ContentsRef.takeError(); + ReadSec.Contents = ContentsRef.get(); + } + + // Relocations. + if (Sec.NumberOfRelocations) { + auto Relocations = + XCOFFObj.relocations<XCOFFSectionHeader32, XCOFFRelocation32>(Sec); + if (!Relocations) + return Relocations.takeError(); + for (const XCOFFRelocation32 &Rel : Relocations.get()) + ReadSec.Relocations.push_back(Rel); + } + + Obj.Sections.push_back(std::move(ReadSec)); + } + return Error::success(); +} + +Error XCOFFReader::readSymbols(Object &Obj) const { + std::vector<Symbol> Symbols; + Symbols.reserve(XCOFFObj.getNumberOfSymbolTableEntries()); + for (SymbolRef Sym : XCOFFObj.symbols()) { + Symbol ReadSym; + DataRefImpl SymbolDRI = Sym.getRawDataRefImpl(); + XCOFFSymbolRef SymbolEntRef = XCOFFObj.toSymbolRef(SymbolDRI); + ReadSym.Sym = *SymbolEntRef.getSymbol32(); + // Auxiliary entries. + if (SymbolEntRef.getNumberOfAuxEntries()) { + const char *Start = reinterpret_cast<const char *>( + SymbolDRI.p + XCOFF::SymbolTableEntrySize); + Expected<StringRef> RawAuxEntriesOrError = XCOFFObj.getRawData( + Start, + XCOFF::SymbolTableEntrySize * SymbolEntRef.getNumberOfAuxEntries(), + StringRef("symbol")); + if (!RawAuxEntriesOrError) + return RawAuxEntriesOrError.takeError(); + ReadSym.AuxSymbolEntries = RawAuxEntriesOrError.get(); + } + Obj.Symbols.push_back(std::move(ReadSym)); + } + return Error::success(); +} + +Expected<std::unique_ptr<Object>> XCOFFReader::create() const { + auto Obj = std::make_unique<Object>(); + // Only 32-bit supported now. + if (XCOFFObj.is64Bit()) + return createStringError(object_error::invalid_file_type, + "64-bit XCOFF is not supported yet"); + // Read the file header. + Obj->FileHeader = *XCOFFObj.fileHeader32(); + // Read the optional header. + if (XCOFFObj.getOptionalHeaderSize()) + Obj->OptionalFileHeader = *XCOFFObj.auxiliaryHeader32(); + // Read each section. + Obj->Sections.reserve(XCOFFObj.getNumberOfSections()); + if (Error E = readSections(*Obj)) + return std::move(E); + // Read each symbol. + Obj->Symbols.reserve(XCOFFObj.getRawNumberOfSymbolTableEntries32()); + if (Error E = readSymbols(*Obj)) + return std::move(E); + // String table. + Obj->StringTable = XCOFFObj.getStringTable(); + return std::move(Obj); +} + +} // end namespace xcoff +} // end namespace objcopy +} // end namespace llvm diff --git a/contrib/libs/llvm16/lib/ObjCopy/XCOFF/XCOFFReader.h b/contrib/libs/llvm16/lib/ObjCopy/XCOFF/XCOFFReader.h new file mode 100644 index 00000000000..63a8d8579d3 --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/XCOFF/XCOFFReader.h @@ -0,0 +1,35 @@ +//===- XCOFFReader.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_XCOFF_XCOFFREADER_H +#define LLVM_LIB_OBJCOPY_XCOFF_XCOFFREADER_H + +#include "XCOFFObject.h" + +namespace llvm { +namespace objcopy { +namespace xcoff { + +using namespace object; + +class XCOFFReader { +public: + explicit XCOFFReader(const XCOFFObjectFile &O) : XCOFFObj(O) {} + Expected<std::unique_ptr<Object>> create() const; + +private: + const XCOFFObjectFile &XCOFFObj; + Error readSections(Object &Obj) const; + Error readSymbols(Object &Obj) const; +}; + +} // end namespace xcoff +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_XCOFF_XCOFFREADER_H diff --git a/contrib/libs/llvm16/lib/ObjCopy/XCOFF/XCOFFWriter.cpp b/contrib/libs/llvm16/lib/ObjCopy/XCOFF/XCOFFWriter.cpp new file mode 100644 index 00000000000..bae3128822e --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/XCOFF/XCOFFWriter.cpp @@ -0,0 +1,125 @@ +//===- XCOFFWriter.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Errc.h" +#include "XCOFFWriter.h" + +namespace llvm { +namespace objcopy { +namespace xcoff { + +using namespace object; + +void XCOFFWriter::finalizeHeaders() { + // File header. + FileSize += sizeof(XCOFFFileHeader32); + // Optional file header. + FileSize += Obj.FileHeader.AuxHeaderSize; + // Section headers. + FileSize += sizeof(XCOFFSectionHeader32) * Obj.Sections.size(); +} + +void XCOFFWriter::finalizeSections() { + for (const Section &Sec : Obj.Sections) { + // Section data. + FileSize += Sec.Contents.size(); + // Relocations. + FileSize += + Sec.SectionHeader.NumberOfRelocations * sizeof(XCOFFRelocation32); + } +} + +void XCOFFWriter::finalizeSymbolStringTable() { + assert(Obj.FileHeader.SymbolTableOffset >= FileSize); + FileSize = Obj.FileHeader.SymbolTableOffset; + // Symbols and auxiliary entries. + FileSize += + Obj.FileHeader.NumberOfSymTableEntries * XCOFF::SymbolTableEntrySize; + // String table. + FileSize += Obj.StringTable.size(); +} + +void XCOFFWriter::finalize() { + FileSize = 0; + finalizeHeaders(); + finalizeSections(); + finalizeSymbolStringTable(); +} + +void XCOFFWriter::writeHeaders() { + // Write the file header. + uint8_t *Ptr = reinterpret_cast<uint8_t *>(Buf->getBufferStart()); + memcpy(Ptr, &Obj.FileHeader, sizeof(XCOFFFileHeader32)); + Ptr += sizeof(XCOFFFileHeader32); + + // Write the optional header. + if (Obj.FileHeader.AuxHeaderSize) { + memcpy(Ptr, &Obj.OptionalFileHeader, Obj.FileHeader.AuxHeaderSize); + Ptr += Obj.FileHeader.AuxHeaderSize; + } + + // Write section headers. + for (const Section &Sec : Obj.Sections) { + memcpy(Ptr, &Sec.SectionHeader, sizeof(XCOFFSectionHeader32)); + Ptr += sizeof(XCOFFSectionHeader32); + } +} + +void XCOFFWriter::writeSections() { + // Write section data. + for (const Section &Sec : Obj.Sections) { + uint8_t *Ptr = reinterpret_cast<uint8_t *>(Buf->getBufferStart()) + + Sec.SectionHeader.FileOffsetToRawData; + Ptr = std::copy(Sec.Contents.begin(), Sec.Contents.end(), Ptr); + } + + // Write relocations. + for (const Section &Sec : Obj.Sections) { + uint8_t *Ptr = reinterpret_cast<uint8_t *>(Buf->getBufferStart()) + + Sec.SectionHeader.FileOffsetToRelocationInfo; + for (const XCOFFRelocation32 &Rel : Sec.Relocations) { + memcpy(Ptr, &Rel, sizeof(XCOFFRelocation32)); + Ptr += sizeof(XCOFFRelocation32); + } + } +} + +void XCOFFWriter::writeSymbolStringTable() { + // Write symbols. + uint8_t *Ptr = reinterpret_cast<uint8_t *>(Buf->getBufferStart()) + + Obj.FileHeader.SymbolTableOffset; + for (const Symbol &Sym : Obj.Symbols) { + memcpy(Ptr, &Sym.Sym, XCOFF::SymbolTableEntrySize); + Ptr += XCOFF::SymbolTableEntrySize; + // Auxiliary symbols. + memcpy(Ptr, Sym.AuxSymbolEntries.data(), Sym.AuxSymbolEntries.size()); + Ptr += Sym.AuxSymbolEntries.size(); + } + // Write the string table. + memcpy(Ptr, Obj.StringTable.data(), Obj.StringTable.size()); + Ptr += Obj.StringTable.size(); +} + +Error XCOFFWriter::write() { + finalize(); + Buf = WritableMemoryBuffer::getNewMemBuffer(FileSize); + if (!Buf) + return createStringError(errc::not_enough_memory, + "failed to allocate memory buffer of " + + Twine::utohexstr(FileSize) + " bytes"); + + writeHeaders(); + writeSections(); + writeSymbolStringTable(); + Out.write(Buf->getBufferStart(), Buf->getBufferSize()); + return Error::success(); +} + +} // end namespace xcoff +} // end namespace objcopy +} // end namespace llvm diff --git a/contrib/libs/llvm16/lib/ObjCopy/XCOFF/XCOFFWriter.h b/contrib/libs/llvm16/lib/ObjCopy/XCOFF/XCOFFWriter.h new file mode 100644 index 00000000000..54c7b5f3ccb --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/XCOFF/XCOFFWriter.h @@ -0,0 +1,48 @@ +//===- XCOFFWriter.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_XCOFF_XCOFFWRITER_H +#define LLVM_LIB_OBJCOPY_XCOFF_XCOFFWRITER_H + +#include "llvm/Support/MemoryBuffer.h" +#include "XCOFFObject.h" + +#include <cstdint> +#include <vector> + +namespace llvm { +namespace objcopy { +namespace xcoff { + +class XCOFFWriter { +public: + virtual ~XCOFFWriter() {} + XCOFFWriter(Object &Obj, raw_ostream &Out) : Obj(Obj), Out(Out) {} + Error write(); + +private: + Object &Obj; + raw_ostream &Out; + std::unique_ptr<WritableMemoryBuffer> Buf; + size_t FileSize; + + void finalizeHeaders(); + void finalizeSections(); + void finalizeSymbolStringTable(); + void finalize(); + + void writeHeaders(); + void writeSections(); + void writeSymbolStringTable(); +}; + +} // end namespace xcoff +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_XCOFF_XCOFFWRITER_H diff --git a/contrib/libs/llvm16/lib/ObjCopy/wasm/WasmObjcopy.cpp b/contrib/libs/llvm16/lib/ObjCopy/wasm/WasmObjcopy.cpp new file mode 100644 index 00000000000..e5af59f9328 --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/wasm/WasmObjcopy.cpp @@ -0,0 +1,162 @@ +//===- WasmObjcopy.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ObjCopy/wasm/WasmObjcopy.h" +#include "WasmObject.h" +#include "WasmReader.h" +#include "WasmWriter.h" +#include "llvm/ObjCopy/CommonConfig.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/FileOutputBuffer.h" + +namespace llvm { +namespace objcopy { +namespace wasm { + +using namespace object; +using SectionPred = std::function<bool(const Section &Sec)>; + +static bool isDebugSection(const Section &Sec) { + return Sec.Name.startswith(".debug"); +} + +static bool isLinkerSection(const Section &Sec) { + return Sec.Name.startswith("reloc.") || Sec.Name == "linking"; +} + +static bool isNameSection(const Section &Sec) { return Sec.Name == "name"; } + +// Sections which are known to be "comments" or informational and do not affect +// program semantics. +static bool isCommentSection(const Section &Sec) { + return Sec.Name == "producers"; +} + +static Error dumpSectionToFile(StringRef SecName, StringRef Filename, + Object &Obj) { + for (const Section &Sec : Obj.Sections) { + if (Sec.Name == SecName) { + ArrayRef<uint8_t> Contents = Sec.Contents; + Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr = + FileOutputBuffer::create(Filename, Contents.size()); + if (!BufferOrErr) + return BufferOrErr.takeError(); + std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr); + std::copy(Contents.begin(), Contents.end(), Buf->getBufferStart()); + if (Error E = Buf->commit()) + return E; + return Error::success(); + } + } + return createStringError(errc::invalid_argument, "section '%s' not found", + SecName.str().c_str()); +} + +static void removeSections(const CommonConfig &Config, Object &Obj) { + SectionPred RemovePred = [](const Section &) { return false; }; + + // Explicitly-requested sections. + if (!Config.ToRemove.empty()) { + RemovePred = [&Config](const Section &Sec) { + return Config.ToRemove.matches(Sec.Name); + }; + } + + if (Config.StripDebug) { + RemovePred = [RemovePred](const Section &Sec) { + return RemovePred(Sec) || isDebugSection(Sec); + }; + } + + if (Config.StripAll) { + RemovePred = [RemovePred](const Section &Sec) { + return RemovePred(Sec) || isDebugSection(Sec) || isLinkerSection(Sec) || + isNameSection(Sec) || isCommentSection(Sec); + }; + } + + if (Config.OnlyKeepDebug) { + RemovePred = [&Config](const Section &Sec) { + // Keep debug sections, unless explicitly requested to remove. + // Remove everything else, including known sections. + return Config.ToRemove.matches(Sec.Name) || !isDebugSection(Sec); + }; + } + + if (!Config.OnlySection.empty()) { + RemovePred = [&Config](const Section &Sec) { + // Explicitly keep these sections regardless of previous removes. + // Remove everything else, inluding known sections. + return !Config.OnlySection.matches(Sec.Name); + }; + } + + if (!Config.KeepSection.empty()) { + RemovePred = [&Config, RemovePred](const Section &Sec) { + // Explicitly keep these sections regardless of previous removes. + if (Config.KeepSection.matches(Sec.Name)) + return false; + // Otherwise defer to RemovePred. + return RemovePred(Sec); + }; + } + + Obj.removeSections(RemovePred); +} + +static Error handleArgs(const CommonConfig &Config, Object &Obj) { + // Only support AddSection, DumpSection, RemoveSection for now. + for (StringRef Flag : Config.DumpSection) { + StringRef SecName; + StringRef FileName; + std::tie(SecName, FileName) = Flag.split("="); + if (Error E = dumpSectionToFile(SecName, FileName, Obj)) + return createFileError(FileName, std::move(E)); + } + + removeSections(Config, Obj); + + for (const NewSectionInfo &NewSection : Config.AddSection) { + Section Sec; + Sec.SectionType = llvm::wasm::WASM_SEC_CUSTOM; + Sec.Name = NewSection.SectionName; + + llvm::StringRef InputData = + llvm::StringRef(NewSection.SectionData->getBufferStart(), + NewSection.SectionData->getBufferSize()); + std::unique_ptr<MemoryBuffer> BufferCopy = MemoryBuffer::getMemBufferCopy( + InputData, NewSection.SectionData->getBufferIdentifier()); + Sec.Contents = ArrayRef<uint8_t>( + reinterpret_cast<const uint8_t *>(BufferCopy->getBufferStart()), + BufferCopy->getBufferSize()); + + Obj.addSectionWithOwnedContents(Sec, std::move(BufferCopy)); + } + + return Error::success(); +} + +Error executeObjcopyOnBinary(const CommonConfig &Config, const WasmConfig &, + object::WasmObjectFile &In, raw_ostream &Out) { + Reader TheReader(In); + Expected<std::unique_ptr<Object>> ObjOrErr = TheReader.create(); + if (!ObjOrErr) + return createFileError(Config.InputFilename, ObjOrErr.takeError()); + Object *Obj = ObjOrErr->get(); + assert(Obj && "Unable to deserialize Wasm object"); + if (Error E = handleArgs(Config, *Obj)) + return E; + Writer TheWriter(*Obj, Out); + if (Error E = TheWriter.write()) + return createFileError(Config.OutputFilename, std::move(E)); + return Error::success(); +} + +} // end namespace wasm +} // end namespace objcopy +} // end namespace llvm diff --git a/contrib/libs/llvm16/lib/ObjCopy/wasm/WasmObject.cpp b/contrib/libs/llvm16/lib/ObjCopy/wasm/WasmObject.cpp new file mode 100644 index 00000000000..28a2de6e6e4 --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/wasm/WasmObject.cpp @@ -0,0 +1,34 @@ +//===- WasmObject.cpp -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "WasmObject.h" + +#include "llvm/Support/LEB128.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace objcopy { +namespace wasm { + +using namespace object; +using namespace llvm::wasm; + +void Object::addSectionWithOwnedContents( + Section NewSection, std::unique_ptr<MemoryBuffer> &&Content) { + Sections.push_back(NewSection); + OwnedContents.emplace_back(std::move(Content)); +} + +void Object::removeSections(function_ref<bool(const Section &)> ToRemove) { + // TODO: remove reloc sections for the removed section, handle symbols, etc. + llvm::erase_if(Sections, ToRemove); +} + +} // end namespace wasm +} // end namespace objcopy +} // end namespace llvm diff --git a/contrib/libs/llvm16/lib/ObjCopy/wasm/WasmObject.h b/contrib/libs/llvm16/lib/ObjCopy/wasm/WasmObject.h new file mode 100644 index 00000000000..9bc5831926c --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/wasm/WasmObject.h @@ -0,0 +1,47 @@ +//===- WasmObject.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_WASM_WASMOBJECT_H +#define LLVM_LIB_OBJCOPY_WASM_WASMOBJECT_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/Wasm.h" +#include "llvm/Support/MemoryBuffer.h" +#include <vector> + +namespace llvm { +namespace objcopy { +namespace wasm { + +struct Section { + // For now, each section is only an opaque binary blob with no distinction + // between custom and known sections. + uint8_t SectionType; + StringRef Name; + ArrayRef<uint8_t> Contents; +}; + +struct Object { + llvm::wasm::WasmObjectHeader Header; + // For now don't discriminate between kinds of sections. + std::vector<Section> Sections; + + void addSectionWithOwnedContents(Section NewSection, + std::unique_ptr<MemoryBuffer> &&Content); + void removeSections(function_ref<bool(const Section &)> ToRemove); + +private: + std::vector<std::unique_ptr<MemoryBuffer>> OwnedContents; +}; + +} // end namespace wasm +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_WASM_WASMOBJECT_H diff --git a/contrib/libs/llvm16/lib/ObjCopy/wasm/WasmReader.cpp b/contrib/libs/llvm16/lib/ObjCopy/wasm/WasmReader.cpp new file mode 100644 index 00000000000..6e7d8b5591c --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/wasm/WasmReader.cpp @@ -0,0 +1,39 @@ +//===- WasmReader.cpp -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "WasmReader.h" + +namespace llvm { +namespace objcopy { +namespace wasm { + +using namespace object; +using namespace llvm::wasm; + +Expected<std::unique_ptr<Object>> Reader::create() const { + auto Obj = std::make_unique<Object>(); + Obj->Header = WasmObj.getHeader(); + std::vector<Section> Sections; + Obj->Sections.reserve(WasmObj.getNumSections()); + for (const SectionRef &Sec : WasmObj.sections()) { + const WasmSection &WS = WasmObj.getWasmSection(Sec); + Obj->Sections.push_back( + {static_cast<uint8_t>(WS.Type), WS.Name, WS.Content}); + // Give known sections standard names to allow them to be selected. (Custom + // sections already have their names filled in by the parser). + Section &ReaderSec = Obj->Sections.back(); + if (ReaderSec.SectionType > WASM_SEC_CUSTOM && + ReaderSec.SectionType <= WASM_SEC_LAST_KNOWN) + ReaderSec.Name = sectionTypeToString(ReaderSec.SectionType); + } + return std::move(Obj); +} + +} // end namespace wasm +} // end namespace objcopy +} // end namespace llvm diff --git a/contrib/libs/llvm16/lib/ObjCopy/wasm/WasmReader.h b/contrib/libs/llvm16/lib/ObjCopy/wasm/WasmReader.h new file mode 100644 index 00000000000..d71660fa2b6 --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/wasm/WasmReader.h @@ -0,0 +1,31 @@ +//===- WasmReader.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_WASM_WASMREADER_H +#define LLVM_LIB_OBJCOPY_WASM_WASMREADER_H + +#include "WasmObject.h" + +namespace llvm { +namespace objcopy { +namespace wasm { + +class Reader { +public: + explicit Reader(const object::WasmObjectFile &O) : WasmObj(O) {} + Expected<std::unique_ptr<Object>> create() const; + +private: + const object::WasmObjectFile &WasmObj; +}; + +} // end namespace wasm +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_WASM_WASMREADER_H diff --git a/contrib/libs/llvm16/lib/ObjCopy/wasm/WasmWriter.cpp b/contrib/libs/llvm16/lib/ObjCopy/wasm/WasmWriter.cpp new file mode 100644 index 00000000000..fdcd441cc79 --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/wasm/WasmWriter.cpp @@ -0,0 +1,79 @@ +//===- WasmWriter.cpp -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "WasmWriter.h" +#include "llvm/BinaryFormat/Wasm.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace objcopy { +namespace wasm { + +using namespace object; +using namespace llvm::wasm; + +Writer::SectionHeader Writer::createSectionHeader(const Section &S, + size_t &SectionSize) { + SectionHeader Header; + raw_svector_ostream OS(Header); + OS << S.SectionType; + bool HasName = S.SectionType == WASM_SEC_CUSTOM; + SectionSize = S.Contents.size(); + if (HasName) + SectionSize += getULEB128Size(S.Name.size()) + S.Name.size(); + // Pad the LEB value out to 5 bytes to make it a predictable size, and + // match the behavior of clang. + encodeULEB128(SectionSize, OS, 5); + if (HasName) { + encodeULEB128(S.Name.size(), OS); + OS << S.Name; + } + // Total section size is the content size plus 1 for the section type and + // 5 for the LEB-encoded size. + SectionSize = SectionSize + 1 + 5; + return Header; +} + +size_t Writer::finalize() { + size_t ObjectSize = sizeof(WasmMagic) + sizeof(WasmVersion); + SectionHeaders.reserve(Obj.Sections.size()); + // Finalize the headers of each section so we know the total size. + for (const Section &S : Obj.Sections) { + size_t SectionSize; + SectionHeaders.push_back(createSectionHeader(S, SectionSize)); + ObjectSize += SectionSize; + } + return ObjectSize; +} + +Error Writer::write() { + size_t TotalSize = finalize(); + Out.reserveExtraSpace(TotalSize); + + // Write the header. + Out.write(Obj.Header.Magic.data(), Obj.Header.Magic.size()); + uint32_t Version; + support::endian::write32le(&Version, Obj.Header.Version); + Out.write(reinterpret_cast<const char *>(&Version), sizeof(Version)); + + // Write each section. + for (size_t I = 0, S = SectionHeaders.size(); I < S; ++I) { + Out.write(SectionHeaders[I].data(), SectionHeaders[I].size()); + Out.write(reinterpret_cast<const char *>(Obj.Sections[I].Contents.data()), + Obj.Sections[I].Contents.size()); + } + + return Error::success(); +} + +} // end namespace wasm +} // end namespace objcopy +} // end namespace llvm diff --git a/contrib/libs/llvm16/lib/ObjCopy/wasm/WasmWriter.h b/contrib/libs/llvm16/lib/ObjCopy/wasm/WasmWriter.h new file mode 100644 index 00000000000..14bbcf88875 --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/wasm/WasmWriter.h @@ -0,0 +1,49 @@ +//===- WasmWriter.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJCOPY_WASM_WASMWRITER_H +#define LLVM_LIB_OBJCOPY_WASM_WASMWRITER_H + +#include "WasmObject.h" +#include <cstdint> +#include <vector> + +namespace llvm { +namespace objcopy { +namespace wasm { + +class Writer { +public: + Writer(Object &Obj, raw_ostream &Out) : Obj(Obj), Out(Out) {} + Error write(); + +private: + using SectionHeader = SmallVector<char, 8>; + Object &Obj; + raw_ostream &Out; + std::vector<SectionHeader> SectionHeaders; + + /// Generate a wasm section section header for S. + /// The header consists of + /// * A one-byte section ID (aka the section type). + /// * The size of the section contents, encoded as ULEB128. + /// * If the section is a custom section (type 0) it also has a name, which is + /// encoded as a length-prefixed string. The encoded section size *includes* + /// this string. + /// See https://webassembly.github.io/spec/core/binary/modules.html#sections + /// Return the header and store the total size in SectionSize. + static SectionHeader createSectionHeader(const Section &S, + size_t &SectionSize); + size_t finalize(); +}; + +} // end namespace wasm +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_LIB_OBJCOPY_WASM_WASMWRITER_H diff --git a/contrib/libs/llvm16/lib/ObjCopy/ya.make b/contrib/libs/llvm16/lib/ObjCopy/ya.make new file mode 100644 index 00000000000..6bb70942976 --- /dev/null +++ b/contrib/libs/llvm16/lib/ObjCopy/ya.make @@ -0,0 +1,50 @@ +# Generated by devtools/yamaker. + +LIBRARY() + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + +PEERDIR( + contrib/libs/llvm16 + contrib/libs/llvm16/lib/BinaryFormat + contrib/libs/llvm16/lib/MC + contrib/libs/llvm16/lib/Object + contrib/libs/llvm16/lib/Support +) + +ADDINCL( + contrib/libs/llvm16/lib/ObjCopy +) + +NO_COMPILER_WARNINGS() + +NO_UTIL() + +SRCS( + Archive.cpp + COFF/COFFObjcopy.cpp + COFF/COFFObject.cpp + COFF/COFFReader.cpp + COFF/COFFWriter.cpp + CommonConfig.cpp + ConfigManager.cpp + ELF/ELFObjcopy.cpp + ELF/ELFObject.cpp + MachO/MachOLayoutBuilder.cpp + MachO/MachOObjcopy.cpp + MachO/MachOObject.cpp + MachO/MachOReader.cpp + MachO/MachOWriter.cpp + ObjCopy.cpp + XCOFF/XCOFFObjcopy.cpp + XCOFF/XCOFFReader.cpp + XCOFF/XCOFFWriter.cpp + wasm/WasmObjcopy.cpp + wasm/WasmObject.cpp + wasm/WasmReader.cpp + wasm/WasmWriter.cpp +) + +END() |
