//===- BitstreamReader.cpp - BitstreamReader implementation ---------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "llvm/Bitstream/BitstreamReader.h" #include "llvm/ADT/StringRef.h" #include <cassert> #include <string> using namespace llvm; //===----------------------------------------------------------------------===// // BitstreamCursor implementation //===----------------------------------------------------------------------===// /// Having read the ENTER_SUBBLOCK abbrevid, enter the block. Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) { // Save the current block's state on BlockScope. BlockScope.push_back(Block(CurCodeSize)); BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); // Add the abbrevs specific to this block to the CurAbbrevs list. if (BlockInfo) { if (const BitstreamBlockInfo::BlockInfo *Info = BlockInfo->getBlockInfo(BlockID)) { llvm::append_range(CurAbbrevs, Info->Abbrevs); } } // Get the codesize of this block. Expected<uint32_t> MaybeVBR = ReadVBR(bitc::CodeLenWidth); if (!MaybeVBR) return MaybeVBR.takeError(); CurCodeSize = MaybeVBR.get(); if (CurCodeSize > MaxChunkSize) return llvm::createStringError( std::errc::illegal_byte_sequence, "can't read more than %zu at a time, trying to read %u", +MaxChunkSize, CurCodeSize); SkipToFourByteBoundary(); Expected<word_t> MaybeNum = Read(bitc::BlockSizeWidth); if (!MaybeNum) return MaybeNum.takeError(); word_t NumWords = MaybeNum.get(); if (NumWordsP) *NumWordsP = NumWords; if (CurCodeSize == 0) return llvm::createStringError( std::errc::illegal_byte_sequence, "can't enter sub-block: current code size is 0"); if (AtEndOfStream()) return llvm::createStringError( std::errc::illegal_byte_sequence, "can't enter sub block: already at end of stream"); return Error::success(); } static Expected<uint64_t> readAbbreviatedField(BitstreamCursor &Cursor, const BitCodeAbbrevOp &Op) { assert(!Op.isLiteral() && "Not to be used with literals!"); // Decode the value as we are commanded. switch (Op.getEncoding()) { case BitCodeAbbrevOp::Array: case BitCodeAbbrevOp::Blob: llvm_unreachable("Should not reach here"); case BitCodeAbbrevOp::Fixed: assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize); return Cursor.Read((unsigned)Op.getEncodingData()); case BitCodeAbbrevOp::VBR: assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize); return Cursor.ReadVBR64((unsigned)Op.getEncodingData()); case BitCodeAbbrevOp::Char6: if (Expected<unsigned> Res = Cursor.Read(6)) return BitCodeAbbrevOp::DecodeChar6(Res.get()); else return Res.takeError(); } llvm_unreachable("invalid abbreviation encoding"); } /// skipRecord - Read the current record and discard it. Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) { // Skip unabbreviated records by reading past their entries. if (AbbrevID == bitc::UNABBREV_RECORD) { Expected<uint32_t> MaybeCode = ReadVBR(6); if (!MaybeCode) return MaybeCode.takeError(); unsigned Code = MaybeCode.get(); Expected<uint32_t> MaybeVBR = ReadVBR(6); if (!MaybeVBR) return MaybeVBR.get(); unsigned NumElts = MaybeVBR.get(); for (unsigned i = 0; i != NumElts; ++i) if (Expected<uint64_t> Res = ReadVBR64(6)) ; // Skip! else return Res.takeError(); return Code; } const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID); const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0); unsigned Code; if (CodeOp.isLiteral()) Code = CodeOp.getLiteralValue(); else { if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array || CodeOp.getEncoding() == BitCodeAbbrevOp::Blob) return llvm::createStringError( std::errc::illegal_byte_sequence, "Abbreviation starts with an Array or a Blob"); Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp); if (!MaybeCode) return MaybeCode.takeError(); Code = MaybeCode.get(); } for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) { const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); if (Op.isLiteral()) continue; if (Op.getEncoding() != BitCodeAbbrevOp::Array && Op.getEncoding() != BitCodeAbbrevOp::Blob) { if (Expected<uint64_t> MaybeField = readAbbreviatedField(*this, Op)) continue; else return MaybeField.takeError(); } if (Op.getEncoding() == BitCodeAbbrevOp::Array) { // Array case. Read the number of elements as a vbr6. Expected<uint32_t> MaybeNum = ReadVBR(6); if (!MaybeNum) return MaybeNum.takeError(); unsigned NumElts = MaybeNum.get(); // Get the element encoding. assert(i+2 == e && "array op not second to last?"); const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); // Read all the elements. // Decode the value as we are commanded. switch (EltEnc.getEncoding()) { default: report_fatal_error("Array element type can't be an Array or a Blob"); case BitCodeAbbrevOp::Fixed: assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize); if (Error Err = JumpToBit(GetCurrentBitNo() + static_cast<uint64_t>(NumElts) * EltEnc.getEncodingData())) return std::move(Err); break; case BitCodeAbbrevOp::VBR: assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize); for (; NumElts; --NumElts) if (Expected<uint64_t> Res = ReadVBR64((unsigned)EltEnc.getEncodingData())) ; // Skip! else return Res.takeError(); break; case BitCodeAbbrevOp::Char6: if (Error Err = JumpToBit(GetCurrentBitNo() + NumElts * 6)) return std::move(Err); break; } continue; } assert(Op.getEncoding() == BitCodeAbbrevOp::Blob); // Blob case. Read the number of bytes as a vbr6. Expected<uint32_t> MaybeNum = ReadVBR(6); if (!MaybeNum) return MaybeNum.takeError(); unsigned NumElts = MaybeNum.get(); SkipToFourByteBoundary(); // 32-bit alignment // Figure out where the end of this blob will be including tail padding. const size_t NewEnd = GetCurrentBitNo() + alignTo(NumElts, 4) * 8; // If this would read off the end of the bitcode file, just set the // record to empty and return. if (!canSkipToPos(NewEnd/8)) { skipToEnd(); break; } // Skip over the blob. if (Error Err = JumpToBit(NewEnd)) return std::move(Err); } return Code; } Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals, StringRef *Blob) { if (AbbrevID == bitc::UNABBREV_RECORD) { Expected<uint32_t> MaybeCode = ReadVBR(6); if (!MaybeCode) return MaybeCode.takeError(); uint32_t Code = MaybeCode.get(); Expected<uint32_t> MaybeNumElts = ReadVBR(6); if (!MaybeNumElts) return MaybeNumElts.takeError(); uint32_t NumElts = MaybeNumElts.get(); Vals.reserve(Vals.size() + NumElts); for (unsigned i = 0; i != NumElts; ++i) if (Expected<uint64_t> MaybeVal = ReadVBR64(6)) Vals.push_back(MaybeVal.get()); else return MaybeVal.takeError(); return Code; } const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID); // Read the record code first. assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?"); const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0); unsigned Code; if (CodeOp.isLiteral()) Code = CodeOp.getLiteralValue(); else { if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array || CodeOp.getEncoding() == BitCodeAbbrevOp::Blob) report_fatal_error("Abbreviation starts with an Array or a Blob"); if (Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp)) Code = MaybeCode.get(); else return MaybeCode.takeError(); } for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) { const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); if (Op.isLiteral()) { Vals.push_back(Op.getLiteralValue()); continue; } if (Op.getEncoding() != BitCodeAbbrevOp::Array && Op.getEncoding() != BitCodeAbbrevOp::Blob) { if (Expected<uint64_t> MaybeVal = readAbbreviatedField(*this, Op)) Vals.push_back(MaybeVal.get()); else return MaybeVal.takeError(); continue; } if (Op.getEncoding() == BitCodeAbbrevOp::Array) { // Array case. Read the number of elements as a vbr6. Expected<uint32_t> MaybeNumElts = ReadVBR(6); if (!MaybeNumElts) return MaybeNumElts.takeError(); uint32_t NumElts = MaybeNumElts.get(); Vals.reserve(Vals.size() + NumElts); // Get the element encoding. if (i + 2 != e) report_fatal_error("Array op not second to last"); const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); if (!EltEnc.isEncoding()) report_fatal_error( "Array element type has to be an encoding of a type"); // Read all the elements. switch (EltEnc.getEncoding()) { default: report_fatal_error("Array element type can't be an Array or a Blob"); case BitCodeAbbrevOp::Fixed: for (; NumElts; --NumElts) if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = Read((unsigned)EltEnc.getEncodingData())) Vals.push_back(MaybeVal.get()); else return MaybeVal.takeError(); break; case BitCodeAbbrevOp::VBR: for (; NumElts; --NumElts) if (Expected<uint64_t> MaybeVal = ReadVBR64((unsigned)EltEnc.getEncodingData())) Vals.push_back(MaybeVal.get()); else return MaybeVal.takeError(); break; case BitCodeAbbrevOp::Char6: for (; NumElts; --NumElts) if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = Read(6)) Vals.push_back(BitCodeAbbrevOp::DecodeChar6(MaybeVal.get())); else return MaybeVal.takeError(); } continue; } assert(Op.getEncoding() == BitCodeAbbrevOp::Blob); // Blob case. Read the number of bytes as a vbr6. Expected<uint32_t> MaybeNumElts = ReadVBR(6); if (!MaybeNumElts) return MaybeNumElts.takeError(); uint32_t NumElts = MaybeNumElts.get(); SkipToFourByteBoundary(); // 32-bit alignment // Figure out where the end of this blob will be including tail padding. size_t CurBitPos = GetCurrentBitNo(); const size_t NewEnd = CurBitPos + alignTo(NumElts, 4) * 8; // If this would read off the end of the bitcode file, just set the // record to empty and return. if (!canSkipToPos(NewEnd/8)) { Vals.append(NumElts, 0); skipToEnd(); break; } // Otherwise, inform the streamer that we need these bytes in memory. Skip // over tail padding first, in case jumping to NewEnd invalidates the Blob // pointer. if (Error Err = JumpToBit(NewEnd)) return std::move(Err); const char *Ptr = (const char *)getPointerToBit(CurBitPos, NumElts); // If we can return a reference to the data, do so to avoid copying it. if (Blob) { *Blob = StringRef(Ptr, NumElts); } else { // Otherwise, unpack into Vals with zero extension. auto *UPtr = reinterpret_cast<const unsigned char *>(Ptr); Vals.append(UPtr, UPtr + NumElts); } } return Code; } Error BitstreamCursor::ReadAbbrevRecord() { auto Abbv = std::make_shared<BitCodeAbbrev>(); Expected<uint32_t> MaybeNumOpInfo = ReadVBR(5); if (!MaybeNumOpInfo) return MaybeNumOpInfo.takeError(); unsigned NumOpInfo = MaybeNumOpInfo.get(); for (unsigned i = 0; i != NumOpInfo; ++i) { Expected<word_t> MaybeIsLiteral = Read(1); if (!MaybeIsLiteral) return MaybeIsLiteral.takeError(); bool IsLiteral = MaybeIsLiteral.get(); if (IsLiteral) { Expected<uint64_t> MaybeOp = ReadVBR64(8); if (!MaybeOp) return MaybeOp.takeError(); Abbv->Add(BitCodeAbbrevOp(MaybeOp.get())); continue; } Expected<word_t> MaybeEncoding = Read(3); if (!MaybeEncoding) return MaybeEncoding.takeError(); BitCodeAbbrevOp::Encoding E = (BitCodeAbbrevOp::Encoding)MaybeEncoding.get(); if (BitCodeAbbrevOp::hasEncodingData(E)) { Expected<uint64_t> MaybeData = ReadVBR64(5); if (!MaybeData) return MaybeData.takeError(); uint64_t Data = MaybeData.get(); // As a special case, handle fixed(0) (i.e., a fixed field with zero bits) // and vbr(0) as a literal zero. This is decoded the same way, and avoids // a slow path in Read() to have to handle reading zero bits. if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) && Data == 0) { Abbv->Add(BitCodeAbbrevOp(0)); continue; } if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) && Data > MaxChunkSize) report_fatal_error( "Fixed or VBR abbrev record with size > MaxChunkData"); Abbv->Add(BitCodeAbbrevOp(E, Data)); } else Abbv->Add(BitCodeAbbrevOp(E)); } if (Abbv->getNumOperandInfos() == 0) report_fatal_error("Abbrev record with no operands"); CurAbbrevs.push_back(std::move(Abbv)); return Error::success(); } Expected<Optional<BitstreamBlockInfo>> BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) { if (llvm::Error Err = EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) return std::move(Err); BitstreamBlockInfo NewBlockInfo; SmallVector<uint64_t, 64> Record; BitstreamBlockInfo::BlockInfo *CurBlockInfo = nullptr; // Read all the records for this module. while (true) { Expected<BitstreamEntry> MaybeEntry = advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs); if (!MaybeEntry) return MaybeEntry.takeError(); BitstreamEntry Entry = MaybeEntry.get(); switch (Entry.Kind) { case llvm::BitstreamEntry::SubBlock: // Handled for us already. case llvm::BitstreamEntry::Error: return None; case llvm::BitstreamEntry::EndBlock: return std::move(NewBlockInfo); case llvm::BitstreamEntry::Record: // The interesting case. break; } // Read abbrev records, associate them with CurBID. if (Entry.ID == bitc::DEFINE_ABBREV) { if (!CurBlockInfo) return None; if (Error Err = ReadAbbrevRecord()) return std::move(Err); // ReadAbbrevRecord installs the abbrev in CurAbbrevs. Move it to the // appropriate BlockInfo. CurBlockInfo->Abbrevs.push_back(std::move(CurAbbrevs.back())); CurAbbrevs.pop_back(); continue; } // Read a record. Record.clear(); Expected<unsigned> MaybeBlockInfo = readRecord(Entry.ID, Record); if (!MaybeBlockInfo) return MaybeBlockInfo.takeError(); switch (MaybeBlockInfo.get()) { default: break; // Default behavior, ignore unknown content. case bitc::BLOCKINFO_CODE_SETBID: if (Record.size() < 1) return None; CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]); break; case bitc::BLOCKINFO_CODE_BLOCKNAME: { if (!CurBlockInfo) return None; if (!ReadBlockInfoNames) break; // Ignore name. CurBlockInfo->Name = std::string(Record.begin(), Record.end()); break; } case bitc::BLOCKINFO_CODE_SETRECORDNAME: { if (!CurBlockInfo) return None; if (!ReadBlockInfoNames) break; // Ignore name. CurBlockInfo->RecordNames.emplace_back( (unsigned)Record[0], std::string(Record.begin() + 1, Record.end())); break; } } } }