diff options
Diffstat (limited to 'llvm/lib/DebugInfo')
46 files changed, 6874 insertions, 693 deletions
diff --git a/llvm/lib/DebugInfo/BTF/BTFContext.cpp b/llvm/lib/DebugInfo/BTF/BTFContext.cpp new file mode 100644 index 000000000000..24898739b824 --- /dev/null +++ b/llvm/lib/DebugInfo/BTF/BTFContext.cpp @@ -0,0 +1,69 @@ +//===- BTFContext.cpp ---------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementation of the BTFContext interface, this is used by +// llvm-objdump tool to print source code alongside disassembly. +// In fact, currently it is a simple wrapper for BTFParser instance. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/BTF/BTFContext.h" + +#define DEBUG_TYPE "debug-info-btf-context" + +using namespace llvm; +using object::ObjectFile; +using object::SectionedAddress; + +DILineInfo BTFContext::getLineInfoForAddress(SectionedAddress Address, + DILineInfoSpecifier Specifier) { + const BTF::BPFLineInfo *LineInfo = BTF.findLineInfo(Address); + DILineInfo Result; + if (!LineInfo) + return Result; + + Result.LineSource = BTF.findString(LineInfo->LineOff); + Result.FileName = BTF.findString(LineInfo->FileNameOff); + Result.Line = LineInfo->getLine(); + Result.Column = LineInfo->getCol(); + return Result; +} + +DILineInfo BTFContext::getLineInfoForDataAddress(SectionedAddress Address) { + // BTF does not convey such information. + return {}; +} + +DILineInfoTable +BTFContext::getLineInfoForAddressRange(SectionedAddress Address, uint64_t Size, + DILineInfoSpecifier Specifier) { + // This function is used only from llvm-rtdyld utility and a few + // JITEventListener implementations. Ignore it for now. + return {}; +} + +DIInliningInfo +BTFContext::getInliningInfoForAddress(SectionedAddress Address, + DILineInfoSpecifier Specifier) { + // BTF does not convey such information + return {}; +} + +std::vector<DILocal> BTFContext::getLocalsForAddress(SectionedAddress Address) { + // BTF does not convey such information + return {}; +} + +std::unique_ptr<BTFContext> +BTFContext::create(const ObjectFile &Obj, + std::function<void(Error)> ErrorHandler) { + auto Ctx = std::make_unique<BTFContext>(); + if (Error E = Ctx->BTF.parse(Obj)) + ErrorHandler(std::move(E)); + return Ctx; +} diff --git a/llvm/lib/DebugInfo/BTF/BTFParser.cpp b/llvm/lib/DebugInfo/BTF/BTFParser.cpp new file mode 100644 index 000000000000..6151e1b15cbb --- /dev/null +++ b/llvm/lib/DebugInfo/BTF/BTFParser.cpp @@ -0,0 +1,283 @@ +//===- BTFParser.cpp ------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// BTFParser reads/interprets .BTF and .BTF.ext ELF sections. +// Refer to BTFParser.h for API description. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/BTF/BTFParser.h" +#include "llvm/Support/Errc.h" + +#define DEBUG_TYPE "debug-info-btf-parser" + +using namespace llvm; +using object::ObjectFile; +using object::SectionedAddress; +using object::SectionRef; + +const char BTFSectionName[] = ".BTF"; +const char BTFExtSectionName[] = ".BTF.ext"; + +// Utility class with API similar to raw_ostream but can be cast +// to Error, e.g.: +// +// Error foo(...) { +// ... +// if (Error E = bar(...)) +// return Err("error while foo(): ") << E; +// ... +// } +// +namespace { +class Err { + std::string Buffer; + raw_string_ostream Stream; + +public: + Err(const char *InitialMsg) : Buffer(InitialMsg), Stream(Buffer) {} + Err(const char *SectionName, DataExtractor::Cursor &C) + : Buffer(), Stream(Buffer) { + *this << "error while reading " << SectionName + << " section: " << C.takeError(); + }; + + template <typename T> Err &operator<<(T Val) { + Stream << Val; + return *this; + } + + Err &write_hex(unsigned long long Val) { + Stream.write_hex(Val); + return *this; + } + + Err &operator<<(Error Val) { + handleAllErrors(std::move(Val), + [=](ErrorInfoBase &Info) { Stream << Info.message(); }); + return *this; + } + + operator Error() const { + return make_error<StringError>(Buffer, errc::invalid_argument); + } +}; +} // anonymous namespace + +// ParseContext wraps information that is only necessary while parsing +// ObjectFile and can be discarded once parsing is done. +// Used by BTFParser::parse* auxiliary functions. +struct BTFParser::ParseContext { + const ObjectFile &Obj; + // Map from ELF section name to SectionRef + DenseMap<StringRef, SectionRef> Sections; + +public: + ParseContext(const ObjectFile &Obj) : Obj(Obj) {} + + Expected<DataExtractor> makeExtractor(SectionRef Sec) { + Expected<StringRef> Contents = Sec.getContents(); + if (!Contents) + return Contents.takeError(); + return DataExtractor(Contents.get(), Obj.isLittleEndian(), + Obj.getBytesInAddress()); + } + + std::optional<SectionRef> findSection(StringRef Name) const { + auto It = Sections.find(Name); + if (It != Sections.end()) + return It->second; + return std::nullopt; + } +}; + +Error BTFParser::parseBTF(ParseContext &Ctx, SectionRef BTF) { + Expected<DataExtractor> MaybeExtractor = Ctx.makeExtractor(BTF); + if (!MaybeExtractor) + return MaybeExtractor.takeError(); + + DataExtractor &Extractor = MaybeExtractor.get(); + DataExtractor::Cursor C = DataExtractor::Cursor(0); + uint16_t Magic = Extractor.getU16(C); + if (!C) + return Err(".BTF", C); + if (Magic != BTF::MAGIC) + return Err("invalid .BTF magic: ").write_hex(Magic); + uint8_t Version = Extractor.getU8(C); + if (!C) + return Err(".BTF", C); + if (Version != 1) + return Err("unsupported .BTF version: ") << (unsigned)Version; + (void)Extractor.getU8(C); // flags + uint32_t HdrLen = Extractor.getU32(C); + if (!C) + return Err(".BTF", C); + if (HdrLen < 8) + return Err("unexpected .BTF header length: ") << HdrLen; + (void)Extractor.getU32(C); // type_off + (void)Extractor.getU32(C); // type_len + uint32_t StrOff = Extractor.getU32(C); + uint32_t StrLen = Extractor.getU32(C); + uint32_t StrStart = HdrLen + StrOff; + uint32_t StrEnd = StrStart + StrLen; + if (!C) + return Err(".BTF", C); + if (Extractor.getData().size() < StrEnd) + return Err("invalid .BTF section size, expecting at-least ") + << StrEnd << " bytes"; + + StringsTable = Extractor.getData().substr(StrStart, StrLen); + return Error::success(); +} + +Error BTFParser::parseBTFExt(ParseContext &Ctx, SectionRef BTFExt) { + Expected<DataExtractor> MaybeExtractor = Ctx.makeExtractor(BTFExt); + if (!MaybeExtractor) + return MaybeExtractor.takeError(); + + DataExtractor &Extractor = MaybeExtractor.get(); + DataExtractor::Cursor C = DataExtractor::Cursor(0); + uint16_t Magic = Extractor.getU16(C); + if (!C) + return Err(".BTF.ext", C); + if (Magic != BTF::MAGIC) + return Err("invalid .BTF.ext magic: ").write_hex(Magic); + uint8_t Version = Extractor.getU8(C); + if (!C) + return Err(".BTF", C); + if (Version != 1) + return Err("unsupported .BTF.ext version: ") << (unsigned)Version; + (void)Extractor.getU8(C); // flags + uint32_t HdrLen = Extractor.getU32(C); + if (!C) + return Err(".BTF.ext", C); + if (HdrLen < 8) + return Err("unexpected .BTF.ext header length: ") << HdrLen; + (void)Extractor.getU32(C); // func_info_off + (void)Extractor.getU32(C); // func_info_len + uint32_t LineInfoOff = Extractor.getU32(C); + uint32_t LineInfoLen = Extractor.getU32(C); + if (!C) + return Err(".BTF.ext", C); + uint32_t LineInfoStart = HdrLen + LineInfoOff; + uint32_t LineInfoEnd = LineInfoStart + LineInfoLen; + if (Error E = parseLineInfo(Ctx, Extractor, LineInfoStart, LineInfoEnd)) + return E; + + return Error::success(); +} + +Error BTFParser::parseLineInfo(ParseContext &Ctx, DataExtractor &Extractor, + uint64_t LineInfoStart, uint64_t LineInfoEnd) { + DataExtractor::Cursor C = DataExtractor::Cursor(LineInfoStart); + uint32_t RecSize = Extractor.getU32(C); + if (!C) + return Err(".BTF.ext", C); + if (RecSize < 16) + return Err("unexpected .BTF.ext line info record length: ") << RecSize; + + while (C && C.tell() < LineInfoEnd) { + uint32_t SecNameOff = Extractor.getU32(C); + uint32_t NumInfo = Extractor.getU32(C); + StringRef SecName = findString(SecNameOff); + std::optional<SectionRef> Sec = Ctx.findSection(SecName); + if (!C) + return Err(".BTF.ext", C); + if (!Sec) + return Err("") << "can't find section '" << SecName + << "' while parsing .BTF.ext line info"; + BTFLinesVector &Lines = SectionLines[Sec->getIndex()]; + for (uint32_t I = 0; C && I < NumInfo; ++I) { + uint64_t RecStart = C.tell(); + uint32_t InsnOff = Extractor.getU32(C); + uint32_t FileNameOff = Extractor.getU32(C); + uint32_t LineOff = Extractor.getU32(C); + uint32_t LineCol = Extractor.getU32(C); + if (!C) + return Err(".BTF.ext", C); + Lines.push_back({InsnOff, FileNameOff, LineOff, LineCol}); + C.seek(RecStart + RecSize); + } + llvm::stable_sort(Lines, + [](const BTF::BPFLineInfo &L, const BTF::BPFLineInfo &R) { + return L.InsnOffset < R.InsnOffset; + }); + } + if (!C) + return Err(".BTF.ext", C); + + return Error::success(); +} + +Error BTFParser::parse(const ObjectFile &Obj) { + StringsTable = StringRef(); + SectionLines.clear(); + + ParseContext Ctx(Obj); + std::optional<SectionRef> BTF; + std::optional<SectionRef> BTFExt; + for (SectionRef Sec : Obj.sections()) { + Expected<StringRef> MaybeName = Sec.getName(); + if (!MaybeName) + return Err("error while reading section name: ") << MaybeName.takeError(); + Ctx.Sections[*MaybeName] = Sec; + if (*MaybeName == BTFSectionName) + BTF = Sec; + if (*MaybeName == BTFExtSectionName) + BTFExt = Sec; + } + if (!BTF) + return Err("can't find .BTF section"); + if (!BTFExt) + return Err("can't find .BTF.ext section"); + if (Error E = parseBTF(Ctx, *BTF)) + return E; + if (Error E = parseBTFExt(Ctx, *BTFExt)) + return E; + + return Error::success(); +} + +bool BTFParser::hasBTFSections(const ObjectFile &Obj) { + bool HasBTF = false; + bool HasBTFExt = false; + for (SectionRef Sec : Obj.sections()) { + Expected<StringRef> Name = Sec.getName(); + if (Error E = Name.takeError()) { + logAllUnhandledErrors(std::move(E), errs()); + continue; + } + HasBTF |= *Name == BTFSectionName; + HasBTFExt |= *Name == BTFExtSectionName; + if (HasBTF && HasBTFExt) + return true; + } + return false; +} + +StringRef BTFParser::findString(uint32_t Offset) const { + return StringsTable.slice(Offset, StringsTable.find(0, Offset)); +} + +const BTF::BPFLineInfo * +BTFParser::findLineInfo(SectionedAddress Address) const { + auto MaybeSecInfo = SectionLines.find(Address.SectionIndex); + if (MaybeSecInfo == SectionLines.end()) + return nullptr; + + const BTFLinesVector &SecInfo = MaybeSecInfo->second; + const uint64_t TargetOffset = Address.Address; + BTFLinesVector::const_iterator LineInfo = + llvm::partition_point(SecInfo, [=](const BTF::BPFLineInfo &Line) { + return Line.InsnOffset < TargetOffset; + }); + if (LineInfo == SecInfo.end() || LineInfo->InsnOffset != Address.Address) + return nullptr; + + return LineInfo; +} diff --git a/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp b/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp index aea672976017..0d0a357dce68 100644 --- a/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp +++ b/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/CodeView/CodeViewRecordIO.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/GUID.h" #include "llvm/DebugInfo/CodeView/RecordSerialization.h" diff --git a/llvm/lib/DebugInfo/CodeView/EnumTables.cpp b/llvm/lib/DebugInfo/CodeView/EnumTables.cpp index 78a258600696..b2f0099bd01c 100644 --- a/llvm/lib/DebugInfo/CodeView/EnumTables.cpp +++ b/llvm/lib/DebugInfo/CodeView/EnumTables.cpp @@ -105,6 +105,7 @@ static const EnumEntry<codeview::SourceLanguage> SourceLanguages[] = { CV_ENUM_ENT(SourceLanguage, JScript), CV_ENUM_ENT(SourceLanguage, MSIL), CV_ENUM_ENT(SourceLanguage, HLSL), CV_ENUM_ENT(SourceLanguage, D), CV_ENUM_ENT(SourceLanguage, Swift), CV_ENUM_ENT(SourceLanguage, Rust), + CV_ENUM_ENT(SourceLanguage, ObjC), CV_ENUM_ENT(SourceLanguage, ObjCpp), }; static const EnumEntry<uint32_t> CompileSym2FlagNames[] = { diff --git a/llvm/lib/DebugInfo/CodeView/TypeRecordHelpers.cpp b/llvm/lib/DebugInfo/CodeView/TypeRecordHelpers.cpp index e44dec6d6396..046b660abfab 100644 --- a/llvm/lib/DebugInfo/CodeView/TypeRecordHelpers.cpp +++ b/llvm/lib/DebugInfo/CodeView/TypeRecordHelpers.cpp @@ -144,15 +144,15 @@ uint64_t llvm::codeview::getSizeInBytesForTypeIndex(TypeIndex TI) { // Complex float. case SimpleTypeKind::Complex16: - return 2; - case SimpleTypeKind::Complex32: return 4; - case SimpleTypeKind::Complex64: + case SimpleTypeKind::Complex32: return 8; + case SimpleTypeKind::Complex64: + return 16; case SimpleTypeKind::Complex80: - return 10; + return 20; case SimpleTypeKind::Complex128: - return 16; + return 32; default: return 0; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp index 5b5b887e2a50..ecdbd004efad 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp @@ -34,22 +34,32 @@ DWARFAbbreviationDeclaration::DWARFAbbreviationDeclaration() { clear(); } -bool -DWARFAbbreviationDeclaration::extract(DataExtractor Data, - uint64_t* OffsetPtr) { +llvm::Expected<DWARFAbbreviationDeclaration::ExtractState> +DWARFAbbreviationDeclaration::extract(DataExtractor Data, uint64_t *OffsetPtr) { clear(); const uint64_t Offset = *OffsetPtr; - Code = Data.getULEB128(OffsetPtr); - if (Code == 0) { - return false; - } + Error Err = Error::success(); + Code = Data.getULEB128(OffsetPtr, &Err); + if (Err) + return std::move(Err); + + if (Code == 0) + return ExtractState::Complete; + CodeByteSize = *OffsetPtr - Offset; - Tag = static_cast<llvm::dwarf::Tag>(Data.getULEB128(OffsetPtr)); + Tag = static_cast<llvm::dwarf::Tag>(Data.getULEB128(OffsetPtr, &Err)); + if (Err) + return std::move(Err); + if (Tag == DW_TAG_null) { clear(); - return false; + return make_error<llvm::object::GenericBinaryError>( + "abbreviation declaration requires a non-null tag"); } - uint8_t ChildrenByte = Data.getU8(OffsetPtr); + uint8_t ChildrenByte = Data.getU8(OffsetPtr, &Err); + if (Err) + return std::move(Err); + HasChildren = (ChildrenByte == DW_CHILDREN_yes); // Assign a value to our optional FixedAttributeSize member variable. If // this member variable still has a value after the while loop below, then @@ -57,70 +67,82 @@ DWARFAbbreviationDeclaration::extract(DataExtractor Data, FixedAttributeSize = FixedSizeInfo(); // Read all of the abbreviation attributes and forms. - while (true) { - auto A = static_cast<Attribute>(Data.getULEB128(OffsetPtr)); - auto F = static_cast<Form>(Data.getULEB128(OffsetPtr)); - if (A && F) { - bool IsImplicitConst = (F == DW_FORM_implicit_const); - if (IsImplicitConst) { - int64_t V = Data.getSLEB128(OffsetPtr); - AttributeSpecs.push_back(AttributeSpec(A, F, V)); - continue; - } - std::optional<uint8_t> ByteSize; - // If this abbrevation still has a fixed byte size, then update the - // FixedAttributeSize as needed. - switch (F) { - case DW_FORM_addr: - if (FixedAttributeSize) - ++FixedAttributeSize->NumAddrs; - break; + while (Data.isValidOffset(*OffsetPtr)) { + auto A = static_cast<Attribute>(Data.getULEB128(OffsetPtr, &Err)); + if (Err) + return std::move(Err); + + auto F = static_cast<Form>(Data.getULEB128(OffsetPtr, &Err)); + if (Err) + return std::move(Err); + + // We successfully reached the end of this abbreviation declaration + // since both attribute and form are zero. There may be more abbreviation + // declarations afterwards. + if (!A && !F) + return ExtractState::MoreItems; + + if (!A || !F) { + // Attribute and form pairs must either both be non-zero, in which case + // they are added to the abbreviation declaration, or both be zero to + // terminate the abbrevation declaration. In this case only one was + // zero which is an error. + clear(); + return make_error<llvm::object::GenericBinaryError>( + "malformed abbreviation declaration attribute. Either the attribute " + "or the form is zero while the other is not"); + } - case DW_FORM_ref_addr: - if (FixedAttributeSize) - ++FixedAttributeSize->NumRefAddrs; - break; + bool IsImplicitConst = (F == DW_FORM_implicit_const); + if (IsImplicitConst) { + int64_t V = Data.getSLEB128(OffsetPtr); + AttributeSpecs.push_back(AttributeSpec(A, F, V)); + continue; + } + std::optional<uint8_t> ByteSize; + // If this abbrevation still has a fixed byte size, then update the + // FixedAttributeSize as needed. + switch (F) { + case DW_FORM_addr: + if (FixedAttributeSize) + ++FixedAttributeSize->NumAddrs; + break; - case DW_FORM_strp: - case DW_FORM_GNU_ref_alt: - case DW_FORM_GNU_strp_alt: - case DW_FORM_line_strp: - case DW_FORM_sec_offset: - case DW_FORM_strp_sup: - if (FixedAttributeSize) - ++FixedAttributeSize->NumDwarfOffsets; - break; + case DW_FORM_ref_addr: + if (FixedAttributeSize) + ++FixedAttributeSize->NumRefAddrs; + break; + + case DW_FORM_strp: + case DW_FORM_GNU_ref_alt: + case DW_FORM_GNU_strp_alt: + case DW_FORM_line_strp: + case DW_FORM_sec_offset: + case DW_FORM_strp_sup: + if (FixedAttributeSize) + ++FixedAttributeSize->NumDwarfOffsets; + break; - default: - // The form has a byte size that doesn't depend on Params. - // If it's a fixed size, keep track of it. - if ((ByteSize = dwarf::getFixedFormByteSize(F, dwarf::FormParams()))) { - if (FixedAttributeSize) - FixedAttributeSize->NumBytes += *ByteSize; - break; - } - // Indicate we no longer have a fixed byte size for this - // abbreviation by clearing the FixedAttributeSize optional value - // so it doesn't have a value. - FixedAttributeSize.reset(); + default: + // The form has a byte size that doesn't depend on Params. + // If it's a fixed size, keep track of it. + if ((ByteSize = dwarf::getFixedFormByteSize(F, dwarf::FormParams()))) { + if (FixedAttributeSize) + FixedAttributeSize->NumBytes += *ByteSize; break; } - // Record this attribute and its fixed size if it has one. - AttributeSpecs.push_back(AttributeSpec(A, F, ByteSize)); - } else if (A == 0 && F == 0) { - // We successfully reached the end of this abbreviation declaration - // since both attribute and form are zero. + // Indicate we no longer have a fixed byte size for this + // abbreviation by clearing the FixedAttributeSize optional value + // so it doesn't have a value. + FixedAttributeSize.reset(); break; - } else { - // Attribute and form pairs must either both be non-zero, in which case - // they are added to the abbreviation declaration, or both be zero to - // terminate the abbrevation declaration. In this case only one was - // zero which is an error. - clear(); - return false; } + // Record this attribute and its fixed size if it has one. + AttributeSpecs.push_back(AttributeSpec(A, F, ByteSize)); } - return true; + return make_error<llvm::object::GenericBinaryError>( + "abbreviation declaration attribute list was not terminated with a null " + "entry"); } void DWARFAbbreviationDeclaration::dump(raw_ostream &OS) const { diff --git a/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp index 889d3f0915b0..14962cd36c23 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp @@ -54,13 +54,11 @@ Error AppleAcceleratorTable::extract() { Hdr.BucketCount = AccelSection.getU32(&Offset); Hdr.HashCount = AccelSection.getU32(&Offset); Hdr.HeaderDataLength = AccelSection.getU32(&Offset); + FormParams = {Hdr.Version, 0, dwarf::DwarfFormat::DWARF32}; // Check that we can read all the hashes and offsets from the // section (see SourceLevelDebugging.rst for the structure of the index). - // We need to substract one because we're checking for an *offset* which is - // equal to the size for an empty table and hence pointer after the section. - if (!AccelSection.isValidOffset(sizeof(Hdr) + Hdr.HeaderDataLength + - Hdr.BucketCount * 4 + Hdr.HashCount * 8 - 1)) + if (!AccelSection.isValidOffset(getIthBucketBase(Hdr.BucketCount - 1))) return createStringError( errc::illegal_byte_sequence, "Section too small: cannot read buckets and hashes."); @@ -68,20 +66,35 @@ Error AppleAcceleratorTable::extract() { HdrData.DIEOffsetBase = AccelSection.getU32(&Offset); uint32_t NumAtoms = AccelSection.getU32(&Offset); + HashDataEntryLength = 0; + auto MakeUnsupportedFormError = [](dwarf::Form Form) { + return createStringError(errc::not_supported, + "Unsupported form:" + + dwarf::FormEncodingString(Form)); + }; + for (unsigned i = 0; i < NumAtoms; ++i) { uint16_t AtomType = AccelSection.getU16(&Offset); auto AtomForm = static_cast<dwarf::Form>(AccelSection.getU16(&Offset)); HdrData.Atoms.push_back(std::make_pair(AtomType, AtomForm)); + + std::optional<uint8_t> FormSize = + dwarf::getFixedFormByteSize(AtomForm, FormParams); + if (!FormSize) + return MakeUnsupportedFormError(AtomForm); + HashDataEntryLength += *FormSize; } IsValid = true; return Error::success(); } -uint32_t AppleAcceleratorTable::getNumBuckets() { return Hdr.BucketCount; } -uint32_t AppleAcceleratorTable::getNumHashes() { return Hdr.HashCount; } -uint32_t AppleAcceleratorTable::getSizeHdr() { return sizeof(Hdr); } -uint32_t AppleAcceleratorTable::getHeaderDataLength() { +uint32_t AppleAcceleratorTable::getNumBuckets() const { + return Hdr.BucketCount; +} +uint32_t AppleAcceleratorTable::getNumHashes() const { return Hdr.HashCount; } +uint32_t AppleAcceleratorTable::getSizeHdr() const { return sizeof(Hdr); } +uint32_t AppleAcceleratorTable::getHeaderDataLength() const { return Hdr.HeaderDataLength; } @@ -114,7 +127,6 @@ std::pair<uint64_t, dwarf::Tag> AppleAcceleratorTable::readAtoms(uint64_t *HashDataOffset) { uint64_t DieOffset = dwarf::DW_INVALID_OFFSET; dwarf::Tag DieTag = dwarf::DW_TAG_null; - dwarf::FormParams FormParams = {Hdr.Version, 0, dwarf::DwarfFormat::DWARF32}; for (auto Atom : getAtomsDesc()) { DWARFFormValue FormValue(Atom.second); @@ -163,7 +175,6 @@ std::optional<uint64_t> AppleAcceleratorTable::HeaderData::extractOffset( bool AppleAcceleratorTable::dumpName(ScopedPrinter &W, SmallVectorImpl<DWARFFormValue> &AtomForms, uint64_t *DataOffset) const { - dwarf::FormParams FormParams = {Hdr.Version, 0, dwarf::DwarfFormat::DWARF32}; uint64_t NameOffset = *DataOffset; if (!AccelSection.isValidOffsetForDataOfSize(*DataOffset, 4)) { W.printString("Incorrectly terminated list."); @@ -209,6 +220,7 @@ LLVM_DUMP_METHOD void AppleAcceleratorTable::dump(raw_ostream &OS) const { W.printNumber("DIE offset base", HdrData.DIEOffsetBase); W.printNumber("Number of atoms", uint64_t(HdrData.Atoms.size())); + W.printNumber("Size of each hash data entry", getHashDataEntryLength()); SmallVector<DWARFFormValue, 3> AtomForms; { ListScope AtomsScope(W, "Atoms"); @@ -255,41 +267,33 @@ LLVM_DUMP_METHOD void AppleAcceleratorTable::dump(raw_ostream &OS) const { } } -AppleAcceleratorTable::Entry::Entry( - const AppleAcceleratorTable::HeaderData &HdrData) - : HdrData(&HdrData) { - Values.reserve(HdrData.Atoms.size()); - for (const auto &Atom : HdrData.Atoms) +AppleAcceleratorTable::Entry::Entry(const AppleAcceleratorTable &Table) + : Table(Table) { + Values.reserve(Table.HdrData.Atoms.size()); + for (const auto &Atom : Table.HdrData.Atoms) Values.push_back(DWARFFormValue(Atom.second)); } -void AppleAcceleratorTable::Entry::extract( - const AppleAcceleratorTable &AccelTable, uint64_t *Offset) { - - dwarf::FormParams FormParams = {AccelTable.Hdr.Version, 0, - dwarf::DwarfFormat::DWARF32}; - for (auto &Atom : Values) - Atom.extractValue(AccelTable.AccelSection, Offset, FormParams); +void AppleAcceleratorTable::Entry::extract(uint64_t *Offset) { + for (auto &FormValue : Values) + FormValue.extractValue(Table.AccelSection, Offset, Table.FormParams); } std::optional<DWARFFormValue> -AppleAcceleratorTable::Entry::lookup(HeaderData::AtomType Atom) const { - assert(HdrData && "Dereferencing end iterator?"); - assert(HdrData->Atoms.size() == Values.size()); - for (auto Tuple : zip_first(HdrData->Atoms, Values)) { - if (std::get<0>(Tuple).first == Atom) - return std::get<1>(Tuple); - } +AppleAcceleratorTable::Entry::lookup(HeaderData::AtomType AtomToFind) const { + for (auto [Atom, FormValue] : zip_equal(Table.HdrData.Atoms, Values)) + if (Atom.first == AtomToFind) + return FormValue; return std::nullopt; } std::optional<uint64_t> AppleAcceleratorTable::Entry::getDIESectionOffset() const { - return HdrData->extractOffset(lookup(dwarf::DW_ATOM_die_offset)); + return Table.HdrData.extractOffset(lookup(dwarf::DW_ATOM_die_offset)); } std::optional<uint64_t> AppleAcceleratorTable::Entry::getCUOffset() const { - return HdrData->extractOffset(lookup(dwarf::DW_ATOM_cu_offset)); + return Table.HdrData.extractOffset(lookup(dwarf::DW_ATOM_cu_offset)); } std::optional<dwarf::Tag> AppleAcceleratorTable::Entry::getTag() const { @@ -301,65 +305,127 @@ std::optional<dwarf::Tag> AppleAcceleratorTable::Entry::getTag() const { return std::nullopt; } -AppleAcceleratorTable::ValueIterator::ValueIterator( - const AppleAcceleratorTable &AccelTable, uint64_t Offset) - : AccelTable(&AccelTable), Current(AccelTable.HdrData), DataOffset(Offset) { - if (!AccelTable.AccelSection.isValidOffsetForDataOfSize(DataOffset, 4)) +AppleAcceleratorTable::SameNameIterator::SameNameIterator( + const AppleAcceleratorTable &AccelTable, uint64_t DataOffset) + : Current(AccelTable), Offset(DataOffset) {} + +void AppleAcceleratorTable::Iterator::prepareNextEntryOrEnd() { + if (NumEntriesToCome == 0) + prepareNextStringOrEnd(); + if (isEnd()) return; + uint64_t OffsetCopy = Offset; + Current.BaseEntry.extract(&OffsetCopy); + NumEntriesToCome--; + Offset += getTable().getHashDataEntryLength(); +} + +void AppleAcceleratorTable::Iterator::prepareNextStringOrEnd() { + std::optional<uint32_t> StrOffset = getTable().readStringOffsetAt(Offset); + if (!StrOffset) + return setToEnd(); + + // A zero denotes the end of the collision list. Read the next string + // again. + if (*StrOffset == 0) + return prepareNextStringOrEnd(); + Current.StrOffset = *StrOffset; - // Read the first entry. - NumData = AccelTable.AccelSection.getU32(&DataOffset); - Next(); + std::optional<uint32_t> MaybeNumEntries = getTable().readU32FromAccel(Offset); + if (!MaybeNumEntries || *MaybeNumEntries == 0) + return setToEnd(); + NumEntriesToCome = *MaybeNumEntries; } -void AppleAcceleratorTable::ValueIterator::Next() { - assert(NumData > 0 && "attempted to increment iterator past the end"); - auto &AccelSection = AccelTable->AccelSection; - if (Data >= NumData || - !AccelSection.isValidOffsetForDataOfSize(DataOffset, 4)) { - NumData = 0; - DataOffset = 0; - return; - } - Current.extract(*AccelTable, &DataOffset); - ++Data; +AppleAcceleratorTable::Iterator::Iterator(const AppleAcceleratorTable &Table, + bool SetEnd) + : Current(Table), Offset(Table.getEntriesBase()), NumEntriesToCome(0) { + if (SetEnd) + setToEnd(); + else + prepareNextEntryOrEnd(); } -iterator_range<AppleAcceleratorTable::ValueIterator> +iterator_range<AppleAcceleratorTable::SameNameIterator> AppleAcceleratorTable::equal_range(StringRef Key) const { + const auto EmptyRange = + make_range(SameNameIterator(*this, 0), SameNameIterator(*this, 0)); if (!IsValid) - return make_range(ValueIterator(), ValueIterator()); + return EmptyRange; // Find the bucket. - unsigned HashValue = djbHash(Key); - unsigned Bucket = HashValue % Hdr.BucketCount; - uint64_t BucketBase = sizeof(Hdr) + Hdr.HeaderDataLength; - uint64_t HashesBase = BucketBase + Hdr.BucketCount * 4; - uint64_t OffsetsBase = HashesBase + Hdr.HashCount * 4; - - uint64_t BucketOffset = BucketBase + Bucket * 4; - unsigned Index = AccelSection.getU32(&BucketOffset); + uint32_t SearchHash = djbHash(Key); + uint32_t BucketIdx = hashToBucketIdx(SearchHash); + std::optional<uint32_t> HashIdx = idxOfHashInBucket(SearchHash, BucketIdx); + if (!HashIdx) + return EmptyRange; + + std::optional<uint64_t> MaybeDataOffset = readIthOffset(*HashIdx); + if (!MaybeDataOffset) + return EmptyRange; + + uint64_t DataOffset = *MaybeDataOffset; + if (DataOffset >= AccelSection.size()) + return EmptyRange; + + std::optional<uint32_t> StrOffset = readStringOffsetAt(DataOffset); + // Valid input and still have strings in this hash. + while (StrOffset && *StrOffset) { + std::optional<StringRef> MaybeStr = readStringFromStrSection(*StrOffset); + std::optional<uint32_t> NumEntries = this->readU32FromAccel(DataOffset); + if (!MaybeStr || !NumEntries) + return EmptyRange; + uint64_t EndOffset = DataOffset + *NumEntries * getHashDataEntryLength(); + if (Key == *MaybeStr) + return make_range({*this, DataOffset}, + SameNameIterator{*this, EndOffset}); + DataOffset = EndOffset; + StrOffset = readStringOffsetAt(DataOffset); + } - // Search through all hashes in the bucket. - for (unsigned HashIdx = Index; HashIdx < Hdr.HashCount; ++HashIdx) { - uint64_t HashOffset = HashesBase + HashIdx * 4; - uint64_t OffsetsOffset = OffsetsBase + HashIdx * 4; - uint32_t Hash = AccelSection.getU32(&HashOffset); + return EmptyRange; +} - if (Hash % Hdr.BucketCount != Bucket) - // We are already in the next bucket. - break; +std::optional<uint32_t> +AppleAcceleratorTable::idxOfHashInBucket(uint32_t HashToFind, + uint32_t BucketIdx) const { + std::optional<uint32_t> HashStartIdx = readIthBucket(BucketIdx); + if (!HashStartIdx) + return std::nullopt; - uint64_t DataOffset = AccelSection.getU32(&OffsetsOffset); - uint64_t StringOffset = AccelSection.getRelocatedValue(4, &DataOffset); - if (!StringOffset) + for (uint32_t HashIdx = *HashStartIdx; HashIdx < getNumHashes(); HashIdx++) { + std::optional<uint32_t> MaybeHash = readIthHash(HashIdx); + if (!MaybeHash || !wouldHashBeInBucket(*MaybeHash, BucketIdx)) break; + if (*MaybeHash == HashToFind) + return HashIdx; + } + return std::nullopt; +} - // Finally, compare the key. - if (Key == StringSection.getCStr(&StringOffset)) - return make_range({*this, DataOffset}, ValueIterator()); +std::optional<StringRef> AppleAcceleratorTable::readStringFromStrSection( + uint64_t StringSectionOffset) const { + Error E = Error::success(); + StringRef Str = StringSection.getCStrRef(&StringSectionOffset, &E); + if (E) { + consumeError(std::move(E)); + return std::nullopt; + } + return Str; +} + +std::optional<uint32_t> +AppleAcceleratorTable::readU32FromAccel(uint64_t &Offset, + bool UseRelocation) const { + Error E = Error::success(); + uint32_t Data = UseRelocation + ? AccelSection.getRelocatedValue(4, &Offset, nullptr, &E) + : AccelSection.getU32(&Offset, &E); + if (E) { + consumeError(std::move(E)); + return std::nullopt; } - return make_range(ValueIterator(), ValueIterator()); + return Data; } void DWARFDebugNames::Header::dump(ScopedPrinter &W) const { diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp index dd86144d16e0..33168abbdc38 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -48,6 +48,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/Format.h" #include "llvm/Support/LEB128.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" @@ -775,11 +776,13 @@ bool DWARFContext::verify(raw_ostream &OS, DIDumpOptions DumpOpts) { Success &= verifier.handleDebugInfo(); if (DumpOpts.DumpType & DIDT_DebugLine) Success &= verifier.handleDebugLine(); + if (DumpOpts.DumpType & DIDT_DebugStrOffsets) + Success &= verifier.handleDebugStrOffsets(); Success &= verifier.handleAccelTables(); return Success; } -void fixupIndex(const DWARFObject &DObj, DWARFContext &C, +void fixupIndexV4(const DWARFObject &DObj, DWARFContext &C, DWARFUnitIndex &Index) { using EntryType = DWARFUnitIndex::Entry::SectionContribution; using EntryMap = DenseMap<uint32_t, EntryType>; @@ -843,8 +846,55 @@ void fixupIndex(const DWARFObject &DObj, DWARFContext &C, Twine::utohexstr(CUOff.getOffset())), errs()); } +} + +void fixupIndexV5(const DWARFObject &DObj, DWARFContext &C, + DWARFUnitIndex &Index) { + DenseMap<uint64_t, uint64_t> Map; - return; + DObj.forEachInfoDWOSections([&](const DWARFSection &S) { + if (!(C.getParseCUTUIndexManually() || + S.Data.size() >= std::numeric_limits<uint32_t>::max())) + return; + DWARFDataExtractor Data(DObj, S, C.isLittleEndian(), 0); + uint64_t Offset = 0; + while (Data.isValidOffset(Offset)) { + DWARFUnitHeader Header; + if (!Header.extract(C, Data, &Offset, DWARFSectionKind::DW_SECT_INFO)) { + logAllUnhandledErrors( + createError("Failed to parse unit header in DWP file"), errs()); + break; + } + bool CU = Header.getUnitType() == DW_UT_split_compile; + uint64_t Sig = CU ? *Header.getDWOId() : Header.getTypeHash(); + Map[Sig] = Header.getOffset(); + Offset = Header.getNextUnitOffset(); + } + }); + if (Map.empty()) + return; + for (DWARFUnitIndex::Entry &E : Index.getMutableRows()) { + if (!E.isValid()) + continue; + DWARFUnitIndex::Entry::SectionContribution &CUOff = E.getContribution(); + auto Iter = Map.find(E.getSignature()); + if (Iter == Map.end()) { + logAllUnhandledErrors( + createError("Could not find unit with signature 0x" + + Twine::utohexstr(E.getSignature()) + " in the Map"), + errs()); + break; + } + CUOff.setOffset(Iter->second); + } +} + +void fixupIndex(const DWARFObject &DObj, DWARFContext &C, + DWARFUnitIndex &Index) { + if (Index.getVersion() < 5) + fixupIndexV4(DObj, C, Index); + else + fixupIndexV5(DObj, C, Index); } const DWARFUnitIndex &DWARFContext::getCUIndex() { @@ -853,8 +903,9 @@ const DWARFUnitIndex &DWARFContext::getCUIndex() { DataExtractor CUIndexData(DObj->getCUIndexSection(), isLittleEndian(), 0); CUIndex = std::make_unique<DWARFUnitIndex>(DW_SECT_INFO); - CUIndex->parse(CUIndexData); - fixupIndex(*DObj, *this, *CUIndex.get()); + bool IsParseSuccessful = CUIndex->parse(CUIndexData); + if (IsParseSuccessful) + fixupIndex(*DObj, *this, *CUIndex); return *CUIndex; } @@ -868,7 +919,7 @@ const DWARFUnitIndex &DWARFContext::getTUIndex() { // If we are parsing TU-index and for .debug_types section we don't need // to do anything. if (isParseSuccessful && TUIndex->getVersion() != 2) - fixupIndex(*DObj, *this, *TUIndex.get()); + fixupIndex(*DObj, *this, *TUIndex); return *TUIndex; } @@ -887,9 +938,7 @@ const DWARFDebugAbbrev *DWARFContext::getDebugAbbrev() { return Abbrev.get(); DataExtractor abbrData(DObj->getAbbrevSection(), isLittleEndian(), 0); - - Abbrev.reset(new DWARFDebugAbbrev()); - Abbrev->extract(abbrData); + Abbrev = std::make_unique<DWARFDebugAbbrev>(abbrData); return Abbrev.get(); } @@ -898,8 +947,7 @@ const DWARFDebugAbbrev *DWARFContext::getDebugAbbrevDWO() { return AbbrevDWO.get(); DataExtractor abbrData(DObj->getAbbrevDWOSection(), isLittleEndian(), 0); - AbbrevDWO.reset(new DWARFDebugAbbrev()); - AbbrevDWO->extract(abbrData); + AbbrevDWO = std::make_unique<DWARFDebugAbbrev>(abbrData); return AbbrevDWO.get(); } @@ -1118,14 +1166,17 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint64_t Offset) { NormalUnits.getUnitForOffset(Offset)); } -DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) { - // First, get the offset of the compile unit. +DWARFCompileUnit *DWARFContext::getCompileUnitForCodeAddress(uint64_t Address) { + uint64_t CUOffset = getDebugAranges()->findAddress(Address); + return getCompileUnitForOffset(CUOffset); +} + +DWARFCompileUnit *DWARFContext::getCompileUnitForDataAddress(uint64_t Address) { uint64_t CUOffset = getDebugAranges()->findAddress(Address); - // Retrieve the compile unit. if (DWARFCompileUnit *OffsetCU = getCompileUnitForOffset(CUOffset)) return OffsetCU; - // Global variables are often not found by the above search, for one of two + // Global variables are often missed by the above search, for one of two // reasons: // 1. .debug_aranges may not include global variables. On clang, it seems we // put the globals in the aranges, but this isn't true for gcc. @@ -1146,7 +1197,7 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) { DWARFContext::DIEsForAddress DWARFContext::getDIEsForAddress(uint64_t Address) { DIEsForAddress Result; - DWARFCompileUnit *CU = getCompileUnitForAddress(Address); + DWARFCompileUnit *CU = getCompileUnitForCodeAddress(Address); if (!CU) return Result; @@ -1297,7 +1348,7 @@ void DWARFContext::addLocalsForDie(DWARFCompileUnit *CU, DWARFDie Subprogram, std::vector<DILocal> DWARFContext::getLocalsForAddress(object::SectionedAddress Address) { std::vector<DILocal> Result; - DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address); + DWARFCompileUnit *CU = getCompileUnitForCodeAddress(Address.Address); if (!CU) return Result; @@ -1310,7 +1361,7 @@ DWARFContext::getLocalsForAddress(object::SectionedAddress Address) { DILineInfo DWARFContext::getLineInfoForAddress(object::SectionedAddress Address, DILineInfoSpecifier Spec) { DILineInfo Result; - DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address); + DWARFCompileUnit *CU = getCompileUnitForCodeAddress(Address.Address); if (!CU) return Result; @@ -1331,7 +1382,7 @@ DILineInfo DWARFContext::getLineInfoForAddress(object::SectionedAddress Address, DILineInfo DWARFContext::getLineInfoForDataAddress(object::SectionedAddress Address) { DILineInfo Result; - DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address); + DWARFCompileUnit *CU = getCompileUnitForDataAddress(Address.Address); if (!CU) return Result; @@ -1346,7 +1397,7 @@ DWARFContext::getLineInfoForDataAddress(object::SectionedAddress Address) { DILineInfoTable DWARFContext::getLineInfoForAddressRange( object::SectionedAddress Address, uint64_t Size, DILineInfoSpecifier Spec) { DILineInfoTable Lines; - DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address); + DWARFCompileUnit *CU = getCompileUnitForCodeAddress(Address.Address); if (!CU) return Lines; @@ -1402,7 +1453,7 @@ DWARFContext::getInliningInfoForAddress(object::SectionedAddress Address, DILineInfoSpecifier Spec) { DIInliningInfo InliningInfo; - DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address); + DWARFCompileUnit *CU = getCompileUnitForCodeAddress(Address.Address); if (!CU) return InliningInfo; @@ -1805,13 +1856,9 @@ public: continue; } - // Compressed sections names in GNU style starts from ".z", - // at this point section is decompressed and we drop compression prefix. - Name = Name.substr( - Name.find_first_not_of("._z")); // Skip ".", "z" and "_" prefixes. - // Map platform specific debug section names to DWARF standard section // names. + Name = Name.substr(Name.find_first_not_of("._")); Name = Obj.mapDebugSectionName(Name); if (StringRef *SectionData = mapSectionToMember(Name)) { @@ -1836,10 +1883,6 @@ public: S.Data = Data; } - if (RelocatedSection != Obj.section_end() && Name.contains(".dwo")) - HandleWarning( - createError("Unexpected relocations for dwo section " + Name)); - if (RelocatedSection == Obj.section_end() || (RelocAction == DWARFContext::ProcessDebugRelocations::Ignore)) continue; @@ -1865,11 +1908,15 @@ public: if (!L && isa<MachOObjectFile>(&Obj)) continue; - RelSecName = RelSecName.substr( - RelSecName.find_first_not_of("._z")); // Skip . and _ prefixes. + if (!Section.relocations().empty() && Name.ends_with(".dwo") && + RelSecName.startswith(".debug")) { + HandleWarning(createError("unexpected relocations for dwo section '" + + RelSecName + "'")); + } // TODO: Add support for relocations in other sections as needed. // Record relocations for the debug_info and debug_line sections. + RelSecName = RelSecName.substr(RelSecName.find_first_not_of("._")); DWARFSectionMap *Sec = mapNameToDWARFSection(RelSecName); RelocAddrMap *Map = Sec ? &Sec->Relocs : nullptr; if (!Map) { diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp index 3ea3818e7cc3..3014e61f566a 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp @@ -25,26 +25,32 @@ void DWARFAbbreviationDeclarationSet::clear() { Decls.clear(); } -bool DWARFAbbreviationDeclarationSet::extract(DataExtractor Data, - uint64_t *OffsetPtr) { +Error DWARFAbbreviationDeclarationSet::extract(DataExtractor Data, + uint64_t *OffsetPtr) { clear(); const uint64_t BeginOffset = *OffsetPtr; Offset = BeginOffset; DWARFAbbreviationDeclaration AbbrDecl; uint32_t PrevAbbrCode = 0; - while (AbbrDecl.extract(Data, OffsetPtr)) { + while (true) { + Expected<DWARFAbbreviationDeclaration::ExtractState> ES = + AbbrDecl.extract(Data, OffsetPtr); + if (!ES) + return ES.takeError(); + + if (*ES == DWARFAbbreviationDeclaration::ExtractState::Complete) + break; + if (FirstAbbrCode == 0) { FirstAbbrCode = AbbrDecl.getCode(); - } else { - if (PrevAbbrCode + 1 != AbbrDecl.getCode()) { - // Codes are not consecutive, can't do O(1) lookups. - FirstAbbrCode = UINT32_MAX; - } + } else if (PrevAbbrCode + 1 != AbbrDecl.getCode()) { + // Codes are not consecutive, can't do O(1) lookups. + FirstAbbrCode = UINT32_MAX; } PrevAbbrCode = AbbrDecl.getCode(); Decls.push_back(std::move(AbbrDecl)); } - return BeginOffset != *OffsetPtr; + return Error::success(); } void DWARFAbbreviationDeclarationSet::dump(raw_ostream &OS) const { @@ -96,17 +102,8 @@ std::string DWARFAbbreviationDeclarationSet::getCodeRange() const { return Buffer; } -DWARFDebugAbbrev::DWARFDebugAbbrev() { clear(); } - -void DWARFDebugAbbrev::clear() { - AbbrDeclSets.clear(); - PrevAbbrOffsetPos = AbbrDeclSets.end(); -} - -void DWARFDebugAbbrev::extract(DataExtractor Data) { - clear(); - this->Data = Data; -} +DWARFDebugAbbrev::DWARFDebugAbbrev(DataExtractor Data) + : AbbrDeclSets(), PrevAbbrOffsetPos(AbbrDeclSets.end()), Data(Data) {} void DWARFDebugAbbrev::parse() const { if (!Data) @@ -118,8 +115,11 @@ void DWARFDebugAbbrev::parse() const { ++I; uint64_t CUAbbrOffset = Offset; DWARFAbbreviationDeclarationSet AbbrDecls; - if (!AbbrDecls.extract(*Data, &Offset)) + if (Error Err = AbbrDecls.extract(*Data, &Offset)) { + // FIXME: We should propagate the error upwards. + consumeError(std::move(Err)); break; + } AbbrDeclSets.insert(I, std::make_pair(CUAbbrOffset, std::move(AbbrDecls))); } Data = std::nullopt; @@ -139,29 +139,30 @@ void DWARFDebugAbbrev::dump(raw_ostream &OS) const { } } -const DWARFAbbreviationDeclarationSet* +Expected<const DWARFAbbreviationDeclarationSet *> DWARFDebugAbbrev::getAbbreviationDeclarationSet(uint64_t CUAbbrOffset) const { const auto End = AbbrDeclSets.end(); if (PrevAbbrOffsetPos != End && PrevAbbrOffsetPos->first == CUAbbrOffset) { - return &(PrevAbbrOffsetPos->second); + return &PrevAbbrOffsetPos->second; } const auto Pos = AbbrDeclSets.find(CUAbbrOffset); if (Pos != End) { PrevAbbrOffsetPos = Pos; - return &(Pos->second); + return &Pos->second; } - if (Data && CUAbbrOffset < Data->getData().size()) { - uint64_t Offset = CUAbbrOffset; - DWARFAbbreviationDeclarationSet AbbrDecls; - if (!AbbrDecls.extract(*Data, &Offset)) - return nullptr; - PrevAbbrOffsetPos = - AbbrDeclSets.insert(std::make_pair(CUAbbrOffset, std::move(AbbrDecls))) - .first; - return &PrevAbbrOffsetPos->second; - } + if (!Data || CUAbbrOffset >= Data->getData().size()) + return make_error<llvm::object::GenericBinaryError>( + "the abbreviation offset into the .debug_abbrev section is not valid"); + + uint64_t Offset = CUAbbrOffset; + DWARFAbbreviationDeclarationSet AbbrDecls; + if (Error Err = AbbrDecls.extract(*Data, &Offset)) + return std::move(Err); - return nullptr; + PrevAbbrOffsetPos = + AbbrDeclSets.insert(std::make_pair(CUAbbrOffset, std::move(AbbrDecls))) + .first; + return &PrevAbbrOffsetPos->second; } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index 0725bd7744ae..6f2afe5d50e9 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -477,6 +477,7 @@ void DWARFDebugLine::Row::reset(bool DefaultIsStmt) { Isa = 0; Discriminator = 0; IsStmt = DefaultIsStmt; + OpIndex = 0; BasicBlock = false; EndSequence = false; PrologueEnd = false; @@ -485,15 +486,16 @@ void DWARFDebugLine::Row::reset(bool DefaultIsStmt) { void DWARFDebugLine::Row::dumpTableHeader(raw_ostream &OS, unsigned Indent) { OS.indent(Indent) - << "Address Line Column File ISA Discriminator Flags\n"; + << "Address Line Column File ISA Discriminator OpIndex " + "Flags\n"; OS.indent(Indent) - << "------------------ ------ ------ ------ --- ------------- " + << "------------------ ------ ------ ------ --- ------------- ------- " "-------------\n"; } void DWARFDebugLine::Row::dump(raw_ostream &OS) const { OS << format("0x%16.16" PRIx64 " %6u %6u", Address.Address, Line, Column) - << format(" %6u %3u %13u ", File, Isa, Discriminator) + << format(" %6u %3u %13u %7u ", File, Isa, Discriminator, OpIndex) << (IsStmt ? " is_stmt" : "") << (BasicBlock ? " basic_block" : "") << (PrologueEnd ? " prologue_end" : "") << (EpilogueBegin ? " epilogue_begin" : "") @@ -608,21 +610,36 @@ static StringRef getOpcodeName(uint8_t Opcode, uint8_t OpcodeBase) { return "special"; } -uint64_t DWARFDebugLine::ParsingState::advanceAddr(uint64_t OperationAdvance, - uint8_t Opcode, - uint64_t OpcodeOffset) { +DWARFDebugLine::ParsingState::AddrOpIndexDelta +DWARFDebugLine::ParsingState::advanceAddrOpIndex(uint64_t OperationAdvance, + uint8_t Opcode, + uint64_t OpcodeOffset) { StringRef OpcodeName = getOpcodeName(Opcode, LineTable->Prologue.OpcodeBase); // For versions less than 4, the MaxOpsPerInst member is set to 0, as the // maximum_operations_per_instruction field wasn't introduced until DWARFv4. // Don't warn about bad values in this situation. if (ReportAdvanceAddrProblem && LineTable->Prologue.getVersion() >= 4 && - LineTable->Prologue.MaxOpsPerInst != 1) + LineTable->Prologue.MaxOpsPerInst == 0) + ErrorHandler(createStringError( + errc::invalid_argument, + "line table program at offset 0x%8.8" PRIx64 + " contains a %s opcode at offset 0x%8.8" PRIx64 + ", but the prologue maximum_operations_per_instruction value is 0" + ", which is invalid. Assuming a value of 1 instead", + LineTableOffset, OpcodeName.data(), OpcodeOffset)); + // Although we are able to correctly parse line number programs with + // MaxOpsPerInst > 1, the rest of DWARFDebugLine and its + // users have not been updated to handle line information for all operations + // in a multi-operation instruction, so warn about potentially incorrect + // results. + if (ReportAdvanceAddrProblem && LineTable->Prologue.MaxOpsPerInst > 1) ErrorHandler(createStringError( errc::not_supported, "line table program at offset 0x%8.8" PRIx64 " contains a %s opcode at offset 0x%8.8" PRIx64 ", but the prologue maximum_operations_per_instruction value is %" PRId8 - ", which is unsupported. Assuming a value of 1 instead", + ", which is experimentally supported, so line number information " + "may be incorrect", LineTableOffset, OpcodeName.data(), OpcodeOffset, LineTable->Prologue.MaxOpsPerInst)); if (ReportAdvanceAddrProblem && LineTable->Prologue.MinInstLength == 0) @@ -634,14 +651,35 @@ uint64_t DWARFDebugLine::ParsingState::advanceAddr(uint64_t OperationAdvance, "is 0, which prevents any address advancing", LineTableOffset, OpcodeName.data(), OpcodeOffset)); ReportAdvanceAddrProblem = false; - uint64_t AddrOffset = OperationAdvance * LineTable->Prologue.MinInstLength; + + // Advances the address and op_index according to DWARFv5, section 6.2.5.1: + // + // new address = address + + // minimum_instruction_length * + // ((op_index + operation advance) / maximum_operations_per_instruction) + // + // new op_index = + // (op_index + operation advance) % maximum_operations_per_instruction + + // For versions less than 4, the MaxOpsPerInst member is set to 0, as the + // maximum_operations_per_instruction field wasn't introduced until DWARFv4. + uint8_t MaxOpsPerInst = + std::max(LineTable->Prologue.MaxOpsPerInst, uint8_t{1}); + + uint64_t AddrOffset = ((Row.OpIndex + OperationAdvance) / MaxOpsPerInst) * + LineTable->Prologue.MinInstLength; Row.Address.Address += AddrOffset; - return AddrOffset; + + uint8_t PrevOpIndex = Row.OpIndex; + Row.OpIndex = (Row.OpIndex + OperationAdvance) % MaxOpsPerInst; + int16_t OpIndexDelta = static_cast<int16_t>(Row.OpIndex) - PrevOpIndex; + + return {AddrOffset, OpIndexDelta}; } -DWARFDebugLine::ParsingState::AddrAndAdjustedOpcode -DWARFDebugLine::ParsingState::advanceAddrForOpcode(uint8_t Opcode, - uint64_t OpcodeOffset) { +DWARFDebugLine::ParsingState::OpcodeAdvanceResults +DWARFDebugLine::ParsingState::advanceForOpcode(uint8_t Opcode, + uint64_t OpcodeOffset) { assert(Opcode == DW_LNS_const_add_pc || Opcode >= LineTable->Prologue.OpcodeBase); if (ReportBadLineRange && LineTable->Prologue.LineRange == 0) { @@ -665,11 +703,12 @@ DWARFDebugLine::ParsingState::advanceAddrForOpcode(uint8_t Opcode, LineTable->Prologue.LineRange != 0 ? AdjustedOpcode / LineTable->Prologue.LineRange : 0; - uint64_t AddrOffset = advanceAddr(OperationAdvance, Opcode, OpcodeOffset); - return {AddrOffset, AdjustedOpcode}; + AddrOpIndexDelta Advance = + advanceAddrOpIndex(OperationAdvance, Opcode, OpcodeOffset); + return {Advance.AddrOffset, Advance.OpIndexDelta, AdjustedOpcode}; } -DWARFDebugLine::ParsingState::AddrAndLineDelta +DWARFDebugLine::ParsingState::SpecialOpcodeDelta DWARFDebugLine::ParsingState::handleSpecialOpcode(uint8_t Opcode, uint64_t OpcodeOffset) { // A special opcode value is chosen based on the amount that needs @@ -703,15 +742,16 @@ DWARFDebugLine::ParsingState::handleSpecialOpcode(uint8_t Opcode, // // line increment = line_base + (adjusted opcode % line_range) - DWARFDebugLine::ParsingState::AddrAndAdjustedOpcode AddrAdvanceResult = - advanceAddrForOpcode(Opcode, OpcodeOffset); + DWARFDebugLine::ParsingState::OpcodeAdvanceResults AddrAdvanceResult = + advanceForOpcode(Opcode, OpcodeOffset); int32_t LineOffset = 0; if (LineTable->Prologue.LineRange != 0) LineOffset = LineTable->Prologue.LineBase + (AddrAdvanceResult.AdjustedOpcode % LineTable->Prologue.LineRange); Row.Line += LineOffset; - return {AddrAdvanceResult.AddrDelta, LineOffset}; + return {AddrAdvanceResult.AddrDelta, LineOffset, + AddrAdvanceResult.OpIndexDelta}; } /// Parse a ULEB128 using the specified \p Cursor. \returns the parsed value on @@ -858,9 +898,10 @@ Error DWARFDebugLine::LineTable::parse( // Takes a single relocatable address as an operand. The size of the // operand is the size appropriate to hold an address on the target // machine. Set the address register to the value given by the - // relocatable address. All of the other statement program opcodes - // that affect the address register add a delta to it. This instruction - // stores a relocatable value into it instead. + // relocatable address and set the op_index register to 0. All of the + // other statement program opcodes that affect the address register + // add a delta to it. This instruction stores a relocatable value into + // it instead. // // Make sure the extractor knows the address size. If not, infer it // from the size of the operand. @@ -891,6 +932,7 @@ Error DWARFDebugLine::LineTable::parse( TableData.setAddressSize(OpcodeAddressSize); State.Row.Address.Address = TableData.getRelocatedAddress( Cursor, &State.Row.Address.SectionIndex); + State.Row.OpIndex = 0; uint64_t Tombstone = dwarf::computeTombstoneAddress(OpcodeAddressSize); @@ -1002,15 +1044,16 @@ Error DWARFDebugLine::LineTable::parse( break; case DW_LNS_advance_pc: - // Takes a single unsigned LEB128 operand, multiplies it by the - // min_inst_length field of the prologue, and adds the - // result to the address register of the state machine. + // Takes a single unsigned LEB128 operand as the operation advance + // and modifies the address and op_index registers of the state machine + // according to that. if (std::optional<uint64_t> Operand = parseULEB128<uint64_t>(TableData, Cursor)) { - uint64_t AddrOffset = - State.advanceAddr(*Operand, Opcode, OpcodeOffset); + ParsingState::AddrOpIndexDelta Advance = + State.advanceAddrOpIndex(*Operand, Opcode, OpcodeOffset); if (Verbose) - *OS << " (" << AddrOffset << ")"; + *OS << " (addr += " << Advance.AddrOffset + << ", op-index += " << Advance.OpIndexDelta << ")"; } break; @@ -1062,8 +1105,8 @@ Error DWARFDebugLine::LineTable::parse( break; case DW_LNS_const_add_pc: - // Takes no arguments. Add to the address register of the state - // machine the address increment value corresponding to special + // Takes no arguments. Advance the address and op_index registers of + // the state machine by the increments corresponding to special // opcode 255. The motivation for DW_LNS_const_add_pc is this: // when the statement program needs to advance the address by a // small amount, it can use a single special opcode, which occupies @@ -1074,30 +1117,35 @@ Error DWARFDebugLine::LineTable::parse( // than twice that range will it need to use both DW_LNS_advance_pc // and a special opcode, requiring three or more bytes. { - uint64_t AddrOffset = - State.advanceAddrForOpcode(Opcode, OpcodeOffset).AddrDelta; + ParsingState::OpcodeAdvanceResults Advance = + State.advanceForOpcode(Opcode, OpcodeOffset); if (Verbose) - *OS << format(" (0x%16.16" PRIx64 ")", AddrOffset); + *OS << format(" (addr += 0x%16.16" PRIx64 ", op-index += %" PRIu8 + ")", + Advance.AddrDelta, Advance.OpIndexDelta); } break; case DW_LNS_fixed_advance_pc: // Takes a single uhalf operand. Add to the address register of - // the state machine the value of the (unencoded) operand. This - // is the only extended opcode that takes an argument that is not - // a variable length number. The motivation for DW_LNS_fixed_advance_pc - // is this: existing assemblers cannot emit DW_LNS_advance_pc or - // special opcodes because they cannot encode LEB128 numbers or - // judge when the computation of a special opcode overflows and - // requires the use of DW_LNS_advance_pc. Such assemblers, however, - // can use DW_LNS_fixed_advance_pc instead, sacrificing compression. + // the state machine the value of the (unencoded) operand and set + // the op_index register to 0. This is the only extended opcode that + // takes an argument that is not a variable length number. + // The motivation for DW_LNS_fixed_advance_pc is this: existing + // assemblers cannot emit DW_LNS_advance_pc or special opcodes because + // they cannot encode LEB128 numbers or judge when the computation + // of a special opcode overflows and requires the use of + // DW_LNS_advance_pc. Such assemblers, however, can use + // DW_LNS_fixed_advance_pc instead, sacrificing compression. { uint16_t PCOffset = TableData.getRelocatedValue(Cursor, 2); if (Cursor) { State.Row.Address.Address += PCOffset; + State.Row.OpIndex = 0; if (Verbose) - *OS << format(" (0x%4.4" PRIx16 ")", PCOffset); + *OS << format(" (addr += 0x%4.4" PRIx16 ", op-index = 0)", + PCOffset); } } break; @@ -1161,11 +1209,12 @@ Error DWARFDebugLine::LineTable::parse( *OffsetPtr = Cursor.tell(); } else { // Special Opcodes. - ParsingState::AddrAndLineDelta Delta = + ParsingState::SpecialOpcodeDelta Delta = State.handleSpecialOpcode(Opcode, OpcodeOffset); if (Verbose) - *OS << "address += " << Delta.Address << ", line += " << Delta.Line; + *OS << "address += " << Delta.Address << ", line += " << Delta.Line + << ", op-index += " << Delta.OpIndex; EmitRow(); *OffsetPtr = Cursor.tell(); } @@ -1226,6 +1275,9 @@ uint32_t DWARFDebugLine::LineTable::findRowInSeq( // // In general we want a non-empty range: the last row whose address is less // than or equal to Address. This can be computed as upper_bound - 1. + // + // TODO: This function, and its users, needs to be update to return multiple + // rows for bundles with multiple op-indexes. DWARFDebugLine::Row Row; Row.Address = Address; RowIter FirstRow = Rows.begin() + Seq.FirstRowIndex; @@ -1505,6 +1557,21 @@ DWARFUnit *DWARFDebugLine::SectionParser::prepareToParse(uint64_t Offset) { return U; } +bool DWARFDebugLine::SectionParser::hasValidVersion(uint64_t Offset) { + DataExtractor::Cursor Cursor(Offset); + auto [TotalLength, _] = DebugLineData.getInitialLength(Cursor); + DWARFDataExtractor HeaderData(DebugLineData, Cursor.tell() + TotalLength); + uint16_t Version = HeaderData.getU16(Cursor); + if (!Cursor) { + // Ignore any error here. + // If this is not the end of the section parseNext() will still be + // attempted, where this error will occur again (and can be handled). + consumeError(Cursor.takeError()); + return false; + } + return versionIsSupported(Version); +} + void DWARFDebugLine::SectionParser::moveToNextTable(uint64_t OldOffset, const Prologue &P) { // If the length field is not valid, we don't know where the next table is, so @@ -1518,5 +1585,29 @@ void DWARFDebugLine::SectionParser::moveToNextTable(uint64_t OldOffset, Offset = OldOffset + P.TotalLength + P.sizeofTotalLength(); if (!DebugLineData.isValidOffset(Offset)) { Done = true; + return; + } + + // Heuristic: If the version is valid, then this is probably a line table. + // Otherwise, the offset might need alignment (to a 4 or 8 byte boundary). + if (hasValidVersion(Offset)) + return; + + // ARM C/C++ Compiler aligns each line table to word boundaries and pads out + // the .debug_line section to a word multiple. Note that in the specification + // this does not seem forbidden since each unit has a DW_AT_stmt_list. + for (unsigned Align : {4, 8}) { + uint64_t AlignedOffset = alignTo(Offset, Align); + if (!DebugLineData.isValidOffset(AlignedOffset)) { + // This is almost certainly not another line table but some alignment + // padding. This assumes the alignments tested are ordered, and are + // smaller than the header size (which is true for 4 and 8). + Done = true; + return; + } + if (hasValidVersion(AlignedOffset)) { + Offset = AlignedOffset; + break; + } } } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index 26cef8713df1..7af7ed8be7b4 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -39,7 +39,7 @@ using namespace object; static void dumpApplePropertyAttribute(raw_ostream &OS, uint64_t Val) { OS << " ("; do { - uint64_t Shift = countTrailingZeros(Val); + uint64_t Shift = llvm::countr_zero(Val); assert(Shift < 64 && "undefined behavior"); uint64_t Bit = 1ULL << Shift; auto PropName = ApplePropertyString(Bit); diff --git a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp index 523dee486d2d..87a4fc78ceb1 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/DWARF/DWARFExpression.h" +#include "llvm/ADT/SmallString.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Support/Format.h" #include <cassert> @@ -18,13 +19,11 @@ using namespace dwarf; namespace llvm { -typedef std::vector<DWARFExpression::Operation::Description> DescVector; - -static DescVector getDescriptions() { - DescVector Descriptions; - typedef DWARFExpression::Operation Op; - typedef Op::Description Desc; +typedef DWARFExpression::Operation Op; +typedef Op::Description Desc; +static std::vector<Desc> getOpDescriptions() { + std::vector<Desc> Descriptions; Descriptions.resize(0xff); Descriptions[DW_OP_addr] = Desc(Op::Dwarf2, Op::SizeAddr); Descriptions[DW_OP_deref] = Desc(Op::Dwarf2); @@ -94,26 +93,49 @@ static DescVector getDescriptions() { Descriptions[DW_OP_WASM_location] = Desc(Op::Dwarf4, Op::SizeLEB, Op::WasmLocationArg); Descriptions[DW_OP_GNU_push_tls_address] = Desc(Op::Dwarf3); - Descriptions[DW_OP_addrx] = Desc(Op::Dwarf4, Op::SizeLEB); Descriptions[DW_OP_GNU_addr_index] = Desc(Op::Dwarf4, Op::SizeLEB); Descriptions[DW_OP_GNU_const_index] = Desc(Op::Dwarf4, Op::SizeLEB); Descriptions[DW_OP_GNU_entry_value] = Desc(Op::Dwarf4, Op::SizeLEB); - + Descriptions[DW_OP_addrx] = Desc(Op::Dwarf5, Op::SizeLEB); + Descriptions[DW_OP_constx] = Desc(Op::Dwarf5, Op::SizeLEB); Descriptions[DW_OP_convert] = Desc(Op::Dwarf5, Op::BaseTypeRef); Descriptions[DW_OP_entry_value] = Desc(Op::Dwarf5, Op::SizeLEB); Descriptions[DW_OP_regval_type] = Desc(Op::Dwarf5, Op::SizeLEB, Op::BaseTypeRef); - + // This Description acts as a marker that getSubOpDesc must be called + // to fetch the final Description for the operation. Each such final + // Description must share the same first SizeSubOpLEB operand. + Descriptions[DW_OP_LLVM_user] = Desc(Op::Dwarf5, Op::SizeSubOpLEB); return Descriptions; } -static DWARFExpression::Operation::Description getOpDesc(unsigned OpCode) { - // FIXME: Make this constexpr once all compilers are smart enough to do it. - static DescVector Descriptions = getDescriptions(); +static Desc getDescImpl(ArrayRef<Desc> Descriptions, unsigned Opcode) { // Handle possible corrupted or unsupported operation. - if (OpCode >= Descriptions.size()) + if (Opcode >= Descriptions.size()) return {}; - return Descriptions[OpCode]; + return Descriptions[Opcode]; +} + +static Desc getOpDesc(unsigned Opcode) { + static std::vector<Desc> Descriptions = getOpDescriptions(); + return getDescImpl(Descriptions, Opcode); +} + +static std::vector<Desc> getSubOpDescriptions() { + static constexpr unsigned LlvmUserDescriptionsSize = 1 +#define HANDLE_DW_OP_LLVM_USEROP(ID, NAME) +1 +#include "llvm/BinaryFormat/Dwarf.def" + ; + std::vector<Desc> Descriptions; + Descriptions.resize(LlvmUserDescriptionsSize); + Descriptions[DW_OP_LLVM_nop] = Desc(Op::Dwarf5, Op::SizeSubOpLEB); + return Descriptions; +} + +static Desc getSubOpDesc(unsigned Opcode, unsigned SubOpcode) { + assert(Opcode == DW_OP_LLVM_user); + static std::vector<Desc> Descriptions = getSubOpDescriptions(); + return getDescImpl(Descriptions, SubOpcode); } bool DWARFExpression::Operation::extract(DataExtractor Data, @@ -126,14 +148,22 @@ bool DWARFExpression::Operation::extract(DataExtractor Data, if (Desc.Version == Operation::DwarfNA) return false; - for (unsigned Operand = 0; Operand < 2; ++Operand) { + Operands.resize(Desc.Op.size()); + OperandEndOffsets.resize(Desc.Op.size()); + for (unsigned Operand = 0; Operand < Desc.Op.size(); ++Operand) { unsigned Size = Desc.Op[Operand]; unsigned Signed = Size & Operation::SignBit; - if (Size == Operation::SizeNA) - break; - switch (Size & ~Operation::SignBit) { + case Operation::SizeSubOpLEB: + assert(Operand == 0 && "SubOp operand must be the first operand"); + Operands[Operand] = Data.getULEB128(&Offset); + Desc = getSubOpDesc(Opcode, Operands[Operand]); + if (Desc.Version == Operation::DwarfNA) + return false; + assert(Desc.Op[Operand] == Operation::SizeSubOpLEB && + "SizeSubOpLEB Description must begin with SizeSubOpLEB operand"); + break; case Operation::Size1: Operands[Operand] = Data.getU8(&Offset); if (Signed) @@ -207,9 +237,9 @@ bool DWARFExpression::Operation::extract(DataExtractor Data, static void prettyPrintBaseTypeRef(DWARFUnit *U, raw_ostream &OS, DIDumpOptions DumpOpts, - const uint64_t Operands[2], + ArrayRef<uint64_t> Operands, unsigned Operand) { - assert(Operand < 2 && "operand out of bounds"); + assert(Operand < Operands.size() && "operand out of bounds"); auto Die = U->getDIEForOffset(U->getOffset() + Operands[Operand]); if (Die && Die.getTag() == dwarf::DW_TAG_base_type) { OS << " ("; @@ -227,7 +257,7 @@ static void prettyPrintBaseTypeRef(DWARFUnit *U, raw_ostream &OS, bool DWARFExpression::prettyPrintRegisterOp(DWARFUnit *U, raw_ostream &OS, DIDumpOptions DumpOpts, uint8_t Opcode, - const uint64_t Operands[2]) { + ArrayRef<uint64_t> Operands) { if (!DumpOpts.GetNameForDWARFReg) return false; @@ -258,6 +288,12 @@ bool DWARFExpression::prettyPrintRegisterOp(DWARFUnit *U, raw_ostream &OS, return false; } +std::optional<unsigned> DWARFExpression::Operation::getSubCode() const { + if (!Desc.Op.size() || Desc.Op[0] != Operation::SizeSubOpLEB) + return std::nullopt; + return Operands[0]; +} + bool DWARFExpression::Operation::print(raw_ostream &OS, DIDumpOptions DumpOpts, const DWARFExpression *Expr, DWARFUnit *U) const { @@ -277,14 +313,15 @@ bool DWARFExpression::Operation::print(raw_ostream &OS, DIDumpOptions DumpOpts, if (prettyPrintRegisterOp(U, OS, DumpOpts, Opcode, Operands)) return true; - for (unsigned Operand = 0; Operand < 2; ++Operand) { + for (unsigned Operand = 0; Operand < Desc.Op.size(); ++Operand) { unsigned Size = Desc.Op[Operand]; unsigned Signed = Size & Operation::SignBit; - if (Size == Operation::SizeNA) - break; - - if (Size == Operation::BaseTypeRef && U) { + if (Size == Operation::SizeSubOpLEB) { + StringRef SubName = SubOperationEncodingString(Opcode, Operands[Operand]); + assert(!SubName.empty() && "DW_OP SubOp has no name!"); + OS << " " << SubName; + } else if (Size == Operation::BaseTypeRef && U) { // For DW_OP_convert the operand may be 0 to indicate that conversion to // the generic type should be done. The same holds for DW_OP_reinterpret, // which is currently not supported. @@ -355,12 +392,9 @@ void DWARFExpression::print(raw_ostream &OS, DIDumpOptions DumpOpts, } bool DWARFExpression::Operation::verify(const Operation &Op, DWARFUnit *U) { - for (unsigned Operand = 0; Operand < 2; ++Operand) { + for (unsigned Operand = 0; Operand < Op.Desc.Op.size(); ++Operand) { unsigned Size = Op.Desc.Op[Operand]; - if (Size == Operation::SizeNA) - break; - if (Size == Operation::BaseTypeRef) { // For DW_OP_convert the operand may be 0 to indicate that conversion to // the generic type should be done, so don't look up a base type in that @@ -454,6 +488,13 @@ static bool printCompactDWARFExpr( Stack.back().Kind = PrintedExpr::Value; break; } + case dwarf::DW_OP_nop: { + break; + } + case dwarf::DW_OP_LLVM_user: { + assert(Op.getSubCode() && *Op.getSubCode() == dwarf::DW_OP_LLVM_nop); + break; + } default: if (Opcode >= dwarf::DW_OP_reg0 && Opcode <= dwarf::DW_OP_reg31) { // DW_OP_reg<N>: A register, with the register num implied by the @@ -487,7 +528,10 @@ static bool printCompactDWARFExpr( ++I; } - assert(Stack.size() == 1 && "expected one value on stack"); + if (Stack.size() != 1) { + OS << "<stack of size " << Stack.size() << ", expected 1>"; + return false; + } if (Stack.front().Kind == PrintedExpr::Address) OS << "[" << Stack.front().String << "]"; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp index 5dd9515aafdb..29949ee02145 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp @@ -160,9 +160,11 @@ bool DWARFFormValue::skipValue(dwarf::Form Form, DataExtractor DebugInfoData, case DW_FORM_ref_sup8: case DW_FORM_strx1: case DW_FORM_strx2: + case DW_FORM_strx3: case DW_FORM_strx4: case DW_FORM_addrx1: case DW_FORM_addrx2: + case DW_FORM_addrx3: case DW_FORM_addrx4: case DW_FORM_sec_offset: case DW_FORM_strp: @@ -212,35 +214,7 @@ bool DWARFFormValue::skipValue(dwarf::Form Form, DataExtractor DebugInfoData, } bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const { - // First, check DWARF5 form classes. - if (Form < ArrayRef(DWARF5FormClasses).size() && - DWARF5FormClasses[Form] == FC) - return true; - // Check more forms from extensions and proposals. - switch (Form) { - case DW_FORM_GNU_ref_alt: - return (FC == FC_Reference); - case DW_FORM_GNU_addr_index: - return (FC == FC_Address); - case DW_FORM_GNU_str_index: - case DW_FORM_GNU_strp_alt: - return (FC == FC_String); - case DW_FORM_LLVM_addrx_offset: - return (FC == FC_Address); - default: - break; - } - - if (FC == FC_SectionOffset) { - if (Form == DW_FORM_strp || Form == DW_FORM_line_strp) - return true; - // In DWARF3 DW_FORM_data4 and DW_FORM_data8 served also as a section - // offset. If we don't have a DWARFUnit, default to the old behavior. - if (Form == DW_FORM_data4 || Form == DW_FORM_data8) - return !U || U->getVersion() <= 3; - } - - return false; + return doesFormBelongToClass(Form, FC, U ? U->getVersion() : 3); } bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data, @@ -300,6 +274,7 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data, Value.uval = Data.getU16(OffsetPtr, &Err); break; case DW_FORM_strx3: + case DW_FORM_addrx3: Value.uval = Data.getU24(OffsetPtr, &Err); break; case DW_FORM_data4: @@ -420,39 +395,27 @@ void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const { case DW_FORM_addrx2: case DW_FORM_addrx3: case DW_FORM_addrx4: - case DW_FORM_GNU_addr_index: { + case DW_FORM_GNU_addr_index: + case DW_FORM_LLVM_addrx_offset: { if (U == nullptr) { OS << "<invalid dwarf unit>"; break; } - std::optional<object::SectionedAddress> A = - U->getAddrOffsetSectionItem(UValue); - if (!A || DumpOpts.Verbose) - AddrOS << format("indexed (%8.8x) address = ", (uint32_t)UValue); + std::optional<object::SectionedAddress> A = getAsSectionedAddress(); + if (!A || DumpOpts.Verbose) { + if (Form == DW_FORM_LLVM_addrx_offset) { + uint32_t Index = UValue >> 32; + uint32_t Offset = UValue & 0xffffffff; + AddrOS << format("indexed (%8.8x) + 0x%x address = ", Index, Offset); + } else + AddrOS << format("indexed (%8.8x) address = ", (uint32_t)UValue); + } if (A) dumpSectionedAddress(AddrOS, DumpOpts, *A); else OS << "<unresolved>"; break; } - case DW_FORM_LLVM_addrx_offset: { - if (U == nullptr) { - OS << "<invalid dwarf unit>"; - break; - } - uint32_t Index = UValue >> 32; - uint32_t Offset = UValue & 0xffffffff; - std::optional<object::SectionedAddress> A = - U->getAddrOffsetSectionItem(Index); - if (!A || DumpOpts.Verbose) - AddrOS << format("indexed (%8.8x) + 0x%x address = ", Index, Offset); - if (A) { - A->Address += Offset; - dumpSectionedAddress(AddrOS, DumpOpts, *A); - } else - OS << "<unresolved>"; - break; - } case DW_FORM_flag_present: OS << "true"; break; @@ -652,16 +615,18 @@ Expected<const char *> DWARFFormValue::getAsCString() const { } // Prefer the Unit's string extractor, because for .dwo it will point to // .debug_str.dwo, while the Context's extractor always uses .debug_str. - DataExtractor StrData = Form == DW_FORM_line_strp - ? C->getLineStringExtractor() - : U ? U->getStringExtractor() - : C->getStringExtractor(); + bool IsDebugLineString = Form == DW_FORM_line_strp; + DataExtractor StrData = + IsDebugLineString ? C->getLineStringExtractor() + : U ? U->getStringExtractor() : C->getStringExtractor(); if (const char *Str = StrData.getCStr(&Offset)) return Str; std::string Msg = FormEncodingString(Form).str(); if (Index) Msg += (" uses index " + Twine(*Index) + ", but the referenced string").str(); - Msg += (" offset " + Twine(Offset) + " is beyond .debug_str bounds").str(); + Msg += (" offset " + Twine(Offset) + " is beyond " + + (IsDebugLineString ? ".debug_line_str" : ".debug_str") + " bounds") + .str(); return make_error<StringError>(Msg, inconvertibleErrorCode()); } @@ -672,12 +637,14 @@ std::optional<uint64_t> DWARFFormValue::getAsAddress() const { return std::nullopt; } -std::optional<object::SectionedAddress> -DWARFFormValue::getAsSectionedAddress() const { - if (!isFormClass(FC_Address)) +std::optional<object::SectionedAddress> DWARFFormValue::getAsSectionedAddress( + const ValueType &Value, const dwarf::Form Form, const DWARFUnit *U) { + if (!doesFormBelongToClass(Form, FC_Address, U ? U->getVersion() : 3)) return std::nullopt; bool AddrOffset = Form == dwarf::DW_FORM_LLVM_addrx_offset; - if (Form == DW_FORM_GNU_addr_index || Form == DW_FORM_addrx || AddrOffset) { + if (Form == DW_FORM_GNU_addr_index || Form == DW_FORM_addrx || + Form == DW_FORM_addrx1 || Form == DW_FORM_addrx2 || + Form == DW_FORM_addrx3 || Form == DW_FORM_addrx4 || AddrOffset) { uint32_t Index = AddrOffset ? (Value.uval >> 32) : Value.uval; if (!U) @@ -693,6 +660,11 @@ DWARFFormValue::getAsSectionedAddress() const { return {{Value.uval, Value.SectionIndex}}; } +std::optional<object::SectionedAddress> +DWARFFormValue::getAsSectionedAddress() const { + return getAsSectionedAddress(Value, Form, U); +} + std::optional<uint64_t> DWARFFormValue::getAsReference() const { if (auto R = getAsRelativeReference()) return R->Unit ? R->Unit->getOffset() + R->Offset : R->Offset; @@ -785,3 +757,33 @@ DWARFFormValue::getAsFile(DILineInfoSpecifier::FileLineInfoKind Kind) const { } return std::nullopt; } + +bool llvm::dwarf::doesFormBelongToClass(dwarf::Form Form, DWARFFormValue::FormClass FC, + uint16_t DwarfVersion) { + // First, check DWARF5 form classes. + if (Form < ArrayRef(DWARF5FormClasses).size() && + DWARF5FormClasses[Form] == FC) + return true; + // Check more forms from extensions and proposals. + switch (Form) { + case DW_FORM_GNU_ref_alt: + return (FC == DWARFFormValue::FC_Reference); + case DW_FORM_GNU_addr_index: + return (FC == DWARFFormValue::FC_Address); + case DW_FORM_GNU_str_index: + case DW_FORM_GNU_strp_alt: + return (FC == DWARFFormValue::FC_String); + case DW_FORM_LLVM_addrx_offset: + return (FC == DWARFFormValue::FC_Address); + case DW_FORM_strp: + case DW_FORM_line_strp: + return (FC == DWARFFormValue::FC_SectionOffset); + case DW_FORM_data4: + case DW_FORM_data8: + // In DWARF3 DW_FORM_data4 and DW_FORM_data8 served also as a section + // offset. + return (FC == DWARFFormValue::FC_SectionOffset) && (DwarfVersion <= 3); + default: + return false; + } +} diff --git a/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp b/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp index 3f140d21c53c..987e63963a06 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp @@ -16,6 +16,7 @@ #include <cassert> #include <cinttypes> #include <cstdint> +#include <set> #include <utility> using namespace llvm; @@ -114,9 +115,9 @@ void DWARFGdbIndex::dump(raw_ostream &OS) { bool DWARFGdbIndex::parseImpl(DataExtractor Data) { uint64_t Offset = 0; - // Only version 7 is supported at this moment. + // Only version 7 and 8 are supported at this moment. Version = Data.getU32(&Offset); - if (Version != 7) + if (Version != 7 && Version != 8) return false; CuListOffset = Data.getU32(&Offset); @@ -166,25 +167,26 @@ bool DWARFGdbIndex::parseImpl(DataExtractor Data) { // for both a string and a CU vector. uint32_t SymTableSize = (ConstantPoolOffset - SymbolTableOffset) / 8; SymbolTable.reserve(SymTableSize); - uint32_t CuVectorsTotal = 0; + std::set<uint32_t> CUOffsets; for (uint32_t i = 0; i < SymTableSize; ++i) { uint32_t NameOffset = Data.getU32(&Offset); uint32_t CuVecOffset = Data.getU32(&Offset); SymbolTable.push_back({NameOffset, CuVecOffset}); if (NameOffset || CuVecOffset) - ++CuVectorsTotal; + CUOffsets.insert(CuVecOffset); } // The constant pool. CU vectors are stored first, followed by strings. // The first value is the number of CU indices in the vector. Each subsequent // value is the index and symbol attributes of a CU in the CU list. - for (uint32_t i = 0; i < CuVectorsTotal; ++i) { + for (auto CUOffset : CUOffsets) { + Offset = ConstantPoolOffset + CUOffset; ConstantPoolVectors.emplace_back(0, SmallVector<uint32_t, 0>()); auto &Vec = ConstantPoolVectors.back(); Vec.first = Offset - ConstantPoolOffset; uint32_t Num = Data.getU32(&Offset); - for (uint32_t j = 0; j < Num; ++j) + for (uint32_t J = 0; J < Num; ++J) Vec.second.push_back(Data.getU32(&Offset)); } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp b/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp index 6a1423d37d9f..c474de607626 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp @@ -424,11 +424,11 @@ bool DWARFTypePrinter::appendTemplateParameters(DWARFDie D, OS << (char)Val; OS << "'"; } else if (Val < 256) - OS << to_string(llvm::format("'\\x%02x'", Val)); + OS << llvm::format("'\\x%02" PRIx64 "'", Val); else if (Val <= 0xFFFF) - OS << to_string(llvm::format("'\\u%04x'", Val)); + OS << llvm::format("'\\u%04" PRIx64 "'", Val); else - OS << to_string(llvm::format("'\\U%08x'", Val)); + OS << llvm::format("'\\U%08" PRIx64 "'", Val); } } continue; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp index c199e0118a6f..19678f121982 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -176,7 +176,7 @@ DWARFUnitVector::getUnitForIndexEntry(const DWARFUnitIndex::Entry &E) { auto U = Parser(Offset, DW_SECT_INFO, nullptr, &E); if (!U) - U = nullptr; + return nullptr; auto *NewCU = U.get(); this->insert(CU, std::move(U)); @@ -1040,8 +1040,16 @@ DWARFUnit::getLastChildEntry(const DWARFDebugInfoEntry *Die) const { } const DWARFAbbreviationDeclarationSet *DWARFUnit::getAbbreviations() const { - if (!Abbrevs) - Abbrevs = Abbrev->getAbbreviationDeclarationSet(getAbbreviationsOffset()); + if (!Abbrevs) { + Expected<const DWARFAbbreviationDeclarationSet *> AbbrevsOrError = + Abbrev->getAbbreviationDeclarationSet(getAbbreviationsOffset()); + if (!AbbrevsOrError) { + // FIXME: We should propagate this error upwards. + consumeError(AbbrevsOrError.takeError()); + return nullptr; + } + Abbrevs = *AbbrevsOrError; + } return Abbrevs; } @@ -1049,7 +1057,7 @@ std::optional<object::SectionedAddress> DWARFUnit::getBaseAddress() { if (BaseAddr) return BaseAddr; - DWARFDie UnitDie = getUnitDIE(); + DWARFDie UnitDie = (SU ? SU : this)->getUnitDIE(); std::optional<DWARFFormValue> PC = UnitDie.find({DW_AT_low_pc, DW_AT_entry_pc}); BaseAddr = toSectionedAddress(PC); diff --git a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp index c90237d4cb77..58900e1e80cb 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -150,8 +150,15 @@ bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData, AddrSize = DebugInfoData.getU8(Offset); } - if (!DCtx.getDebugAbbrev()->getAbbreviationDeclarationSet(AbbrOffset)) + Expected<const DWARFAbbreviationDeclarationSet *> AbbrevSetOrErr = + DCtx.getDebugAbbrev()->getAbbreviationDeclarationSet(AbbrOffset); + if (!AbbrevSetOrErr) { ValidAbbrevOffset = false; + // FIXME: A problematic debug_abbrev section is reported below in the form + // of a `note:`. We should propagate this error there (or elsewhere) to + // avoid losing the specific problem with the debug_abbrev section. + consumeError(AbbrevSetOrErr.takeError()); + } ValidLength = DebugInfoData.isValidOffset(OffsetStart + Length + 3); ValidVersion = DWARFContext::isSupportedVersion(Version); @@ -299,20 +306,27 @@ unsigned DWARFVerifier::verifyDebugInfoCallSite(const DWARFDie &Die) { } unsigned DWARFVerifier::verifyAbbrevSection(const DWARFDebugAbbrev *Abbrev) { + if (!Abbrev) + return 0; + + Expected<const DWARFAbbreviationDeclarationSet *> AbbrDeclsOrErr = + Abbrev->getAbbreviationDeclarationSet(0); + if (!AbbrDeclsOrErr) { + error() << toString(AbbrDeclsOrErr.takeError()) << "\n"; + return 1; + } + + const auto *AbbrDecls = *AbbrDeclsOrErr; unsigned NumErrors = 0; - if (Abbrev) { - const DWARFAbbreviationDeclarationSet *AbbrDecls = - Abbrev->getAbbreviationDeclarationSet(0); - for (auto AbbrDecl : *AbbrDecls) { - SmallDenseSet<uint16_t> AttributeSet; - for (auto Attribute : AbbrDecl.attributes()) { - auto Result = AttributeSet.insert(Attribute.Attr); - if (!Result.second) { - error() << "Abbreviation declaration contains multiple " - << AttributeString(Attribute.Attr) << " attributes.\n"; - AbbrDecl.dump(OS); - ++NumErrors; - } + for (auto AbbrDecl : *AbbrDecls) { + SmallDenseSet<uint16_t> AttributeSet; + for (auto Attribute : AbbrDecl.attributes()) { + auto Result = AttributeSet.insert(Attribute.Attr); + if (!Result.second) { + error() << "Abbreviation declaration contains multiple " + << AttributeString(Attribute.Attr) << " attributes.\n"; + AbbrDecl.dump(OS); + ++NumErrors; } } } @@ -777,7 +791,8 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die, case DW_FORM_strx1: case DW_FORM_strx2: case DW_FORM_strx3: - case DW_FORM_strx4: { + case DW_FORM_strx4: + case DW_FORM_line_strp: { if (Error E = AttrValue.Value.getAsCString().takeError()) { ++NumErrors; error() << toString(std::move(E)) << ":\n"; @@ -867,8 +882,10 @@ void DWARFVerifier::verifyDebugLineRows() { continue; // Verify prologue. + bool isDWARF5 = LineTable->Prologue.getVersion() >= 5; uint32_t MaxDirIndex = LineTable->Prologue.IncludeDirectories.size(); - uint32_t FileIndex = 1; + uint32_t MinFileIndex = isDWARF5 ? 0 : 1; + uint32_t FileIndex = MinFileIndex; StringMap<uint16_t> FullPathMap; for (const auto &FileName : LineTable->Prologue.FileNames) { // Verify directory index. @@ -926,12 +943,11 @@ void DWARFVerifier::verifyDebugLineRows() { // Verify file index. if (!LineTable->hasFileAtIndex(Row.File)) { ++NumDebugLineErrors; - bool isDWARF5 = LineTable->Prologue.getVersion() >= 5; error() << ".debug_line[" << format("0x%08" PRIx64, *toSectionOffset(Die.find(DW_AT_stmt_list))) << "][" << RowIndex << "] has invalid file index " << Row.File - << " (valid values are [" << (isDWARF5 ? "0," : "1,") + << " (valid values are [" << MinFileIndex << ',' << LineTable->Prologue.FileNames.size() << (isDWARF5 ? ")" : "]") << "):\n"; DWARFDebugLine::Row::dumpTableHeader(OS, 0); @@ -1627,6 +1643,116 @@ bool DWARFVerifier::handleAccelTables() { return NumErrors == 0; } +bool DWARFVerifier::handleDebugStrOffsets() { + OS << "Verifying .debug_str_offsets...\n"; + const DWARFObject &DObj = DCtx.getDWARFObj(); + bool Success = true; + Success &= verifyDebugStrOffsets( + ".debug_str_offsets.dwo", DObj.getStrOffsetsDWOSection(), + DObj.getStrDWOSection(), &DWARFObject::forEachInfoDWOSections); + Success &= verifyDebugStrOffsets( + ".debug_str_offsets", DObj.getStrOffsetsSection(), DObj.getStrSection(), + &DWARFObject::forEachInfoSections); + return Success; +} + +bool DWARFVerifier::verifyDebugStrOffsets( + StringRef SectionName, const DWARFSection &Section, StringRef StrData, + void (DWARFObject::*VisitInfoSections)( + function_ref<void(const DWARFSection &)>) const) { + const DWARFObject &DObj = DCtx.getDWARFObj(); + uint16_t InfoVersion = 0; + DwarfFormat InfoFormat = DwarfFormat::DWARF32; + (DObj.*VisitInfoSections)([&](const DWARFSection &S) { + if (InfoVersion) + return; + DWARFDataExtractor DebugInfoData(DObj, S, DCtx.isLittleEndian(), 0); + uint64_t Offset = 0; + InfoFormat = DebugInfoData.getInitialLength(&Offset).second; + InfoVersion = DebugInfoData.getU16(&Offset); + }); + + DWARFDataExtractor DA(DObj, Section, DCtx.isLittleEndian(), 0); + + DataExtractor::Cursor C(0); + uint64_t NextUnit = 0; + bool Success = true; + while (C.seek(NextUnit), C.tell() < DA.getData().size()) { + DwarfFormat Format; + uint64_t Length; + uint64_t StartOffset = C.tell(); + if (InfoVersion == 4) { + Format = InfoFormat; + Length = DA.getData().size(); + NextUnit = C.tell() + Length; + } else { + std::tie(Length, Format) = DA.getInitialLength(C); + if (!C) + break; + if (C.tell() + Length > DA.getData().size()) { + error() << formatv( + "{0}: contribution {1:X}: length exceeds available space " + "(contribution " + "offset ({1:X}) + length field space ({2:X}) + length ({3:X}) == " + "{4:X} > section size {5:X})\n", + SectionName, StartOffset, C.tell() - StartOffset, Length, + C.tell() + Length, DA.getData().size()); + Success = false; + // Nothing more to do - no other contributions to try. + break; + } + NextUnit = C.tell() + Length; + uint8_t Version = DA.getU16(C); + if (C && Version != 5) { + error() << formatv("{0}: contribution {1:X}: invalid version {2}\n", + SectionName, StartOffset, Version); + Success = false; + // Can't parse the rest of this contribution, since we don't know the + // version, but we can pick up with the next contribution. + continue; + } + (void)DA.getU16(C); // padding + } + uint64_t OffsetByteSize = getDwarfOffsetByteSize(Format); + DA.setAddressSize(OffsetByteSize); + uint64_t Remainder = (Length - 4) % OffsetByteSize; + if (Remainder != 0) { + error() << formatv( + "{0}: contribution {1:X}: invalid length ((length ({2:X}) " + "- header (0x4)) % offset size {3:X} == {4:X} != 0)\n", + SectionName, StartOffset, Length, OffsetByteSize, Remainder); + Success = false; + } + for (uint64_t Index = 0; C && C.tell() + OffsetByteSize <= NextUnit; ++Index) { + uint64_t OffOff = C.tell(); + uint64_t StrOff = DA.getAddress(C); + // check StrOff refers to the start of a string + if (StrOff == 0) + continue; + if (StrData.size() <= StrOff) { + error() << formatv( + "{0}: contribution {1:X}: index {2:X}: invalid string " + "offset *{3:X} == {4:X}, is beyond the bounds of the string section of length {5:X}\n", + SectionName, StartOffset, Index, OffOff, StrOff, StrData.size()); + continue; + } + if (StrData[StrOff - 1] == '\0') + continue; + error() << formatv("{0}: contribution {1:X}: index {2:X}: invalid string " + "offset *{3:X} == {4:X}, is neither zero nor " + "immediately following a null character\n", + SectionName, StartOffset, Index, OffOff, StrOff); + Success = false; + } + } + + if (Error E = C.takeError()) { + error() << SectionName << ": " << toString(std::move(E)) << '\n'; + return false; + } + return Success; +} + raw_ostream &DWARFVerifier::error() const { return WithColor::error(OS); } raw_ostream &DWARFVerifier::warn() const { return WithColor::warning(OS); } diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp index 51058fc09cf1..145a43d3b381 100644 --- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp +++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp @@ -96,57 +96,83 @@ llvm::Expected<FunctionInfo> FunctionInfo::decode(DataExtractor &Data, return std::move(FI); } -llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &O) const { +uint64_t FunctionInfo::cacheEncoding() { + EncodingCache.clear(); + if (!isValid()) + return 0; + raw_svector_ostream OutStrm(EncodingCache); + FileWriter FW(OutStrm, support::endian::system_endianness()); + llvm::Expected<uint64_t> Result = encode(FW); + if (!Result) { + EncodingCache.clear(); + consumeError(Result.takeError()); + return 0; + } + return EncodingCache.size(); +} + +llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &Out) const { if (!isValid()) return createStringError(std::errc::invalid_argument, "attempted to encode invalid FunctionInfo object"); // Align FunctionInfo data to a 4 byte alignment. - O.alignTo(4); - const uint64_t FuncInfoOffset = O.tell(); + Out.alignTo(4); + const uint64_t FuncInfoOffset = Out.tell(); + // Check if we have already encoded this function info into EncodingCache. + // This will be non empty when creating segmented GSYM files as we need to + // precompute exactly how big FunctionInfo objects encode into so we can + // accurately make segments of a specific size. + if (!EncodingCache.empty() && + support::endian::system_endianness() == Out.getByteOrder()) { + // We already encoded this object, just write out the bytes. + Out.writeData(llvm::ArrayRef<uint8_t>((const uint8_t *)EncodingCache.data(), + EncodingCache.size())); + return FuncInfoOffset; + } // Write the size in bytes of this function as a uint32_t. This can be zero // if we just have a symbol from a symbol table and that symbol has no size. - O.writeU32(size()); + Out.writeU32(size()); // Write the name of this function as a uint32_t string table offset. - O.writeU32(Name); + Out.writeU32(Name); if (OptLineTable) { - O.writeU32(InfoType::LineTableInfo); + Out.writeU32(InfoType::LineTableInfo); // Write a uint32_t length as zero for now, we will fix this up after // writing the LineTable out with the number of bytes that were written. - O.writeU32(0); - const auto StartOffset = O.tell(); - llvm::Error err = OptLineTable->encode(O, Range.start()); + Out.writeU32(0); + const auto StartOffset = Out.tell(); + llvm::Error err = OptLineTable->encode(Out, Range.start()); if (err) return std::move(err); - const auto Length = O.tell() - StartOffset; + const auto Length = Out.tell() - StartOffset; if (Length > UINT32_MAX) return createStringError(std::errc::invalid_argument, "LineTable length is greater than UINT32_MAX"); // Fixup the size of the LineTable data with the correct size. - O.fixup32(static_cast<uint32_t>(Length), StartOffset - 4); + Out.fixup32(static_cast<uint32_t>(Length), StartOffset - 4); } // Write out the inline function info if we have any and if it is valid. if (Inline) { - O.writeU32(InfoType::InlineInfo); + Out.writeU32(InfoType::InlineInfo); // Write a uint32_t length as zero for now, we will fix this up after // writing the LineTable out with the number of bytes that were written. - O.writeU32(0); - const auto StartOffset = O.tell(); - llvm::Error err = Inline->encode(O, Range.start()); + Out.writeU32(0); + const auto StartOffset = Out.tell(); + llvm::Error err = Inline->encode(Out, Range.start()); if (err) return std::move(err); - const auto Length = O.tell() - StartOffset; + const auto Length = Out.tell() - StartOffset; if (Length > UINT32_MAX) return createStringError(std::errc::invalid_argument, "InlineInfo length is greater than UINT32_MAX"); // Fixup the size of the InlineInfo data with the correct size. - O.fixup32(static_cast<uint32_t>(Length), StartOffset - 4); + Out.fixup32(static_cast<uint32_t>(Length), StartOffset - 4); } // Terminate the data chunks with and end of list with zero size - O.writeU32(InfoType::EndOfList); - O.writeU32(0); + Out.writeU32(InfoType::EndOfList); + Out.writeU32(0); return FuncInfoOffset; } diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp index 8281938770cf..60b6dbc6a12d 100644 --- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp +++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp @@ -34,8 +34,10 @@ uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) { // requirements. const uint32_t Dir = insertString(directory); const uint32_t Base = insertString(filename); - FileEntry FE(Dir, Base); + return insertFileEntry(FileEntry(Dir, Base)); +} +uint32_t GsymCreator::insertFileEntry(FileEntry FE) { std::lock_guard<std::mutex> Guard(Mutex); const auto NextIndex = Files.size(); // Find FE in hash map and insert if not present. @@ -45,8 +47,26 @@ uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) { return R.first->second; } +uint32_t GsymCreator::copyFile(const GsymCreator &SrcGC, uint32_t FileIdx) { + // File index zero is reserved for a FileEntry with no directory and no + // filename. Any other file and we need to copy the strings for the directory + // and filename. + if (FileIdx == 0) + return 0; + const FileEntry SrcFE = SrcGC.Files[FileIdx]; + // Copy the strings for the file and then add the newly converted file entry. + uint32_t Dir = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Dir)->second); + uint32_t Base = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Base)->second); + FileEntry DstFE(Dir, Base); + return insertFileEntry(DstFE); +} + + llvm::Error GsymCreator::save(StringRef Path, - llvm::support::endianness ByteOrder) const { + llvm::support::endianness ByteOrder, + std::optional<uint64_t> SegmentSize) const { + if (SegmentSize) + return saveSegments(Path, ByteOrder, *SegmentSize); std::error_code EC; raw_fd_ostream OutStrm(Path, EC); if (EC) @@ -68,16 +88,17 @@ llvm::Error GsymCreator::encode(FileWriter &O) const { return createStringError(std::errc::invalid_argument, "too many FunctionInfos"); - const uint64_t MinAddr = - BaseAddress ? *BaseAddress : Funcs.front().startAddress(); - const uint64_t MaxAddr = Funcs.back().startAddress(); - const uint64_t AddrDelta = MaxAddr - MinAddr; + std::optional<uint64_t> BaseAddress = getBaseAddress(); + // Base address should be valid if we have any functions. + if (!BaseAddress) + return createStringError(std::errc::invalid_argument, + "invalid base address"); Header Hdr; Hdr.Magic = GSYM_MAGIC; Hdr.Version = GSYM_VERSION; - Hdr.AddrOffSize = 0; + Hdr.AddrOffSize = getAddressOffsetSize(); Hdr.UUIDSize = static_cast<uint8_t>(UUID.size()); - Hdr.BaseAddress = MinAddr; + Hdr.BaseAddress = *BaseAddress; Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size()); Hdr.StrtabOffset = 0; // We will fix this up later. Hdr.StrtabSize = 0; // We will fix this up later. @@ -85,15 +106,6 @@ llvm::Error GsymCreator::encode(FileWriter &O) const { if (UUID.size() > sizeof(Hdr.UUID)) return createStringError(std::errc::invalid_argument, "invalid UUID size %u", (uint32_t)UUID.size()); - // Set the address offset size correctly in the GSYM header. - if (AddrDelta <= UINT8_MAX) - Hdr.AddrOffSize = 1; - else if (AddrDelta <= UINT16_MAX) - Hdr.AddrOffSize = 2; - else if (AddrDelta <= UINT32_MAX) - Hdr.AddrOffSize = 4; - else - Hdr.AddrOffSize = 8; // Copy the UUID value if we have one. if (UUID.size() > 0) memcpy(Hdr.UUID, UUID.data(), UUID.size()); @@ -102,10 +114,17 @@ llvm::Error GsymCreator::encode(FileWriter &O) const { if (Err) return Err; + const uint64_t MaxAddressOffset = getMaxAddressOffset(); // Write out the address offsets. O.alignTo(Hdr.AddrOffSize); for (const auto &FuncInfo : Funcs) { uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress; + // Make sure we calculated the address offsets byte size correctly by + // verifying the current address offset is within ranges. We have seen bugs + // introduced when the code changes that can cause problems here so it is + // good to catch this during testing. + assert(AddrOffset <= MaxAddressOffset); + (void)MaxAddressOffset; switch (Hdr.AddrOffSize) { case 1: O.writeU8(static_cast<uint8_t>(AddrOffset)); @@ -142,7 +161,7 @@ llvm::Error GsymCreator::encode(FileWriter &O) const { O.writeU32(File.Base); } - // Write out the sting table. + // Write out the string table. const off_t StrtabOffset = O.tell(); StrTab.write(O.get_stream()); const off_t StrtabSize = O.tell() - StrtabOffset; @@ -300,6 +319,13 @@ llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) { return Error::success(); } +uint32_t GsymCreator::copyString(const GsymCreator &SrcGC, uint32_t StrOff) { + // String offset at zero is always the empty string, no copying needed. + if (StrOff == 0) + return 0; + return StrTab.add(SrcGC.StringOffsetMap.find(StrOff)->second); +} + uint32_t GsymCreator::insertString(StringRef S, bool Copy) { if (S.empty()) return 0; @@ -318,7 +344,13 @@ uint32_t GsymCreator::insertString(StringRef S, bool Copy) { CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(), CHStr.hash()}; } - return StrTab.add(CHStr); + const uint32_t StrOff = StrTab.add(CHStr); + // Save a mapping of string offsets to the cached string reference in case + // we need to segment the GSYM file and copy string from one string table to + // another. + if (StringOffsetMap.count(StrOff) == 0) + StringOffsetMap.insert(std::make_pair(StrOff, CHStr)); + return StrOff; } void GsymCreator::addFunctionInfo(FunctionInfo &&FI) { @@ -360,3 +392,187 @@ bool GsymCreator::hasFunctionInfoForAddress(uint64_t Addr) const { std::lock_guard<std::mutex> Guard(Mutex); return Ranges.contains(Addr); } + +std::optional<uint64_t> GsymCreator::getFirstFunctionAddress() const { + if (Finalized && !Funcs.empty()) + return std::optional<uint64_t>(Funcs.front().startAddress()); + // This code gets used by the segmentation of GSYM files to help determine the + // size of the GSYM header while continually adding new FunctionInfo objects + // to this object, so we haven't finalized this object yet. + if (Ranges.empty()) + return std::nullopt; + return std::optional<uint64_t>(Ranges.begin()->start()); +} + +std::optional<uint64_t> GsymCreator::getLastFunctionAddress() const { + if (Finalized && !Funcs.empty()) + return std::optional<uint64_t>(Funcs.back().startAddress()); + // This code gets used by the segmentation of GSYM files to help determine the + // size of the GSYM header while continually adding new FunctionInfo objects + // to this object, so we haven't finalized this object yet. + if (Ranges.empty()) + return std::nullopt; + return std::optional<uint64_t>((Ranges.end() - 1)->end()); +} + +std::optional<uint64_t> GsymCreator::getBaseAddress() const { + if (BaseAddress) + return BaseAddress; + return getFirstFunctionAddress(); +} + +uint64_t GsymCreator::getMaxAddressOffset() const { + switch (getAddressOffsetSize()) { + case 1: return UINT8_MAX; + case 2: return UINT16_MAX; + case 4: return UINT32_MAX; + case 8: return UINT64_MAX; + } + llvm_unreachable("invalid address offset"); +} + +uint8_t GsymCreator::getAddressOffsetSize() const { + const std::optional<uint64_t> BaseAddress = getBaseAddress(); + const std::optional<uint64_t> LastFuncAddr = getLastFunctionAddress(); + if (BaseAddress && LastFuncAddr) { + const uint64_t AddrDelta = *LastFuncAddr - *BaseAddress; + if (AddrDelta <= UINT8_MAX) + return 1; + else if (AddrDelta <= UINT16_MAX) + return 2; + else if (AddrDelta <= UINT32_MAX) + return 4; + return 8; + } + return 1; +} + +uint64_t GsymCreator::calculateHeaderAndTableSize() const { + uint64_t Size = sizeof(Header); + const size_t NumFuncs = Funcs.size(); + // Add size of address offset table + Size += NumFuncs * getAddressOffsetSize(); + // Add size of address info offsets which are 32 bit integers in version 1. + Size += NumFuncs * sizeof(uint32_t); + // Add file table size + Size += Files.size() * sizeof(FileEntry); + // Add string table size + Size += StrTab.getSize(); + + return Size; +} + +// This function takes a InlineInfo class that was copy constructed from an +// InlineInfo from the \a SrcGC and updates all members that point to strings +// and files to point to strings and files from this GsymCreator. +void GsymCreator::fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II) { + II.Name = copyString(SrcGC, II.Name); + II.CallFile = copyFile(SrcGC, II.CallFile); + for (auto &ChildII: II.Children) + fixupInlineInfo(SrcGC, ChildII); +} + +uint64_t GsymCreator::copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncIdx) { + // To copy a function info we need to copy any files and strings over into + // this GsymCreator and then copy the function info and update the string + // table offsets to match the new offsets. + const FunctionInfo &SrcFI = SrcGC.Funcs[FuncIdx]; + Ranges.insert(SrcFI.Range); + + FunctionInfo DstFI; + DstFI.Range = SrcFI.Range; + DstFI.Name = copyString(SrcGC, SrcFI.Name); + // Copy the line table if there is one. + if (SrcFI.OptLineTable) { + // Copy the entire line table. + DstFI.OptLineTable = LineTable(SrcFI.OptLineTable.value()); + // Fixup all LineEntry::File entries which are indexes in the the file table + // from SrcGC and must be converted to file indexes from this GsymCreator. + LineTable &DstLT = DstFI.OptLineTable.value(); + const size_t NumLines = DstLT.size(); + for (size_t I=0; I<NumLines; ++I) { + LineEntry &LE = DstLT.get(I); + LE.File = copyFile(SrcGC, LE.File); + } + } + // Copy the inline information if needed. + if (SrcFI.Inline) { + // Make a copy of the source inline information. + DstFI.Inline = SrcFI.Inline.value(); + // Fixup all strings and files in the copied inline information. + fixupInlineInfo(SrcGC, *DstFI.Inline); + } + std::lock_guard<std::mutex> Guard(Mutex); + Funcs.push_back(DstFI); + return Funcs.back().cacheEncoding(); +} + +llvm::Error GsymCreator::saveSegments(StringRef Path, + llvm::support::endianness ByteOrder, + uint64_t SegmentSize) const { + if (SegmentSize == 0) + return createStringError(std::errc::invalid_argument, + "invalid segment size zero"); + + size_t FuncIdx = 0; + const size_t NumFuncs = Funcs.size(); + while (FuncIdx < NumFuncs) { + llvm::Expected<std::unique_ptr<GsymCreator>> ExpectedGC = + createSegment(SegmentSize, FuncIdx); + if (ExpectedGC) { + GsymCreator *GC = ExpectedGC->get(); + if (GC == NULL) + break; // We had not more functions to encode. + raw_null_ostream ErrorStrm; + llvm::Error Err = GC->finalize(ErrorStrm); + if (Err) + return Err; + std::string SegmentedGsymPath; + raw_string_ostream SGP(SegmentedGsymPath); + std::optional<uint64_t> FirstFuncAddr = GC->getFirstFunctionAddress(); + if (FirstFuncAddr) { + SGP << Path << "-" << llvm::format_hex(*FirstFuncAddr, 1); + SGP.flush(); + Err = GC->save(SegmentedGsymPath, ByteOrder, std::nullopt); + if (Err) + return Err; + } + } else { + return ExpectedGC.takeError(); + } + } + return Error::success(); +} + +llvm::Expected<std::unique_ptr<GsymCreator>> +GsymCreator::createSegment(uint64_t SegmentSize, size_t &FuncIdx) const { + // No function entries, return empty unique pointer + if (FuncIdx >= Funcs.size()) + return std::unique_ptr<GsymCreator>(); + + std::unique_ptr<GsymCreator> GC(new GsymCreator(/*Quiet=*/true)); + // Set the base address if there is one. + if (BaseAddress) + GC->setBaseAddress(*BaseAddress); + // Copy the UUID value from this object into the new creator. + GC->setUUID(UUID); + const size_t NumFuncs = Funcs.size(); + // Track how big the function infos are for the current segment so we can + // emit segments that are close to the requested size. It is quick math to + // determine the current header and tables sizes, so we can do that each loop. + uint64_t SegmentFuncInfosSize = 0; + for (; FuncIdx < NumFuncs; ++FuncIdx) { + const uint64_t HeaderAndTableSize = GC->calculateHeaderAndTableSize(); + if (HeaderAndTableSize + SegmentFuncInfosSize >= SegmentSize) { + if (SegmentFuncInfosSize == 0) + return createStringError(std::errc::invalid_argument, + "a segment size of %" PRIu64 " is to small to " + "fit any function infos, specify a larger value", + SegmentSize); + + break; + } + SegmentFuncInfosSize += alignTo(GC->copyFunctionInfo(*this, FuncIdx), 4); + } + return std::move(GC); +} diff --git a/llvm/lib/DebugInfo/LogicalView/Core/LVElement.cpp b/llvm/lib/DebugInfo/LogicalView/Core/LVElement.cpp index a320752befc4..cfe304eead51 100644 --- a/llvm/lib/DebugInfo/LogicalView/Core/LVElement.cpp +++ b/llvm/lib/DebugInfo/LogicalView/Core/LVElement.cpp @@ -17,6 +17,7 @@ #include "llvm/DebugInfo/LogicalView/Core/LVType.h" using namespace llvm; +using namespace llvm::codeview; using namespace llvm::logicalview; #define DEBUG_TYPE "Element" @@ -103,6 +104,14 @@ void LVElement::setFilename(StringRef Filename) { FilenameIndex = getStringIndex(Filename); } +void LVElement::setInnerComponent(StringRef Name) { + if (Name.size()) { + StringRef InnerComponent; + std::tie(std::ignore, InnerComponent) = getInnerComponent(Name); + setName(InnerComponent); + } +} + // Return the string representation of a DIE offset. std::string LVElement::typeOffsetAsString() const { if (options().getAttributeOffset()) { @@ -126,6 +135,19 @@ StringRef LVElement::accessibilityString(uint32_t Access) const { } } +std::optional<uint32_t> LVElement::getAccessibilityCode(MemberAccess Access) { + switch (Access) { + case MemberAccess::Private: + return dwarf::DW_ACCESS_private; + case MemberAccess::Protected: + return dwarf::DW_ACCESS_protected; + case MemberAccess::Public: + return dwarf::DW_ACCESS_public; + default: + return std::nullopt; + } +} + StringRef LVElement::externalString() const { return getIsExternal() ? "extern" : StringRef(); } @@ -160,6 +182,21 @@ StringRef LVElement::virtualityString(uint32_t Virtuality) const { } } +std::optional<uint32_t> LVElement::getVirtualityCode(MethodKind Virtuality) { + switch (Virtuality) { + case MethodKind::Virtual: + return dwarf::DW_VIRTUALITY_virtual; + case MethodKind::PureVirtual: + return dwarf::DW_VIRTUALITY_pure_virtual; + case MethodKind::IntroducingVirtual: + case MethodKind::PureIntroducingVirtual: + // No direct equivalents in DWARF. Assume Virtual. + return dwarf::DW_VIRTUALITY_virtual; + default: + return std::nullopt; + } +} + void LVElement::resolve() { if (getIsResolved()) return; diff --git a/llvm/lib/DebugInfo/LogicalView/Core/LVLocation.cpp b/llvm/lib/DebugInfo/LogicalView/Core/LVLocation.cpp index 115b903c6c7f..17b32a5f67b4 100644 --- a/llvm/lib/DebugInfo/LogicalView/Core/LVLocation.cpp +++ b/llvm/lib/DebugInfo/LogicalView/Core/LVLocation.cpp @@ -352,7 +352,7 @@ std::string LVOperation::getOperandsCodeViewInfo() { uint16_t OperationCode = getCodeViewOperationCode(Opcode); switch (OperationCode) { - // Operands: [Offset, 0]. + // Operands: [Offset]. case codeview::SymbolKind::S_DEFRANGE_FRAMEPOINTER_REL: Stream << "frame_pointer_rel " << int(Operands[0]); break; @@ -360,7 +360,7 @@ std::string LVOperation::getOperandsCodeViewInfo() { Stream << "frame_pointer_rel_full_scope " << int(Operands[0]); break; - // Operands: [Register, 0]. + // Operands: [Register]. case codeview::SymbolKind::S_DEFRANGE_REGISTER: Stream << "register " << getReader().getRegisterName(Opcode, Operands); break; @@ -375,7 +375,7 @@ std::string LVOperation::getOperandsCodeViewInfo() { << " offset " << int(Operands[1]); break; - // Operands: [Program, 0]. + // Operands: [Program]. case codeview::SymbolKind::S_DEFRANGE: Stream << "frame " << int(Operands[0]); break; @@ -576,11 +576,11 @@ void LVLocationSymbol::addObject(LVAddress LowPC, LVAddress HighPC, } // Add a Location Record. -void LVLocationSymbol::addObject(LVSmall Opcode, LVUnsigned Operand1, - LVUnsigned Operand2) { +void LVLocationSymbol::addObject(LVSmall Opcode, + ArrayRef<LVUnsigned> Operands) { if (!Entries) - Entries = new LVAutoOperations(); - Entries->emplace_back(new LVOperation(Opcode, Operand1, Operand2)); + Entries = std::make_unique<LVOperations>(); + Entries->push_back(getReader().createOperation(Opcode, Operands)); } // Based on the DWARF attribute, define the location kind. @@ -606,8 +606,7 @@ void LVLocation::setKind() { void LVLocationSymbol::updateKind() { // Update the location type for simple ones. if (Entries && Entries->size() == 1) { - LVOperation *Operation = Entries->front(); - if (dwarf::DW_OP_fbreg == Operation->getOpcode()) + if (dwarf::DW_OP_fbreg == Entries->front()->getOpcode()) setIsStackOffset(); } } @@ -660,7 +659,7 @@ void LVLocationSymbol::printExtra(raw_ostream &OS, bool Full) const { if (Full && Entries) { bool CodeViewLocation = getParentSymbol()->getHasCodeViewLocation(); std::stringstream Stream; - std::string Leading = ""; + std::string Leading; for (LVOperation *Operation : *Entries) { Stream << Leading << (CodeViewLocation ? Operation->getOperandsCodeViewInfo() diff --git a/llvm/lib/DebugInfo/LogicalView/Core/LVReader.cpp b/llvm/lib/DebugInfo/LogicalView/Core/LVReader.cpp index 88f66cf2093b..613452c0b501 100644 --- a/llvm/lib/DebugInfo/LogicalView/Core/LVReader.cpp +++ b/llvm/lib/DebugInfo/LogicalView/Core/LVReader.cpp @@ -182,6 +182,9 @@ Error LVReader::createSplitFolder() { // Get the filename for given object. StringRef LVReader::getFilename(LVObject *Object, size_t Index) const { + // TODO: The current CodeView Reader implementation does not have support + // for multiple compile units. Until we have a proper offset calculation, + // check only in the current compile unit. if (CompileUnits.size()) { // Get Compile Unit for the given object. LVCompileUnits::const_iterator Iter = diff --git a/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp b/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp index fb503f3d3e7e..2f26025d01ec 100644 --- a/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp +++ b/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp @@ -45,15 +45,6 @@ const char *const KindUnion = "Union"; //===----------------------------------------------------------------------===// // DWARF lexical block, such as: namespace, function, compile unit, module, etc. //===----------------------------------------------------------------------===// -LVScope::~LVScope() { - delete Types; - delete Symbols; - delete Scopes; - delete Lines; - delete Ranges; - delete Children; -} - // Return a string representation for the scope kind. const char *LVScope::kind() const { const char *Kind = KindUndefined; @@ -114,7 +105,7 @@ LVScopeDispatch LVScope::Dispatch = { void LVScope::addToChildren(LVElement *Element) { if (!Children) - Children = new LVElements(); + Children = std::make_unique<LVElements>(); Children->push_back(Element); } @@ -137,7 +128,7 @@ void LVScope::addElement(LVLine *Line) { assert(Line && "Invalid line."); assert(!Line->getParent() && "Line already inserted"); if (!Lines) - Lines = new LVAutoLines(); + Lines = std::make_unique<LVLines>(); // Add it to parent. Lines->push_back(Line); @@ -161,7 +152,7 @@ void LVScope::addObject(LVLocation *Location) { assert(Location && "Invalid location."); assert(!Location->getParent() && "Location already inserted"); if (!Ranges) - Ranges = new LVAutoLocations(); + Ranges = std::make_unique<LVLocations>(); // Add it to parent. Location->setParent(this); @@ -176,7 +167,7 @@ void LVScope::addElement(LVScope *Scope) { assert(Scope && "Invalid scope."); assert(!Scope->getParent() && "Scope already inserted"); if (!Scopes) - Scopes = new LVAutoScopes(); + Scopes = std::make_unique<LVScopes>(); // Add it to parent. Scopes->push_back(Scope); @@ -203,7 +194,7 @@ void LVScope::addElement(LVSymbol *Symbol) { assert(Symbol && "Invalid symbol."); assert(!Symbol->getParent() && "Symbol already inserted"); if (!Symbols) - Symbols = new LVAutoSymbols(); + Symbols = std::make_unique<LVSymbols>(); // Add it to parent. Symbols->push_back(Symbol); @@ -230,7 +221,7 @@ void LVScope::addElement(LVType *Type) { assert(Type && "Invalid type."); assert(!Type->getParent() && "Type already inserted"); if (!Types) - Types = new LVAutoTypes(); + Types = std::make_unique<LVTypes>(); // Add it to parent. Types->push_back(Type); @@ -255,7 +246,7 @@ void LVScope::addElement(LVType *Type) { // Add a pair of ranges. void LVScope::addObject(LVAddress LowerAddress, LVAddress UpperAddress) { // Pack the ranges into a Location object. - LVLocation *Location = new LVLocation(); + LVLocation *Location = getReader().createLocation(); Location->setLowerAddress(LowerAddress); Location->setUpperAddress(UpperAddress); Location->setIsAddressRange(); @@ -341,7 +332,7 @@ void LVScope::addMissingElements(LVScope *Reference) { // information that is incorrect for the element to be inserted. // As the symbol being added does not exist in the debug section, // use its parent scope offset, to indicate its DIE location. - LVSymbol *Symbol = new LVSymbol(); + LVSymbol *Symbol = getReader().createSymbol(); addElement(Symbol); Symbol->setOffset(getOffset()); Symbol->setIsOptimized(); @@ -598,6 +589,10 @@ Error LVScope::doPrint(bool Split, bool Match, bool Print, raw_ostream &OS, // split context, then switch to the reader output stream. raw_ostream *StreamSplit = &OS; + // Ignore the CU generated by the VS toolchain, when compiling to PDB. + if (getIsSystem() && !options().getAttributeSystem()) + return Error::success(); + // If 'Split', we use the scope name (CU name) as the ouput file; the // delimiters in the pathname, must be replaced by a normal character. if (getIsCompileUnit()) { @@ -690,7 +685,7 @@ void LVScope::sort() { if (SortFunction) { std::function<void(LVScope * Parent, LVSortFunction SortFunction)> Sort = [&](LVScope *Parent, LVSortFunction SortFunction) { - auto Traverse = [&](auto *Set, LVSortFunction SortFunction) { + auto Traverse = [&](auto &Set, LVSortFunction SortFunction) { if (Set) std::stable_sort(Set->begin(), Set->end(), SortFunction); }; @@ -877,7 +872,7 @@ bool LVScope::equalNumberOfChildren(const LVScope *Scope) const { } void LVScope::markMissingParents(const LVScope *Target, bool TraverseChildren) { - auto SetCompareState = [&](auto *Container) { + auto SetCompareState = [&](auto &Container) { if (Container) for (auto *Entry : *Container) Entry->setIsInCompare(); @@ -1356,8 +1351,7 @@ void LVScopeCompileUnit::addedElement(LVType *Type) { // Record unsuported DWARF tags. void LVScopeCompileUnit::addDebugTag(dwarf::Tag Target, LVOffset Offset) { - addItem<LVTagOffsetsMap, LVOffsetList, dwarf::Tag, LVOffset>(&DebugTags, - Target, Offset); + addItem<LVTagOffsetsMap, dwarf::Tag, LVOffset>(&DebugTags, Target, Offset); } // Record elements with invalid offsets. @@ -1390,8 +1384,7 @@ void LVScopeCompileUnit::addLineZero(LVLine *Line) { LVScope *Scope = Line->getParentScope(); LVOffset Offset = Scope->getOffset(); addInvalidOffset(Offset, Scope); - addItem<LVOffsetLinesMap, LVLines, LVOffset, LVLine *>(&LinesZero, Offset, - Line); + addItem<LVOffsetLinesMap, LVOffset, LVLine *>(&LinesZero, Offset, Line); } void LVScopeCompileUnit::printLocalNames(raw_ostream &OS, bool Full) const { @@ -1481,7 +1474,7 @@ void LVScopeCompileUnit::printWarnings(raw_ostream &OS, bool Full) const { PrintHeader(Header); for (LVOffsetLocationsMap::const_reference Entry : Map) { PrintElement(WarningOffsets, Entry.first); - for (const LVLocation *Location : *Entry.second) + for (const LVLocation *Location : Entry.second) OS << hexSquareString(Location->getOffset()) << " " << Location->getIntervalInfo() << "\n"; } @@ -1494,7 +1487,7 @@ void LVScopeCompileUnit::printWarnings(raw_ostream &OS, bool Full) const { OS << format("\n0x%02x", (unsigned)Entry.first) << ", " << dwarf::TagString(Entry.first) << "\n"; unsigned Count = 0; - for (const LVOffset &Offset : *Entry.second) + for (const LVOffset &Offset : Entry.second) PrintOffset(Count, Offset); OS << "\n"; } @@ -1519,7 +1512,7 @@ void LVScopeCompileUnit::printWarnings(raw_ostream &OS, bool Full) const { for (LVOffsetLinesMap::const_reference Entry : LinesZero) { PrintElement(WarningOffsets, Entry.first); unsigned Count = 0; - for (const LVLine *Line : *Entry.second) + for (const LVLine *Line : Entry.second) PrintOffset(Count, Line->getOffset()); OS << "\n"; } @@ -1795,6 +1788,8 @@ void LVScopeFunction::resolveReferences() { // DW_AT_external DW_FORM_flag_present // 00000070 DW_TAG_subprogram "bar" // DW_AT_specification DW_FORM_ref4 0x00000048 + // CodeView does not include any information at the class level to + // mark the member function as external. // If there is a reference linking the declaration and definition, mark // the definition as extern, to facilitate the logical view comparison. if (getHasReferenceSpecification()) { @@ -2030,6 +2025,28 @@ void LVScopeRoot::processRangeInformation() { } } +void LVScopeRoot::transformScopedName() { + // Recursively transform all names. + std::function<void(LVScope * Parent)> TraverseScope = [&](LVScope *Parent) { + auto Traverse = [&](const auto *Set) { + if (Set) + for (const auto &Entry : *Set) + Entry->setInnerComponent(); + }; + if (const LVScopes *Scopes = Parent->getScopes()) + for (LVScope *Scope : *Scopes) { + Scope->setInnerComponent(); + TraverseScope(Scope); + } + Traverse(Parent->getSymbols()); + Traverse(Parent->getTypes()); + Traverse(Parent->getLines()); + }; + + // Start traversing the scopes root and transform the element name. + TraverseScope(this); +} + bool LVScopeRoot::equals(const LVScope *Scope) const { return LVScope::equals(Scope); } diff --git a/llvm/lib/DebugInfo/LogicalView/Core/LVSupport.cpp b/llvm/lib/DebugInfo/LogicalView/Core/LVSupport.cpp index 9fa1f28eb089..42fb1142eb44 100644 --- a/llvm/lib/DebugInfo/LogicalView/Core/LVSupport.cpp +++ b/llvm/lib/DebugInfo/LogicalView/Core/LVSupport.cpp @@ -20,6 +20,12 @@ using namespace llvm::logicalview; #define DEBUG_TYPE "Support" +namespace { +// Unique string pool instance used by all logical readers. +LVStringPool StringPool; +} // namespace +LVStringPool &llvm::logicalview::getStringPool() { return StringPool; } + // Perform the following transformations to the given 'Path': // - all characters to lowercase. // - '\\' into '/' (Platform independent). @@ -54,3 +60,106 @@ std::string llvm::logicalview::flattenedFilePath(StringRef Path) { }; return Name; } + +using LexicalEntry = std::pair<size_t, size_t>; +using LexicalIndexes = SmallVector<LexicalEntry, 10>; + +static LexicalIndexes getAllLexicalIndexes(StringRef Name) { + if (Name.empty()) + return {}; + + size_t AngleCount = 0; + size_t ColonSeen = 0; + size_t Current = 0; + + LexicalIndexes Indexes; + +#ifndef NDEBUG + auto PrintLexicalEntry = [&]() { + LexicalEntry Entry = Indexes.back(); + llvm::dbgs() << formatv( + "'{0}:{1}', '{2}'\n", Entry.first, Entry.second, + Name.substr(Entry.first, Entry.second - Entry.first + 1)); + }; +#endif + + size_t Length = Name.size(); + for (size_t Index = 0; Index < Length; ++Index) { + LLVM_DEBUG({ + llvm::dbgs() << formatv("Index: '{0}', Char: '{1}'\n", Index, + Name[Index]); + }); + switch (Name[Index]) { + case '<': + ++AngleCount; + break; + case '>': + --AngleCount; + break; + case ':': + ++ColonSeen; + break; + } + if (ColonSeen == 2) { + if (!AngleCount) { + Indexes.push_back(LexicalEntry(Current, Index - 2)); + Current = Index + 1; + LLVM_DEBUG({ PrintLexicalEntry(); }); + } + ColonSeen = 0; + continue; + } + } + + // Store last component. + Indexes.push_back(LexicalEntry(Current, Length - 1)); + LLVM_DEBUG({ PrintLexicalEntry(); }); + return Indexes; +} + +LVLexicalComponent llvm::logicalview::getInnerComponent(StringRef Name) { + if (Name.empty()) + return {}; + + LexicalIndexes Indexes = getAllLexicalIndexes(Name); + if (Indexes.size() == 1) + return std::make_tuple(StringRef(), Name); + + LexicalEntry BeginEntry = Indexes.front(); + LexicalEntry EndEntry = Indexes[Indexes.size() - 2]; + StringRef Outer = + Name.substr(BeginEntry.first, EndEntry.second - BeginEntry.first + 1); + + LexicalEntry LastEntry = Indexes.back(); + StringRef Inner = + Name.substr(LastEntry.first, LastEntry.second - LastEntry.first + 1); + + return std::make_tuple(Outer, Inner); +} + +LVStringRefs llvm::logicalview::getAllLexicalComponents(StringRef Name) { + if (Name.empty()) + return {}; + + LexicalIndexes Indexes = getAllLexicalIndexes(Name); + LVStringRefs Components; + for (const LexicalEntry &Entry : Indexes) + Components.push_back( + Name.substr(Entry.first, Entry.second - Entry.first + 1)); + + return Components; +} + +std::string llvm::logicalview::getScopedName(const LVStringRefs &Components, + StringRef BaseName) { + if (Components.empty()) + return {}; + std::string Name(BaseName); + raw_string_ostream Stream(Name); + if (BaseName.size()) + Stream << "::"; + Stream << Components[0]; + for (LVStringRefs::size_type Index = 1; Index < Components.size(); ++Index) + Stream << "::" << Components[Index]; + return Name; +} diff --git a/llvm/lib/DebugInfo/LogicalView/Core/LVSymbol.cpp b/llvm/lib/DebugInfo/LogicalView/Core/LVSymbol.cpp index 82633fbc6b2e..4608fe20cb6d 100644 --- a/llvm/lib/DebugInfo/LogicalView/Core/LVSymbol.cpp +++ b/llvm/lib/DebugInfo/LogicalView/Core/LVSymbol.cpp @@ -66,10 +66,10 @@ void LVSymbol::addLocation(dwarf::Attribute Attr, LVAddress LowPC, LVAddress HighPC, LVUnsigned SectionOffset, uint64_t LocDescOffset, bool CallSiteLocation) { if (!Locations) - Locations = new LVAutoLocations(); + Locations = std::make_unique<LVLocations>(); // Create the location entry. - CurrentLocation = new LVLocationSymbol(); + CurrentLocation = getReader().createLocationSymbol(); CurrentLocation->setParent(this); CurrentLocation->setAttr(Attr); if (CallSiteLocation) @@ -82,10 +82,10 @@ void LVSymbol::addLocation(dwarf::Attribute Attr, LVAddress LowPC, } // Add a Location Record. -void LVSymbol::addLocationOperands(LVSmall Opcode, uint64_t Operand1, - uint64_t Operand2) { +void LVSymbol::addLocationOperands(LVSmall Opcode, + ArrayRef<uint64_t> Operands) { if (CurrentLocation) - CurrentLocation->addObject(Opcode, Operand1, Operand2); + CurrentLocation->addObject(Opcode, Operands); } // Add a Location Entry. @@ -97,15 +97,14 @@ void LVSymbol::addLocationConstant(dwarf::Attribute Attr, LVUnsigned Constant, /*SectionOffset=*/0, LocDescOffset); // Add records to Location Entry. - addLocationOperands(/*Opcode=*/LVLocationMemberOffset, - /*Operand1=*/Constant, /*Operand2=*/0); + addLocationOperands(/*Opcode=*/LVLocationMemberOffset, {Constant}); } LVLocations::iterator LVSymbol::addLocationGap(LVLocations::iterator Pos, LVAddress LowPC, LVAddress HighPC) { // Create a location entry for the gap. - LVLocation *Gap = new LVLocationSymbol(); + LVLocation *Gap = getReader().createLocationSymbol(); Gap->setParent(this); Gap->setAttr(dwarf::DW_AT_location); Gap->addObject(LowPC, HighPC, @@ -115,8 +114,7 @@ LVLocations::iterator LVSymbol::addLocationGap(LVLocations::iterator Pos, LVLocations::iterator Iter = Locations->insert(Pos, Gap); // Add gap to Location Entry. - Gap->addObject(/*op=*/dwarf::DW_OP_hi_user, - /*opd1=*/0, /*opd2=*/0); + Gap->addObject(dwarf::DW_OP_hi_user, {}); // Mark the entry as a gap. Gap->setIsGapEntry(); @@ -190,7 +188,7 @@ void LVSymbol::getLocations(LVLocations &LocationList) const { // Calculate coverage factor. void LVSymbol::calculateCoverage() { - if (!LVLocation::calculateCoverage(Locations, CoverageFactor, + if (!LVLocation::calculateCoverage(Locations.get(), CoverageFactor, CoveragePercentage)) { LVScope *Parent = getParentScope(); if (Parent->getIsInlinedFunction()) { @@ -444,6 +442,6 @@ void LVSymbol::printExtra(raw_ostream &OS, bool Full) const { Reference->printReference(OS, Full, const_cast<LVSymbol *>(this)); // Print location information. - LVLocation::print(Locations, OS, Full); + LVLocation::print(Locations.get(), OS, Full); } } diff --git a/llvm/lib/DebugInfo/LogicalView/Core/LVType.cpp b/llvm/lib/DebugInfo/LogicalView/Core/LVType.cpp index 3d32c34ee02a..28bccadce598 100644 --- a/llvm/lib/DebugInfo/LogicalView/Core/LVType.cpp +++ b/llvm/lib/DebugInfo/LogicalView/Core/LVType.cpp @@ -253,16 +253,10 @@ void LVType::getParameters(const LVTypes *Types, LVTypes *TypesParam, if (!Type->getIsTemplateParam()) continue; if (options().getAttributeArgument()) { - LVScope *Scope = nullptr; if (Type->getIsKindType()) - Type = Type->getTypeAsType(); - else { - if (Type->getIsKindScope()) { - Scope = Type->getTypeAsScope(); - Type = nullptr; - } - } - Type ? TypesParam->push_back(Type) : ScopesParam->push_back(Scope); + TypesParam->push_back(Type->getTypeAsType()); + else if (Type->getIsKindScope()) + ScopesParam->push_back(Type->getTypeAsScope()); } else TypesParam->push_back(Type); } @@ -330,6 +324,13 @@ LVElement *LVTypeDefinition::getUnderlyingType() { } void LVTypeDefinition::resolveExtra() { + // In the case of CodeView, the MSVC toolset generates a series of typedefs + // that refer to internal runtime structures, that we do not process. Those + // typedefs are marked as 'system'. They have an associated logical type, + // but the underlying type always is null. + if (getIsSystem()) + return; + // Set the reference to the typedef type. if (options().getAttributeUnderlying()) { setUnderlyingType(getUnderlyingType()); diff --git a/llvm/lib/DebugInfo/LogicalView/LVReaderHandler.cpp b/llvm/lib/DebugInfo/LogicalView/LVReaderHandler.cpp index 35dc30fd601f..5f82f816dc19 100644 --- a/llvm/lib/DebugInfo/LogicalView/LVReaderHandler.cpp +++ b/llvm/lib/DebugInfo/LogicalView/LVReaderHandler.cpp @@ -11,8 +11,13 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/LogicalView/LVReaderHandler.h" +#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" #include "llvm/DebugInfo/LogicalView/Core/LVCompare.h" +#include "llvm/DebugInfo/LogicalView/Readers/LVCodeViewReader.h" #include "llvm/DebugInfo/LogicalView/Readers/LVELFReader.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/PDB.h" +#include "llvm/Object/COFF.h" using namespace llvm; using namespace llvm::object; @@ -32,31 +37,36 @@ Error LVReaderHandler::process() { return Error::success(); } -void LVReaderHandler::destroyReaders() { - LLVM_DEBUG(dbgs() << "destroyReaders\n"); - for (const LVReader *Reader : TheReaders) - delete Reader; -} - Error LVReaderHandler::createReader(StringRef Filename, LVReaders &Readers, PdbOrObj &Input, StringRef FileFormatName, StringRef ExePath) { - auto CreateOneReader = [&]() -> LVReader * { - if (Input.is<ObjectFile *>()) { - ObjectFile &Obj = *Input.get<ObjectFile *>(); + auto CreateOneReader = [&]() -> std::unique_ptr<LVReader> { + if (isa<ObjectFile *>(Input)) { + ObjectFile &Obj = *cast<ObjectFile *>(Input); + if (Obj.isCOFF()) { + COFFObjectFile *COFF = cast<COFFObjectFile>(&Obj); + return std::make_unique<LVCodeViewReader>(Filename, FileFormatName, + *COFF, W, ExePath); + } if (Obj.isELF() || Obj.isMachO()) - return new LVELFReader(Filename, FileFormatName, Obj, W); + return std::make_unique<LVELFReader>(Filename, FileFormatName, Obj, W); + } + if (isa<PDBFile *>(Input)) { + PDBFile &Pdb = *cast<PDBFile *>(Input); + return std::make_unique<LVCodeViewReader>(Filename, FileFormatName, Pdb, + W, ExePath); } return nullptr; }; - LVReader *Reader = CreateOneReader(); - if (!Reader) + std::unique_ptr<LVReader> ReaderObj = CreateOneReader(); + if (!ReaderObj) return createStringError(errc::invalid_argument, "unable to create reader for: '%s'", Filename.str().c_str()); - Readers.push_back(Reader); + LVReader *Reader = ReaderObj.get(); + Readers.emplace_back(std::move(ReaderObj)); return Reader->doLoad(); } @@ -81,8 +91,102 @@ Error LVReaderHandler::handleArchive(LVReaders &Readers, StringRef Filename, return Error::success(); } +// Search for a matching executable image for the given PDB path. +static std::string searchForExe(const StringRef Path, + const StringRef Extension) { + SmallString<128> ExePath(Path); + llvm::sys::path::replace_extension(ExePath, Extension); + + std::unique_ptr<IPDBSession> Session; + if (Error Err = loadDataForEXE(PDB_ReaderType::Native, ExePath, Session)) { + consumeError(std::move(Err)); + return {}; + } + // We have a candidate for the executable image. + Expected<std::string> PdbPathOrErr = NativeSession::searchForPdb({ExePath}); + if (!PdbPathOrErr) { + consumeError(PdbPathOrErr.takeError()); + return {}; + } + // Convert any Windows backslashes into forward slashes to get the path. + std::string ConvertedPath = sys::path::convert_to_slash( + PdbPathOrErr.get(), sys::path::Style::windows); + if (ConvertedPath == Path) + return std::string(ExePath); + + return {}; +} + +// Search for a matching object image for the given PDB path. +static std::string searchForObj(const StringRef Path, + const StringRef Extension) { + SmallString<128> ObjPath(Path); + llvm::sys::path::replace_extension(ObjPath, Extension); + if (llvm::sys::fs::exists(ObjPath)) { + ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr = + MemoryBuffer::getFileOrSTDIN(ObjPath); + if (!BuffOrErr) + return {}; + return std::string(ObjPath); + } + + return {}; +} + Error LVReaderHandler::handleBuffer(LVReaders &Readers, StringRef Filename, MemoryBufferRef Buffer, StringRef ExePath) { + // As PDB does not support the Binary interface, at this point we can check + // if the buffer corresponds to a PDB or PE file. + file_magic FileMagic = identify_magic(Buffer.getBuffer()); + if (FileMagic == file_magic::pdb) { + if (!ExePath.empty()) + return handleObject(Readers, Filename, Buffer.getBuffer(), ExePath); + + // Search in the directory derived from the given 'Filename' for a + // matching object file (.o, .obj, .lib) or a matching executable file + // (.exe/.dll) and try to create the reader based on the matched file. + // If no matching file is found then we load the original PDB file. + std::vector<StringRef> ExecutableExtensions = {"exe", "dll"}; + for (StringRef Extension : ExecutableExtensions) { + std::string ExecutableImage = searchForExe(Filename, Extension); + if (ExecutableImage.empty()) + continue; + if (Error Err = handleObject(Readers, Filename, Buffer.getBuffer(), + ExecutableImage)) { + consumeError(std::move(Err)); + continue; + } + return Error::success(); + } + + std::vector<StringRef> ObjectExtensions = {"o", "obj", "lib"}; + for (StringRef Extension : ObjectExtensions) { + std::string ObjectImage = searchForObj(Filename, Extension); + if (ObjectImage.empty()) + continue; + if (Error Err = handleFile(Readers, ObjectImage)) { + consumeError(std::move(Err)); + continue; + } + return Error::success(); + } + + // No matching executable/object image was found. Load the given PDB. + return handleObject(Readers, Filename, Buffer.getBuffer(), ExePath); + } + if (FileMagic == file_magic::pecoff_executable) { + // If we have a valid executable, try to find a matching PDB file. + Expected<std::string> PdbPath = NativeSession::searchForPdb({Filename}); + if (errorToErrorCode(PdbPath.takeError())) { + return createStringError( + errc::not_supported, + "Binary object format in '%s' does not have debug info.", + Filename.str().c_str()); + } + // Process the matching PDB file and pass the executable filename. + return handleFile(Readers, PdbPath.get(), Filename); + } + Expected<std::unique_ptr<Binary>> BinOrErr = createBinary(Buffer); if (errorToErrorCode(BinOrErr.takeError())) { return createStringError(errc::not_supported, @@ -139,7 +243,7 @@ Error LVReaderHandler::handleObject(LVReaders &Readers, StringRef Filename, Binary &Binary) { if (PdbOrObj Input = dyn_cast<ObjectFile>(&Binary)) return createReader(Filename, Readers, Input, - Input.get<ObjectFile *>()->getFileFormatName()); + cast<ObjectFile *>(Input)->getFileFormatName()); if (MachOUniversalBinary *Fat = dyn_cast<MachOUniversalBinary>(&Binary)) return handleMach(Readers, Filename, *Fat); @@ -152,13 +256,32 @@ Error LVReaderHandler::handleObject(LVReaders &Readers, StringRef Filename, Filename.str().c_str()); } +Error LVReaderHandler::handleObject(LVReaders &Readers, StringRef Filename, + StringRef Buffer, StringRef ExePath) { + std::unique_ptr<IPDBSession> Session; + if (Error Err = loadDataForPDB(PDB_ReaderType::Native, Filename, Session)) + return createStringError(errorToErrorCode(std::move(Err)), "%s", + Filename.str().c_str()); + + std::unique_ptr<NativeSession> PdbSession; + PdbSession.reset(static_cast<NativeSession *>(Session.release())); + PdbOrObj Input = &PdbSession->getPDBFile(); + StringRef FileFormatName; + size_t Pos = Buffer.find_first_of("\r\n"); + if (Pos) + FileFormatName = Buffer.substr(0, Pos - 1); + return createReader(Filename, Readers, Input, FileFormatName, ExePath); +} + Error LVReaderHandler::createReaders() { LLVM_DEBUG(dbgs() << "createReaders\n"); for (std::string &Object : Objects) { LVReaders Readers; if (Error Err = createReader(Object, Readers)) return Err; - TheReaders.insert(TheReaders.end(), Readers.begin(), Readers.end()); + TheReaders.insert(TheReaders.end(), + std::make_move_iterator(Readers.begin()), + std::make_move_iterator(Readers.end())); } return Error::success(); @@ -167,7 +290,7 @@ Error LVReaderHandler::createReaders() { Error LVReaderHandler::printReaders() { LLVM_DEBUG(dbgs() << "printReaders\n"); if (options().getPrintExecute()) - for (LVReader *Reader : TheReaders) + for (const std::unique_ptr<LVReader> &Reader : TheReaders) if (Error Err = Reader->doPrint()) return Err; @@ -182,7 +305,8 @@ Error LVReaderHandler::compareReaders() { size_t ViewPairs = ReadersCount / 2; LVCompare Compare(OS); for (size_t Pair = 0, Index = 0; Pair < ViewPairs; ++Pair) { - if (Error Err = Compare.execute(TheReaders[Index], TheReaders[Index + 1])) + if (Error Err = Compare.execute(TheReaders[Index].get(), + TheReaders[Index + 1].get())) return Err; Index += 2; } diff --git a/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp b/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp index b654c624f57c..a0cd8b7839cf 100644 --- a/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp +++ b/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp @@ -190,6 +190,61 @@ void LVBinaryReader::mapVirtualAddress(const object::ObjectFile &Obj) { }); } +void LVBinaryReader::mapVirtualAddress(const object::COFFObjectFile &COFFObj) { + ErrorOr<uint64_t> ImageBase = COFFObj.getImageBase(); + if (ImageBase) + ImageBaseAddress = ImageBase.get(); + + LLVM_DEBUG({ + dbgs() << "ImageBaseAddress: " << hexValue(ImageBaseAddress) << "\n"; + }); + + uint32_t Flags = COFF::IMAGE_SCN_CNT_CODE | COFF::IMAGE_SCN_LNK_COMDAT; + + for (const object::SectionRef &Section : COFFObj.sections()) { + if (!Section.isText() || Section.isVirtual() || !Section.getSize()) + continue; + + const object::coff_section *COFFSection = COFFObj.getCOFFSection(Section); + VirtualAddress = COFFSection->VirtualAddress; + bool IsComdat = (COFFSection->Characteristics & Flags) == Flags; + + // Record section information required for symbol resolution. + // Note: The section index returned by 'getIndex()' is zero based. + Sections.emplace(Section.getIndex() + 1, Section); + addSectionAddress(Section); + + // Additional initialization on the specific object format. + mapRangeAddress(COFFObj, Section, IsComdat); + } + + LLVM_DEBUG({ + dbgs() << "\nSections Information:\n"; + for (LVSections::reference Entry : Sections) { + LVSectionIndex SectionIndex = Entry.first; + const object::SectionRef Section = Entry.second; + const object::coff_section *COFFSection = COFFObj.getCOFFSection(Section); + Expected<StringRef> SectionNameOrErr = Section.getName(); + if (!SectionNameOrErr) + consumeError(SectionNameOrErr.takeError()); + dbgs() << "\nIndex: " << format_decimal(SectionIndex, 3) + << " Name: " << *SectionNameOrErr << "\n" + << "Size: " << hexValue(Section.getSize()) << "\n" + << "VirtualAddress: " << hexValue(VirtualAddress) << "\n" + << "SectionAddress: " << hexValue(Section.getAddress()) << "\n" + << "PointerToRawData: " << hexValue(COFFSection->PointerToRawData) + << "\n" + << "SizeOfRawData: " << hexValue(COFFSection->SizeOfRawData) + << "\n"; + } + dbgs() << "\nObject Section Information:\n"; + for (LVSectionAddresses::const_reference Entry : SectionAddresses) + dbgs() << "[" << hexValue(Entry.first) << ":" + << hexValue(Entry.first + Entry.second.getSize()) + << "] Size: " << hexValue(Entry.second.getSize()) << "\n"; + }); +} + Error LVBinaryReader::loadGenericTargetInfo(StringRef TheTriple, StringRef TheFeatures) { std::string TargetLookupError; @@ -297,29 +352,16 @@ void LVBinaryReader::addSectionRange(LVSectionIndex SectionIndex, } LVRange *LVBinaryReader::getSectionRanges(LVSectionIndex SectionIndex) { - LVRange *Range = nullptr; // Check if we already have a mapping for this section index. LVSectionRanges::iterator IterSection = SectionRanges.find(SectionIndex); - if (IterSection == SectionRanges.end()) { - Range = new LVRange(); - SectionRanges.emplace(SectionIndex, Range); - } else { - Range = IterSection->second; - } + if (IterSection == SectionRanges.end()) + IterSection = + SectionRanges.emplace(SectionIndex, std::make_unique<LVRange>()).first; + LVRange *Range = IterSection->second.get(); assert(Range && "Range is null."); return Range; } -LVBinaryReader::~LVBinaryReader() { - // Delete the lines created by 'createInstructions'. - std::vector<LVLines *> AllInstructionLines = ScopeInstructions.find(); - for (LVLines *Entry : AllInstructionLines) - delete Entry; - // Delete the ranges created by 'getSectionRanges'. - for (LVSectionRanges::reference Entry : SectionRanges) - delete Entry.second; -} - Error LVBinaryReader::createInstructions(LVScope *Scope, LVSectionIndex SectionIndex, const LVNameInfo &NameInfo) { @@ -380,7 +422,9 @@ Error LVBinaryReader::createInstructions(LVScope *Scope, // Address for first instruction line. LVAddress FirstAddress = Address; - LVLines *Instructions = new LVLines(); + auto InstructionsSP = std::make_unique<LVLines>(); + LVLines &Instructions = *InstructionsSP; + DiscoveredLines.emplace_back(std::move(InstructionsSP)); while (Begin < End) { MCInst Instruction; @@ -399,7 +443,7 @@ Error LVBinaryReader::createInstructions(LVScope *Scope, break; case MCDisassembler::SoftFail: LLVM_DEBUG({ dbgs() << "Potentially undefined instruction:"; }); - LLVM_FALLTHROUGH; + [[fallthrough]]; case MCDisassembler::Success: { std::string Buffer; raw_string_ostream Stream(Buffer); @@ -422,10 +466,10 @@ Error LVBinaryReader::createInstructions(LVScope *Scope, // the 'processLines()' function will move each created logical line // to its enclosing logical scope, using the debug ranges information // and they will be released when its scope parent is deleted. - LVLineAssembler *Line = new LVLineAssembler(); + LVLineAssembler *Line = createLineAssembler(); Line->setAddress(Address); Line->setName(StringRef(Stream.str()).trim()); - Instructions->push_back(Line); + Instructions.push_back(Line); break; } } @@ -439,15 +483,15 @@ Error LVBinaryReader::createInstructions(LVScope *Scope, << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n" << "Address: " << hexValue(FirstAddress) << format(" - Collected instructions lines: %d\n", - Instructions->size()); - for (const LVLine *Line : *Instructions) + Instructions.size()); + for (const LVLine *Line : Instructions) dbgs() << format_decimal(++Index, 5) << ": " << hexValue(Line->getOffset()) << ", (" << Line->getName() << ")\n"; }); // The scope in the assembler names is linked to its own instructions. - ScopeInstructions.add(SectionIndex, Scope, Instructions); + ScopeInstructions.add(SectionIndex, Scope, &Instructions); AssemblerMappings.add(SectionIndex, FirstAddress, Scope); return Error::success(); @@ -815,6 +859,80 @@ void LVBinaryReader::processLines(LVLines *DebugLines, } } +// Traverse the scopes for the given 'Function' looking for any inlined +// scopes with inlined lines, which are found in 'CUInlineeLines'. +void LVBinaryReader::includeInlineeLines(LVSectionIndex SectionIndex, + LVScope *Function) { + SmallVector<LVInlineeLine::iterator> InlineeIters; + std::function<void(LVScope * Parent)> FindInlinedScopes = + [&](LVScope *Parent) { + if (const LVScopes *Scopes = Parent->getScopes()) + for (LVScope *Scope : *Scopes) { + LVInlineeLine::iterator Iter = CUInlineeLines.find(Scope); + if (Iter != CUInlineeLines.end()) + InlineeIters.push_back(Iter); + FindInlinedScopes(Scope); + } + }; + + // Find all inlined scopes for the given 'Function'. + FindInlinedScopes(Function); + for (LVInlineeLine::iterator InlineeIter : InlineeIters) { + LVScope *Scope = InlineeIter->first; + addToSymbolTable(Scope->getLinkageName(), Scope, SectionIndex); + + // TODO: Convert this into a reference. + LVLines *InlineeLines = InlineeIter->second.get(); + LLVM_DEBUG({ + dbgs() << "Inlined lines for: " << Scope->getName() << "\n"; + for (const LVLine *Line : *InlineeLines) + dbgs() << "[" << hexValue(Line->getAddress()) << "] " + << Line->getLineNumber() << "\n"; + dbgs() << format("Debug lines: %d\n", CULines.size()); + for (const LVLine *Line : CULines) + dbgs() << "Line address: " << hexValue(Line->getOffset()) << ", (" + << Line->getLineNumber() << ")\n"; + ; + }); + + // The inlined lines must be merged using its address, in order to keep + // the real order of the instructions. The inlined lines are mixed with + // the other non-inlined lines. + if (InlineeLines->size()) { + // First address of inlinee code. + uint64_t InlineeStart = (InlineeLines->front())->getAddress(); + LVLines::iterator Iter = std::find_if( + CULines.begin(), CULines.end(), [&](LVLine *Item) -> bool { + return Item->getAddress() == InlineeStart; + }); + if (Iter != CULines.end()) { + // 'Iter' points to the line where the inlined function is called. + // Emulate the DW_AT_call_line attribute. + Scope->setCallLineNumber((*Iter)->getLineNumber()); + // Mark the referenced line as the start of the inlined function. + // Skip the first line during the insertion, as the address and + // line number as the same. Otherwise we have to erase and insert. + (*Iter)->setLineNumber((*InlineeLines->begin())->getLineNumber()); + ++Iter; + CULines.insert(Iter, InlineeLines->begin() + 1, InlineeLines->end()); + } + } + + // Remove this set of lines from the container; each inlined function + // creates an unique set of lines. Remove only the created container. + CUInlineeLines.erase(InlineeIter); + InlineeLines->clear(); + } + LLVM_DEBUG({ + dbgs() << "Merged Inlined lines for: " << Function->getName() << "\n"; + dbgs() << format("Debug lines: %d\n", CULines.size()); + for (const LVLine *Line : CULines) + dbgs() << "Line address: " << hexValue(Line->getOffset()) << ", (" + << Line->getLineNumber() << ")\n"; + ; + }); +} + void LVBinaryReader::print(raw_ostream &OS) const { OS << "LVBinaryReader\n"; LLVM_DEBUG(dbgs() << "PrintReader\n"); diff --git a/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp b/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp new file mode 100644 index 000000000000..d72fe2683f92 --- /dev/null +++ b/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp @@ -0,0 +1,1221 @@ +//===-- LVCodeViewReader.cpp ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This implements the LVCodeViewReader class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/LogicalView/Readers/LVCodeViewReader.h" +#include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h" +#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" +#include "llvm/DebugInfo/CodeView/EnumTables.h" +#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" +#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" +#include "llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h" +#include "llvm/DebugInfo/LogicalView/Core/LVLine.h" +#include "llvm/DebugInfo/LogicalView/Core/LVScope.h" +#include "llvm/DebugInfo/LogicalView/Core/LVSymbol.h" +#include "llvm/DebugInfo/LogicalView/Core/LVType.h" +#include "llvm/DebugInfo/PDB/GenericError.h" +#include "llvm/DebugInfo/PDB/Native/DbiStream.h" +#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" +#include "llvm/DebugInfo/PDB/Native/InfoStream.h" +#include "llvm/DebugInfo/PDB/Native/LinePrinter.h" +#include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/RawConstants.h" +#include "llvm/DebugInfo/PDB/Native/SymbolStream.h" +#include "llvm/DebugInfo/PDB/Native/TpiStream.h" +#include "llvm/Demangle/Demangle.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FormatAdapters.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/WithColor.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::logicalview; +using namespace llvm::msf; +using namespace llvm::object; +using namespace llvm::pdb; + +#define DEBUG_TYPE "CodeViewReader" + +StringRef LVCodeViewReader::getSymbolKindName(SymbolKind Kind) { + switch (Kind) { +#define SYMBOL_RECORD(EnumName, EnumVal, Name) \ + case EnumName: \ + return #EnumName; +#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def" + default: + return "UnknownSym"; + } + llvm_unreachable("Unknown SymbolKind::Kind"); +} + +std::string LVCodeViewReader::formatRegisterId(RegisterId Register, + CPUType CPU) { +#define RETURN_CASE(Enum, X, Ret) \ + case Enum::X: \ + return Ret; + + if (CPU == CPUType::ARMNT) { + switch (Register) { +#define CV_REGISTERS_ARM +#define CV_REGISTER(name, val) RETURN_CASE(RegisterId, name, #name) +#include "llvm/DebugInfo/CodeView/CodeViewRegisters.def" +#undef CV_REGISTER +#undef CV_REGISTERS_ARM + + default: + break; + } + } else if (CPU == CPUType::ARM64) { + switch (Register) { +#define CV_REGISTERS_ARM64 +#define CV_REGISTER(name, val) RETURN_CASE(RegisterId, name, #name) +#include "llvm/DebugInfo/CodeView/CodeViewRegisters.def" +#undef CV_REGISTER +#undef CV_REGISTERS_ARM64 + + default: + break; + } + } else { + switch (Register) { +#define CV_REGISTERS_X86 +#define CV_REGISTER(name, val) RETURN_CASE(RegisterId, name, #name) +#include "llvm/DebugInfo/CodeView/CodeViewRegisters.def" +#undef CV_REGISTER +#undef CV_REGISTERS_X86 + + default: + break; + } + } + return "formatUnknownEnum(Id)"; +} + +void LVCodeViewReader::printRelocatedField(StringRef Label, + const coff_section *CoffSection, + uint32_t RelocOffset, + uint32_t Offset, + StringRef *RelocSym) { + StringRef SymStorage; + StringRef &Symbol = RelocSym ? *RelocSym : SymStorage; + if (!resolveSymbolName(CoffSection, RelocOffset, Symbol)) + W.printSymbolOffset(Label, Symbol, Offset); + else + W.printHex(Label, RelocOffset); +} + +void LVCodeViewReader::getLinkageName(const coff_section *CoffSection, + uint32_t RelocOffset, uint32_t Offset, + StringRef *RelocSym) { + StringRef SymStorage; + StringRef &Symbol = RelocSym ? *RelocSym : SymStorage; + if (resolveSymbolName(CoffSection, RelocOffset, Symbol)) + Symbol = ""; +} + +Expected<StringRef> +LVCodeViewReader::getFileNameForFileOffset(uint32_t FileOffset, + const SymbolGroup *SG) { + if (SG) { + Expected<StringRef> Filename = SG->getNameFromChecksums(FileOffset); + if (!Filename) { + consumeError(Filename.takeError()); + return StringRef(""); + } + return *Filename; + } + + // The file checksum subsection should precede all references to it. + if (!CVFileChecksumTable.valid() || !CVStringTable.valid()) + return createStringError(object_error::parse_failed, getFileName()); + + VarStreamArray<FileChecksumEntry>::Iterator Iter = + CVFileChecksumTable.getArray().at(FileOffset); + + // Check if the file checksum table offset is valid. + if (Iter == CVFileChecksumTable.end()) + return createStringError(object_error::parse_failed, getFileName()); + + Expected<StringRef> NameOrErr = CVStringTable.getString(Iter->FileNameOffset); + if (!NameOrErr) + return createStringError(object_error::parse_failed, getFileName()); + return *NameOrErr; +} + +Error LVCodeViewReader::printFileNameForOffset(StringRef Label, + uint32_t FileOffset, + const SymbolGroup *SG) { + Expected<StringRef> NameOrErr = getFileNameForFileOffset(FileOffset, SG); + if (!NameOrErr) + return NameOrErr.takeError(); + W.printHex(Label, *NameOrErr, FileOffset); + return Error::success(); +} + +void LVCodeViewReader::cacheRelocations() { + for (const SectionRef &Section : getObj().sections()) { + const coff_section *CoffSection = getObj().getCOFFSection(Section); + + for (const RelocationRef &Relocacion : Section.relocations()) + RelocMap[CoffSection].push_back(Relocacion); + + // Sort relocations by address. + llvm::sort(RelocMap[CoffSection], [](RelocationRef L, RelocationRef R) { + return L.getOffset() < R.getOffset(); + }); + } +} + +// Given a section and an offset into this section the function returns the +// symbol used for the relocation at the offset. +Error LVCodeViewReader::resolveSymbol(const coff_section *CoffSection, + uint64_t Offset, SymbolRef &Sym) { + const auto &Relocations = RelocMap[CoffSection]; + basic_symbol_iterator SymI = getObj().symbol_end(); + for (const RelocationRef &Relocation : Relocations) { + uint64_t RelocationOffset = Relocation.getOffset(); + + if (RelocationOffset == Offset) { + SymI = Relocation.getSymbol(); + break; + } + } + if (SymI == getObj().symbol_end()) + return make_error<StringError>("Unknown Symbol", inconvertibleErrorCode()); + Sym = *SymI; + return ErrorSuccess(); +} + +// Given a section and an offset into this section the function returns the +// name of the symbol used for the relocation at the offset. +Error LVCodeViewReader::resolveSymbolName(const coff_section *CoffSection, + uint64_t Offset, StringRef &Name) { + SymbolRef Symbol; + if (Error E = resolveSymbol(CoffSection, Offset, Symbol)) + return E; + Expected<StringRef> NameOrErr = Symbol.getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + Name = *NameOrErr; + return ErrorSuccess(); +} + +// CodeView and DWARF can have references to compiler generated elements, +// used for initialization. The MSVC includes in the PDBs, internal compile +// units, associated with the MS runtime support. We mark them as 'system' +// and they are printed only if the command line option 'internal=system'. +bool LVCodeViewReader::isSystemEntry(LVElement *Element, StringRef Name) const { + Name = Name.empty() ? Element->getName() : Name; + auto Find = [=](const char *String) -> bool { + return StringRef::npos != Name.find(String); + }; + auto Starts = [=](const char *Pattern) -> bool { + return Name.startswith(Pattern); + }; + auto CheckExclude = [&]() -> bool { + if (Starts("__") || Starts("_PMD") || Starts("_PMFN")) + return true; + if (Find("_s__")) + return true; + if (Find("_CatchableType") || Find("_TypeDescriptor")) + return true; + if (Find("Intermediate\\vctools")) + return true; + if (Find("$initializer$") || Find("dynamic initializer")) + return true; + if (Find("`vftable'") || Find("_GLOBAL__sub")) + return true; + return false; + }; + bool Excluded = CheckExclude(); + if (Excluded) + Element->setIsSystem(); + + return Excluded; +} + +Error LVCodeViewReader::collectInlineeInfo( + DebugInlineeLinesSubsectionRef &Lines, const llvm::pdb::SymbolGroup *SG) { + for (const InlineeSourceLine &Line : Lines) { + TypeIndex TIInlinee = Line.Header->Inlinee; + uint32_t LineNumber = Line.Header->SourceLineNum; + uint32_t FileOffset = Line.Header->FileID; + LLVM_DEBUG({ + DictScope S(W, "InlineeSourceLine"); + LogicalVisitor.printTypeIndex("Inlinee", TIInlinee, StreamTPI); + if (Error Err = printFileNameForOffset("FileID", FileOffset, SG)) + return Err; + W.printNumber("SourceLineNum", LineNumber); + + if (Lines.hasExtraFiles()) { + W.printNumber("ExtraFileCount", Line.ExtraFiles.size()); + ListScope ExtraFiles(W, "ExtraFiles"); + for (const ulittle32_t &FID : Line.ExtraFiles) + if (Error Err = printFileNameForOffset("FileID", FID, SG)) + return Err; + } + }); + Expected<StringRef> NameOrErr = getFileNameForFileOffset(FileOffset, SG); + if (!NameOrErr) + return NameOrErr.takeError(); + LogicalVisitor.addInlineeInfo(TIInlinee, LineNumber, *NameOrErr); + } + + return Error::success(); +} + +Error LVCodeViewReader::traverseInlineeLines(StringRef Subsection) { + BinaryStreamReader SR(Subsection, llvm::support::little); + DebugInlineeLinesSubsectionRef Lines; + if (Error E = Lines.initialize(SR)) + return createStringError(errorToErrorCode(std::move(E)), getFileName()); + + return collectInlineeInfo(Lines); +} + +Error LVCodeViewReader::createLines( + const FixedStreamArray<LineNumberEntry> &LineNumbers, LVAddress Addendum, + uint32_t Segment, uint32_t Begin, uint32_t Size, uint32_t NameIndex, + const SymbolGroup *SG) { + LLVM_DEBUG({ + uint32_t End = Begin + Size; + W.getOStream() << formatv("{0:x-4}:{1:x-8}-{2:x-8}\n", Segment, Begin, End); + }); + + for (const LineNumberEntry &Line : LineNumbers) { + if (Line.Offset >= Size) + return createStringError(object_error::parse_failed, getFileName()); + + LineInfo LI(Line.Flags); + + LLVM_DEBUG({ + W.getOStream() << formatv( + "{0} {1:x-8}\n", utostr(LI.getStartLine()), + fmt_align(Begin + Line.Offset, AlignStyle::Right, 8, '0')); + }); + + // The 'processLines()' function will move each created logical line + // to its enclosing logical scope, using the debug ranges information + // and they will be released when its scope parent is deleted. + LVLineDebug *LineDebug = createLineDebug(); + CULines.push_back(LineDebug); + LVAddress Address = linearAddress(Segment, Begin + Line.Offset); + LineDebug->setAddress(Address + Addendum); + + if (LI.isAlwaysStepInto()) + LineDebug->setIsAlwaysStepInto(); + else if (LI.isNeverStepInto()) + LineDebug->setIsNeverStepInto(); + else + LineDebug->setLineNumber(LI.getStartLine()); + + if (LI.isStatement()) + LineDebug->setIsNewStatement(); + + Expected<StringRef> NameOrErr = getFileNameForFileOffset(NameIndex, SG); + if (!NameOrErr) + return NameOrErr.takeError(); + LineDebug->setFilename(*NameOrErr); + } + + return Error::success(); +} + +Error LVCodeViewReader::initializeFileAndStringTables( + BinaryStreamReader &Reader) { + while (Reader.bytesRemaining() > 0 && + (!CVFileChecksumTable.valid() || !CVStringTable.valid())) { + // The section consists of a number of subsection in the following format: + // |SubSectionType|SubSectionSize|Contents...| + uint32_t SubType, SubSectionSize; + + if (Error E = Reader.readInteger(SubType)) + return createStringError(errorToErrorCode(std::move(E)), getFileName()); + if (Error E = Reader.readInteger(SubSectionSize)) + return createStringError(errorToErrorCode(std::move(E)), getFileName()); + + StringRef Contents; + if (Error E = Reader.readFixedString(Contents, SubSectionSize)) + return createStringError(errorToErrorCode(std::move(E)), getFileName()); + + BinaryStreamRef ST(Contents, support::little); + switch (DebugSubsectionKind(SubType)) { + case DebugSubsectionKind::FileChecksums: + if (Error E = CVFileChecksumTable.initialize(ST)) + return createStringError(errorToErrorCode(std::move(E)), getFileName()); + break; + case DebugSubsectionKind::StringTable: + if (Error E = CVStringTable.initialize(ST)) + return createStringError(errorToErrorCode(std::move(E)), getFileName()); + break; + default: + break; + } + + uint32_t PaddedSize = alignTo(SubSectionSize, 4); + if (Error E = Reader.skip(PaddedSize - SubSectionSize)) + return createStringError(errorToErrorCode(std::move(E)), getFileName()); + } + + return Error::success(); +} + +Error LVCodeViewReader::loadTypeServer(TypeServer2Record &TS) { + LLVM_DEBUG({ + W.printString("Guid", formatv("{0}", TS.getGuid()).str()); + W.printNumber("Age", TS.getAge()); + W.printString("Name", TS.getName()); + }); + + SmallString<128> ServerName(TS.getName()); + BuffOrErr = MemoryBuffer::getFile(ServerName); + if (BuffOrErr.getError()) { + // The server name does not exist. Try in the same directory as the + // input file. + ServerName = createAlternativePath(ServerName); + BuffOrErr = MemoryBuffer::getFile(ServerName); + if (BuffOrErr.getError()) { + // For the error message, use the original type server name. + return createStringError(errc::bad_file_descriptor, + "File '%s' does not exist.", + TS.getName().str().c_str()); + } + } + MemBuffer = std::move(BuffOrErr.get()); + + // Check if the buffer corresponds to a PDB file. + assert(identify_magic((*MemBuffer).getBuffer()) == file_magic::pdb && + "Invalid PDB file."); + + if (Error Err = loadDataForPDB(PDB_ReaderType::Native, ServerName, Session)) + return createStringError(errorToErrorCode(std::move(Err)), "%s", + ServerName.c_str()); + + PdbSession.reset(static_cast<NativeSession *>(Session.release())); + PDBFile &Pdb = PdbSession->getPDBFile(); + + // Just because a file with a matching name was found and it was an actual + // PDB file doesn't mean it matches. For it to match the InfoStream's GUID + // must match the GUID specified in the TypeServer2 record. + Expected<InfoStream &> expectedInfo = Pdb.getPDBInfoStream(); + if (!expectedInfo || expectedInfo->getGuid() != TS.getGuid()) + return createStringError(errc::invalid_argument, "signature_out_of_date"); + + // The reader needs to switch to a type server, to process the types from + // the server. We need to keep the original input source, as reading other + // sections will require the input associated with the loaded object file. + TypeServer = std::make_shared<InputFile>(&Pdb); + LogicalVisitor.setInput(TypeServer); + + LazyRandomTypeCollection &Types = types(); + LazyRandomTypeCollection &Ids = ids(); + if (Error Err = traverseTypes(Pdb, Types, Ids)) + return Err; + + return Error::success(); +} + +Error LVCodeViewReader::loadPrecompiledObject(PrecompRecord &Precomp, + CVTypeArray &CVTypesObj) { + LLVM_DEBUG({ + W.printHex("Count", Precomp.getTypesCount()); + W.printHex("Signature", Precomp.getSignature()); + W.printString("PrecompFile", Precomp.getPrecompFilePath()); + }); + + SmallString<128> ServerName(Precomp.getPrecompFilePath()); + BuffOrErr = MemoryBuffer::getFile(ServerName); + if (BuffOrErr.getError()) { + // The server name does not exist. Try in the directory as the input file. + ServerName = createAlternativePath(ServerName); + if (BuffOrErr.getError()) { + // For the error message, use the original type server name. + return createStringError(errc::bad_file_descriptor, + "File '%s' does not exist.", + Precomp.getPrecompFilePath().str().c_str()); + } + } + MemBuffer = std::move(BuffOrErr.get()); + + Expected<std::unique_ptr<Binary>> BinOrErr = createBinary(*MemBuffer); + if (errorToErrorCode(BinOrErr.takeError())) + return createStringError(errc::not_supported, + "Binary object format in '%s' is not supported.", + ServerName.c_str()); + + Binary &BinaryObj = *BinOrErr.get(); + if (!BinaryObj.isCOFF()) + return createStringError(errc::not_supported, "'%s' is not a COFF object.", + ServerName.c_str()); + + Builder = std::make_unique<AppendingTypeTableBuilder>(BuilderAllocator); + + // The MSVC precompiled header object file, should contain just a single + // ".debug$P" section. + COFFObjectFile &Obj = *cast<COFFObjectFile>(&BinaryObj); + for (const SectionRef &Section : Obj.sections()) { + Expected<StringRef> SectionNameOrErr = Section.getName(); + if (!SectionNameOrErr) + return SectionNameOrErr.takeError(); + if (*SectionNameOrErr == ".debug$P") { + Expected<StringRef> DataOrErr = Section.getContents(); + if (!DataOrErr) + return DataOrErr.takeError(); + uint32_t Magic; + if (Error Err = consume(*DataOrErr, Magic)) + return Err; + if (Magic != COFF::DEBUG_SECTION_MAGIC) + return errorCodeToError(object_error::parse_failed); + + ReaderPrecomp = + std::make_unique<BinaryStreamReader>(*DataOrErr, support::little); + cantFail( + ReaderPrecomp->readArray(CVTypesPrecomp, ReaderPrecomp->getLength())); + + // Append all the type records up to the LF_ENDPRECOMP marker and + // check if the signatures match. + for (const CVType &Type : CVTypesPrecomp) { + ArrayRef<uint8_t> TypeData = Type.data(); + if (Type.kind() == LF_ENDPRECOMP) { + EndPrecompRecord EndPrecomp = cantFail( + TypeDeserializer::deserializeAs<EndPrecompRecord>(TypeData)); + if (Precomp.getSignature() != EndPrecomp.getSignature()) + return createStringError(errc::invalid_argument, "no matching pch"); + break; + } + Builder->insertRecordBytes(TypeData); + } + // Done processing .debug$P, break out of section loop. + break; + } + } + + // Append all the type records, skipping the first record which is the + // reference to the precompiled header object information. + for (const CVType &Type : CVTypesObj) { + ArrayRef<uint8_t> TypeData = Type.data(); + if (Type.kind() != LF_PRECOMP) + Builder->insertRecordBytes(TypeData); + } + + // Set up a type stream that refers to the added type records. + Builder->ForEachRecord( + [&](TypeIndex TI, const CVType &Type) { TypeArray.push_back(Type); }); + + ItemStream = + std::make_unique<BinaryItemStream<CVType>>(llvm::support::little); + ItemStream->setItems(TypeArray); + TypeStream.setUnderlyingStream(*ItemStream); + + PrecompHeader = + std::make_shared<LazyRandomTypeCollection>(TypeStream, TypeArray.size()); + + // Change the original input source to use the collected type records. + LogicalVisitor.setInput(PrecompHeader); + + LazyRandomTypeCollection &Types = types(); + LazyRandomTypeCollection &Ids = ids(); + LVTypeVisitor TDV(W, &LogicalVisitor, Types, Ids, StreamTPI, + LogicalVisitor.getShared()); + return visitTypeStream(Types, TDV); +} + +Error LVCodeViewReader::traverseTypeSection(StringRef SectionName, + const SectionRef &Section) { + LLVM_DEBUG({ + ListScope D(W, "CodeViewTypes"); + W.printNumber("Section", SectionName, getObj().getSectionID(Section)); + }); + + Expected<StringRef> DataOrErr = Section.getContents(); + if (!DataOrErr) + return DataOrErr.takeError(); + uint32_t Magic; + if (Error Err = consume(*DataOrErr, Magic)) + return Err; + if (Magic != COFF::DEBUG_SECTION_MAGIC) + return errorCodeToError(object_error::parse_failed); + + // Get the first type record. It will indicate if this object uses a type + // server (/Zi) or a PCH file (/Yu). + CVTypeArray CVTypes; + BinaryStreamReader Reader(*DataOrErr, support::little); + cantFail(Reader.readArray(CVTypes, Reader.getLength())); + CVTypeArray::Iterator FirstType = CVTypes.begin(); + + // The object was compiled with /Zi. It uses types from a type server PDB. + if (FirstType->kind() == LF_TYPESERVER2) { + TypeServer2Record TS = cantFail( + TypeDeserializer::deserializeAs<TypeServer2Record>(FirstType->data())); + return loadTypeServer(TS); + } + + // The object was compiled with /Yc or /Yu. It uses types from another + // object file with a matching signature. + if (FirstType->kind() == LF_PRECOMP) { + PrecompRecord Precomp = cantFail( + TypeDeserializer::deserializeAs<PrecompRecord>(FirstType->data())); + return loadPrecompiledObject(Precomp, CVTypes); + } + + LazyRandomTypeCollection &Types = types(); + LazyRandomTypeCollection &Ids = ids(); + Types.reset(*DataOrErr, 100); + LVTypeVisitor TDV(W, &LogicalVisitor, Types, Ids, StreamTPI, + LogicalVisitor.getShared()); + return visitTypeStream(Types, TDV); +} + +Error LVCodeViewReader::traverseTypes(PDBFile &Pdb, + LazyRandomTypeCollection &Types, + LazyRandomTypeCollection &Ids) { + // Traverse types (TPI and IPI). + auto VisitTypes = [&](LazyRandomTypeCollection &Types, + LazyRandomTypeCollection &Ids, + SpecialStream StreamIdx) -> Error { + LVTypeVisitor TDV(W, &LogicalVisitor, Types, Ids, StreamIdx, + LogicalVisitor.getShared()); + return visitTypeStream(Types, TDV); + }; + + Expected<TpiStream &> StreamTpiOrErr = Pdb.getPDBTpiStream(); + if (!StreamTpiOrErr) + return StreamTpiOrErr.takeError(); + TpiStream &StreamTpi = *StreamTpiOrErr; + StreamTpi.buildHashMap(); + LLVM_DEBUG({ + W.getOStream() << formatv("Showing {0:N} TPI records\n", + StreamTpi.getNumTypeRecords()); + }); + if (Error Err = VisitTypes(Types, Ids, StreamTPI)) + return Err; + + Expected<TpiStream &> StreamIpiOrErr = Pdb.getPDBIpiStream(); + if (!StreamIpiOrErr) + return StreamIpiOrErr.takeError(); + TpiStream &StreamIpi = *StreamIpiOrErr; + StreamIpi.buildHashMap(); + LLVM_DEBUG({ + W.getOStream() << formatv("Showing {0:N} IPI records\n", + StreamIpi.getNumTypeRecords()); + }); + return VisitTypes(Ids, Ids, StreamIPI); +} + +Error LVCodeViewReader::traverseSymbolsSubsection(StringRef Subsection, + const SectionRef &Section, + StringRef SectionContents) { + ArrayRef<uint8_t> BinaryData(Subsection.bytes_begin(), + Subsection.bytes_end()); + LVSymbolVisitorDelegate VisitorDelegate(this, Section, &getObj(), + SectionContents); + CVSymbolArray Symbols; + BinaryStreamReader Reader(BinaryData, llvm::support::little); + if (Error E = Reader.readArray(Symbols, Reader.getLength())) + return createStringError(errorToErrorCode(std::move(E)), getFileName()); + + LazyRandomTypeCollection &Types = types(); + LazyRandomTypeCollection &Ids = ids(); + SymbolVisitorCallbackPipeline Pipeline; + SymbolDeserializer Deserializer(&VisitorDelegate, + CodeViewContainer::ObjectFile); + // As we are processing a COFF format, use TPI as IPI, so the generic code + // to process the CodeView format does not contain any additional checks. + LVSymbolVisitor Traverser(this, W, &LogicalVisitor, Types, Ids, + &VisitorDelegate, LogicalVisitor.getShared()); + + Pipeline.addCallbackToPipeline(Deserializer); + Pipeline.addCallbackToPipeline(Traverser); + CVSymbolVisitor Visitor(Pipeline); + return Visitor.visitSymbolStream(Symbols); +} + +Error LVCodeViewReader::traverseSymbolSection(StringRef SectionName, + const SectionRef &Section) { + LLVM_DEBUG({ + ListScope D(W, "CodeViewDebugInfo"); + W.printNumber("Section", SectionName, getObj().getSectionID(Section)); + }); + + Expected<StringRef> SectionOrErr = Section.getContents(); + if (!SectionOrErr) + return SectionOrErr.takeError(); + StringRef SectionContents = *SectionOrErr; + StringRef Data = SectionContents; + + SmallVector<StringRef, 10> SymbolNames; + StringMap<StringRef> FunctionLineTables; + + uint32_t Magic; + if (Error E = consume(Data, Magic)) + return createStringError(errorToErrorCode(std::move(E)), getFileName()); + + if (Magic != COFF::DEBUG_SECTION_MAGIC) + return createStringError(object_error::parse_failed, getFileName()); + + BinaryStreamReader FSReader(Data, support::little); + if (Error Err = initializeFileAndStringTables(FSReader)) + return Err; + + while (!Data.empty()) { + // The section consists of a number of subsection in the following format: + // |SubSectionType|SubSectionSize|Contents...| + uint32_t SubType, SubSectionSize; + if (Error E = consume(Data, SubType)) + return createStringError(errorToErrorCode(std::move(E)), getFileName()); + if (Error E = consume(Data, SubSectionSize)) + return createStringError(errorToErrorCode(std::move(E)), getFileName()); + + // Process the subsection as normal even if the ignore bit is set. + SubType &= ~SubsectionIgnoreFlag; + + // Get the contents of the subsection. + if (SubSectionSize > Data.size()) + return createStringError(object_error::parse_failed, getFileName()); + StringRef Contents = Data.substr(0, SubSectionSize); + + // Add SubSectionSize to the current offset and align that offset + // to find the next subsection. + size_t SectionOffset = Data.data() - SectionContents.data(); + size_t NextOffset = SectionOffset + SubSectionSize; + NextOffset = alignTo(NextOffset, 4); + if (NextOffset > SectionContents.size()) + return createStringError(object_error::parse_failed, getFileName()); + Data = SectionContents.drop_front(NextOffset); + + switch (DebugSubsectionKind(SubType)) { + case DebugSubsectionKind::Symbols: + if (Error Err = + traverseSymbolsSubsection(Contents, Section, SectionContents)) + return Err; + break; + + case DebugSubsectionKind::InlineeLines: + if (Error Err = traverseInlineeLines(Contents)) + return Err; + break; + + case DebugSubsectionKind::Lines: + // Holds a PC to file:line table. Some data to parse this subsection + // is stored in the other subsections, so just check sanity and store + // the pointers for deferred processing. + + // Collect function and ranges only if we need to print logical lines. + if (options().getGeneralCollectRanges()) { + + if (SubSectionSize < 12) { + // There should be at least three words to store two function + // relocations and size of the code. + return createStringError(object_error::parse_failed, getFileName()); + } + + StringRef SymbolName; + if (Error Err = resolveSymbolName(getObj().getCOFFSection(Section), + SectionOffset, SymbolName)) + return createStringError(errorToErrorCode(std::move(Err)), + getFileName()); + + LLVM_DEBUG({ W.printString("Symbol Name", SymbolName); }); + if (FunctionLineTables.count(SymbolName) != 0) { + // Saw debug info for this function already? + return createStringError(object_error::parse_failed, getFileName()); + } + + FunctionLineTables[SymbolName] = Contents; + SymbolNames.push_back(SymbolName); + } + break; + + // Do nothing for unrecognized subsections. + default: + break; + } + W.flush(); + } + + // Traverse the line tables now that we've read all the subsections and + // know all the required information. + for (StringRef SymbolName : SymbolNames) { + LLVM_DEBUG({ + ListScope S(W, "FunctionLineTable"); + W.printString("Symbol Name", SymbolName); + }); + + BinaryStreamReader Reader(FunctionLineTables[SymbolName], support::little); + + DebugLinesSubsectionRef Lines; + if (Error E = Lines.initialize(Reader)) + return createStringError(errorToErrorCode(std::move(E)), getFileName()); + + // Find the associated symbol table information. + LVSymbolTableEntry SymbolTableEntry = getSymbolTableEntry(SymbolName); + LVScope *Function = SymbolTableEntry.Scope; + if (!Function) + continue; + + LVAddress Addendum = SymbolTableEntry.Address; + LVSectionIndex SectionIndex = SymbolTableEntry.SectionIndex; + + // The given scope represents the function that contains the line numbers. + // Collect all generated debug lines associated with the function. + CULines.clear(); + + // For the given scope, collect all scopes ranges. + LVRange *ScopesWithRanges = getSectionRanges(SectionIndex); + ScopesWithRanges->clear(); + Function->getRanges(*ScopesWithRanges); + ScopesWithRanges->sort(); + + uint16_t Segment = Lines.header()->RelocSegment; + uint32_t Begin = Lines.header()->RelocOffset; + uint32_t Size = Lines.header()->CodeSize; + for (const LineColumnEntry &Block : Lines) + if (Error Err = createLines(Block.LineNumbers, Addendum, Segment, Begin, + Size, Block.NameIndex)) + return Err; + + // Include lines from any inlined functions within the current function. + includeInlineeLines(SectionIndex, Function); + + if (Error Err = createInstructions(Function, SectionIndex)) + return Err; + + processLines(&CULines, SectionIndex, Function); + } + + return Error::success(); +} + +void LVCodeViewReader::sortScopes() { Root->sort(); } + +void LVCodeViewReader::print(raw_ostream &OS) const { + LLVM_DEBUG(dbgs() << "CreateReaders\n"); +} + +void LVCodeViewReader::mapRangeAddress(const ObjectFile &Obj, + const SectionRef &Section, + bool IsComdat) { + if (!Obj.isCOFF()) + return; + + const COFFObjectFile *Object = cast<COFFObjectFile>(&Obj); + + for (const SymbolRef &Sym : Object->symbols()) { + if (!Section.containsSymbol(Sym)) + continue; + + COFFSymbolRef Symbol = Object->getCOFFSymbol(Sym); + if (Symbol.getComplexType() != llvm::COFF::IMAGE_SYM_DTYPE_FUNCTION) + continue; + + StringRef SymbolName; + Expected<StringRef> SymNameOrErr = Object->getSymbolName(Symbol); + if (!SymNameOrErr) { + W.startLine() << "Invalid symbol name: " << Symbol.getSectionNumber() + << "\n"; + consumeError(SymNameOrErr.takeError()); + continue; + } + SymbolName = *SymNameOrErr; + + LLVM_DEBUG({ + Expected<const coff_section *> SectionOrErr = + Object->getSection(Symbol.getSectionNumber()); + if (!SectionOrErr) { + W.startLine() << "Invalid section number: " << Symbol.getSectionNumber() + << "\n"; + consumeError(SectionOrErr.takeError()); + return; + } + W.printNumber("Section #", Symbol.getSectionNumber()); + W.printString("Name", SymbolName); + W.printHex("Value", Symbol.getValue()); + }); + + // Record the symbol name (linkage) and its loading address. + addToSymbolTable(SymbolName, Symbol.getValue(), Symbol.getSectionNumber(), + IsComdat); + } +} + +Error LVCodeViewReader::createScopes(COFFObjectFile &Obj) { + if (Error Err = loadTargetInfo(Obj)) + return Err; + + // Initialization required when processing a COFF file: + // Cache the symbols relocations. + // Create a mapping for virtual addresses. + // Get the functions entry points. + cacheRelocations(); + mapVirtualAddress(Obj); + + for (const SectionRef &Section : Obj.sections()) { + Expected<StringRef> SectionNameOrErr = Section.getName(); + if (!SectionNameOrErr) + return SectionNameOrErr.takeError(); + // .debug$T is a standard CodeView type section, while .debug$P is the + // same format but used for MSVC precompiled header object files. + if (*SectionNameOrErr == ".debug$T" || *SectionNameOrErr == ".debug$P") + if (Error Err = traverseTypeSection(*SectionNameOrErr, Section)) + return Err; + } + + // Process collected namespaces. + LogicalVisitor.processNamespaces(); + + for (const SectionRef &Section : Obj.sections()) { + Expected<StringRef> SectionNameOrErr = Section.getName(); + if (!SectionNameOrErr) + return SectionNameOrErr.takeError(); + if (*SectionNameOrErr == ".debug$S") + if (Error Err = traverseSymbolSection(*SectionNameOrErr, Section)) + return Err; + } + + // Check if we have to close the Compile Unit scope. + LogicalVisitor.closeScope(); + + // Traverse the strings recorded and transform them into filenames. + LogicalVisitor.processFiles(); + + // Process collected element lines. + LogicalVisitor.processLines(); + + // Translate composite names into a single component. + Root->transformScopedName(); + return Error::success(); +} + +Error LVCodeViewReader::createScopes(PDBFile &Pdb) { + if (Error Err = loadTargetInfo(Pdb)) + return Err; + + if (!Pdb.hasPDBTpiStream() || !Pdb.hasPDBDbiStream()) + return Error::success(); + + // Open the executable associated with the PDB file and get the section + // addresses used to calculate linear addresses for CodeView Symbols. + if (!ExePath.empty()) { + ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr = + MemoryBuffer::getFileOrSTDIN(ExePath); + if (BuffOrErr.getError()) { + return createStringError(errc::bad_file_descriptor, + "File '%s' does not exist.", ExePath.c_str()); + } + BinaryBuffer = std::move(BuffOrErr.get()); + + // Check if the buffer corresponds to a PECOFF executable. + assert(identify_magic(BinaryBuffer->getBuffer()) == + file_magic::pecoff_executable && + "Invalid PECOFF executable file."); + + Expected<std::unique_ptr<Binary>> BinOrErr = + createBinary(BinaryBuffer->getMemBufferRef()); + if (errorToErrorCode(BinOrErr.takeError())) { + return createStringError(errc::not_supported, + "Binary object format in '%s' is not supported.", + ExePath.c_str()); + } + BinaryExecutable = std::move(*BinOrErr); + if (COFFObjectFile *COFFObject = + dyn_cast<COFFObjectFile>(BinaryExecutable.get())) + mapVirtualAddress(*COFFObject); + } + + // In order to generate a full logical view, we have to traverse both + // streams TPI and IPI if they are present. The following table gives + // the stream where a specified type is located. If the IPI stream is + // not present, all the types are located in the TPI stream. + // + // TPI Stream: + // LF_POINTER LF_MODIFIER LF_PROCEDURE LF_MFUNCTION + // LF_LABEL LF_ARGLIST LF_FIELDLIST LF_ARRAY + // LF_CLASS LF_STRUCTURE LF_INTERFACE LF_UNION + // LF_ENUM LF_TYPESERVER2 LF_VFTABLE LF_VTSHAPE + // LF_BITFIELD LF_METHODLIST LF_PRECOMP LF_ENDPRECOMP + // + // IPI stream: + // LF_FUNC_ID LF_MFUNC_ID LF_BUILDINFO + // LF_SUBSTR_LIST LF_STRING_ID LF_UDT_SRC_LINE + // LF_UDT_MOD_SRC_LINE + + LazyRandomTypeCollection &Types = types(); + LazyRandomTypeCollection &Ids = ids(); + if (Error Err = traverseTypes(Pdb, Types, Ids)) + return Err; + + // Process collected namespaces. + LogicalVisitor.processNamespaces(); + + LLVM_DEBUG({ W.getOStream() << "Traversing inlined lines\n"; }); + + auto VisitInlineeLines = [&](int32_t Modi, const SymbolGroup &SG, + DebugInlineeLinesSubsectionRef &Lines) -> Error { + return collectInlineeInfo(Lines, &SG); + }; + + FilterOptions Filters = {}; + LinePrinter Printer(/*Indent=*/2, false, nulls(), Filters); + const PrintScope HeaderScope(Printer, /*IndentLevel=*/2); + if (Error Err = iterateModuleSubsections<DebugInlineeLinesSubsectionRef>( + Input, HeaderScope, VisitInlineeLines)) + return Err; + + // Traverse global symbols. + LLVM_DEBUG({ W.getOStream() << "Traversing global symbols\n"; }); + if (Pdb.hasPDBGlobalsStream()) { + Expected<GlobalsStream &> GlobalsOrErr = Pdb.getPDBGlobalsStream(); + if (!GlobalsOrErr) + return GlobalsOrErr.takeError(); + GlobalsStream &Globals = *GlobalsOrErr; + const GSIHashTable &Table = Globals.getGlobalsTable(); + Expected<SymbolStream &> ExpectedSyms = Pdb.getPDBSymbolStream(); + if (ExpectedSyms) { + + SymbolVisitorCallbackPipeline Pipeline; + SymbolDeserializer Deserializer(nullptr, CodeViewContainer::Pdb); + LVSymbolVisitor Traverser(this, W, &LogicalVisitor, Types, Ids, nullptr, + LogicalVisitor.getShared()); + + // As the global symbols do not have an associated Compile Unit, create + // one, as the container for all global symbols. + RecordPrefix Prefix(SymbolKind::S_COMPILE3); + CVSymbol Symbol(&Prefix, sizeof(Prefix)); + uint32_t Offset = 0; + if (Error Err = Traverser.visitSymbolBegin(Symbol, Offset)) + consumeError(std::move(Err)); + else { + // The CodeView compile unit containing the global symbols does not + // have a name; generate one using its parent name (object filename) + // follow by the '_global' string. + std::string Name(CompileUnit->getParentScope()->getName()); + CompileUnit->setName(Name.append("_global")); + + Pipeline.addCallbackToPipeline(Deserializer); + Pipeline.addCallbackToPipeline(Traverser); + CVSymbolVisitor Visitor(Pipeline); + + BinaryStreamRef SymStream = + ExpectedSyms->getSymbolArray().getUnderlyingStream(); + for (uint32_t PubSymOff : Table) { + Expected<CVSymbol> Sym = readSymbolFromStream(SymStream, PubSymOff); + if (Sym) { + if (Error Err = Visitor.visitSymbolRecord(*Sym, PubSymOff)) + return createStringError(errorToErrorCode(std::move(Err)), + getFileName()); + } else { + consumeError(Sym.takeError()); + } + } + } + + LogicalVisitor.closeScope(); + } else { + consumeError(ExpectedSyms.takeError()); + } + } + + // Traverse symbols (DBI). + LLVM_DEBUG({ W.getOStream() << "Traversing symbol groups\n"; }); + + auto VisitSymbolGroup = [&](uint32_t Modi, const SymbolGroup &SG) -> Error { + Expected<ModuleDebugStreamRef> ExpectedModS = + getModuleDebugStream(Pdb, Modi); + if (ExpectedModS) { + ModuleDebugStreamRef &ModS = *ExpectedModS; + + LLVM_DEBUG({ + W.getOStream() << formatv("Traversing Group: Mod {0:4}\n", Modi); + }); + + SymbolVisitorCallbackPipeline Pipeline; + SymbolDeserializer Deserializer(nullptr, CodeViewContainer::Pdb); + LVSymbolVisitor Traverser(this, W, &LogicalVisitor, Types, Ids, nullptr, + LogicalVisitor.getShared()); + + Pipeline.addCallbackToPipeline(Deserializer); + Pipeline.addCallbackToPipeline(Traverser); + CVSymbolVisitor Visitor(Pipeline); + BinarySubstreamRef SS = ModS.getSymbolsSubstream(); + if (Error Err = + Visitor.visitSymbolStream(ModS.getSymbolArray(), SS.Offset)) + return createStringError(errorToErrorCode(std::move(Err)), + getFileName()); + } else { + // If the module stream does not exist, it is not an error condition. + consumeError(ExpectedModS.takeError()); + } + + return Error::success(); + }; + + if (Error Err = iterateSymbolGroups(Input, HeaderScope, VisitSymbolGroup)) + return Err; + + // At this stage, the logical view contains all scopes, symbols and types. + // For PDBs we can use the module id, to access its specific compile unit. + // The line record addresses has been already resolved, so we can apply the + // flow as when processing DWARF. + + LLVM_DEBUG({ W.getOStream() << "Traversing lines\n"; }); + + // Record all line records for a Compile Unit. + CULines.clear(); + + auto VisitDebugLines = [this](int32_t Modi, const SymbolGroup &SG, + DebugLinesSubsectionRef &Lines) -> Error { + if (!options().getPrintLines()) + return Error::success(); + + uint16_t Segment = Lines.header()->RelocSegment; + uint32_t Begin = Lines.header()->RelocOffset; + uint32_t Size = Lines.header()->CodeSize; + + LLVM_DEBUG({ W.getOStream() << formatv("Modi = {0}\n", Modi); }); + + // We have line information for a new module; finish processing the + // collected information for the current module. Once it is done, start + // recording the line information for the new module. + if (CurrentModule != Modi) { + if (Error Err = processModule()) + return Err; + CULines.clear(); + CurrentModule = Modi; + } + + for (const LineColumnEntry &Block : Lines) + if (Error Err = createLines(Block.LineNumbers, /*Addendum=*/0, Segment, + Begin, Size, Block.NameIndex, &SG)) + return Err; + + return Error::success(); + }; + + if (Error Err = iterateModuleSubsections<DebugLinesSubsectionRef>( + Input, HeaderScope, VisitDebugLines)) + return Err; + + // Check if we have to close the Compile Unit scope. + LogicalVisitor.closeScope(); + + // Process collected element lines. + LogicalVisitor.processLines(); + + // Translate composite names into a single component. + Root->transformScopedName(); + return Error::success(); +} + +Error LVCodeViewReader::processModule() { + if (LVScope *Scope = getScopeForModule(CurrentModule)) { + CompileUnit = static_cast<LVScopeCompileUnit *>(Scope); + + LLVM_DEBUG({ dbgs() << "Processing Scope: " << Scope->getName() << "\n"; }); + + // For the given compile unit, collect all scopes ranges. + // For a complete ranges and lines mapping, the logical view support + // needs for the compile unit to have a low and high pc values. We + // can traverse the 'Modules' section and get the information for the + // specific module. Another option, is from all the ranges collected + // to take the first and last values. + LVSectionIndex SectionIndex = DotTextSectionIndex; + LVRange *ScopesWithRanges = getSectionRanges(SectionIndex); + ScopesWithRanges->clear(); + CompileUnit->getRanges(*ScopesWithRanges); + if (!ScopesWithRanges->empty()) + CompileUnit->addObject(ScopesWithRanges->getLower(), + ScopesWithRanges->getUpper()); + ScopesWithRanges->sort(); + + if (Error Err = createInstructions()) + return Err; + + // Include lines from any inlined functions within the current function. + includeInlineeLines(SectionIndex, Scope); + + processLines(&CULines, SectionIndex, nullptr); + } + + return Error::success(); +} + +// In order to create the scopes, the CodeView Reader will: +// = Traverse the TPI/IPI stream (Type visitor): +// Collect forward references, scoped names, type indexes that will represent +// a logical element, strings, line records, linkage names. +// = Traverse the symbols section (Symbol visitor): +// Create the scopes tree and creates the required logical elements, by +// using the collected indexes from the type visitor. +Error LVCodeViewReader::createScopes() { + LLVM_DEBUG({ + W.startLine() << "\n"; + W.printString("File", getFileName().str()); + W.printString("Exe", ExePath); + W.printString("Format", FileFormatName); + }); + + if (Error Err = LVReader::createScopes()) + return Err; + + LogicalVisitor.setRoot(Root); + + if (isObj()) { + if (Error Err = createScopes(getObj())) + return Err; + } else { + if (Error Err = createScopes(getPdb())) + return Err; + } + + return Error::success(); +} + +Error LVCodeViewReader::loadTargetInfo(const ObjectFile &Obj) { + // Detect the architecture from the object file. We usually don't need OS + // info to lookup a target and create register info. + Triple TT; + TT.setArch(Triple::ArchType(Obj.getArch())); + TT.setVendor(Triple::UnknownVendor); + TT.setOS(Triple::UnknownOS); + + // Features to be passed to target/subtarget + Expected<SubtargetFeatures> Features = Obj.getFeatures(); + SubtargetFeatures FeaturesValue; + if (!Features) { + consumeError(Features.takeError()); + FeaturesValue = SubtargetFeatures(); + } + FeaturesValue = *Features; + return loadGenericTargetInfo(TT.str(), FeaturesValue.getString()); +} + +Error LVCodeViewReader::loadTargetInfo(const PDBFile &Pdb) { + Triple TT; + TT.setArch(Triple::ArchType::x86_64); + TT.setVendor(Triple::UnknownVendor); + TT.setOS(Triple::Win32); + + StringRef TheFeature = ""; + + return loadGenericTargetInfo(TT.str(), TheFeature); +} + +std::string LVCodeViewReader::getRegisterName(LVSmall Opcode, + ArrayRef<uint64_t> Operands) { + // Get Compilation Unit CPU Type. + CPUType CPU = getCompileUnitCPUType(); + // For CodeView the register always is in Operands[0]; + RegisterId Register = (RegisterId(Operands[0])); + return formatRegisterId(Register, CPU); +} diff --git a/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.cpp b/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.cpp new file mode 100644 index 000000000000..e4f5f533262b --- /dev/null +++ b/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.cpp @@ -0,0 +1,3525 @@ +//===-- LVCodeViewVisitor.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This implements the LVCodeViewVisitor class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/DebugInfo/CodeView/EnumTables.h" +#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" +#include "llvm/DebugInfo/CodeView/SymbolRecordHelpers.h" +#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h" +#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h" +#include "llvm/DebugInfo/LogicalView/Core/LVScope.h" +#include "llvm/DebugInfo/LogicalView/Core/LVSymbol.h" +#include "llvm/DebugInfo/LogicalView/Core/LVType.h" +#include "llvm/DebugInfo/LogicalView/Readers/LVCodeViewReader.h" +#include "llvm/DebugInfo/PDB/Native/DbiStream.h" +#include "llvm/DebugInfo/PDB/Native/InputFile.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" +#include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/DebugInfo/PDB/Native/TpiStream.h" +#include "llvm/DebugInfo/PDB/PDB.h" +#include "llvm/Demangle/Demangle.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/FormatAdapters.h" +#include "llvm/Support/FormatVariadic.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::object; +using namespace llvm::pdb; +using namespace llvm::logicalview; + +#define DEBUG_TYPE "CodeViewUtilities" + +namespace llvm { +namespace logicalview { + +static TypeIndex getTrueType(TypeIndex &TI) { + // Dealing with a MSVC generated PDB, we encountered a type index with the + // value of: 0x0280xxxx where xxxx=0000. + // + // There is some documentation about type indices: + // https://llvm.org/docs/PDB/TpiStream.html + // + // A type index is a 32-bit integer that uniquely identifies a type inside + // of an object file’s .debug$T section or a PDB file’s TPI or IPI stream. + // The value of the type index for the first type record from the TPI stream + // is given by the TypeIndexBegin member of the TPI Stream Header although + // in practice this value is always equal to 0x1000 (4096). + // + // Any type index with a high bit set is considered to come from the IPI + // stream, although this appears to be more of a hack, and LLVM does not + // generate type indices of this nature. They can, however, be observed in + // Microsoft PDBs occasionally, so one should be prepared to handle them. + // Note that having the high bit set is not a necessary condition to + // determine whether a type index comes from the IPI stream, it is only + // sufficient. + LLVM_DEBUG( + { dbgs() << "Index before: " << HexNumber(TI.getIndex()) << "\n"; }); + TI.setIndex(TI.getIndex() & 0x0000ffff); + LLVM_DEBUG( + { dbgs() << "Index after: " << HexNumber(TI.getIndex()) << "\n"; }); + return TI; +} + +static const EnumEntry<TypeLeafKind> LeafTypeNames[] = { +#define CV_TYPE(enum, val) {#enum, enum}, +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" +}; + +// Return the type name pointed by the type index. It uses the kind to query +// the associated name for the record type. +static StringRef getRecordName(LazyRandomTypeCollection &Types, TypeIndex TI) { + if (TI.isSimple()) + return {}; + + StringRef RecordName; + CVType CVReference = Types.getType(TI); + auto GetName = [&](auto Record) { + if (Error Err = TypeDeserializer::deserializeAs( + const_cast<CVType &>(CVReference), Record)) + consumeError(std::move(Err)); + else + RecordName = Record.getName(); + }; + + TypeRecordKind RK = static_cast<TypeRecordKind>(CVReference.kind()); + if (RK == TypeRecordKind::Class || RK == TypeRecordKind::Struct) + GetName(ClassRecord(RK)); + else if (RK == TypeRecordKind::Union) + GetName(UnionRecord(RK)); + else if (RK == TypeRecordKind::Enum) + GetName(EnumRecord(RK)); + + return RecordName; +} + +} // namespace logicalview +} // namespace llvm + +#undef DEBUG_TYPE +#define DEBUG_TYPE "CodeViewDataVisitor" + +namespace llvm { +namespace logicalview { + +// Keeps the type indexes with line information. +using LVLineRecords = std::vector<TypeIndex>; + +namespace { + +class LVTypeRecords { + LVShared *Shared = nullptr; + + // Logical elements associated to their CodeView Type Index. + using RecordEntry = std::pair<TypeLeafKind, LVElement *>; + using RecordTable = std::map<TypeIndex, RecordEntry>; + RecordTable RecordFromTypes; + RecordTable RecordFromIds; + + using NameTable = std::map<StringRef, TypeIndex>; + NameTable NameFromTypes; + NameTable NameFromIds; + +public: + LVTypeRecords(LVShared *Shared) : Shared(Shared) {} + + void add(uint32_t StreamIdx, TypeIndex TI, TypeLeafKind Kind, + LVElement *Element = nullptr); + void add(uint32_t StreamIdx, TypeIndex TI, StringRef Name); + LVElement *find(uint32_t StreamIdx, TypeIndex TI, bool Create = true); + TypeIndex find(uint32_t StreamIdx, StringRef Name); +}; + +class LVForwardReferences { + // Forward reference and its definitions (Name as key). + using ForwardEntry = std::pair<TypeIndex, TypeIndex>; + using ForwardTypeNames = std::map<StringRef, ForwardEntry>; + ForwardTypeNames ForwardTypesNames; + + // Forward reference and its definition (TypeIndex as key). + using ForwardType = std::map<TypeIndex, TypeIndex>; + ForwardType ForwardTypes; + + // Forward types and its references. + void add(TypeIndex TIForward, TypeIndex TIReference) { + ForwardTypes.emplace(TIForward, TIReference); + } + + void add(StringRef Name, TypeIndex TIForward) { + if (ForwardTypesNames.find(Name) == ForwardTypesNames.end()) { + ForwardTypesNames.emplace( + std::piecewise_construct, std::forward_as_tuple(Name), + std::forward_as_tuple(TIForward, TypeIndex::None())); + } else { + // Update a recorded definition with its reference. + ForwardTypesNames[Name].first = TIForward; + add(TIForward, ForwardTypesNames[Name].second); + } + } + + // Update a previously recorded forward reference with its definition. + void update(StringRef Name, TypeIndex TIReference) { + if (ForwardTypesNames.find(Name) != ForwardTypesNames.end()) { + // Update the recorded forward reference with its definition. + ForwardTypesNames[Name].second = TIReference; + add(ForwardTypesNames[Name].first, TIReference); + } else { + // We have not seen the forward reference. Insert the definition. + ForwardTypesNames.emplace( + std::piecewise_construct, std::forward_as_tuple(Name), + std::forward_as_tuple(TypeIndex::None(), TIReference)); + } + } + +public: + LVForwardReferences() = default; + + void record(bool IsForwardRef, StringRef Name, TypeIndex TI) { + // We are expecting for the forward references to be first. But that + // is not always the case. A name must be recorded regardless of the + // order in which the forward reference appears. + (IsForwardRef) ? add(Name, TI) : update(Name, TI); + } + + TypeIndex find(TypeIndex TIForward) { + return (ForwardTypes.find(TIForward) != ForwardTypes.end()) + ? ForwardTypes[TIForward] + : TypeIndex::None(); + } + + TypeIndex find(StringRef Name) { + return (ForwardTypesNames.find(Name) != ForwardTypesNames.end()) + ? ForwardTypesNames[Name].second + : TypeIndex::None(); + } + + // If the given TI corresponds to a reference, return the reference. + // Otherwise return the given TI. + TypeIndex remap(TypeIndex TI) { + TypeIndex Forward = find(TI); + return Forward.isNoneType() ? TI : Forward; + } +}; + +// Namespace deduction. +class LVNamespaceDeduction { + LVShared *Shared = nullptr; + + using Names = std::map<StringRef, LVScope *>; + Names NamespaceNames; + + using LookupSet = std::set<StringRef>; + LookupSet DeducedScopes; + LookupSet UnresolvedScopes; + LookupSet IdentifiedNamespaces; + + void add(StringRef Name, LVScope *Namespace) { + if (NamespaceNames.find(Name) == NamespaceNames.end()) + NamespaceNames.emplace(Name, Namespace); + } + +public: + LVNamespaceDeduction(LVShared *Shared) : Shared(Shared) {} + + void init(); + void add(StringRef String); + LVScope *get(LVStringRefs Components); + LVScope *get(StringRef Name, bool CheckScope = true); + + // Find the logical namespace for the 'Name' component. + LVScope *find(StringRef Name) { + LVScope *Namespace = (NamespaceNames.find(Name) != NamespaceNames.end()) + ? NamespaceNames[Name] + : nullptr; + return Namespace; + } + + // For the given lexical components, return a tuple with the first entry + // being the outermost namespace and the second entry being the first + // non-namespace. + LVLexicalIndex find(LVStringRefs Components) { + if (Components.empty()) + return {}; + + LVStringRefs::size_type FirstNamespace = 0; + LVStringRefs::size_type FirstNonNamespace; + for (LVStringRefs::size_type Index = 0; Index < Components.size(); + ++Index) { + FirstNonNamespace = Index; + LookupSet::iterator Iter = IdentifiedNamespaces.find(Components[Index]); + if (Iter == IdentifiedNamespaces.end()) + // The component is not a namespace name. + break; + } + return std::make_tuple(FirstNamespace, FirstNonNamespace); + } +}; + +// Strings. +class LVStringRecords { + using StringEntry = std::tuple<uint32_t, std::string, LVScopeCompileUnit *>; + using StringIds = std::map<TypeIndex, StringEntry>; + StringIds Strings; + +public: + LVStringRecords() = default; + + void add(TypeIndex TI, StringRef String) { + static uint32_t Index = 0; + if (Strings.find(TI) == Strings.end()) + Strings.emplace( + std::piecewise_construct, std::forward_as_tuple(TI), + std::forward_as_tuple(++Index, std::string(String), nullptr)); + } + + StringRef find(TypeIndex TI) { + StringIds::iterator Iter = Strings.find(TI); + return Iter != Strings.end() ? std::get<1>(Iter->second) : StringRef{}; + } + + uint32_t findIndex(TypeIndex TI) { + StringIds::iterator Iter = Strings.find(TI); + return Iter != Strings.end() ? std::get<0>(Iter->second) : 0; + } + + // Move strings representing the filenames to the compile unit. + void addFilenames(); + void addFilenames(LVScopeCompileUnit *Scope); +}; +} // namespace + +using LVTypeKinds = std::set<TypeLeafKind>; +using LVSymbolKinds = std::set<SymbolKind>; + +// The following data keeps forward information, type records, names for +// namespace deduction, strings records, line records. +// It is shared by the type visitor, symbol visitor and logical visitor and +// it is independent from the CodeViewReader. +struct LVShared { + LVCodeViewReader *Reader; + LVLogicalVisitor *Visitor; + LVForwardReferences ForwardReferences; + LVLineRecords LineRecords; + LVNamespaceDeduction NamespaceDeduction; + LVStringRecords StringRecords; + LVTypeRecords TypeRecords; + + // In order to determine which types and/or symbols records should be handled + // by the reader, we record record kinds seen by the type and symbol visitors. + // At the end of the scopes creation, the '--internal=tag' option will allow + // to print the unique record ids collected. + LVTypeKinds TypeKinds; + LVSymbolKinds SymbolKinds; + + LVShared(LVCodeViewReader *Reader, LVLogicalVisitor *Visitor) + : Reader(Reader), Visitor(Visitor), NamespaceDeduction(this), + TypeRecords(this) {} + ~LVShared() = default; +}; +} // namespace logicalview +} // namespace llvm + +void LVTypeRecords::add(uint32_t StreamIdx, TypeIndex TI, TypeLeafKind Kind, + LVElement *Element) { + RecordTable &Target = + (StreamIdx == StreamTPI) ? RecordFromTypes : RecordFromIds; + Target.emplace(std::piecewise_construct, std::forward_as_tuple(TI), + std::forward_as_tuple(Kind, Element)); +} + +void LVTypeRecords::add(uint32_t StreamIdx, TypeIndex TI, StringRef Name) { + NameTable &Target = (StreamIdx == StreamTPI) ? NameFromTypes : NameFromIds; + Target.emplace(Name, TI); +} + +LVElement *LVTypeRecords::find(uint32_t StreamIdx, TypeIndex TI, bool Create) { + RecordTable &Target = + (StreamIdx == StreamTPI) ? RecordFromTypes : RecordFromIds; + + LVElement *Element = nullptr; + RecordTable::iterator Iter = Target.find(TI); + if (Iter != Target.end()) { + Element = Iter->second.second; + if (Element || !Create) + return Element; + + // Create the logical element if not found. + Element = Shared->Visitor->createElement(Iter->second.first); + if (Element) { + Element->setOffset(TI.getIndex()); + Element->setOffsetFromTypeIndex(); + Target[TI].second = Element; + } + } + return Element; +} + +TypeIndex LVTypeRecords::find(uint32_t StreamIdx, StringRef Name) { + NameTable &Target = (StreamIdx == StreamTPI) ? NameFromTypes : NameFromIds; + NameTable::iterator Iter = Target.find(Name); + return Iter != Target.end() ? Iter->second : TypeIndex::None(); +} + +void LVStringRecords::addFilenames() { + for (StringIds::const_reference Entry : Strings) { + StringRef Name = std::get<1>(Entry.second); + LVScopeCompileUnit *Scope = std::get<2>(Entry.second); + Scope->addFilename(transformPath(Name)); + } + Strings.clear(); +} + +void LVStringRecords::addFilenames(LVScopeCompileUnit *Scope) { + for (StringIds::reference Entry : Strings) + if (!std::get<2>(Entry.second)) + std::get<2>(Entry.second) = Scope; +} + +void LVNamespaceDeduction::add(StringRef String) { + StringRef InnerComponent; + StringRef OuterComponent; + std::tie(OuterComponent, InnerComponent) = getInnerComponent(String); + DeducedScopes.insert(InnerComponent); + if (OuterComponent.size()) + UnresolvedScopes.insert(OuterComponent); +} + +void LVNamespaceDeduction::init() { + // We have 2 sets of names: + // - deduced scopes (class, structure, union and enum) and + // - unresolved scopes, that can represent namespaces or any deduced. + // Before creating the namespaces, we have to traverse the unresolved + // and remove any references to already deduced scopes. + LVStringRefs Components; + for (const StringRef &Unresolved : UnresolvedScopes) { + Components = getAllLexicalComponents(Unresolved); + for (const StringRef &Component : Components) { + LookupSet::iterator Iter = DeducedScopes.find(Component); + if (Iter == DeducedScopes.end()) + IdentifiedNamespaces.insert(Component); + } + } + + LLVM_DEBUG({ + auto Print = [&](LookupSet &Container, const char *Title) { + auto Header = [&]() { + dbgs() << formatv("\n{0}\n", fmt_repeat('=', 72)); + dbgs() << formatv("{0}\n", Title); + dbgs() << formatv("{0}\n", fmt_repeat('=', 72)); + }; + Header(); + for (const StringRef &Item : Container) + dbgs() << formatv("'{0}'\n", Item.str().c_str()); + }; + + Print(DeducedScopes, "Deducted Scopes"); + Print(UnresolvedScopes, "Unresolved Scopes"); + Print(IdentifiedNamespaces, "Namespaces"); + }); +} + +LVScope *LVNamespaceDeduction::get(LVStringRefs Components) { + LLVM_DEBUG({ + for (const StringRef &Component : Components) + dbgs() << formatv("'{0}'\n", Component.str().c_str()); + }); + + if (Components.empty()) + return nullptr; + + // Update the namespaces relationship. + LVScope *Namespace = nullptr; + LVScope *Parent = Shared->Reader->getCompileUnit(); + for (const StringRef &Component : Components) { + // Check if we have seen the namespace. + Namespace = find(Component); + if (!Namespace) { + // We have identified namespaces that are generated by MSVC. Mark them + // as 'system' so they will be excluded from the logical view. + Namespace = Shared->Reader->createScopeNamespace(); + Namespace->setTag(dwarf::DW_TAG_namespace); + Namespace->setName(Component); + Parent->addElement(Namespace); + getReader().isSystemEntry(Namespace); + add(Component, Namespace); + } + Parent = Namespace; + } + return Parent; +} + +LVScope *LVNamespaceDeduction::get(StringRef ScopedName, bool CheckScope) { + LVStringRefs Components = getAllLexicalComponents(ScopedName); + if (CheckScope) + Components.erase(std::remove_if(Components.begin(), Components.end(), + [&](StringRef Component) { + LookupSet::iterator Iter = + IdentifiedNamespaces.find(Component); + return Iter == IdentifiedNamespaces.end(); + }), + Components.end()); + + LLVM_DEBUG( + { dbgs() << formatv("ScopedName: '{0}'\n", ScopedName.str().c_str()); }); + + return get(Components); +} + +#undef DEBUG_TYPE +#define DEBUG_TYPE "CodeViewTypeVisitor" + +//===----------------------------------------------------------------------===// +// TypeRecord traversal. +//===----------------------------------------------------------------------===// +void LVTypeVisitor::printTypeIndex(StringRef FieldName, TypeIndex TI, + uint32_t StreamIdx) const { + codeview::printTypeIndex(W, FieldName, TI, + StreamIdx == StreamTPI ? Types : Ids); +} + +Error LVTypeVisitor::visitTypeBegin(CVType &Record) { + return visitTypeBegin(Record, TypeIndex::fromArrayIndex(Types.size())); +} + +Error LVTypeVisitor::visitTypeBegin(CVType &Record, TypeIndex TI) { + LLVM_DEBUG({ + W.getOStream() << formatTypeLeafKind(Record.kind()); + W.getOStream() << " (" << HexNumber(TI.getIndex()) << ")\n"; + }); + + if (options().getInternalTag()) + Shared->TypeKinds.insert(Record.kind()); + + // The collected type records, will be use to create the logical elements + // during the symbols traversal when a type is referenced. + CurrentTypeIndex = TI; + Shared->TypeRecords.add(StreamIdx, TI, Record.kind()); + return Error::success(); +} + +Error LVTypeVisitor::visitUnknownType(CVType &Record) { + LLVM_DEBUG({ W.printNumber("Length", uint32_t(Record.content().size())); }); + return Error::success(); +} + +Error LVTypeVisitor::visitMemberBegin(CVMemberRecord &Record) { + LLVM_DEBUG({ + W.startLine() << formatTypeLeafKind(Record.Kind); + W.getOStream() << " {\n"; + W.indent(); + }); + return Error::success(); +} + +Error LVTypeVisitor::visitMemberEnd(CVMemberRecord &Record) { + LLVM_DEBUG({ + W.unindent(); + W.startLine() << "}\n"; + }); + return Error::success(); +} + +Error LVTypeVisitor::visitUnknownMember(CVMemberRecord &Record) { + LLVM_DEBUG({ W.printHex("UnknownMember", unsigned(Record.Kind)); }); + return Error::success(); +} + +// LF_BUILDINFO (TPI)/(IPI) +Error LVTypeVisitor::visitKnownRecord(CVType &Record, BuildInfoRecord &Args) { + // All the args are references into the TPI/IPI stream. + LLVM_DEBUG({ + W.printNumber("NumArgs", static_cast<uint32_t>(Args.getArgs().size())); + ListScope Arguments(W, "Arguments"); + for (TypeIndex Arg : Args.getArgs()) + printTypeIndex("ArgType", Arg, StreamIPI); + }); + + // Only add the strings that hold information about filenames. They will be + // used to complete the line/file information for the logical elements. + // There are other strings holding information about namespaces. + TypeIndex TI; + StringRef String; + + // Absolute CWD path + TI = Args.getArgs()[BuildInfoRecord::BuildInfoArg::CurrentDirectory]; + String = Ids.getTypeName(TI); + if (!String.empty()) + Shared->StringRecords.add(TI, String); + + // Get the compile unit name. + TI = Args.getArgs()[BuildInfoRecord::BuildInfoArg::SourceFile]; + String = Ids.getTypeName(TI); + if (!String.empty()) + Shared->StringRecords.add(TI, String); + LogicalVisitor->setCompileUnitName(std::string(String)); + + return Error::success(); +} + +// LF_CLASS, LF_STRUCTURE, LF_INTERFACE (TPI) +Error LVTypeVisitor::visitKnownRecord(CVType &Record, ClassRecord &Class) { + LLVM_DEBUG({ + printTypeIndex("TypeIndex", CurrentTypeIndex, StreamTPI); + printTypeIndex("FieldListType", Class.getFieldList(), StreamTPI); + W.printString("Name", Class.getName()); + }); + + // Collect class name for scope deduction. + Shared->NamespaceDeduction.add(Class.getName()); + Shared->ForwardReferences.record(Class.isForwardRef(), Class.getName(), + CurrentTypeIndex); + + // Collect class name for contained scopes deduction. + Shared->TypeRecords.add(StreamIdx, CurrentTypeIndex, Class.getName()); + return Error::success(); +} + +// LF_ENUM (TPI) +Error LVTypeVisitor::visitKnownRecord(CVType &Record, EnumRecord &Enum) { + LLVM_DEBUG({ + printTypeIndex("TypeIndex", CurrentTypeIndex, StreamTPI); + printTypeIndex("FieldListType", Enum.getFieldList(), StreamTPI); + W.printString("Name", Enum.getName()); + }); + + // Collect enum name for scope deduction. + Shared->NamespaceDeduction.add(Enum.getName()); + return Error::success(); +} + +// LF_FUNC_ID (TPI)/(IPI) +Error LVTypeVisitor::visitKnownRecord(CVType &Record, FuncIdRecord &Func) { + LLVM_DEBUG({ + printTypeIndex("TypeIndex", CurrentTypeIndex, StreamTPI); + printTypeIndex("Type", Func.getFunctionType(), StreamTPI); + printTypeIndex("Parent", Func.getParentScope(), StreamTPI); + W.printString("Name", Func.getName()); + }); + + // Collect function name for scope deduction. + Shared->NamespaceDeduction.add(Func.getName()); + return Error::success(); +} + +// LF_PROCEDURE (TPI) +Error LVTypeVisitor::visitKnownRecord(CVType &Record, ProcedureRecord &Proc) { + LLVM_DEBUG({ + printTypeIndex("TypeIndex", CurrentTypeIndex, StreamTPI); + printTypeIndex("ReturnType", Proc.getReturnType(), StreamTPI); + W.printNumber("NumParameters", Proc.getParameterCount()); + printTypeIndex("ArgListType", Proc.getArgumentList(), StreamTPI); + }); + + // Collect procedure information as they can be referenced by typedefs. + Shared->TypeRecords.add(StreamTPI, CurrentTypeIndex, {}); + return Error::success(); +} + +// LF_STRING_ID (TPI)/(IPI) +Error LVTypeVisitor::visitKnownRecord(CVType &Record, StringIdRecord &String) { + // No additional references are needed. + LLVM_DEBUG({ + printTypeIndex("Id", String.getId(), StreamIPI); + W.printString("StringData", String.getString()); + }); + return Error::success(); +} + +// LF_UDT_SRC_LINE (TPI)/(IPI) +Error LVTypeVisitor::visitKnownRecord(CVType &Record, + UdtSourceLineRecord &Line) { + // UDT and SourceFile are references into the TPI/IPI stream. + LLVM_DEBUG({ + printTypeIndex("UDT", Line.getUDT(), StreamIPI); + printTypeIndex("SourceFile", Line.getSourceFile(), StreamIPI); + W.printNumber("LineNumber", Line.getLineNumber()); + }); + + Shared->LineRecords.push_back(CurrentTypeIndex); + return Error::success(); +} + +// LF_UNION (TPI) +Error LVTypeVisitor::visitKnownRecord(CVType &Record, UnionRecord &Union) { + LLVM_DEBUG({ + W.printNumber("MemberCount", Union.getMemberCount()); + printTypeIndex("FieldList", Union.getFieldList(), StreamTPI); + W.printNumber("SizeOf", Union.getSize()); + W.printString("Name", Union.getName()); + if (Union.hasUniqueName()) + W.printString("UniqueName", Union.getUniqueName()); + }); + + // Collect union name for scope deduction. + Shared->NamespaceDeduction.add(Union.getName()); + Shared->ForwardReferences.record(Union.isForwardRef(), Union.getName(), + CurrentTypeIndex); + + // Collect class name for contained scopes deduction. + Shared->TypeRecords.add(StreamIdx, CurrentTypeIndex, Union.getName()); + return Error::success(); +} + +#undef DEBUG_TYPE +#define DEBUG_TYPE "CodeViewSymbolVisitor" + +//===----------------------------------------------------------------------===// +// SymbolRecord traversal. +//===----------------------------------------------------------------------===// +void LVSymbolVisitorDelegate::printRelocatedField(StringRef Label, + uint32_t RelocOffset, + uint32_t Offset, + StringRef *RelocSym) { + Reader->printRelocatedField(Label, CoffSection, RelocOffset, Offset, + RelocSym); +} + +void LVSymbolVisitorDelegate::getLinkageName(uint32_t RelocOffset, + uint32_t Offset, + StringRef *RelocSym) { + Reader->getLinkageName(CoffSection, RelocOffset, Offset, RelocSym); +} + +StringRef +LVSymbolVisitorDelegate::getFileNameForFileOffset(uint32_t FileOffset) { + Expected<StringRef> Name = Reader->getFileNameForFileOffset(FileOffset); + if (!Name) { + consumeError(Name.takeError()); + return {}; + } + return *Name; +} + +DebugStringTableSubsectionRef LVSymbolVisitorDelegate::getStringTable() { + return Reader->CVStringTable; +} + +void LVSymbolVisitor::printLocalVariableAddrRange( + const LocalVariableAddrRange &Range, uint32_t RelocationOffset) { + DictScope S(W, "LocalVariableAddrRange"); + if (ObjDelegate) + ObjDelegate->printRelocatedField("OffsetStart", RelocationOffset, + Range.OffsetStart); + W.printHex("ISectStart", Range.ISectStart); + W.printHex("Range", Range.Range); +} + +void LVSymbolVisitor::printLocalVariableAddrGap( + ArrayRef<LocalVariableAddrGap> Gaps) { + for (const LocalVariableAddrGap &Gap : Gaps) { + ListScope S(W, "LocalVariableAddrGap"); + W.printHex("GapStartOffset", Gap.GapStartOffset); + W.printHex("Range", Gap.Range); + } +} + +void LVSymbolVisitor::printTypeIndex(StringRef FieldName, TypeIndex TI) const { + codeview::printTypeIndex(W, FieldName, TI, Types); +} + +Error LVSymbolVisitor::visitSymbolBegin(CVSymbol &Record) { + return visitSymbolBegin(Record, 0); +} + +Error LVSymbolVisitor::visitSymbolBegin(CVSymbol &Record, uint32_t Offset) { + SymbolKind Kind = Record.kind(); + LLVM_DEBUG({ + W.printNumber("Offset", Offset); + W.printEnum("Begin Kind", unsigned(Kind), getSymbolTypeNames()); + }); + + if (options().getInternalTag()) + Shared->SymbolKinds.insert(Kind); + + LogicalVisitor->CurrentElement = LogicalVisitor->createElement(Kind); + if (!LogicalVisitor->CurrentElement) { + LLVM_DEBUG({ + // We have an unsupported Symbol or Type Record. + // W.printEnum("Kind ignored", unsigned(Kind), getSymbolTypeNames()); + }); + return Error::success(); + } + + // Offset carried by the traversal routines when dealing with streams. + CurrentOffset = Offset; + IsCompileUnit = false; + if (!LogicalVisitor->CurrentElement->getOffsetFromTypeIndex()) + LogicalVisitor->CurrentElement->setOffset(Offset); + if (symbolOpensScope(Kind) || (IsCompileUnit = symbolIsCompileUnit(Kind))) { + assert(LogicalVisitor->CurrentScope && "Invalid scope!"); + LogicalVisitor->addElement(LogicalVisitor->CurrentScope, IsCompileUnit); + } else { + if (LogicalVisitor->CurrentSymbol) + LogicalVisitor->addElement(LogicalVisitor->CurrentSymbol); + if (LogicalVisitor->CurrentType) + LogicalVisitor->addElement(LogicalVisitor->CurrentType); + } + + return Error::success(); +} + +Error LVSymbolVisitor::visitSymbolEnd(CVSymbol &Record) { + SymbolKind Kind = Record.kind(); + LLVM_DEBUG( + { W.printEnum("End Kind", unsigned(Kind), getSymbolTypeNames()); }); + + if (symbolEndsScope(Kind)) { + LogicalVisitor->popScope(); + } + + return Error::success(); +} + +Error LVSymbolVisitor::visitUnknownSymbol(CVSymbol &Record) { + LLVM_DEBUG({ W.printNumber("Length", Record.length()); }); + return Error::success(); +} + +// S_BLOCK32 +Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record, BlockSym &Block) { + LLVM_DEBUG({ + W.printHex("CodeSize", Block.CodeSize); + W.printHex("Segment", Block.Segment); + W.printString("BlockName", Block.Name); + }); + + if (LVScope *Scope = LogicalVisitor->CurrentScope) { + StringRef LinkageName; + if (ObjDelegate) + ObjDelegate->getLinkageName(Block.getRelocationOffset(), Block.CodeOffset, + &LinkageName); + Scope->setLinkageName(LinkageName); + + if (options().getGeneralCollectRanges()) { + // Record converted segment::offset addressing for this scope. + LVAddress Addendum = Reader->getSymbolTableAddress(LinkageName); + LVAddress LowPC = + Reader->linearAddress(Block.Segment, Block.CodeOffset, Addendum); + LVAddress HighPC = LowPC + Block.CodeSize - 1; + Scope->addObject(LowPC, HighPC); + } + } + + return Error::success(); +} + +// S_BPREL32 +Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record, + BPRelativeSym &Local) { + LLVM_DEBUG({ + printTypeIndex("Type", Local.Type); + W.printNumber("Offset", Local.Offset); + W.printString("VarName", Local.Name); + }); + + if (LVSymbol *Symbol = LogicalVisitor->CurrentSymbol) { + Symbol->setName(Local.Name); + // From the MS_Symbol_Type.pdf documentation (S_BPREL32): + // This symbol specifies symbols that are allocated on the stack for a + // procedure. For C and C++, these include the actual function parameters + // and the local non-static variables of functions. + // However, the offset for 'this' comes as a negative value. + + // Symbol was created as 'variable'; determine its real kind. + Symbol->resetIsVariable(); + + if (Local.Name.equals("this")) { + Symbol->setIsParameter(); + Symbol->setIsArtificial(); + } else { + // Determine symbol kind. + bool(Local.Offset > 0) ? Symbol->setIsParameter() + : Symbol->setIsVariable(); + } + + // Update correct debug information tag. + if (Symbol->getIsParameter()) + Symbol->setTag(dwarf::DW_TAG_formal_parameter); + + LVElement *Element = LogicalVisitor->getElement(StreamTPI, Local.Type); + if (Element && Element->getIsScoped()) { + // We have a local type. Find its parent function. + LVScope *Parent = Symbol->getFunctionParent(); + // The element representing the type has been already finalized. If + // the type is an aggregate type, its members have been already added. + // As the type is local, its level will be changed. + + // FIXME: Currently the algorithm used to scope lambda functions is + // incorrect. Before we allocate the type at this scope, check if is + // already allocated in other scope. + if (!Element->getParentScope()) { + Parent->addElement(Element); + Element->updateLevel(Parent); + } + } + Symbol->setType(Element); + } + + return Error::success(); +} + +// S_REGREL32 +Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record, + RegRelativeSym &Local) { + LLVM_DEBUG({ + printTypeIndex("Type", Local.Type); + W.printNumber("Offset", Local.Offset); + W.printString("VarName", Local.Name); + }); + + if (LVSymbol *Symbol = LogicalVisitor->CurrentSymbol) { + Symbol->setName(Local.Name); + + // Symbol was created as 'variable'; determine its real kind. + Symbol->resetIsVariable(); + + // Check for the 'this' symbol. + if (Local.Name.equals("this")) { + Symbol->setIsArtificial(); + Symbol->setIsParameter(); + } else { + // Determine symbol kind. + determineSymbolKind(Symbol, Local.Register); + } + + // Update correct debug information tag. + if (Symbol->getIsParameter()) + Symbol->setTag(dwarf::DW_TAG_formal_parameter); + + LVElement *Element = LogicalVisitor->getElement(StreamTPI, Local.Type); + if (Element && Element->getIsScoped()) { + // We have a local type. Find its parent function. + LVScope *Parent = Symbol->getFunctionParent(); + // The element representing the type has been already finalized. If + // the type is an aggregate type, its members have been already added. + // As the type is local, its level will be changed. + + // FIXME: Currently the algorithm used to scope lambda functions is + // incorrect. Before we allocate the type at this scope, check if is + // already allocated in other scope. + if (!Element->getParentScope()) { + Parent->addElement(Element); + Element->updateLevel(Parent); + } + } + Symbol->setType(Element); + } + + return Error::success(); +} + +// S_BUILDINFO +Error LVSymbolVisitor::visitKnownRecord(CVSymbol &CVR, + BuildInfoSym &BuildInfo) { + LLVM_DEBUG({ printTypeIndex("BuildId", BuildInfo.BuildId); }); + + CVType CVBuildType = Ids.getType(BuildInfo.BuildId); + if (Error Err = LogicalVisitor->finishVisitation( + CVBuildType, BuildInfo.BuildId, Reader->getCompileUnit())) + return Err; + + return Error::success(); +} + +// S_COMPILE2 +Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record, + Compile2Sym &Compile2) { + LLVM_DEBUG({ + W.printEnum("Language", uint8_t(Compile2.getLanguage()), + getSourceLanguageNames()); + W.printFlags("Flags", uint32_t(Compile2.getFlags()), + getCompileSym3FlagNames()); + W.printEnum("Machine", unsigned(Compile2.Machine), getCPUTypeNames()); + W.printString("VersionName", Compile2.Version); + }); + + // MSVC generates the following sequence for a CodeView module: + // S_OBJNAME --> Set 'CurrentObjectName'. + // S_COMPILE2 --> Set the compile unit name using 'CurrentObjectName'. + // ... + // S_BUILDINFO --> Extract the source name. + // + // Clang generates the following sequence for a CodeView module: + // S_COMPILE2 --> Set the compile unit name to empty string. + // ... + // S_BUILDINFO --> Extract the source name. + // + // For both toolchains, update the compile unit name from S_BUILDINFO. + if (LVScope *Scope = LogicalVisitor->CurrentScope) { + // The name of the CU, was extracted from the 'BuildInfo' subsection. + Reader->setCompileUnitCPUType(Compile2.Machine); + Scope->setName(CurrentObjectName); + if (options().getAttributeProducer()) + Scope->setProducer(Compile2.Version); + getReader().isSystemEntry(Scope, CurrentObjectName); + + // The line records in CodeView are recorded per Module ID. Update + // the relationship between the current CU and the Module ID. + Reader->addModule(Scope); + + // Updated the collected strings with their associated compile unit. + Shared->StringRecords.addFilenames(Reader->getCompileUnit()); + } + + // Clear any previous ObjectName. + CurrentObjectName = ""; + return Error::success(); +} + +// S_COMPILE3 +Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record, + Compile3Sym &Compile3) { + LLVM_DEBUG({ + W.printEnum("Language", uint8_t(Compile3.getLanguage()), + getSourceLanguageNames()); + W.printFlags("Flags", uint32_t(Compile3.getFlags()), + getCompileSym3FlagNames()); + W.printEnum("Machine", unsigned(Compile3.Machine), getCPUTypeNames()); + W.printString("VersionName", Compile3.Version); + }); + + // MSVC generates the following sequence for a CodeView module: + // S_OBJNAME --> Set 'CurrentObjectName'. + // S_COMPILE3 --> Set the compile unit name using 'CurrentObjectName'. + // ... + // S_BUILDINFO --> Extract the source name. + // + // Clang generates the following sequence for a CodeView module: + // S_COMPILE3 --> Set the compile unit name to empty string. + // ... + // S_BUILDINFO --> Extract the source name. + // + // For both toolchains, update the compile unit name from S_BUILDINFO. + if (LVScope *Scope = LogicalVisitor->CurrentScope) { + // The name of the CU, was extracted from the 'BuildInfo' subsection. + Reader->setCompileUnitCPUType(Compile3.Machine); + Scope->setName(CurrentObjectName); + if (options().getAttributeProducer()) + Scope->setProducer(Compile3.Version); + getReader().isSystemEntry(Scope, CurrentObjectName); + + // The line records in CodeView are recorded per Module ID. Update + // the relationship between the current CU and the Module ID. + Reader->addModule(Scope); + + // Updated the collected strings with their associated compile unit. + Shared->StringRecords.addFilenames(Reader->getCompileUnit()); + } + + // Clear any previous ObjectName. + CurrentObjectName = ""; + return Error::success(); +} + +// S_CONSTANT, S_MANCONSTANT +Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record, + ConstantSym &Constant) { + LLVM_DEBUG({ + printTypeIndex("Type", Constant.Type); + W.printNumber("Value", Constant.Value); + W.printString("Name", Constant.Name); + }); + + if (LVSymbol *Symbol = LogicalVisitor->CurrentSymbol) { + Symbol->setName(Constant.Name); + Symbol->setType(LogicalVisitor->getElement(StreamTPI, Constant.Type)); + Symbol->resetIncludeInPrint(); + } + + return Error::success(); +} + +// S_DEFRANGE_FRAMEPOINTER_REL_FULL_SCOPE +Error LVSymbolVisitor::visitKnownRecord( + CVSymbol &Record, + DefRangeFramePointerRelFullScopeSym &DefRangeFramePointerRelFullScope) { + // DefRanges don't have types, just registers and code offsets. + LLVM_DEBUG({ + if (LocalSymbol) + W.getOStream() << formatv("Symbol: {0}, ", LocalSymbol->getName()); + + W.printNumber("Offset", DefRangeFramePointerRelFullScope.Offset); + }); + + if (LVSymbol *Symbol = LocalSymbol) { + Symbol->setHasCodeViewLocation(); + LocalSymbol = nullptr; + + // Add location debug location. Operands: [Offset, 0]. + dwarf::Attribute Attr = + dwarf::Attribute(SymbolKind::S_DEFRANGE_FRAMEPOINTER_REL_FULL_SCOPE); + + uint64_t Operand1 = DefRangeFramePointerRelFullScope.Offset; + Symbol->addLocation(Attr, 0, 0, 0, 0); + Symbol->addLocationOperands(LVSmall(Attr), {Operand1}); + } + + return Error::success(); +} + +// S_DEFRANGE_FRAMEPOINTER_REL +Error LVSymbolVisitor::visitKnownRecord( + CVSymbol &Record, DefRangeFramePointerRelSym &DefRangeFramePointerRel) { + // DefRanges don't have types, just registers and code offsets. + LLVM_DEBUG({ + if (LocalSymbol) + W.getOStream() << formatv("Symbol: {0}, ", LocalSymbol->getName()); + + W.printNumber("Offset", DefRangeFramePointerRel.Hdr.Offset); + printLocalVariableAddrRange(DefRangeFramePointerRel.Range, + DefRangeFramePointerRel.getRelocationOffset()); + printLocalVariableAddrGap(DefRangeFramePointerRel.Gaps); + }); + + // We are expecting the following sequence: + // 128 | S_LOCAL [size = 20] `ParamBar` + // ... + // 148 | S_DEFRANGE_FRAMEPOINTER_REL [size = 16] + if (LVSymbol *Symbol = LocalSymbol) { + Symbol->setHasCodeViewLocation(); + LocalSymbol = nullptr; + + // Add location debug location. Operands: [Offset, 0]. + dwarf::Attribute Attr = + dwarf::Attribute(SymbolKind::S_DEFRANGE_FRAMEPOINTER_REL); + uint64_t Operand1 = DefRangeFramePointerRel.Hdr.Offset; + + LocalVariableAddrRange Range = DefRangeFramePointerRel.Range; + LVAddress Address = + Reader->linearAddress(Range.ISectStart, Range.OffsetStart); + + Symbol->addLocation(Attr, Address, Address + Range.Range, 0, 0); + Symbol->addLocationOperands(LVSmall(Attr), {Operand1}); + } + + return Error::success(); +} + +// S_DEFRANGE_REGISTER_REL +Error LVSymbolVisitor::visitKnownRecord( + CVSymbol &Record, DefRangeRegisterRelSym &DefRangeRegisterRel) { + // DefRanges don't have types, just registers and code offsets. + LLVM_DEBUG({ + if (LocalSymbol) + W.getOStream() << formatv("Symbol: {0}, ", LocalSymbol->getName()); + + W.printBoolean("HasSpilledUDTMember", + DefRangeRegisterRel.hasSpilledUDTMember()); + W.printNumber("OffsetInParent", DefRangeRegisterRel.offsetInParent()); + W.printNumber("BasePointerOffset", + DefRangeRegisterRel.Hdr.BasePointerOffset); + printLocalVariableAddrRange(DefRangeRegisterRel.Range, + DefRangeRegisterRel.getRelocationOffset()); + printLocalVariableAddrGap(DefRangeRegisterRel.Gaps); + }); + + if (LVSymbol *Symbol = LocalSymbol) { + Symbol->setHasCodeViewLocation(); + LocalSymbol = nullptr; + + // Add location debug location. Operands: [Register, Offset]. + dwarf::Attribute Attr = + dwarf::Attribute(SymbolKind::S_DEFRANGE_REGISTER_REL); + uint64_t Operand1 = DefRangeRegisterRel.Hdr.Register; + uint64_t Operand2 = DefRangeRegisterRel.Hdr.BasePointerOffset; + + LocalVariableAddrRange Range = DefRangeRegisterRel.Range; + LVAddress Address = + Reader->linearAddress(Range.ISectStart, Range.OffsetStart); + + Symbol->addLocation(Attr, Address, Address + Range.Range, 0, 0); + Symbol->addLocationOperands(LVSmall(Attr), {Operand1, Operand2}); + } + + return Error::success(); +} + +// S_DEFRANGE_REGISTER +Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record, + DefRangeRegisterSym &DefRangeRegister) { + // DefRanges don't have types, just registers and code offsets. + LLVM_DEBUG({ + if (LocalSymbol) + W.getOStream() << formatv("Symbol: {0}, ", LocalSymbol->getName()); + + W.printEnum("Register", uint16_t(DefRangeRegister.Hdr.Register), + getRegisterNames(Reader->getCompileUnitCPUType())); + W.printNumber("MayHaveNoName", DefRangeRegister.Hdr.MayHaveNoName); + printLocalVariableAddrRange(DefRangeRegister.Range, + DefRangeRegister.getRelocationOffset()); + printLocalVariableAddrGap(DefRangeRegister.Gaps); + }); + + if (LVSymbol *Symbol = LocalSymbol) { + Symbol->setHasCodeViewLocation(); + LocalSymbol = nullptr; + + // Add location debug location. Operands: [Register, 0]. + dwarf::Attribute Attr = dwarf::Attribute(SymbolKind::S_DEFRANGE_REGISTER); + uint64_t Operand1 = DefRangeRegister.Hdr.Register; + + LocalVariableAddrRange Range = DefRangeRegister.Range; + LVAddress Address = + Reader->linearAddress(Range.ISectStart, Range.OffsetStart); + + Symbol->addLocation(Attr, Address, Address + Range.Range, 0, 0); + Symbol->addLocationOperands(LVSmall(Attr), {Operand1}); + } + + return Error::success(); +} + +// S_DEFRANGE_SUBFIELD_REGISTER +Error LVSymbolVisitor::visitKnownRecord( + CVSymbol &Record, DefRangeSubfieldRegisterSym &DefRangeSubfieldRegister) { + // DefRanges don't have types, just registers and code offsets. + LLVM_DEBUG({ + if (LocalSymbol) + W.getOStream() << formatv("Symbol: {0}, ", LocalSymbol->getName()); + + W.printEnum("Register", uint16_t(DefRangeSubfieldRegister.Hdr.Register), + getRegisterNames(Reader->getCompileUnitCPUType())); + W.printNumber("MayHaveNoName", DefRangeSubfieldRegister.Hdr.MayHaveNoName); + W.printNumber("OffsetInParent", + DefRangeSubfieldRegister.Hdr.OffsetInParent); + printLocalVariableAddrRange(DefRangeSubfieldRegister.Range, + DefRangeSubfieldRegister.getRelocationOffset()); + printLocalVariableAddrGap(DefRangeSubfieldRegister.Gaps); + }); + + if (LVSymbol *Symbol = LocalSymbol) { + Symbol->setHasCodeViewLocation(); + LocalSymbol = nullptr; + + // Add location debug location. Operands: [Register, 0]. + dwarf::Attribute Attr = + dwarf::Attribute(SymbolKind::S_DEFRANGE_SUBFIELD_REGISTER); + uint64_t Operand1 = DefRangeSubfieldRegister.Hdr.Register; + + LocalVariableAddrRange Range = DefRangeSubfieldRegister.Range; + LVAddress Address = + Reader->linearAddress(Range.ISectStart, Range.OffsetStart); + + Symbol->addLocation(Attr, Address, Address + Range.Range, 0, 0); + Symbol->addLocationOperands(LVSmall(Attr), {Operand1}); + } + + return Error::success(); +} + +// S_DEFRANGE_SUBFIELD +Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record, + DefRangeSubfieldSym &DefRangeSubfield) { + // DefRanges don't have types, just registers and code offsets. + LLVM_DEBUG({ + if (LocalSymbol) + W.getOStream() << formatv("Symbol: {0}, ", LocalSymbol->getName()); + + if (ObjDelegate) { + DebugStringTableSubsectionRef Strings = ObjDelegate->getStringTable(); + auto ExpectedProgram = Strings.getString(DefRangeSubfield.Program); + if (!ExpectedProgram) { + consumeError(ExpectedProgram.takeError()); + return llvm::make_error<CodeViewError>( + "String table offset outside of bounds of String Table!"); + } + W.printString("Program", *ExpectedProgram); + } + W.printNumber("OffsetInParent", DefRangeSubfield.OffsetInParent); + printLocalVariableAddrRange(DefRangeSubfield.Range, + DefRangeSubfield.getRelocationOffset()); + printLocalVariableAddrGap(DefRangeSubfield.Gaps); + }); + + if (LVSymbol *Symbol = LocalSymbol) { + Symbol->setHasCodeViewLocation(); + LocalSymbol = nullptr; + + // Add location debug location. Operands: [Program, 0]. + dwarf::Attribute Attr = dwarf::Attribute(SymbolKind::S_DEFRANGE_SUBFIELD); + uint64_t Operand1 = DefRangeSubfield.Program; + + LocalVariableAddrRange Range = DefRangeSubfield.Range; + LVAddress Address = + Reader->linearAddress(Range.ISectStart, Range.OffsetStart); + + Symbol->addLocation(Attr, Address, Address + Range.Range, 0, 0); + Symbol->addLocationOperands(LVSmall(Attr), {Operand1, /*Operand2=*/0}); + } + + return Error::success(); +} + +// S_DEFRANGE +Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record, + DefRangeSym &DefRange) { + // DefRanges don't have types, just registers and code offsets. + LLVM_DEBUG({ + if (LocalSymbol) + W.getOStream() << formatv("Symbol: {0}, ", LocalSymbol->getName()); + + if (ObjDelegate) { + DebugStringTableSubsectionRef Strings = ObjDelegate->getStringTable(); + auto ExpectedProgram = Strings.getString(DefRange.Program); + if (!ExpectedProgram) { + consumeError(ExpectedProgram.takeError()); + return llvm::make_error<CodeViewError>( + "String table offset outside of bounds of String Table!"); + } + W.printString("Program", *ExpectedProgram); + } + printLocalVariableAddrRange(DefRange.Range, DefRange.getRelocationOffset()); + printLocalVariableAddrGap(DefRange.Gaps); + }); + + if (LVSymbol *Symbol = LocalSymbol) { + Symbol->setHasCodeViewLocation(); + LocalSymbol = nullptr; + + // Add location debug location. Operands: [Program, 0]. + dwarf::Attribute Attr = dwarf::Attribute(SymbolKind::S_DEFRANGE); + uint64_t Operand1 = DefRange.Program; + + LocalVariableAddrRange Range = DefRange.Range; + LVAddress Address = + Reader->linearAddress(Range.ISectStart, Range.OffsetStart); + + Symbol->addLocation(Attr, Address, Address + Range.Range, 0, 0); + Symbol->addLocationOperands(LVSmall(Attr), {Operand1, /*Operand2=*/0}); + } + + return Error::success(); +} + +// S_FRAMEPROC +Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record, + FrameProcSym &FrameProc) { + if (LVScope *Function = LogicalVisitor->getReaderScope()) { + // S_FRAMEPROC contains extra information for the function described + // by any of the previous generated records: + // S_GPROC32, S_LPROC32, S_LPROC32_ID, S_GPROC32_ID. + + // The generated sequence is: + // S_GPROC32_ID ... + // S_FRAMEPROC ... + + // Collect additional inline flags for the current scope function. + FrameProcedureOptions Flags = FrameProc.Flags; + if (FrameProcedureOptions::MarkedInline == + (Flags & FrameProcedureOptions::MarkedInline)) + Function->setInlineCode(dwarf::DW_INL_declared_inlined); + if (FrameProcedureOptions::Inlined == + (Flags & FrameProcedureOptions::Inlined)) + Function->setInlineCode(dwarf::DW_INL_inlined); + + // To determine the symbol kind for any symbol declared in that function, + // we can access the S_FRAMEPROC for the parent scope function. It contains + // information about the local fp and param fp registers and compare with + // the register in the S_REGREL32 to get a match. + codeview::CPUType CPU = Reader->getCompileUnitCPUType(); + LocalFrameRegister = FrameProc.getLocalFramePtrReg(CPU); + ParamFrameRegister = FrameProc.getParamFramePtrReg(CPU); + } + + return Error::success(); +} + +// S_GDATA32, S_LDATA32, S_LMANDATA, S_GMANDATA +Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record, DataSym &Data) { + LLVM_DEBUG({ + printTypeIndex("Type", Data.Type); + W.printString("DisplayName", Data.Name); + }); + + if (LVSymbol *Symbol = LogicalVisitor->CurrentSymbol) { + StringRef LinkageName; + if (ObjDelegate) + ObjDelegate->getLinkageName(Data.getRelocationOffset(), Data.DataOffset, + &LinkageName); + + Symbol->setName(Data.Name); + Symbol->setLinkageName(LinkageName); + + // The MSVC generates local data as initialization for aggregates. It + // contains the address for an initialization function. + // The symbols contains the '$initializer$' pattern. Allow them only if + // the '--internal=system' option is given. + // 0 | S_LDATA32 `Struct$initializer$` + // type = 0x1040 (void ()*) + if (getReader().isSystemEntry(Symbol) && !options().getAttributeSystem()) { + Symbol->resetIncludeInPrint(); + return Error::success(); + } + + if (LVScope *Namespace = Shared->NamespaceDeduction.get(Data.Name)) { + // The variable is already at different scope. In order to reflect + // the correct parent, move it to the namespace. + if (Symbol->getParentScope()->removeElement(Symbol)) + Namespace->addElement(Symbol); + } + + Symbol->setType(LogicalVisitor->getElement(StreamTPI, Data.Type)); + if (Record.kind() == SymbolKind::S_GDATA32) + Symbol->setIsExternal(); + } + + return Error::success(); +} + +// S_INLINESITE +Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record, + InlineSiteSym &InlineSite) { + LLVM_DEBUG({ printTypeIndex("Inlinee", InlineSite.Inlinee); }); + + if (LVScope *InlinedFunction = LogicalVisitor->CurrentScope) { + LVScope *AbstractFunction = Reader->createScopeFunction(); + AbstractFunction->setIsSubprogram(); + AbstractFunction->setTag(dwarf::DW_TAG_subprogram); + AbstractFunction->setInlineCode(dwarf::DW_INL_inlined); + AbstractFunction->setIsInlinedAbstract(); + InlinedFunction->setReference(AbstractFunction); + + LogicalVisitor->startProcessArgumentList(); + // 'Inlinee' is a Type ID. + CVType CVFunctionType = Ids.getType(InlineSite.Inlinee); + if (Error Err = LogicalVisitor->finishVisitation( + CVFunctionType, InlineSite.Inlinee, AbstractFunction)) + return Err; + LogicalVisitor->stopProcessArgumentList(); + + // For inlined functions set the linkage name to be the same as + // the name. It used to find their lines and ranges. + StringRef Name = AbstractFunction->getName(); + InlinedFunction->setName(Name); + InlinedFunction->setLinkageName(Name); + + // Process annotation bytes to calculate code and line offsets. + if (Error Err = LogicalVisitor->inlineSiteAnnotation( + AbstractFunction, InlinedFunction, InlineSite)) + return Err; + } + + return Error::success(); +} + +// S_LOCAL +Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record, LocalSym &Local) { + LLVM_DEBUG({ + printTypeIndex("Type", Local.Type); + W.printFlags("Flags", uint16_t(Local.Flags), getLocalFlagNames()); + W.printString("VarName", Local.Name); + }); + + if (LVSymbol *Symbol = LogicalVisitor->CurrentSymbol) { + Symbol->setName(Local.Name); + + // Symbol was created as 'variable'; determine its real kind. + Symbol->resetIsVariable(); + + // Be sure the 'this' symbol is marked as 'compiler generated'. + if (bool(Local.Flags & LocalSymFlags::IsCompilerGenerated) || + Local.Name.equals("this")) { + Symbol->setIsArtificial(); + Symbol->setIsParameter(); + } else { + bool(Local.Flags & LocalSymFlags::IsParameter) ? Symbol->setIsParameter() + : Symbol->setIsVariable(); + } + + // Update correct debug information tag. + if (Symbol->getIsParameter()) + Symbol->setTag(dwarf::DW_TAG_formal_parameter); + + LVElement *Element = LogicalVisitor->getElement(StreamTPI, Local.Type); + if (Element && Element->getIsScoped()) { + // We have a local type. Find its parent function. + LVScope *Parent = Symbol->getFunctionParent(); + // The element representing the type has been already finalized. If + // the type is an aggregate type, its members have been already added. + // As the type is local, its level will be changed. + Parent->addElement(Element); + Element->updateLevel(Parent); + } + Symbol->setType(Element); + + // The CodeView records (S_DEFFRAME_*) describing debug location for + // this symbol, do not have any direct reference to it. Those records + // are emitted after this symbol. Record the current symbol. + LocalSymbol = Symbol; + } + + return Error::success(); +} + +// S_OBJNAME +Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record, ObjNameSym &ObjName) { + LLVM_DEBUG({ + W.printHex("Signature", ObjName.Signature); + W.printString("ObjectName", ObjName.Name); + }); + + CurrentObjectName = ObjName.Name; + return Error::success(); +} + +// S_GPROC32, S_LPROC32, S_LPROC32_ID, S_GPROC32_ID +Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record, ProcSym &Proc) { + if (InFunctionScope) + return llvm::make_error<CodeViewError>("Visiting a ProcSym while inside " + "function scope!"); + + InFunctionScope = true; + + LLVM_DEBUG({ + printTypeIndex("FunctionType", Proc.FunctionType); + W.printHex("Segment", Proc.Segment); + W.printFlags("Flags", static_cast<uint8_t>(Proc.Flags), + getProcSymFlagNames()); + W.printString("DisplayName", Proc.Name); + }); + + // Clang and Microsoft generated different debug information records: + // For functions definitions: + // Clang: S_GPROC32 -> LF_FUNC_ID -> LF_PROCEDURE + // Microsoft: S_GPROC32 -> LF_PROCEDURE + + // For member function definition: + // Clang: S_GPROC32 -> LF_MFUNC_ID -> LF_MFUNCTION + // Microsoft: S_GPROC32 -> LF_MFUNCTION + // In order to support both sequences, if we found LF_FUNCTION_ID, just + // get the TypeIndex for LF_PROCEDURE. + + // For the given test case, we have the sequence: + // namespace NSP_local { + // void foo_local() { + // } + // } + // + // 0x1000 | LF_STRING_ID String: NSP_local + // 0x1002 | LF_PROCEDURE + // return type = 0x0003 (void), # args = 0, param list = 0x1001 + // calling conv = cdecl, options = None + // 0x1003 | LF_FUNC_ID + // name = foo_local, type = 0x1002, parent scope = 0x1000 + // 0 | S_GPROC32_ID `NSP_local::foo_local` + // type = `0x1003 (foo_local)` + // 0x1004 | LF_STRING_ID String: suite + // 0x1005 | LF_STRING_ID String: suite_local.cpp + // + // The LF_STRING_ID can hold different information: + // 0x1000 - The enclosing namespace. + // 0x1004 - The compile unit directory name. + // 0x1005 - The compile unit name. + // + // Before deducting its scope, we need to evaluate its type and create any + // associated namespaces. + if (LVScope *Function = LogicalVisitor->CurrentScope) { + StringRef LinkageName; + if (ObjDelegate) + ObjDelegate->getLinkageName(Proc.getRelocationOffset(), Proc.CodeOffset, + &LinkageName); + + // The line table can be accessed using the linkage name. + Reader->addToSymbolTable(LinkageName, Function); + Function->setName(Proc.Name); + Function->setLinkageName(LinkageName); + + if (options().getGeneralCollectRanges()) { + // Record converted segment::offset addressing for this scope. + LVAddress Addendum = Reader->getSymbolTableAddress(LinkageName); + LVAddress LowPC = + Reader->linearAddress(Proc.Segment, Proc.CodeOffset, Addendum); + LVAddress HighPC = LowPC + Proc.CodeSize - 1; + Function->addObject(LowPC, HighPC); + + // If the scope is a function, add it to the public names. + if ((options().getAttributePublics() || options().getPrintAnyLine()) && + !Function->getIsInlinedFunction()) + Reader->getCompileUnit()->addPublicName(Function, LowPC, HighPC); + } + + if (Function->getIsSystem() && !options().getAttributeSystem()) { + Function->resetIncludeInPrint(); + return Error::success(); + } + + TypeIndex TIFunctionType = Proc.FunctionType; + if (TIFunctionType.isSimple()) + Function->setType(LogicalVisitor->getElement(StreamTPI, TIFunctionType)); + else { + // We have to detect the correct stream, using the lexical parent + // name, as there is not other obvious way to get the stream. + // Normal function: LF_FUNC_ID (TPI)/(IPI) + // LF_PROCEDURE (TPI) + // Lambda function: LF_MFUNCTION (TPI) + // Member function: LF_MFUNC_ID (TPI)/(IPI) + + StringRef OuterComponent; + std::tie(OuterComponent, std::ignore) = getInnerComponent(Proc.Name); + TypeIndex TI = Shared->ForwardReferences.find(OuterComponent); + + std::optional<CVType> CVFunctionType; + auto GetRecordType = [&]() -> bool { + CVFunctionType = Ids.tryGetType(TIFunctionType); + if (!CVFunctionType) + return false; + + if (TI.isNoneType()) + // Normal function. + if (CVFunctionType->kind() == LF_FUNC_ID) + return true; + + // Member function. + return (CVFunctionType->kind() == LF_MFUNC_ID); + }; + + // We can have a LF_FUNC_ID, LF_PROCEDURE or LF_MFUNCTION. + if (!GetRecordType()) { + CVFunctionType = Types.tryGetType(TIFunctionType); + if (!CVFunctionType) + return llvm::make_error<CodeViewError>("Invalid type index"); + } + + if (Error Err = LogicalVisitor->finishVisitation( + *CVFunctionType, TIFunctionType, Function)) + return Err; + } + + if (Record.kind() == SymbolKind::S_GPROC32 || + Record.kind() == SymbolKind::S_GPROC32_ID) + Function->setIsExternal(); + + // We don't have a way to see if the symbol is compiler generated. Use + // the linkage name, to detect `scalar deleting destructor' functions. + std::string DemangledSymbol = demangle(LinkageName); + if (DemangledSymbol.find("scalar deleting dtor") != std::string::npos) { + Function->setIsArtificial(); + } else { + // Clang generates global ctor and dtor names containing the substrings: + // 'dynamic initializer for' and 'dynamic atexit destructor for'. + if (DemangledSymbol.find("dynamic atexit destructor for") != + std::string::npos) + Function->setIsArtificial(); + } + } + + return Error::success(); +} + +// S_END +Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record, + ScopeEndSym &ScopeEnd) { + InFunctionScope = false; + return Error::success(); +} + +// S_THUNK32 +Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record, Thunk32Sym &Thunk) { + if (InFunctionScope) + return llvm::make_error<CodeViewError>("Visiting a Thunk32Sym while inside " + "function scope!"); + + InFunctionScope = true; + + LLVM_DEBUG({ + W.printHex("Segment", Thunk.Segment); + W.printString("Name", Thunk.Name); + }); + + if (LVScope *Function = LogicalVisitor->CurrentScope) + Function->setName(Thunk.Name); + + return Error::success(); +} + +// S_UDT, S_COBOLUDT +Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record, UDTSym &UDT) { + LLVM_DEBUG({ + printTypeIndex("Type", UDT.Type); + W.printString("UDTName", UDT.Name); + }); + + if (LVType *Type = LogicalVisitor->CurrentType) { + if (LVScope *Namespace = Shared->NamespaceDeduction.get(UDT.Name)) { + if (Type->getParentScope()->removeElement(Type)) + Namespace->addElement(Type); + } + + Type->setName(UDT.Name); + + // We have to determine if the typedef is a real C/C++ definition or is + // the S_UDT record that describe all the user defined types. + // 0 | S_UDT `Name` original type = 0x1009 + // 0x1009 | LF_STRUCTURE `Name` + // Ignore type definitions for RTTI types: + // _s__RTTIBaseClassArray, _s__RTTIBaseClassDescriptor, + // _s__RTTICompleteObjectLocator, _s__RTTIClassHierarchyDescriptor. + if (getReader().isSystemEntry(Type)) + Type->resetIncludeInPrint(); + else { + StringRef RecordName = getRecordName(Types, UDT.Type); + if (UDT.Name.equals(RecordName)) + Type->resetIncludeInPrint(); + Type->setType(LogicalVisitor->getElement(StreamTPI, UDT.Type)); + } + } + + return Error::success(); +} + +// S_UNAMESPACE +Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record, + UsingNamespaceSym &UN) { + LLVM_DEBUG({ W.printString("Namespace", UN.Name); }); + return Error::success(); +} + +#undef DEBUG_TYPE +#define DEBUG_TYPE "CodeViewLogicalVisitor" + +//===----------------------------------------------------------------------===// +// Logical visitor. +//===----------------------------------------------------------------------===// +LVLogicalVisitor::LVLogicalVisitor(LVCodeViewReader *Reader, ScopedPrinter &W, + InputFile &Input) + : Reader(Reader), W(W), Input(Input) { + // The LogicalVisitor connects the CodeViewReader with the visitors that + // traverse the types, symbols, etc. Do any initialization that is needed. + Shared = std::make_shared<LVShared>(Reader, this); +} + +void LVLogicalVisitor::printTypeIndex(StringRef FieldName, TypeIndex TI, + uint32_t StreamIdx) { + codeview::printTypeIndex(W, FieldName, TI, + StreamIdx == StreamTPI ? types() : ids()); +} + +void LVLogicalVisitor::printTypeBegin(CVType &Record, TypeIndex TI, + LVElement *Element, uint32_t StreamIdx) { + W.getOStream() << "\n"; + W.startLine() << formatTypeLeafKind(Record.kind()); + W.getOStream() << " (" << HexNumber(TI.getIndex()) << ")"; + W.getOStream() << " {\n"; + W.indent(); + W.printEnum("TypeLeafKind", unsigned(Record.kind()), ArrayRef(LeafTypeNames)); + printTypeIndex("TI", TI, StreamIdx); + W.startLine() << "Element: " << HexNumber(Element->getOffset()) << " " + << Element->getName() << "\n"; +} + +void LVLogicalVisitor::printTypeEnd(CVType &Record) { + W.unindent(); + W.startLine() << "}\n"; +} + +void LVLogicalVisitor::printMemberBegin(CVMemberRecord &Record, TypeIndex TI, + LVElement *Element, + uint32_t StreamIdx) { + W.getOStream() << "\n"; + W.startLine() << formatTypeLeafKind(Record.Kind); + W.getOStream() << " (" << HexNumber(TI.getIndex()) << ")"; + W.getOStream() << " {\n"; + W.indent(); + W.printEnum("TypeLeafKind", unsigned(Record.Kind), ArrayRef(LeafTypeNames)); + printTypeIndex("TI", TI, StreamIdx); + W.startLine() << "Element: " << HexNumber(Element->getOffset()) << " " + << Element->getName() << "\n"; +} + +void LVLogicalVisitor::printMemberEnd(CVMemberRecord &Record) { + W.unindent(); + W.startLine() << "}\n"; +} + +Error LVLogicalVisitor::visitUnknownType(CVType &Record, TypeIndex TI) { + LLVM_DEBUG({ + printTypeIndex("\nTI", TI, StreamTPI); + W.printNumber("Length", uint32_t(Record.content().size())); + }); + return Error::success(); +} + +// LF_ARGLIST (TPI) +Error LVLogicalVisitor::visitKnownRecord(CVType &Record, ArgListRecord &Args, + TypeIndex TI, LVElement *Element) { + ArrayRef<TypeIndex> Indices = Args.getIndices(); + uint32_t Size = Indices.size(); + LLVM_DEBUG({ + printTypeBegin(Record, TI, Element, StreamTPI); + W.printNumber("NumArgs", Size); + ListScope Arguments(W, "Arguments"); + for (uint32_t I = 0; I < Size; ++I) + printTypeIndex("ArgType", Indices[I], StreamTPI); + printTypeEnd(Record); + }); + + LVScope *Function = static_cast<LVScope *>(Element); + for (uint32_t Index = 0; Index < Size; ++Index) { + TypeIndex ParameterType = Indices[Index]; + createParameter(ParameterType, StringRef(), Function); + } + + return Error::success(); +} + +// LF_ARRAY (TPI) +Error LVLogicalVisitor::visitKnownRecord(CVType &Record, ArrayRecord &AT, + TypeIndex TI, LVElement *Element) { + LLVM_DEBUG({ + printTypeBegin(Record, TI, Element, StreamTPI); + printTypeIndex("ElementType", AT.getElementType(), StreamTPI); + printTypeIndex("IndexType", AT.getIndexType(), StreamTPI); + W.printNumber("SizeOf", AT.getSize()); + W.printString("Name", AT.getName()); + printTypeEnd(Record); + }); + + if (Element->getIsFinalized()) + return Error::success(); + Element->setIsFinalized(); + + LVScopeArray *Array = static_cast<LVScopeArray *>(Element); + if (!Array) + return Error::success(); + + Reader->getCompileUnit()->addElement(Array); + TypeIndex TIElementType = AT.getElementType(); + + LVType *PrevSubrange = nullptr; + LazyRandomTypeCollection &Types = types(); + + // As the logical view is modeled on DWARF, for each dimension we have to + // create a DW_TAG_subrange_type, with dimension size. + // The subrange type can be: unsigned __int32 or unsigned __int64. + auto AddSubrangeType = [&](ArrayRecord &AR) { + LVType *Subrange = Reader->createTypeSubrange(); + Subrange->setTag(dwarf::DW_TAG_subrange_type); + Subrange->setType(getElement(StreamTPI, AR.getIndexType())); + Subrange->setCount(AR.getSize()); + Subrange->setOffset( + TIElementType.isSimple() + ? (uint32_t)(TypeLeafKind)TIElementType.getSimpleKind() + : TIElementType.getIndex()); + Array->addElement(Subrange); + + if (PrevSubrange) + if (int64_t Count = Subrange->getCount()) + PrevSubrange->setCount(PrevSubrange->getCount() / Count); + PrevSubrange = Subrange; + }; + + // Preserve the original TypeIndex; it would be updated in the case of: + // - The array type contains qualifiers. + // - In multidimensional arrays, the last LF_ARRAY entry contains the type. + TypeIndex TIArrayType; + + // For each dimension in the array, there is a LF_ARRAY entry. The last + // entry contains the array type, which can be a LF_MODIFIER in the case + // of the type being modified by a qualifier (const, etc). + ArrayRecord AR(AT); + CVType CVEntry = Record; + while (CVEntry.kind() == LF_ARRAY) { + // Create the subrange information, required by the logical view. Once + // the array has been processed, the dimension sizes will updated, as + // the sizes are a progression. For instance: + // sizeof(int) = 4 + // int Array[2]; Sizes: 8 Dim: 8 / 4 -> [2] + // int Array[2][3]; Sizes: 24, 12 Dim: 24 / 12 -> [2] + // Dim: 12 / 4 -> [3] + // int Array[2][3][4]; sizes: 96, 48, 16 Dim: 96 / 48 -> [2] + // Dim: 48 / 16 -> [3] + // Dim: 16 / 4 -> [4] + AddSubrangeType(AR); + TIArrayType = TIElementType; + + // The current ElementType can be a modifier, in which case we need to + // get the type being modified. + // If TypeIndex is not a simple type, check if we have a qualified type. + if (!TIElementType.isSimple()) { + CVType CVElementType = Types.getType(TIElementType); + if (CVElementType.kind() == LF_MODIFIER) { + LVElement *QualifiedType = + Shared->TypeRecords.find(StreamTPI, TIElementType); + if (Error Err = + finishVisitation(CVElementType, TIElementType, QualifiedType)) + return Err; + // Get the TypeIndex of the type that the LF_MODIFIER modifies. + TIElementType = getModifiedType(CVElementType); + } + } + // Ends the traversal, as we have reached a simple type (int, char, etc). + if (TIElementType.isSimple()) + break; + + // Read next dimension linked entry, if any. + CVEntry = Types.getType(TIElementType); + if (Error Err = TypeDeserializer::deserializeAs( + const_cast<CVType &>(CVEntry), AR)) { + consumeError(std::move(Err)); + break; + } + TIElementType = AR.getElementType(); + // NOTE: The typeindex has a value of: 0x0280.0000 + getTrueType(TIElementType); + } + + Array->setName(AT.getName()); + TIArrayType = Shared->ForwardReferences.remap(TIArrayType); + Array->setType(getElement(StreamTPI, TIArrayType)); + + if (PrevSubrange) + // In the case of an aggregate type (class, struct, union, interface), + // get the aggregate size. As the original record is pointing to its + // reference, we have to update it. + if (uint64_t Size = + isAggregate(CVEntry) + ? getSizeInBytesForTypeRecord(Types.getType(TIArrayType)) + : getSizeInBytesForTypeIndex(TIElementType)) + PrevSubrange->setCount(PrevSubrange->getCount() / Size); + + return Error::success(); +} + +// LF_BITFIELD (TPI) +Error LVLogicalVisitor::visitKnownRecord(CVType &Record, BitFieldRecord &BF, + TypeIndex TI, LVElement *Element) { + LLVM_DEBUG({ + printTypeBegin(Record, TI, Element, StreamTPI); + printTypeIndex("Type", TI, StreamTPI); + W.printNumber("BitSize", BF.getBitSize()); + W.printNumber("BitOffset", BF.getBitOffset()); + printTypeEnd(Record); + }); + + Element->setType(getElement(StreamTPI, BF.getType())); + Element->setBitSize(BF.getBitSize()); + return Error::success(); +} + +// LF_BUILDINFO (TPI)/(IPI) +Error LVLogicalVisitor::visitKnownRecord(CVType &Record, BuildInfoRecord &BI, + TypeIndex TI, LVElement *Element) { + LLVM_DEBUG({ + printTypeBegin(Record, TI, Element, StreamIPI); + W.printNumber("NumArgs", static_cast<uint32_t>(BI.getArgs().size())); + ListScope Arguments(W, "Arguments"); + for (TypeIndex Arg : BI.getArgs()) + printTypeIndex("ArgType", Arg, StreamIPI); + printTypeEnd(Record); + }); + + // The given 'Element' refers to the current compilation unit. + // All the args are references into the TPI/IPI stream. + TypeIndex TIName = BI.getArgs()[BuildInfoRecord::BuildInfoArg::SourceFile]; + std::string Name = std::string(ids().getTypeName(TIName)); + + // There are cases where LF_BUILDINFO fields are empty. + if (!Name.empty()) + Element->setName(Name); + + return Error::success(); +} + +// LF_CLASS, LF_STRUCTURE, LF_INTERFACE (TPI) +Error LVLogicalVisitor::visitKnownRecord(CVType &Record, ClassRecord &Class, + TypeIndex TI, LVElement *Element) { + LLVM_DEBUG({ + printTypeBegin(Record, TI, Element, StreamTPI); + W.printNumber("MemberCount", Class.getMemberCount()); + printTypeIndex("FieldList", Class.getFieldList(), StreamTPI); + printTypeIndex("DerivedFrom", Class.getDerivationList(), StreamTPI); + printTypeIndex("VShape", Class.getVTableShape(), StreamTPI); + W.printNumber("SizeOf", Class.getSize()); + W.printString("Name", Class.getName()); + if (Class.hasUniqueName()) + W.printString("UniqueName", Class.getUniqueName()); + printTypeEnd(Record); + }); + + if (Element->getIsFinalized()) + return Error::success(); + Element->setIsFinalized(); + + LVScopeAggregate *Scope = static_cast<LVScopeAggregate *>(Element); + if (!Scope) + return Error::success(); + + Scope->setName(Class.getName()); + if (Class.hasUniqueName()) + Scope->setLinkageName(Class.getUniqueName()); + + if (Class.isNested()) { + Scope->setIsNested(); + createParents(Class.getName(), Scope); + } + + if (Class.isScoped()) + Scope->setIsScoped(); + + // Nested types will be added to their parents at creation. The forward + // references are only processed to finish the referenced element creation. + if (!(Class.isNested() || Class.isScoped())) { + if (LVScope *Namespace = Shared->NamespaceDeduction.get(Class.getName())) + Namespace->addElement(Scope); + else + Reader->getCompileUnit()->addElement(Scope); + } + + LazyRandomTypeCollection &Types = types(); + TypeIndex TIFieldList = Class.getFieldList(); + if (TIFieldList.isNoneType()) { + TypeIndex ForwardType = Shared->ForwardReferences.find(Class.getName()); + if (!ForwardType.isNoneType()) { + CVType CVReference = Types.getType(ForwardType); + TypeRecordKind RK = static_cast<TypeRecordKind>(CVReference.kind()); + ClassRecord ReferenceRecord(RK); + if (Error Err = TypeDeserializer::deserializeAs( + const_cast<CVType &>(CVReference), ReferenceRecord)) + return Err; + TIFieldList = ReferenceRecord.getFieldList(); + } + } + + if (!TIFieldList.isNoneType()) { + // Pass down the TypeIndex 'TI' for the aggregate containing the field list. + CVType CVFieldList = Types.getType(TIFieldList); + if (Error Err = finishVisitation(CVFieldList, TI, Scope)) + return Err; + } + + return Error::success(); +} + +// LF_ENUM (TPI) +Error LVLogicalVisitor::visitKnownRecord(CVType &Record, EnumRecord &Enum, + TypeIndex TI, LVElement *Element) { + LLVM_DEBUG({ + printTypeBegin(Record, TI, Element, StreamTPI); + W.printNumber("NumEnumerators", Enum.getMemberCount()); + printTypeIndex("UnderlyingType", Enum.getUnderlyingType(), StreamTPI); + printTypeIndex("FieldListType", Enum.getFieldList(), StreamTPI); + W.printString("Name", Enum.getName()); + printTypeEnd(Record); + }); + + LVScopeEnumeration *Scope = static_cast<LVScopeEnumeration *>(Element); + if (!Scope) + return Error::success(); + + if (Scope->getIsFinalized()) + return Error::success(); + Scope->setIsFinalized(); + + // Set the name, as in the case of nested, it would determine the relation + // to any potential parent, via the LF_NESTTYPE record. + Scope->setName(Enum.getName()); + if (Enum.hasUniqueName()) + Scope->setLinkageName(Enum.getUniqueName()); + + Scope->setType(getElement(StreamTPI, Enum.getUnderlyingType())); + + if (Enum.isNested()) { + Scope->setIsNested(); + createParents(Enum.getName(), Scope); + } + + if (Enum.isScoped()) { + Scope->setIsScoped(); + Scope->setIsEnumClass(); + } + + // Nested types will be added to their parents at creation. + if (!(Enum.isNested() || Enum.isScoped())) { + if (LVScope *Namespace = Shared->NamespaceDeduction.get(Enum.getName())) + Namespace->addElement(Scope); + else + Reader->getCompileUnit()->addElement(Scope); + } + + TypeIndex TIFieldList = Enum.getFieldList(); + if (!TIFieldList.isNoneType()) { + LazyRandomTypeCollection &Types = types(); + CVType CVFieldList = Types.getType(TIFieldList); + if (Error Err = finishVisitation(CVFieldList, TIFieldList, Scope)) + return Err; + } + + return Error::success(); +} + +// LF_FIELDLIST (TPI) +Error LVLogicalVisitor::visitKnownRecord(CVType &Record, + FieldListRecord &FieldList, + TypeIndex TI, LVElement *Element) { + LLVM_DEBUG({ + printTypeBegin(Record, TI, Element, StreamTPI); + printTypeEnd(Record); + }); + + if (Error Err = visitFieldListMemberStream(TI, Element, FieldList.Data)) + return Err; + + return Error::success(); +} + +// LF_FUNC_ID (TPI)/(IPI) +Error LVLogicalVisitor::visitKnownRecord(CVType &Record, FuncIdRecord &Func, + TypeIndex TI, LVElement *Element) { + // ParentScope and FunctionType are references into the TPI stream. + LLVM_DEBUG({ + printTypeBegin(Record, TI, Element, StreamIPI); + printTypeIndex("ParentScope", Func.getParentScope(), StreamTPI); + printTypeIndex("FunctionType", Func.getFunctionType(), StreamTPI); + W.printString("Name", Func.getName()); + printTypeEnd(Record); + }); + + // The TypeIndex (LF_PROCEDURE) returned by 'getFunctionType' is the + // function propotype, we need to use the function definition. + if (LVScope *FunctionDcl = static_cast<LVScope *>(Element)) { + // For inlined functions, the inlined instance has been already processed + // (all its information is contained in the Symbols section). + // 'Element' points to the created 'abstract' (out-of-line) function. + // Use the parent scope information to allocate it to the correct scope. + LazyRandomTypeCollection &Types = types(); + TypeIndex TIParent = Func.getParentScope(); + if (FunctionDcl->getIsInlinedAbstract()) { + FunctionDcl->setName(Func.getName()); + if (TIParent.isNoneType()) + Reader->getCompileUnit()->addElement(FunctionDcl); + } + + if (!TIParent.isNoneType()) { + CVType CVParentScope = ids().getType(TIParent); + if (Error Err = finishVisitation(CVParentScope, TIParent, FunctionDcl)) + return Err; + } + + TypeIndex TIFunctionType = Func.getFunctionType(); + CVType CVFunctionType = Types.getType(TIFunctionType); + if (Error Err = + finishVisitation(CVFunctionType, TIFunctionType, FunctionDcl)) + return Err; + + FunctionDcl->setIsFinalized(); + } + + return Error::success(); +} + +// LF_LABEL (TPI) +Error LVLogicalVisitor::visitKnownRecord(CVType &Record, LabelRecord &LR, + TypeIndex TI, LVElement *Element) { + LLVM_DEBUG({ + printTypeBegin(Record, TI, Element, StreamTPI); + printTypeEnd(Record); + }); + return Error::success(); +} + +// LF_MFUNC_ID (TPI)/(IPI) +Error LVLogicalVisitor::visitKnownRecord(CVType &Record, MemberFuncIdRecord &Id, + TypeIndex TI, LVElement *Element) { + // ClassType and FunctionType are references into the TPI stream. + LLVM_DEBUG({ + printTypeBegin(Record, TI, Element, StreamIPI); + printTypeIndex("ClassType", Id.getClassType(), StreamTPI); + printTypeIndex("FunctionType", Id.getFunctionType(), StreamTPI); + W.printString("Name", Id.getName()); + printTypeEnd(Record); + }); + + LVScope *FunctionDcl = static_cast<LVScope *>(Element); + if (FunctionDcl->getIsInlinedAbstract()) { + // For inlined functions, the inlined instance has been already processed + // (all its information is contained in the Symbols section). + // 'Element' points to the created 'abstract' (out-of-line) function. + // Use the parent scope information to allocate it to the correct scope. + if (LVScope *Class = static_cast<LVScope *>( + Shared->TypeRecords.find(StreamTPI, Id.getClassType()))) + Class->addElement(FunctionDcl); + } + + TypeIndex TIFunctionType = Id.getFunctionType(); + CVType CVFunction = types().getType(TIFunctionType); + if (Error Err = finishVisitation(CVFunction, TIFunctionType, Element)) + return Err; + + return Error::success(); +} + +// LF_MFUNCTION (TPI) +Error LVLogicalVisitor::visitKnownRecord(CVType &Record, + MemberFunctionRecord &MF, TypeIndex TI, + LVElement *Element) { + LLVM_DEBUG({ + printTypeBegin(Record, TI, Element, StreamTPI); + printTypeIndex("ReturnType", MF.getReturnType(), StreamTPI); + printTypeIndex("ClassType", MF.getClassType(), StreamTPI); + printTypeIndex("ThisType", MF.getThisType(), StreamTPI); + W.printNumber("NumParameters", MF.getParameterCount()); + printTypeIndex("ArgListType", MF.getArgumentList(), StreamTPI); + W.printNumber("ThisAdjustment", MF.getThisPointerAdjustment()); + printTypeEnd(Record); + }); + + if (LVScope *MemberFunction = static_cast<LVScope *>(Element)) { + LVElement *Class = getElement(StreamTPI, MF.getClassType()); + + MemberFunction->setIsFinalized(); + MemberFunction->setType(getElement(StreamTPI, MF.getReturnType())); + MemberFunction->setOffset(TI.getIndex()); + MemberFunction->setOffsetFromTypeIndex(); + + if (ProcessArgumentList) { + ProcessArgumentList = false; + + if (!MemberFunction->getIsStatic()) { + LVElement *ThisPointer = getElement(StreamTPI, MF.getThisType()); + // When creating the 'this' pointer, check if it points to a reference. + ThisPointer->setType(Class); + LVSymbol *This = + createParameter(ThisPointer, StringRef(), MemberFunction); + This->setIsArtificial(); + } + + // Create formal parameters. + LazyRandomTypeCollection &Types = types(); + CVType CVArguments = Types.getType(MF.getArgumentList()); + if (Error Err = finishVisitation(CVArguments, MF.getArgumentList(), + MemberFunction)) + return Err; + } + } + + return Error::success(); +} + +// LF_METHODLIST (TPI) +Error LVLogicalVisitor::visitKnownRecord(CVType &Record, + MethodOverloadListRecord &Overloads, + TypeIndex TI, LVElement *Element) { + LLVM_DEBUG({ + printTypeBegin(Record, TI, Element, StreamTPI); + printTypeEnd(Record); + }); + + for (OneMethodRecord &Method : Overloads.Methods) { + CVMemberRecord Record; + Record.Kind = LF_METHOD; + Method.Name = OverloadedMethodName; + if (Error Err = visitKnownMember(Record, Method, TI, Element)) + return Err; + } + + return Error::success(); +} + +// LF_MODIFIER (TPI) +Error LVLogicalVisitor::visitKnownRecord(CVType &Record, ModifierRecord &Mod, + TypeIndex TI, LVElement *Element) { + LLVM_DEBUG({ + printTypeBegin(Record, TI, Element, StreamTPI); + printTypeIndex("ModifiedType", Mod.getModifiedType(), StreamTPI); + printTypeEnd(Record); + }); + + // Create the modified type, which will be attached to the type(s) that + // contains the modifiers. + LVElement *ModifiedType = getElement(StreamTPI, Mod.getModifiedType()); + + // At this point the types recording the qualifiers do not have a + // scope parent. They must be assigned to the current compile unit. + LVScopeCompileUnit *CompileUnit = Reader->getCompileUnit(); + + // The incoming element does not have a defined kind. Use the given + // modifiers to complete its type. A type can have more than one modifier; + // in that case, we have to create an extra type to have the other modifier. + LVType *LastLink = static_cast<LVType *>(Element); + if (!LastLink->getParentScope()) + CompileUnit->addElement(LastLink); + + bool SeenModifier = false; + uint16_t Mods = static_cast<uint16_t>(Mod.getModifiers()); + if (Mods & uint16_t(ModifierOptions::Const)) { + SeenModifier = true; + LastLink->setTag(dwarf::DW_TAG_const_type); + LastLink->setIsConst(); + LastLink->setName("const"); + } + if (Mods & uint16_t(ModifierOptions::Volatile)) { + if (SeenModifier) { + LVType *Volatile = Reader->createType(); + Volatile->setIsModifier(); + LastLink->setType(Volatile); + LastLink = Volatile; + CompileUnit->addElement(LastLink); + } + LastLink->setTag(dwarf::DW_TAG_volatile_type); + LastLink->setIsVolatile(); + LastLink->setName("volatile"); + } + if (Mods & uint16_t(ModifierOptions::Unaligned)) { + if (SeenModifier) { + LVType *Unaligned = Reader->createType(); + Unaligned->setIsModifier(); + LastLink->setType(Unaligned); + LastLink = Unaligned; + CompileUnit->addElement(LastLink); + } + LastLink->setTag(dwarf::DW_TAG_unaligned); + LastLink->setIsUnaligned(); + LastLink->setName("unaligned"); + } + + LastLink->setType(ModifiedType); + return Error::success(); +} + +// LF_POINTER (TPI) +Error LVLogicalVisitor::visitKnownRecord(CVType &Record, PointerRecord &Ptr, + TypeIndex TI, LVElement *Element) { + LLVM_DEBUG({ + printTypeBegin(Record, TI, Element, StreamTPI); + printTypeIndex("PointeeType", Ptr.getReferentType(), StreamTPI); + W.printNumber("IsFlat", Ptr.isFlat()); + W.printNumber("IsConst", Ptr.isConst()); + W.printNumber("IsVolatile", Ptr.isVolatile()); + W.printNumber("IsUnaligned", Ptr.isUnaligned()); + W.printNumber("IsRestrict", Ptr.isRestrict()); + W.printNumber("IsThisPtr&", Ptr.isLValueReferenceThisPtr()); + W.printNumber("IsThisPtr&&", Ptr.isRValueReferenceThisPtr()); + W.printNumber("SizeOf", Ptr.getSize()); + + if (Ptr.isPointerToMember()) { + const MemberPointerInfo &MI = Ptr.getMemberInfo(); + printTypeIndex("ClassType", MI.getContainingType(), StreamTPI); + } + printTypeEnd(Record); + }); + + // Find the pointed-to type. + LVType *Pointer = static_cast<LVType *>(Element); + LVElement *Pointee = nullptr; + + PointerMode Mode = Ptr.getMode(); + Pointee = Ptr.isPointerToMember() + ? Shared->TypeRecords.find(StreamTPI, Ptr.getReferentType()) + : getElement(StreamTPI, Ptr.getReferentType()); + + // At this point the types recording the qualifiers do not have a + // scope parent. They must be assigned to the current compile unit. + LVScopeCompileUnit *CompileUnit = Reader->getCompileUnit(); + + // Order for the different modifiers: + // <restrict> <pointer, Reference, ValueReference> <const, volatile> + // Const and volatile already processed. + bool SeenModifier = false; + LVType *LastLink = Pointer; + if (!LastLink->getParentScope()) + CompileUnit->addElement(LastLink); + + if (Ptr.isRestrict()) { + SeenModifier = true; + LVType *Restrict = Reader->createType(); + Restrict->setTag(dwarf::DW_TAG_restrict_type); + Restrict->setIsRestrict(); + Restrict->setName("restrict"); + LastLink->setType(Restrict); + LastLink = Restrict; + CompileUnit->addElement(LastLink); + } + if (Mode == PointerMode::LValueReference) { + if (SeenModifier) { + LVType *LReference = Reader->createType(); + LReference->setIsModifier(); + LastLink->setType(LReference); + LastLink = LReference; + CompileUnit->addElement(LastLink); + } + LastLink->setTag(dwarf::DW_TAG_reference_type); + LastLink->setIsReference(); + LastLink->setName("&"); + } + if (Mode == PointerMode::RValueReference) { + if (SeenModifier) { + LVType *RReference = Reader->createType(); + RReference->setIsModifier(); + LastLink->setType(RReference); + LastLink = RReference; + CompileUnit->addElement(LastLink); + } + LastLink->setTag(dwarf::DW_TAG_rvalue_reference_type); + LastLink->setIsRvalueReference(); + LastLink->setName("&&"); + } + + // When creating the pointer, check if it points to a reference. + LastLink->setType(Pointee); + return Error::success(); +} + +// LF_PROCEDURE (TPI) +Error LVLogicalVisitor::visitKnownRecord(CVType &Record, ProcedureRecord &Proc, + TypeIndex TI, LVElement *Element) { + LLVM_DEBUG({ + printTypeBegin(Record, TI, Element, StreamTPI); + printTypeIndex("ReturnType", Proc.getReturnType(), StreamTPI); + W.printNumber("NumParameters", Proc.getParameterCount()); + printTypeIndex("ArgListType", Proc.getArgumentList(), StreamTPI); + printTypeEnd(Record); + }); + + // There is no need to traverse the argument list, as the CodeView format + // declares the parameters as a 'S_LOCAL' symbol tagged as parameter. + // Only process parameters when dealing with inline functions. + if (LVScope *FunctionDcl = static_cast<LVScope *>(Element)) { + FunctionDcl->setType(getElement(StreamTPI, Proc.getReturnType())); + + if (ProcessArgumentList) { + ProcessArgumentList = false; + // Create formal parameters. + LazyRandomTypeCollection &Types = types(); + CVType CVArguments = Types.getType(Proc.getArgumentList()); + if (Error Err = finishVisitation(CVArguments, Proc.getArgumentList(), + FunctionDcl)) + return Err; + } + } + + return Error::success(); +} + +// LF_UNION (TPI) +Error LVLogicalVisitor::visitKnownRecord(CVType &Record, UnionRecord &Union, + TypeIndex TI, LVElement *Element) { + LLVM_DEBUG({ + printTypeBegin(Record, TI, Element, StreamTPI); + W.printNumber("MemberCount", Union.getMemberCount()); + printTypeIndex("FieldList", Union.getFieldList(), StreamTPI); + W.printNumber("SizeOf", Union.getSize()); + W.printString("Name", Union.getName()); + if (Union.hasUniqueName()) + W.printString("UniqueName", Union.getUniqueName()); + printTypeEnd(Record); + }); + + LVScopeAggregate *Scope = static_cast<LVScopeAggregate *>(Element); + if (!Scope) + return Error::success(); + + if (Scope->getIsFinalized()) + return Error::success(); + Scope->setIsFinalized(); + + Scope->setName(Union.getName()); + if (Union.hasUniqueName()) + Scope->setLinkageName(Union.getUniqueName()); + + if (Union.isNested()) { + Scope->setIsNested(); + createParents(Union.getName(), Scope); + } else { + if (LVScope *Namespace = Shared->NamespaceDeduction.get(Union.getName())) + Namespace->addElement(Scope); + else + Reader->getCompileUnit()->addElement(Scope); + } + + if (!Union.getFieldList().isNoneType()) { + LazyRandomTypeCollection &Types = types(); + // Pass down the TypeIndex 'TI' for the aggregate containing the field list. + CVType CVFieldList = Types.getType(Union.getFieldList()); + if (Error Err = finishVisitation(CVFieldList, TI, Scope)) + return Err; + } + + return Error::success(); +} + +// LF_TYPESERVER2 (TPI) +Error LVLogicalVisitor::visitKnownRecord(CVType &Record, TypeServer2Record &TS, + TypeIndex TI, LVElement *Element) { + LLVM_DEBUG({ + printTypeBegin(Record, TI, Element, StreamTPI); + W.printString("Guid", formatv("{0}", TS.getGuid()).str()); + W.printNumber("Age", TS.getAge()); + W.printString("Name", TS.getName()); + printTypeEnd(Record); + }); + return Error::success(); +} + +// LF_VFTABLE (TPI) +Error LVLogicalVisitor::visitKnownRecord(CVType &Record, VFTableRecord &VFT, + TypeIndex TI, LVElement *Element) { + LLVM_DEBUG({ + printTypeBegin(Record, TI, Element, StreamTPI); + printTypeIndex("CompleteClass", VFT.getCompleteClass(), StreamTPI); + printTypeIndex("OverriddenVFTable", VFT.getOverriddenVTable(), StreamTPI); + W.printHex("VFPtrOffset", VFT.getVFPtrOffset()); + W.printString("VFTableName", VFT.getName()); + for (const StringRef &N : VFT.getMethodNames()) + W.printString("MethodName", N); + printTypeEnd(Record); + }); + return Error::success(); +} + +// LF_VTSHAPE (TPI) +Error LVLogicalVisitor::visitKnownRecord(CVType &Record, + VFTableShapeRecord &Shape, + TypeIndex TI, LVElement *Element) { + LLVM_DEBUG({ + printTypeBegin(Record, TI, Element, StreamTPI); + W.printNumber("VFEntryCount", Shape.getEntryCount()); + printTypeEnd(Record); + }); + return Error::success(); +} + +// LF_SUBSTR_LIST (TPI)/(IPI) +Error LVLogicalVisitor::visitKnownRecord(CVType &Record, + StringListRecord &Strings, + TypeIndex TI, LVElement *Element) { + // All the indices are references into the TPI/IPI stream. + LLVM_DEBUG({ + printTypeBegin(Record, TI, Element, StreamIPI); + ArrayRef<TypeIndex> Indices = Strings.getIndices(); + uint32_t Size = Indices.size(); + W.printNumber("NumStrings", Size); + ListScope Arguments(W, "Strings"); + for (uint32_t I = 0; I < Size; ++I) + printTypeIndex("String", Indices[I], StreamIPI); + printTypeEnd(Record); + }); + return Error::success(); +} + +// LF_STRING_ID (TPI)/(IPI) +Error LVLogicalVisitor::visitKnownRecord(CVType &Record, StringIdRecord &String, + TypeIndex TI, LVElement *Element) { + // All args are references into the TPI/IPI stream. + LLVM_DEBUG({ + printTypeIndex("\nTI", TI, StreamIPI); + printTypeIndex("Id", String.getId(), StreamIPI); + W.printString("StringData", String.getString()); + }); + + if (LVScope *Namespace = Shared->NamespaceDeduction.get( + String.getString(), /*CheckScope=*/false)) { + // The function is already at different scope. In order to reflect + // the correct parent, move it to the namespace. + if (LVScope *Scope = Element->getParentScope()) + Scope->removeElement(Element); + Namespace->addElement(Element); + } + + return Error::success(); +} + +// LF_UDT_SRC_LINE (TPI)/(IPI) +Error LVLogicalVisitor::visitKnownRecord(CVType &Record, + UdtSourceLineRecord &SourceLine, + TypeIndex TI, LVElement *Element) { + // All args are references into the TPI/IPI stream. + LLVM_DEBUG({ + printTypeIndex("\nTI", TI, StreamIPI); + printTypeIndex("UDT", SourceLine.getUDT(), StreamIPI); + printTypeIndex("SourceFile", SourceLine.getSourceFile(), StreamIPI); + W.printNumber("LineNumber", SourceLine.getLineNumber()); + }); + return Error::success(); +} + +// LF_UDT_MOD_SRC_LINE (TPI)/(IPI) +Error LVLogicalVisitor::visitKnownRecord(CVType &Record, + UdtModSourceLineRecord &ModSourceLine, + TypeIndex TI, LVElement *Element) { + // All args are references into the TPI/IPI stream. + LLVM_DEBUG({ + printTypeBegin(Record, TI, Element, StreamIPI); + printTypeIndex("\nTI", TI, StreamIPI); + printTypeIndex("UDT", ModSourceLine.getUDT(), StreamIPI); + printTypeIndex("SourceFile", ModSourceLine.getSourceFile(), StreamIPI); + W.printNumber("LineNumber", ModSourceLine.getLineNumber()); + W.printNumber("Module", ModSourceLine.getModule()); + printTypeEnd(Record); + }); + return Error::success(); +} + +// LF_PRECOMP (TPI) +Error LVLogicalVisitor::visitKnownRecord(CVType &Record, PrecompRecord &Precomp, + TypeIndex TI, LVElement *Element) { + LLVM_DEBUG({ + printTypeBegin(Record, TI, Element, StreamTPI); + W.printHex("StartIndex", Precomp.getStartTypeIndex()); + W.printHex("Count", Precomp.getTypesCount()); + W.printHex("Signature", Precomp.getSignature()); + W.printString("PrecompFile", Precomp.getPrecompFilePath()); + printTypeEnd(Record); + }); + return Error::success(); +} + +// LF_ENDPRECOMP (TPI) +Error LVLogicalVisitor::visitKnownRecord(CVType &Record, + EndPrecompRecord &EndPrecomp, + TypeIndex TI, LVElement *Element) { + LLVM_DEBUG({ + printTypeBegin(Record, TI, Element, StreamTPI); + W.printHex("Signature", EndPrecomp.getSignature()); + printTypeEnd(Record); + }); + return Error::success(); +} + +Error LVLogicalVisitor::visitUnknownMember(CVMemberRecord &Record, + TypeIndex TI) { + LLVM_DEBUG({ W.printHex("UnknownMember", unsigned(Record.Kind)); }); + return Error::success(); +} + +// LF_BCLASS, LF_BINTERFACE +Error LVLogicalVisitor::visitKnownMember(CVMemberRecord &Record, + BaseClassRecord &Base, TypeIndex TI, + LVElement *Element) { + LLVM_DEBUG({ + printMemberBegin(Record, TI, Element, StreamTPI); + printTypeIndex("BaseType", Base.getBaseType(), StreamTPI); + W.printHex("BaseOffset", Base.getBaseOffset()); + printMemberEnd(Record); + }); + + createElement(Record.Kind); + if (LVSymbol *Symbol = CurrentSymbol) { + LVElement *BaseClass = getElement(StreamTPI, Base.getBaseType()); + Symbol->setName(BaseClass->getName()); + Symbol->setType(BaseClass); + Symbol->setAccessibilityCode(Base.getAccess()); + static_cast<LVScope *>(Element)->addElement(Symbol); + } + + return Error::success(); +} + +// LF_MEMBER +Error LVLogicalVisitor::visitKnownMember(CVMemberRecord &Record, + DataMemberRecord &Field, TypeIndex TI, + LVElement *Element) { + LLVM_DEBUG({ + printMemberBegin(Record, TI, Element, StreamTPI); + printTypeIndex("Type", Field.getType(), StreamTPI); + W.printHex("FieldOffset", Field.getFieldOffset()); + W.printString("Name", Field.getName()); + printMemberEnd(Record); + }); + + // Create the data member. + createDataMember(Record, static_cast<LVScope *>(Element), Field.getName(), + Field.getType(), Field.getAccess()); + return Error::success(); +} + +// LF_ENUMERATE +Error LVLogicalVisitor::visitKnownMember(CVMemberRecord &Record, + EnumeratorRecord &Enum, TypeIndex TI, + LVElement *Element) { + LLVM_DEBUG({ + printMemberBegin(Record, TI, Element, StreamTPI); + W.printNumber("EnumValue", Enum.getValue()); + W.printString("Name", Enum.getName()); + printMemberEnd(Record); + }); + + createElement(Record.Kind); + if (LVType *Type = CurrentType) { + Type->setName(Enum.getName()); + SmallString<16> Value; + Enum.getValue().toString(Value, 16, true, true); + Type->setValue(Value); + static_cast<LVScope *>(Element)->addElement(CurrentType); + } + + return Error::success(); +} + +// LF_INDEX +Error LVLogicalVisitor::visitKnownMember(CVMemberRecord &Record, + ListContinuationRecord &Cont, + TypeIndex TI, LVElement *Element) { + LLVM_DEBUG({ + printMemberBegin(Record, TI, Element, StreamTPI); + printTypeIndex("ContinuationIndex", Cont.getContinuationIndex(), StreamTPI); + printMemberEnd(Record); + }); + return Error::success(); +} + +// LF_NESTTYPE +Error LVLogicalVisitor::visitKnownMember(CVMemberRecord &Record, + NestedTypeRecord &Nested, TypeIndex TI, + LVElement *Element) { + LLVM_DEBUG({ + printMemberBegin(Record, TI, Element, StreamTPI); + printTypeIndex("Type", Nested.getNestedType(), StreamTPI); + W.printString("Name", Nested.getName()); + printMemberEnd(Record); + }); + + if (LVElement *Typedef = createElement(SymbolKind::S_UDT)) { + Typedef->setName(Nested.getName()); + LVElement *NestedType = getElement(StreamTPI, Nested.getNestedType()); + Typedef->setType(NestedType); + LVScope *Scope = static_cast<LVScope *>(Element); + Scope->addElement(Typedef); + + if (NestedType && NestedType->getIsNested()) { + // 'Element' is an aggregate type that may contains this nested type + // definition. Used their scoped names, to decide on their relationship. + StringRef RecordName = getRecordName(types(), TI); + + StringRef NestedTypeName = NestedType->getName(); + if (NestedTypeName.size() && RecordName.size()) { + StringRef OuterComponent; + std::tie(OuterComponent, std::ignore) = + getInnerComponent(NestedTypeName); + // We have an already created nested type. Add it to the current scope + // and update all its children if any. + if (OuterComponent.size() && OuterComponent.equals(RecordName)) { + if (!NestedType->getIsScopedAlready()) { + Scope->addElement(NestedType); + NestedType->setIsScopedAlready(); + NestedType->updateLevel(Scope); + } + Typedef->resetIncludeInPrint(); + } + } + } + } + + return Error::success(); +} + +// LF_ONEMETHOD +Error LVLogicalVisitor::visitKnownMember(CVMemberRecord &Record, + OneMethodRecord &Method, TypeIndex TI, + LVElement *Element) { + LLVM_DEBUG({ + printMemberBegin(Record, TI, Element, StreamTPI); + printTypeIndex("Type", Method.getType(), StreamTPI); + // If virtual, then read the vftable offset. + if (Method.isIntroducingVirtual()) + W.printHex("VFTableOffset", Method.getVFTableOffset()); + W.printString("Name", Method.getName()); + printMemberEnd(Record); + }); + + // All the LF_ONEMETHOD objects share the same type description. + // We have to create a scope object for each one and get the required + // information from the LF_MFUNCTION object. + ProcessArgumentList = true; + if (LVElement *MemberFunction = createElement(TypeLeafKind::LF_ONEMETHOD)) { + MemberFunction->setIsFinalized(); + static_cast<LVScope *>(Element)->addElement(MemberFunction); + + MemberFunction->setName(Method.getName()); + MemberFunction->setAccessibilityCode(Method.getAccess()); + + MethodKind Kind = Method.getMethodKind(); + if (Kind == MethodKind::Static) + MemberFunction->setIsStatic(); + MemberFunction->setVirtualityCode(Kind); + + MethodOptions Flags = Method.Attrs.getFlags(); + if (MethodOptions::CompilerGenerated == + (Flags & MethodOptions::CompilerGenerated)) + MemberFunction->setIsArtificial(); + + LazyRandomTypeCollection &Types = types(); + CVType CVMethodType = Types.getType(Method.getType()); + if (Error Err = + finishVisitation(CVMethodType, Method.getType(), MemberFunction)) + return Err; + } + ProcessArgumentList = false; + + return Error::success(); +} + +// LF_METHOD +Error LVLogicalVisitor::visitKnownMember(CVMemberRecord &Record, + OverloadedMethodRecord &Method, + TypeIndex TI, LVElement *Element) { + LLVM_DEBUG({ + printMemberBegin(Record, TI, Element, StreamTPI); + W.printHex("MethodCount", Method.getNumOverloads()); + printTypeIndex("MethodListIndex", Method.getMethodList(), StreamTPI); + W.printString("Name", Method.getName()); + printMemberEnd(Record); + }); + + // Record the overloaded method name, which will be used during the + // traversal of the method list. + LazyRandomTypeCollection &Types = types(); + OverloadedMethodName = Method.getName(); + CVType CVMethods = Types.getType(Method.getMethodList()); + if (Error Err = finishVisitation(CVMethods, Method.getMethodList(), Element)) + return Err; + + return Error::success(); +} + +// LF_STMEMBER +Error LVLogicalVisitor::visitKnownMember(CVMemberRecord &Record, + StaticDataMemberRecord &Field, + TypeIndex TI, LVElement *Element) { + LLVM_DEBUG({ + printMemberBegin(Record, TI, Element, StreamTPI); + printTypeIndex("Type", Field.getType(), StreamTPI); + W.printString("Name", Field.getName()); + printMemberEnd(Record); + }); + + // Create the data member. + createDataMember(Record, static_cast<LVScope *>(Element), Field.getName(), + Field.getType(), Field.getAccess()); + return Error::success(); +} + +// LF_VFUNCTAB +Error LVLogicalVisitor::visitKnownMember(CVMemberRecord &Record, + VFPtrRecord &VFTable, TypeIndex TI, + LVElement *Element) { + LLVM_DEBUG({ + printMemberBegin(Record, TI, Element, StreamTPI); + printTypeIndex("Type", VFTable.getType(), StreamTPI); + printMemberEnd(Record); + }); + return Error::success(); +} + +// LF_VBCLASS, LF_IVBCLASS +Error LVLogicalVisitor::visitKnownMember(CVMemberRecord &Record, + VirtualBaseClassRecord &Base, + TypeIndex TI, LVElement *Element) { + LLVM_DEBUG({ + printMemberBegin(Record, TI, Element, StreamTPI); + printTypeIndex("BaseType", Base.getBaseType(), StreamTPI); + printTypeIndex("VBPtrType", Base.getVBPtrType(), StreamTPI); + W.printHex("VBPtrOffset", Base.getVBPtrOffset()); + W.printHex("VBTableIndex", Base.getVTableIndex()); + printMemberEnd(Record); + }); + + createElement(Record.Kind); + if (LVSymbol *Symbol = CurrentSymbol) { + LVElement *BaseClass = getElement(StreamTPI, Base.getBaseType()); + Symbol->setName(BaseClass->getName()); + Symbol->setType(BaseClass); + Symbol->setAccessibilityCode(Base.getAccess()); + Symbol->setVirtualityCode(MethodKind::Virtual); + static_cast<LVScope *>(Element)->addElement(Symbol); + } + + return Error::success(); +} + +Error LVLogicalVisitor::visitMemberRecord(CVMemberRecord &Record, + TypeVisitorCallbacks &Callbacks, + TypeIndex TI, LVElement *Element) { + if (Error Err = Callbacks.visitMemberBegin(Record)) + return Err; + + switch (Record.Kind) { + default: + if (Error Err = Callbacks.visitUnknownMember(Record)) + return Err; + break; +#define MEMBER_RECORD(EnumName, EnumVal, Name) \ + case EnumName: { \ + if (Error Err = \ + visitKnownMember<Name##Record>(Record, Callbacks, TI, Element)) \ + return Err; \ + break; \ + } +#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) \ + MEMBER_RECORD(EnumVal, EnumVal, AliasName) +#define TYPE_RECORD(EnumName, EnumVal, Name) +#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" + } + + if (Error Err = Callbacks.visitMemberEnd(Record)) + return Err; + + return Error::success(); +} + +Error LVLogicalVisitor::finishVisitation(CVType &Record, TypeIndex TI, + LVElement *Element) { + switch (Record.kind()) { + default: + if (Error Err = visitUnknownType(Record, TI)) + return Err; + break; +#define TYPE_RECORD(EnumName, EnumVal, Name) \ + case EnumName: { \ + if (Error Err = visitKnownRecord<Name##Record>(Record, TI, Element)) \ + return Err; \ + break; \ + } +#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) \ + TYPE_RECORD(EnumVal, EnumVal, AliasName) +#define MEMBER_RECORD(EnumName, EnumVal, Name) +#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" + } + + return Error::success(); +} + +// Customized version of 'FieldListVisitHelper'. +Error LVLogicalVisitor::visitFieldListMemberStream( + TypeIndex TI, LVElement *Element, ArrayRef<uint8_t> FieldList) { + BinaryByteStream Stream(FieldList, llvm::support::little); + BinaryStreamReader Reader(Stream); + FieldListDeserializer Deserializer(Reader); + TypeVisitorCallbackPipeline Pipeline; + Pipeline.addCallbackToPipeline(Deserializer); + + TypeLeafKind Leaf; + while (!Reader.empty()) { + if (Error Err = Reader.readEnum(Leaf)) + return Err; + + CVMemberRecord Record; + Record.Kind = Leaf; + if (Error Err = visitMemberRecord(Record, Pipeline, TI, Element)) + return Err; + } + + return Error::success(); +} + +void LVLogicalVisitor::addElement(LVScope *Scope, bool IsCompileUnit) { + // The CodeView specifications does not treat S_COMPILE2 and S_COMPILE3 + // as symbols that open a scope. The CodeView reader, treat them in a + // similar way as DWARF. As there is no a symbole S_END to close the + // compile unit, we need to check for the next compile unit. + if (IsCompileUnit) { + if (!ScopeStack.empty()) + popScope(); + InCompileUnitScope = true; + } + + pushScope(Scope); + ReaderParent->addElement(Scope); +} + +void LVLogicalVisitor::addElement(LVSymbol *Symbol) { + ReaderScope->addElement(Symbol); +} + +void LVLogicalVisitor::addElement(LVType *Type) { + ReaderScope->addElement(Type); +} + +LVElement *LVLogicalVisitor::createElement(TypeLeafKind Kind) { + CurrentScope = nullptr; + CurrentSymbol = nullptr; + CurrentType = nullptr; + + if (Kind < TypeIndex::FirstNonSimpleIndex) { + CurrentType = Reader->createType(); + CurrentType->setIsBase(); + CurrentType->setTag(dwarf::DW_TAG_base_type); + if (options().getAttributeBase()) + CurrentType->setIncludeInPrint(); + return CurrentType; + } + + switch (Kind) { + // Types. + case TypeLeafKind::LF_ENUMERATE: + CurrentType = Reader->createTypeEnumerator(); + CurrentType->setTag(dwarf::DW_TAG_enumerator); + return CurrentType; + case TypeLeafKind::LF_MODIFIER: + CurrentType = Reader->createType(); + CurrentType->setIsModifier(); + return CurrentType; + case TypeLeafKind::LF_POINTER: + CurrentType = Reader->createType(); + CurrentType->setIsPointer(); + CurrentType->setName("*"); + CurrentType->setTag(dwarf::DW_TAG_pointer_type); + return CurrentType; + + // Symbols. + case TypeLeafKind::LF_BCLASS: + case TypeLeafKind::LF_IVBCLASS: + case TypeLeafKind::LF_VBCLASS: + CurrentSymbol = Reader->createSymbol(); + CurrentSymbol->setTag(dwarf::DW_TAG_inheritance); + CurrentSymbol->setIsInheritance(); + return CurrentSymbol; + case TypeLeafKind::LF_MEMBER: + case TypeLeafKind::LF_STMEMBER: + CurrentSymbol = Reader->createSymbol(); + CurrentSymbol->setIsMember(); + CurrentSymbol->setTag(dwarf::DW_TAG_member); + return CurrentSymbol; + + // Scopes. + case TypeLeafKind::LF_ARRAY: + CurrentScope = Reader->createScopeArray(); + CurrentScope->setTag(dwarf::DW_TAG_array_type); + return CurrentScope; + case TypeLeafKind::LF_CLASS: + CurrentScope = Reader->createScopeAggregate(); + CurrentScope->setTag(dwarf::DW_TAG_class_type); + CurrentScope->setIsClass(); + return CurrentScope; + case TypeLeafKind::LF_ENUM: + CurrentScope = Reader->createScopeEnumeration(); + CurrentScope->setTag(dwarf::DW_TAG_enumeration_type); + return CurrentScope; + case TypeLeafKind::LF_METHOD: + case TypeLeafKind::LF_ONEMETHOD: + case TypeLeafKind::LF_PROCEDURE: + CurrentScope = Reader->createScopeFunction(); + CurrentScope->setIsSubprogram(); + CurrentScope->setTag(dwarf::DW_TAG_subprogram); + return CurrentScope; + case TypeLeafKind::LF_STRUCTURE: + CurrentScope = Reader->createScopeAggregate(); + CurrentScope->setIsStructure(); + CurrentScope->setTag(dwarf::DW_TAG_structure_type); + return CurrentScope; + case TypeLeafKind::LF_UNION: + CurrentScope = Reader->createScopeAggregate(); + CurrentScope->setIsUnion(); + CurrentScope->setTag(dwarf::DW_TAG_union_type); + return CurrentScope; + default: + // If '--internal=tag' and '--print=warning' are specified in the command + // line, we record and print each seen 'TypeLeafKind'. + break; + } + return nullptr; +} + +LVElement *LVLogicalVisitor::createElement(SymbolKind Kind) { + CurrentScope = nullptr; + CurrentSymbol = nullptr; + CurrentType = nullptr; + switch (Kind) { + // Types. + case SymbolKind::S_UDT: + CurrentType = Reader->createTypeDefinition(); + CurrentType->setTag(dwarf::DW_TAG_typedef); + return CurrentType; + + // Symbols. + case SymbolKind::S_CONSTANT: + CurrentSymbol = Reader->createSymbol(); + CurrentSymbol->setIsConstant(); + CurrentSymbol->setTag(dwarf::DW_TAG_constant); + return CurrentSymbol; + + case SymbolKind::S_BPREL32: + case SymbolKind::S_REGREL32: + case SymbolKind::S_GDATA32: + case SymbolKind::S_LDATA32: + case SymbolKind::S_LOCAL: + // During the symbol traversal more information is available to + // determine if the symbol is a parameter or a variable. At this + // stage mark it as variable. + CurrentSymbol = Reader->createSymbol(); + CurrentSymbol->setIsVariable(); + CurrentSymbol->setTag(dwarf::DW_TAG_variable); + return CurrentSymbol; + + // Scopes. + case SymbolKind::S_BLOCK32: + CurrentScope = Reader->createScope(); + CurrentScope->setIsLexicalBlock(); + CurrentScope->setTag(dwarf::DW_TAG_lexical_block); + return CurrentScope; + case SymbolKind::S_COMPILE2: + case SymbolKind::S_COMPILE3: + CurrentScope = Reader->createScopeCompileUnit(); + CurrentScope->setTag(dwarf::DW_TAG_compile_unit); + Reader->setCompileUnit(static_cast<LVScopeCompileUnit *>(CurrentScope)); + return CurrentScope; + case SymbolKind::S_INLINESITE: + case SymbolKind::S_INLINESITE2: + CurrentScope = Reader->createScopeFunctionInlined(); + CurrentScope->setIsInlinedFunction(); + CurrentScope->setTag(dwarf::DW_TAG_inlined_subroutine); + return CurrentScope; + case SymbolKind::S_LPROC32: + case SymbolKind::S_GPROC32: + case SymbolKind::S_LPROC32_ID: + case SymbolKind::S_GPROC32_ID: + case SymbolKind::S_SEPCODE: + case SymbolKind::S_THUNK32: + CurrentScope = Reader->createScopeFunction(); + CurrentScope->setIsSubprogram(); + CurrentScope->setTag(dwarf::DW_TAG_subprogram); + return CurrentScope; + default: + // If '--internal=tag' and '--print=warning' are specified in the command + // line, we record and print each seen 'SymbolKind'. + break; + } + return nullptr; +} + +LVElement *LVLogicalVisitor::createElement(TypeIndex TI, TypeLeafKind Kind) { + LVElement *Element = Shared->TypeRecords.find(StreamTPI, TI); + if (!Element) { + // We are dealing with a base type or pointer to a base type, which are + // not included explicitly in the CodeView format. + if (Kind < TypeIndex::FirstNonSimpleIndex) { + Element = createElement(Kind); + Element->setIsFinalized(); + Shared->TypeRecords.add(StreamTPI, (TypeIndex)Kind, Kind, Element); + Element->setOffset(Kind); + return Element; + } + // We are dealing with a pointer to a base type. + if (TI.getIndex() < TypeIndex::FirstNonSimpleIndex) { + Element = createElement(Kind); + Shared->TypeRecords.add(StreamTPI, TI, Kind, Element); + Element->setOffset(TI.getIndex()); + Element->setOffsetFromTypeIndex(); + return Element; + } + + W.printString("** Not implemented. **"); + printTypeIndex("TypeIndex", TI, StreamTPI); + W.printString("TypeLeafKind", formatTypeLeafKind(Kind)); + return nullptr; + } + + Element->setOffset(TI.getIndex()); + Element->setOffsetFromTypeIndex(); + return Element; +} + +void LVLogicalVisitor::createDataMember(CVMemberRecord &Record, LVScope *Parent, + StringRef Name, TypeIndex TI, + MemberAccess Access) { + LLVM_DEBUG({ + printTypeIndex("TypeIndex", TI, StreamTPI); + W.printString("TypeName", Name); + }); + + createElement(Record.Kind); + if (LVSymbol *Symbol = CurrentSymbol) { + Symbol->setName(Name); + if (TI.isNoneType() || TI.isSimple()) + Symbol->setType(getElement(StreamTPI, TI)); + else { + LazyRandomTypeCollection &Types = types(); + CVType CVMemberType = Types.getType(TI); + if (CVMemberType.kind() == LF_BITFIELD) { + if (Error Err = finishVisitation(CVMemberType, TI, Symbol)) { + consumeError(std::move(Err)); + return; + } + } else + Symbol->setType(getElement(StreamTPI, TI)); + } + Symbol->setAccessibilityCode(Access); + Parent->addElement(Symbol); + } +} + +LVSymbol *LVLogicalVisitor::createParameter(LVElement *Element, StringRef Name, + LVScope *Parent) { + LVSymbol *Parameter = Reader->createSymbol(); + Parent->addElement(Parameter); + Parameter->setIsParameter(); + Parameter->setTag(dwarf::DW_TAG_formal_parameter); + Parameter->setName(Name); + Parameter->setType(Element); + return Parameter; +} + +LVSymbol *LVLogicalVisitor::createParameter(TypeIndex TI, StringRef Name, + LVScope *Parent) { + return createParameter(getElement(StreamTPI, TI), Name, Parent); +} + +LVType *LVLogicalVisitor::createBaseType(TypeIndex TI, StringRef TypeName) { + TypeLeafKind SimpleKind = (TypeLeafKind)TI.getSimpleKind(); + TypeIndex TIR = (TypeIndex)SimpleKind; + LLVM_DEBUG({ + printTypeIndex("TypeIndex", TIR, StreamTPI); + W.printString("TypeName", TypeName); + }); + + if (LVElement *Element = Shared->TypeRecords.find(StreamTPI, TIR)) + return static_cast<LVType *>(Element); + + if (createElement(TIR, SimpleKind)) { + CurrentType->setName(TypeName); + Reader->getCompileUnit()->addElement(CurrentType); + } + return CurrentType; +} + +LVType *LVLogicalVisitor::createPointerType(TypeIndex TI, StringRef TypeName) { + LLVM_DEBUG({ + printTypeIndex("TypeIndex", TI, StreamTPI); + W.printString("TypeName", TypeName); + }); + + if (LVElement *Element = Shared->TypeRecords.find(StreamTPI, TI)) + return static_cast<LVType *>(Element); + + LVType *Pointee = createBaseType(TI, TypeName.drop_back(1)); + if (createElement(TI, TypeLeafKind::LF_POINTER)) { + CurrentType->setIsFinalized(); + CurrentType->setType(Pointee); + Reader->getCompileUnit()->addElement(CurrentType); + } + return CurrentType; +} + +void LVLogicalVisitor::createParents(StringRef ScopedName, LVElement *Element) { + // For the given test case: + // + // struct S { enum E { ... }; }; + // S::E V; + // + // 0 | S_LOCAL `V` + // type=0x1004 (S::E), flags = none + // 0x1004 | LF_ENUM `S::E` + // options: has unique name | is nested + // 0x1009 | LF_STRUCTURE `S` + // options: contains nested class + // + // When the local 'V' is processed, its type 'E' is created. But There is + // no direct reference to its parent 'S'. We use the scoped name for 'E', + // to create its parents. + + // The input scoped name must have at least parent and nested names. + // Drop the last element name, as it corresponds to the nested type. + LVStringRefs Components = getAllLexicalComponents(ScopedName); + if (Components.size() < 2) + return; + Components.pop_back(); + + LVStringRefs::size_type FirstNamespace; + LVStringRefs::size_type FirstAggregate; + std::tie(FirstNamespace, FirstAggregate) = + Shared->NamespaceDeduction.find(Components); + + LLVM_DEBUG({ + W.printString("First Namespace", Components[FirstNamespace]); + W.printString("First NonNamespace", Components[FirstAggregate]); + }); + + // Create any referenced namespaces. + if (FirstNamespace < FirstAggregate) { + Shared->NamespaceDeduction.get( + LVStringRefs(Components.begin() + FirstNamespace, + Components.begin() + FirstAggregate)); + } + + // Traverse the enclosing scopes (aggregates) and create them. In the + // case of nested empty aggregates, MSVC does not emit a full record + // description. It emits only the reference record. + LVScope *Aggregate = nullptr; + TypeIndex TIAggregate; + std::string AggregateName = getScopedName( + LVStringRefs(Components.begin(), Components.begin() + FirstAggregate)); + + // This traversal is executed at least once. + for (LVStringRefs::size_type Index = FirstAggregate; + Index < Components.size(); ++Index) { + AggregateName = getScopedName(LVStringRefs(Components.begin() + Index, + Components.begin() + Index + 1), + AggregateName); + TIAggregate = Shared->ForwardReferences.remap( + Shared->TypeRecords.find(StreamTPI, AggregateName)); + Aggregate = + TIAggregate.isNoneType() + ? nullptr + : static_cast<LVScope *>(getElement(StreamTPI, TIAggregate)); + } + + // Workaround for cases where LF_NESTTYPE is missing for nested templates. + // If we manage to get parent information from the scoped name, we can add + // the nested type without relying on the LF_NESTTYPE. + if (Aggregate && !Element->getIsScopedAlready()) { + Aggregate->addElement(Element); + Element->setIsScopedAlready(); + } +} + +LVElement *LVLogicalVisitor::getElement(uint32_t StreamIdx, TypeIndex TI, + LVScope *Parent) { + LLVM_DEBUG({ printTypeIndex("TypeIndex", TI, StreamTPI); }); + TI = Shared->ForwardReferences.remap(TI); + LLVM_DEBUG({ printTypeIndex("TypeIndex Remap", TI, StreamTPI); }); + + LVElement *Element = Shared->TypeRecords.find(StreamIdx, TI); + if (!Element) { + if (TI.isNoneType() || TI.isSimple()) { + StringRef TypeName = TypeIndex::simpleTypeName(TI); + // If the name ends with "*", create 2 logical types: a pointer and a + // pointee type. TypeIndex is composed of a SympleTypeMode byte followed + // by a SimpleTypeKind byte. The logical pointer will be identified by + // the full TypeIndex value and the pointee by the SimpleTypeKind. + return (TypeName.back() == '*') ? createPointerType(TI, TypeName) + : createBaseType(TI, TypeName); + } + + LLVM_DEBUG({ W.printHex("TypeIndex not implemented: ", TI.getIndex()); }); + return nullptr; + } + + // The element has been finalized. + if (Element->getIsFinalized()) + return Element; + + // Add the element in case of a given parent. + if (Parent) + Parent->addElement(Element); + + // Check for a composite type. + LazyRandomTypeCollection &Types = types(); + CVType CVRecord = Types.getType(TI); + if (Error Err = finishVisitation(CVRecord, TI, Element)) { + consumeError(std::move(Err)); + return nullptr; + } + Element->setIsFinalized(); + return Element; +} + +void LVLogicalVisitor::processLines() { + // Traverse the collected LF_UDT_SRC_LINE records and add the source line + // information to the logical elements. + for (const TypeIndex &Entry : Shared->LineRecords) { + CVType CVRecord = ids().getType(Entry); + UdtSourceLineRecord Line; + if (Error Err = TypeDeserializer::deserializeAs( + const_cast<CVType &>(CVRecord), Line)) + consumeError(std::move(Err)); + else { + LLVM_DEBUG({ + printTypeIndex("UDT", Line.getUDT(), StreamIPI); + printTypeIndex("SourceFile", Line.getSourceFile(), StreamIPI); + W.printNumber("LineNumber", Line.getLineNumber()); + }); + + // The TypeIndex returned by 'getUDT()' must point to an already + // created logical element. If no logical element is found, it means + // the LF_UDT_SRC_LINE is associated with a system TypeIndex. + if (LVElement *Element = Shared->TypeRecords.find( + StreamTPI, Line.getUDT(), /*Create=*/false)) { + Element->setLineNumber(Line.getLineNumber()); + Element->setFilenameIndex( + Shared->StringRecords.findIndex(Line.getSourceFile())); + } + } + } +} + +void LVLogicalVisitor::processNamespaces() { + // Create namespaces. + Shared->NamespaceDeduction.init(); +} + +void LVLogicalVisitor::processFiles() { Shared->StringRecords.addFilenames(); } + +void LVLogicalVisitor::printRecords(raw_ostream &OS) const { + if (!options().getInternalTag()) + return; + + unsigned Count = 0; + auto PrintItem = [&](StringRef Name) { + auto NewLine = [&]() { + if (++Count == 4) { + Count = 0; + OS << "\n"; + } + }; + OS << format("%20s", Name.str().c_str()); + NewLine(); + }; + + OS << "\nTypes:\n"; + for (const TypeLeafKind &Kind : Shared->TypeKinds) + PrintItem(formatTypeLeafKind(Kind)); + Shared->TypeKinds.clear(); + + Count = 0; + OS << "\nSymbols:\n"; + for (const SymbolKind &Kind : Shared->SymbolKinds) + PrintItem(LVCodeViewReader::getSymbolKindName(Kind)); + Shared->SymbolKinds.clear(); + + OS << "\n"; +} + +Error LVLogicalVisitor::inlineSiteAnnotation(LVScope *AbstractFunction, + LVScope *InlinedFunction, + InlineSiteSym &InlineSite) { + // Get the parent scope to update the address ranges of the nested + // scope representing the inlined function. + LVAddress ParentLowPC = 0; + LVScope *Parent = InlinedFunction->getParentScope(); + if (const LVLocations *Locations = Parent->getRanges()) { + if (!Locations->empty()) + ParentLowPC = (*Locations->begin())->getLowerAddress(); + } + + // For the given inlinesite, get the initial line number and its + // source filename. Update the logical scope representing it. + uint32_t LineNumber = 0; + StringRef Filename; + LVInlineeInfo::iterator Iter = InlineeInfo.find(InlineSite.Inlinee); + if (Iter != InlineeInfo.end()) { + LineNumber = Iter->second.first; + Filename = Iter->second.second; + AbstractFunction->setLineNumber(LineNumber); + // TODO: This part needs additional work in order to set properly the + // correct filename in order to detect changes between filenames. + // AbstractFunction->setFilename(Filename); + } + + LLVM_DEBUG({ + dbgs() << "inlineSiteAnnotation\n" + << "Abstract: " << AbstractFunction->getName() << "\n" + << "Inlined: " << InlinedFunction->getName() << "\n" + << "Parent: " << Parent->getName() << "\n" + << "Low PC: " << hexValue(ParentLowPC) << "\n"; + }); + + // Get the source lines if requested by command line option. + if (!options().getPrintLines()) + return Error::success(); + + // Limitation: Currently we don't track changes in the FileOffset. The + // side effects are the caller that it is unable to differentiate the + // source filename for the inlined code. + uint64_t CodeOffset = ParentLowPC; + int32_t LineOffset = LineNumber; + uint32_t FileOffset = 0; + + auto UpdateClose = [&]() { LLVM_DEBUG({ dbgs() << ("\n"); }); }; + auto UpdateCodeOffset = [&](uint32_t Delta) { + CodeOffset += Delta; + LLVM_DEBUG({ + dbgs() << formatv(" code 0x{0} (+0x{1})", utohexstr(CodeOffset), + utohexstr(Delta)); + }); + }; + auto UpdateLineOffset = [&](int32_t Delta) { + LineOffset += Delta; + LLVM_DEBUG({ + char Sign = Delta > 0 ? '+' : '-'; + dbgs() << formatv(" line {0} ({1}{2})", LineOffset, Sign, + std::abs(Delta)); + }); + }; + auto UpdateFileOffset = [&](int32_t Offset) { + FileOffset = Offset; + LLVM_DEBUG({ dbgs() << formatv(" file {0}", FileOffset); }); + }; + + LVLines InlineeLines; + auto CreateLine = [&]() { + // Create the logical line record. + LVLineDebug *Line = Reader->createLineDebug(); + Line->setAddress(CodeOffset); + Line->setLineNumber(LineOffset); + // TODO: This part needs additional work in order to set properly the + // correct filename in order to detect changes between filenames. + // Line->setFilename(Filename); + InlineeLines.push_back(Line); + }; + + bool SeenLowAddress = false; + bool SeenHighAddress = false; + uint64_t LowPC = 0; + uint64_t HighPC = 0; + + for (auto &Annot : InlineSite.annotations()) { + LLVM_DEBUG({ + dbgs() << formatv(" {0}", + fmt_align(toHex(Annot.Bytes), AlignStyle::Left, 9)); + }); + + // Use the opcode to interpret the integer values. + switch (Annot.OpCode) { + case BinaryAnnotationsOpCode::ChangeCodeOffset: + case BinaryAnnotationsOpCode::CodeOffset: + case BinaryAnnotationsOpCode::ChangeCodeLength: + UpdateCodeOffset(Annot.U1); + UpdateClose(); + if (Annot.OpCode == BinaryAnnotationsOpCode::ChangeCodeOffset) { + CreateLine(); + LowPC = CodeOffset; + SeenLowAddress = true; + break; + } + if (Annot.OpCode == BinaryAnnotationsOpCode::ChangeCodeLength) { + HighPC = CodeOffset - 1; + SeenHighAddress = true; + } + break; + case BinaryAnnotationsOpCode::ChangeCodeLengthAndCodeOffset: + UpdateCodeOffset(Annot.U2); + UpdateClose(); + break; + case BinaryAnnotationsOpCode::ChangeLineOffset: + case BinaryAnnotationsOpCode::ChangeCodeOffsetAndLineOffset: + UpdateCodeOffset(Annot.U1); + UpdateLineOffset(Annot.S1); + UpdateClose(); + if (Annot.OpCode == + BinaryAnnotationsOpCode::ChangeCodeOffsetAndLineOffset) + CreateLine(); + break; + case BinaryAnnotationsOpCode::ChangeFile: + UpdateFileOffset(Annot.U1); + UpdateClose(); + break; + default: + break; + } + if (SeenLowAddress && SeenHighAddress) { + SeenLowAddress = false; + SeenHighAddress = false; + InlinedFunction->addObject(LowPC, HighPC); + } + } + + Reader->addInlineeLines(InlinedFunction, InlineeLines); + UpdateClose(); + + return Error::success(); +} diff --git a/llvm/lib/DebugInfo/LogicalView/Readers/LVELFReader.cpp b/llvm/lib/DebugInfo/LogicalView/Readers/LVELFReader.cpp index 7746bc508b41..ab458341a0bd 100644 --- a/llvm/lib/DebugInfo/LogicalView/Readers/LVELFReader.cpp +++ b/llvm/lib/DebugInfo/LogicalView/Readers/LVELFReader.cpp @@ -57,182 +57,182 @@ LVElement *LVELFReader::createElement(dwarf::Tag Tag) { switch (Tag) { // Types. case dwarf::DW_TAG_base_type: - CurrentType = new LVType(); + CurrentType = createType(); CurrentType->setIsBase(); if (options().getAttributeBase()) CurrentType->setIncludeInPrint(); return CurrentType; case dwarf::DW_TAG_const_type: - CurrentType = new LVType(); + CurrentType = createType(); CurrentType->setIsConst(); CurrentType->setName("const"); return CurrentType; case dwarf::DW_TAG_enumerator: - CurrentType = new LVTypeEnumerator(); + CurrentType = createTypeEnumerator(); return CurrentType; case dwarf::DW_TAG_imported_declaration: - CurrentType = new LVTypeImport(); + CurrentType = createTypeImport(); CurrentType->setIsImportDeclaration(); return CurrentType; case dwarf::DW_TAG_imported_module: - CurrentType = new LVTypeImport(); + CurrentType = createTypeImport(); CurrentType->setIsImportModule(); return CurrentType; case dwarf::DW_TAG_pointer_type: - CurrentType = new LVType(); + CurrentType = createType(); CurrentType->setIsPointer(); CurrentType->setName("*"); return CurrentType; case dwarf::DW_TAG_ptr_to_member_type: - CurrentType = new LVType(); + CurrentType = createType(); CurrentType->setIsPointerMember(); CurrentType->setName("*"); return CurrentType; case dwarf::DW_TAG_reference_type: - CurrentType = new LVType(); + CurrentType = createType(); CurrentType->setIsReference(); CurrentType->setName("&"); return CurrentType; case dwarf::DW_TAG_restrict_type: - CurrentType = new LVType(); + CurrentType = createType(); CurrentType->setIsRestrict(); CurrentType->setName("restrict"); return CurrentType; case dwarf::DW_TAG_rvalue_reference_type: - CurrentType = new LVType(); + CurrentType = createType(); CurrentType->setIsRvalueReference(); CurrentType->setName("&&"); return CurrentType; case dwarf::DW_TAG_subrange_type: - CurrentType = new LVTypeSubrange(); + CurrentType = createTypeSubrange(); return CurrentType; case dwarf::DW_TAG_template_value_parameter: - CurrentType = new LVTypeParam(); + CurrentType = createTypeParam(); CurrentType->setIsTemplateValueParam(); return CurrentType; case dwarf::DW_TAG_template_type_parameter: - CurrentType = new LVTypeParam(); + CurrentType = createTypeParam(); CurrentType->setIsTemplateTypeParam(); return CurrentType; case dwarf::DW_TAG_GNU_template_template_param: - CurrentType = new LVTypeParam(); + CurrentType = createTypeParam(); CurrentType->setIsTemplateTemplateParam(); return CurrentType; case dwarf::DW_TAG_typedef: - CurrentType = new LVTypeDefinition(); + CurrentType = createTypeDefinition(); return CurrentType; case dwarf::DW_TAG_unspecified_type: - CurrentType = new LVType(); + CurrentType = createType(); CurrentType->setIsUnspecified(); return CurrentType; case dwarf::DW_TAG_volatile_type: - CurrentType = new LVType(); + CurrentType = createType(); CurrentType->setIsVolatile(); CurrentType->setName("volatile"); return CurrentType; // Symbols. case dwarf::DW_TAG_formal_parameter: - CurrentSymbol = new LVSymbol(); + CurrentSymbol = createSymbol(); CurrentSymbol->setIsParameter(); return CurrentSymbol; case dwarf::DW_TAG_unspecified_parameters: - CurrentSymbol = new LVSymbol(); + CurrentSymbol = createSymbol(); CurrentSymbol->setIsUnspecified(); CurrentSymbol->setName("..."); return CurrentSymbol; case dwarf::DW_TAG_member: - CurrentSymbol = new LVSymbol(); + CurrentSymbol = createSymbol(); CurrentSymbol->setIsMember(); return CurrentSymbol; case dwarf::DW_TAG_variable: - CurrentSymbol = new LVSymbol(); + CurrentSymbol = createSymbol(); CurrentSymbol->setIsVariable(); return CurrentSymbol; case dwarf::DW_TAG_inheritance: - CurrentSymbol = new LVSymbol(); + CurrentSymbol = createSymbol(); CurrentSymbol->setIsInheritance(); return CurrentSymbol; case dwarf::DW_TAG_call_site_parameter: case dwarf::DW_TAG_GNU_call_site_parameter: - CurrentSymbol = new LVSymbol(); + CurrentSymbol = createSymbol(); CurrentSymbol->setIsCallSiteParameter(); return CurrentSymbol; case dwarf::DW_TAG_constant: - CurrentSymbol = new LVSymbol(); + CurrentSymbol = createSymbol(); CurrentSymbol->setIsConstant(); return CurrentSymbol; // Scopes. case dwarf::DW_TAG_catch_block: - CurrentScope = new LVScope(); + CurrentScope = createScope(); CurrentScope->setIsCatchBlock(); return CurrentScope; case dwarf::DW_TAG_lexical_block: - CurrentScope = new LVScope(); + CurrentScope = createScope(); CurrentScope->setIsLexicalBlock(); return CurrentScope; case dwarf::DW_TAG_try_block: - CurrentScope = new LVScope(); + CurrentScope = createScope(); CurrentScope->setIsTryBlock(); return CurrentScope; case dwarf::DW_TAG_compile_unit: case dwarf::DW_TAG_skeleton_unit: - CurrentScope = new LVScopeCompileUnit(); + CurrentScope = createScopeCompileUnit(); CompileUnit = static_cast<LVScopeCompileUnit *>(CurrentScope); return CurrentScope; case dwarf::DW_TAG_inlined_subroutine: - CurrentScope = new LVScopeFunctionInlined(); + CurrentScope = createScopeFunctionInlined(); return CurrentScope; case dwarf::DW_TAG_namespace: - CurrentScope = new LVScopeNamespace(); + CurrentScope = createScopeNamespace(); return CurrentScope; case dwarf::DW_TAG_template_alias: - CurrentScope = new LVScopeAlias(); + CurrentScope = createScopeAlias(); return CurrentScope; case dwarf::DW_TAG_array_type: - CurrentScope = new LVScopeArray(); + CurrentScope = createScopeArray(); return CurrentScope; case dwarf::DW_TAG_call_site: case dwarf::DW_TAG_GNU_call_site: - CurrentScope = new LVScopeFunction(); + CurrentScope = createScopeFunction(); CurrentScope->setIsCallSite(); return CurrentScope; case dwarf::DW_TAG_entry_point: - CurrentScope = new LVScopeFunction(); + CurrentScope = createScopeFunction(); CurrentScope->setIsEntryPoint(); return CurrentScope; case dwarf::DW_TAG_subprogram: - CurrentScope = new LVScopeFunction(); + CurrentScope = createScopeFunction(); CurrentScope->setIsSubprogram(); return CurrentScope; case dwarf::DW_TAG_subroutine_type: - CurrentScope = new LVScopeFunctionType(); + CurrentScope = createScopeFunctionType(); return CurrentScope; case dwarf::DW_TAG_label: - CurrentScope = new LVScopeFunction(); + CurrentScope = createScopeFunction(); CurrentScope->setIsLabel(); return CurrentScope; case dwarf::DW_TAG_class_type: - CurrentScope = new LVScopeAggregate(); + CurrentScope = createScopeAggregate(); CurrentScope->setIsClass(); return CurrentScope; case dwarf::DW_TAG_structure_type: - CurrentScope = new LVScopeAggregate(); + CurrentScope = createScopeAggregate(); CurrentScope->setIsStructure(); return CurrentScope; case dwarf::DW_TAG_union_type: - CurrentScope = new LVScopeAggregate(); + CurrentScope = createScopeAggregate(); CurrentScope->setIsUnion(); return CurrentScope; case dwarf::DW_TAG_enumeration_type: - CurrentScope = new LVScopeEnumeration(); + CurrentScope = createScopeEnumeration(); return CurrentScope; case dwarf::DW_TAG_GNU_formal_parameter_pack: - CurrentScope = new LVScopeFormalPack(); + CurrentScope = createScopeFormalPack(); return CurrentScope; case dwarf::DW_TAG_GNU_template_parameter_pack: - CurrentScope = new LVScopeTemplatePack(); + CurrentScope = createScopeTemplatePack(); return CurrentScope; default: // Collect TAGs not implemented. @@ -548,22 +548,22 @@ LVScope *LVELFReader::processOneDie(const DWARFDie &InputDIE, LVScope *Parent, // referencing this element. if (ElementTable.find(Offset) == ElementTable.end()) { // No previous references to this offset. - ElementTable.emplace( - std::piecewise_construct, std::forward_as_tuple(Offset), - std::forward_as_tuple(CurrentElement, LVElementSet())); + ElementTable.emplace(std::piecewise_construct, + std::forward_as_tuple(Offset), + std::forward_as_tuple(CurrentElement)); } else { // There are previous references to this element. We need to update the // element and all the references pointing to this element. LVElementEntry &Reference = ElementTable[Offset]; - Reference.first = CurrentElement; + Reference.Element = CurrentElement; // Traverse the element set and update the elements (backtracking). - // Using the bit associated with 'type' or 'reference' allows us to set - // the correct target. - for (LVElement *Target : Reference.second) - Target->getHasReference() ? Target->setReference(CurrentElement) - : Target->setType(CurrentElement); + for (LVElement *Target : Reference.References) + Target->setReference(CurrentElement); + for (LVElement *Target : Reference.Types) + Target->setType(CurrentElement); // Clear the pending elements. - Reference.second.clear(); + Reference.References.clear(); + Reference.Types.clear(); } // Add the current element to its parent as there are attributes @@ -733,7 +733,7 @@ void LVELFReader::createLineAndFileRecords( // the 'processLines()' function will move each created logical line // to its enclosing logical scope, using the debug ranges information // and they will be released when its scope parent is deleted. - LVLineDebug *Line = new LVLineDebug(); + LVLineDebug *Line = createLineDebug(); CULines.push_back(Line); Line->setAddress(Row.Address.Address); Line->setFilename( @@ -759,7 +759,8 @@ void LVELFReader::createLineAndFileRecords( } } -std::string LVELFReader::getRegisterName(LVSmall Opcode, uint64_t Operands[2]) { +std::string LVELFReader::getRegisterName(LVSmall Opcode, + ArrayRef<uint64_t> Operands) { // The 'prettyPrintRegisterOp' function uses the DWARFUnit to support // DW_OP_regval_type. At this point we are operating on a logical view // item, with no access to the underlying DWARF data used by LLVM. @@ -973,19 +974,8 @@ void LVELFReader::processLocationList(dwarf::Attribute Attr, bool CallSiteLocation) { auto ProcessLocationExpression = [&](const DWARFExpression &Expression) { - // DW_OP_const_type is variable-length and has 3 - // operands. DWARFExpression thus far only supports 2. - uint64_t Operands[2] = {0}; - for (const DWARFExpression::Operation &Op : Expression) { - DWARFExpression::Operation::Description Description = Op.getDescription(); - for (unsigned Operand = 0; Operand < 2; ++Operand) { - if (Description.Op[Operand] == DWARFExpression::Operation::SizeNA) - break; - Operands[Operand] = Op.getRawOperand(Operand); - } - CurrentSymbol->addLocationOperands(Op.getCode(), Operands[0], - Operands[1]); - } + for (const DWARFExpression::Operation &Op : Expression) + CurrentSymbol->addLocationOperands(Op.getCode(), Op.getRawOperands()); }; DWARFUnit *U = Die.getDwarfUnit(); @@ -1075,12 +1065,14 @@ void LVELFReader::processLocationMember(dwarf::Attribute Attr, // Update the current element with the reference. void LVELFReader::updateReference(dwarf::Attribute Attr, const DWARFFormValue &FormValue) { - // We are assuming that DW_AT_specification, DW_AT_abstract_origin, - // DW_AT_type and DW_AT_extension do not appear at the same time - // in the same DIE. + // FIXME: We are assuming that at most one Reference (DW_AT_specification, + // DW_AT_abstract_origin, ...) and at most one Type (DW_AT_import, DW_AT_type) + // appear in any single DIE, but this may not be true. uint64_t Reference = *FormValue.getAsReference(); // Get target for the given reference, if already created. - LVElement *Target = getElementForOffset(Reference, CurrentElement); + LVElement *Target = getElementForOffset( + Reference, CurrentElement, + /*IsType=*/Attr == dwarf::DW_AT_import || Attr == dwarf::DW_AT_type); // Check if we are dealing with cross CU references. if (FormValue.getForm() == dwarf::DW_FORM_ref_addr) { if (Target) { @@ -1124,26 +1116,18 @@ void LVELFReader::updateReference(dwarf::Attribute Attr, } // Get an element given the DIE offset. -LVElement *LVELFReader::getElementForOffset(LVOffset Offset, - LVElement *Element) { - LVElement *Target = nullptr; - // Search offset in the cross references. - LVElementReference::iterator Iter = ElementTable.find(Offset); - if (Iter == ElementTable.end()) - // Reference to an unseen element. - ElementTable.emplace(std::piecewise_construct, - std::forward_as_tuple(Offset), - std::forward_as_tuple(nullptr, LVElementSet{Element})); - else { - // There are previous references to this element. We need to update the - // element and all the references pointing to this element. - LVElementEntry &Reference = Iter->second; - Target = Reference.first; - if (!Target) - // Add the element to the set. - Reference.second.insert(Element); +LVElement *LVELFReader::getElementForOffset(LVOffset Offset, LVElement *Element, + bool IsType) { + auto Iter = ElementTable.try_emplace(Offset).first; + // Update the element and all the references pointing to this element. + LVElementEntry &Entry = Iter->second; + if (!Entry.Element) { + if (IsType) + Entry.Types.insert(Element); + else + Entry.References.insert(Element); } - return Target; + return Entry.Element; } Error LVELFReader::loadTargetInfo(const ObjectFile &Obj) { diff --git a/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp b/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp index f9a763d724a8..c26caa647ed9 100644 --- a/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp +++ b/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp @@ -364,6 +364,18 @@ Expected<FileBufferByteStream> MSFBuilder::commit(StringRef Path, FileSize, Layout.SB->BlockSize)); } + uint64_t NumDirectoryBlocks = + bytesToBlocks(Layout.SB->NumDirectoryBytes, Layout.SB->BlockSize); + uint64_t DirectoryBlockMapSize = + NumDirectoryBlocks * sizeof(support::ulittle32_t); + if (DirectoryBlockMapSize > Layout.SB->BlockSize) { + return make_error<MSFError>(msf_error_code::stream_directory_overflow, + formatv("The directory block map ({0} bytes) " + "doesn't fit in a block ({1} bytes)", + DirectoryBlockMapSize, + Layout.SB->BlockSize)); + } + auto OutFileOrError = FileOutputBuffer::create(Path, FileSize); if (auto EC = OutFileOrError.takeError()) return std::move(EC); diff --git a/llvm/lib/DebugInfo/MSF/MSFError.cpp b/llvm/lib/DebugInfo/MSF/MSFError.cpp index fd93c3e726cc..dbd8648c4d41 100644 --- a/llvm/lib/DebugInfo/MSF/MSFError.cpp +++ b/llvm/lib/DebugInfo/MSF/MSFError.cpp @@ -43,6 +43,8 @@ public: return "The data is in an unexpected format."; case msf_error_code::block_in_use: return "The block is already in use."; + case msf_error_code::stream_directory_overflow: + return "PDB stream directory too large."; } llvm_unreachable("Unrecognized msf_error_code"); } diff --git a/llvm/lib/DebugInfo/PDB/Native/DbiModuleList.cpp b/llvm/lib/DebugInfo/PDB/Native/DbiModuleList.cpp index 009cd113f652..081cede6d840 100644 --- a/llvm/lib/DebugInfo/PDB/Native/DbiModuleList.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/DbiModuleList.cpp @@ -240,7 +240,9 @@ Error DbiModuleList::initializeFileInfo(BinaryStreamRef FileInfo) { } uint32_t DbiModuleList::getModuleCount() const { - return FileInfoHeader->NumModules; + // Workaround to avoid the crash until upstream issue is fixed: + // https://github.com/llvm/llvm-project/issues/55214 + return FileInfoHeader ? FileInfoHeader->NumModules : 0; } uint32_t DbiModuleList::getSourceFileCount() const { diff --git a/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp index 169d49f64eb5..b17fbd63e9fd 100644 --- a/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp @@ -76,7 +76,7 @@ struct llvm::pdb::SymbolDenseMapInfo { return Tombstone; } static unsigned getHashValue(const CVSymbol &Val) { - return xxHash64(Val.RecordData); + return xxh3_64bits(Val.RecordData); } static bool isEqual(const CVSymbol &LHS, const CVSymbol &RHS) { return LHS.RecordData == RHS.RecordData; diff --git a/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp b/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp index 495b25077737..85c22483fa90 100644 --- a/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp @@ -8,6 +8,7 @@ #include "llvm/DebugInfo/PDB/Native/InputFile.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/BinaryFormat/Magic.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" @@ -347,32 +348,32 @@ Expected<InputFile> InputFile::open(StringRef Path, bool AllowUnknownFile) { PDBFile &InputFile::pdb() { assert(isPdb()); - return *PdbOrObj.get<PDBFile *>(); + return *cast<PDBFile *>(PdbOrObj); } const PDBFile &InputFile::pdb() const { assert(isPdb()); - return *PdbOrObj.get<PDBFile *>(); + return *cast<PDBFile *>(PdbOrObj); } object::COFFObjectFile &InputFile::obj() { assert(isObj()); - return *PdbOrObj.get<object::COFFObjectFile *>(); + return *cast<object::COFFObjectFile *>(PdbOrObj); } const object::COFFObjectFile &InputFile::obj() const { assert(isObj()); - return *PdbOrObj.get<object::COFFObjectFile *>(); + return *cast<object::COFFObjectFile *>(PdbOrObj); } MemoryBuffer &InputFile::unknown() { assert(isUnknown()); - return *PdbOrObj.get<MemoryBuffer *>(); + return *cast<MemoryBuffer *>(PdbOrObj); } const MemoryBuffer &InputFile::unknown() const { assert(isUnknown()); - return *PdbOrObj.get<MemoryBuffer *>(); + return *cast<MemoryBuffer *>(PdbOrObj); } StringRef InputFile::getFilePath() const { @@ -402,13 +403,13 @@ bool InputFile::hasIds() const { return pdb().hasPDBIpiStream(); } -bool InputFile::isPdb() const { return PdbOrObj.is<PDBFile *>(); } +bool InputFile::isPdb() const { return isa<PDBFile *>(PdbOrObj); } bool InputFile::isObj() const { - return PdbOrObj.is<object::COFFObjectFile *>(); + return isa<object::COFFObjectFile *>(PdbOrObj); } -bool InputFile::isUnknown() const { return PdbOrObj.is<MemoryBuffer *>(); } +bool InputFile::isUnknown() const { return isa<MemoryBuffer *>(PdbOrObj); } codeview::LazyRandomTypeCollection & InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) { @@ -562,13 +563,13 @@ static bool isMyCode(const SymbolGroup &Group) { StringRef Name = Group.name(); if (Name.startswith("Import:")) return false; - if (Name.endswith_insensitive(".dll")) + if (Name.ends_with_insensitive(".dll")) return false; if (Name.equals_insensitive("* linker *")) return false; - if (Name.startswith_insensitive("f:\\binaries\\Intermediate\\vctools")) + if (Name.starts_with_insensitive("f:\\binaries\\Intermediate\\vctools")) return false; - if (Name.startswith_insensitive("f:\\dd\\vctools\\crt")) + if (Name.starts_with_insensitive("f:\\dd\\vctools\\crt")) return false; return true; } diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp index 65e253ed115f..b0aadf861cbc 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp @@ -8,6 +8,7 @@ #include "llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/HashTable.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp index cf314c3bede3..91b428afaddb 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp @@ -8,6 +8,7 @@ #include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/ADT/SmallString.h" #include "llvm/BinaryFormat/Magic.h" #include "llvm/DebugInfo/MSF/MSFCommon.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp index 8d6f8ebebf4c..89b1614ba2af 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/NativeSourceFile.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp index 27df769ee6f2..cd30b56be7cd 100644 --- a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/GUID.h" #include "llvm/DebugInfo/MSF/MSFBuilder.h" @@ -338,7 +340,7 @@ Error PDBFileBuilder::commit(StringRef Filename, codeview::GUID *Guid) { if (Info->hashPDBContentsToGUID()) { // Compute a hash of all sections of the output file. uint64_t Digest = - xxHash64({Buffer.getBufferStart(), Buffer.getBufferEnd()}); + xxh3_64bits({Buffer.getBufferStart(), Buffer.getBufferEnd()}); H->Age = 1; diff --git a/llvm/lib/DebugInfo/PDB/PDBExtras.cpp b/llvm/lib/DebugInfo/PDB/PDBExtras.cpp index 571510e6bad9..2b318bf1c648 100644 --- a/llvm/lib/DebugInfo/PDB/PDBExtras.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBExtras.cpp @@ -232,6 +232,8 @@ raw_ostream &llvm::pdb::operator<<(raw_ostream &OS, const PDB_Lang &Lang) { CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, D, OS) CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, Swift, OS) CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, Rust, OS) + CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, ObjC, OS) + CASE_OUTPUT_ENUM_CLASS_STR(PDB_Lang, ObjCpp, "ObjC++", OS) } return OS; } diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp index bd60489b6bed..437b96677c0b 100644 --- a/llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp +++ b/llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp @@ -102,6 +102,8 @@ std::string PDBSymbolCompiland::getSourceFileFullPath() const { .Case(".asm", Lang == PDB_Lang::Masm) .Case(".swift", Lang == PDB_Lang::Swift) .Case(".rs", Lang == PDB_Lang::Rust) + .Case(".m", Lang == PDB_Lang::ObjC) + .Case(".mm", Lang == PDB_Lang::ObjCpp) .Default(false)) return File->getFileName(); } diff --git a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp index bfd6f7c02ca3..f9669b554b47 100644 --- a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp +++ b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp @@ -266,11 +266,8 @@ void PlainPrinterBase::printInvalidCommand(const Request &Request, } bool PlainPrinterBase::printError(const Request &Request, - const ErrorInfoBase &ErrorInfo, - StringRef ErrorBanner) { - ES << ErrorBanner; - ErrorInfo.log(ES); - ES << '\n'; + const ErrorInfoBase &ErrorInfo) { + ErrHandler(ErrorInfo, Request.ModuleName); // Print an empty struct too. return true; } @@ -288,6 +285,24 @@ static json::Object toJSON(const Request &Request, StringRef ErrorMsg = "") { return Json; } +static json::Object toJSON(const DILineInfo &LineInfo) { + return json::Object( + {{"FunctionName", LineInfo.FunctionName != DILineInfo::BadString + ? LineInfo.FunctionName + : ""}, + {"StartFileName", LineInfo.StartFileName != DILineInfo::BadString + ? LineInfo.StartFileName + : ""}, + {"StartLine", LineInfo.StartLine}, + {"StartAddress", + LineInfo.StartAddress ? toHex(*LineInfo.StartAddress) : ""}, + {"FileName", + LineInfo.FileName != DILineInfo::BadString ? LineInfo.FileName : ""}, + {"Line", LineInfo.Line}, + {"Column", LineInfo.Column}, + {"Discriminator", LineInfo.Discriminator}}); +} + void JSONPrinter::print(const Request &Request, const DILineInfo &Info) { DIInliningInfo InliningInfo; InliningInfo.addFrame(Info); @@ -298,21 +313,7 @@ void JSONPrinter::print(const Request &Request, const DIInliningInfo &Info) { json::Array Array; for (uint32_t I = 0, N = Info.getNumberOfFrames(); I < N; ++I) { const DILineInfo &LineInfo = Info.getFrame(I); - json::Object Object( - {{"FunctionName", LineInfo.FunctionName != DILineInfo::BadString - ? LineInfo.FunctionName - : ""}, - {"StartFileName", LineInfo.StartFileName != DILineInfo::BadString - ? LineInfo.StartFileName - : ""}, - {"StartLine", LineInfo.StartLine}, - {"StartAddress", - LineInfo.StartAddress ? toHex(*LineInfo.StartAddress) : ""}, - {"FileName", - LineInfo.FileName != DILineInfo::BadString ? LineInfo.FileName : ""}, - {"Line", LineInfo.Line}, - {"Column", LineInfo.Column}, - {"Discriminator", LineInfo.Discriminator}}); + json::Object Object = toJSON(LineInfo); SourceCode SourceCode(LineInfo.FileName, LineInfo.Line, Config.SourceContextLines, LineInfo.Source); std::string FormattedSource; @@ -370,13 +371,11 @@ void JSONPrinter::printInvalidCommand(const Request &Request, StringRef Command) { printError(Request, StringError("unable to parse arguments: " + Command, - std::make_error_code(std::errc::invalid_argument)), - ""); + std::make_error_code(std::errc::invalid_argument))); } bool JSONPrinter::printError(const Request &Request, - const ErrorInfoBase &ErrorInfo, - StringRef ErrorBanner) { + const ErrorInfoBase &ErrorInfo) { json::Object Json = toJSON(Request, ErrorInfo.message()); if (ObjectList) ObjectList->push_back(std::move(Json)); diff --git a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp index 5e9d8ac538df..a2bc2577b70a 100644 --- a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp +++ b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp @@ -133,9 +133,8 @@ bool MarkupFilter::tryReset(const MarkupNode &Node, endAnyModuleInfoLine(); for (const MarkupNode &Node : DeferredNodes) filterNode(Node); - highlight(); - OS << "[[[reset]]]" << lineEnding(); - restoreColor(); + printRawElement(Node); + OS << lineEnding(); Modules.clear(); MMaps.clear(); @@ -239,8 +238,7 @@ bool MarkupFilter::tryPC(const MarkupNode &Node) { return false; if (!checkNumFieldsAtLeast(Node, 1)) return true; - if (!checkNumFieldsAtMost(Node, 2)) - return true; + warnNumFieldsAtMost(Node, 2); std::optional<uint64_t> Addr = parseAddr(Node.Fields[0]); if (!Addr) @@ -293,8 +291,7 @@ bool MarkupFilter::tryBackTrace(const MarkupNode &Node) { return false; if (!checkNumFieldsAtLeast(Node, 2)) return true; - if (!checkNumFieldsAtMost(Node, 3)) - return true; + warnNumFieldsAtMost(Node, 3); std::optional<uint64_t> FrameNumber = parseFrameNumber(Node.Fields[0]); if (!FrameNumber) @@ -513,8 +510,9 @@ MarkupFilter::parseModule(const MarkupNode &Element) const { } if (!checkNumFields(Element, 4)) return std::nullopt; - ASSIGN_OR_RETURN_NONE(SmallVector<uint8_t>, BuildID, - parseBuildID(Element.Fields[3])); + SmallVector<uint8_t> BuildID = parseBuildID(Element.Fields[3]); + if (BuildID.empty()) + return std::nullopt; return Module{ID, Name.str(), std::move(BuildID)}; } @@ -597,16 +595,11 @@ std::optional<uint64_t> MarkupFilter::parseFrameNumber(StringRef Str) const { } // Parse a build ID (%x in the spec). -std::optional<SmallVector<uint8_t>> -MarkupFilter::parseBuildID(StringRef Str) const { - std::string Bytes; - if (Str.empty() || Str.size() % 2 || !tryGetFromHex(Str, Bytes)) { +object::BuildID MarkupFilter::parseBuildID(StringRef Str) const { + object::BuildID BID = llvm::object::parseBuildID(Str); + if (BID.empty()) reportTypeError(Str, "build ID"); - return std::nullopt; - } - ArrayRef<uint8_t> BuildID(reinterpret_cast<const uint8_t *>(Bytes.data()), - Bytes.size()); - return SmallVector<uint8_t>(BuildID.begin(), BuildID.end()); + return BID; } // Parses the mode string for an mmap element. @@ -659,10 +652,12 @@ bool MarkupFilter::checkTag(const MarkupNode &Node) const { bool MarkupFilter::checkNumFields(const MarkupNode &Element, size_t Size) const { if (Element.Fields.size() != Size) { - WithColor::error(errs()) << "expected " << Size << " field(s); found " - << Element.Fields.size() << "\n"; + bool Warn = Element.Fields.size() > Size; + WithColor(errs(), Warn ? HighlightColor::Warning : HighlightColor::Error) + << (Warn ? "warning: " : "error: ") << "expected " << Size + << " field(s); found " << Element.Fields.size() << "\n"; reportLocation(Element.Tag.end()); - return false; + return Warn; } return true; } @@ -679,16 +674,14 @@ bool MarkupFilter::checkNumFieldsAtLeast(const MarkupNode &Element, return true; } -bool MarkupFilter::checkNumFieldsAtMost(const MarkupNode &Element, - size_t Size) const { - if (Element.Fields.size() > Size) { - WithColor::error(errs()) - << "expected at most " << Size << " field(s); found " - << Element.Fields.size() << "\n"; - reportLocation(Element.Tag.end()); - return false; - } - return true; +void MarkupFilter::warnNumFieldsAtMost(const MarkupNode &Element, + size_t Size) const { + if (Element.Fields.size() <= Size) + return; + WithColor::warning(errs()) + << "expected at most " << Size << " field(s); found " + << Element.Fields.size() << "\n"; + reportLocation(Element.Tag.end()); } void MarkupFilter::reportTypeError(StringRef Str, StringRef TypeName) const { diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp index 5c65742a39f5..6b8068a531c0 100644 --- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp +++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp @@ -12,7 +12,6 @@ #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/Triple.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/Object/COFF.h" @@ -21,6 +20,7 @@ #include "llvm/Object/SymbolSize.h" #include "llvm/Support/Casting.h" #include "llvm/Support/DataExtractor.h" +#include "llvm/TargetParser/Triple.h" #include <algorithm> using namespace llvm; diff --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp index 72c008d9835e..517f1e7dc284 100644 --- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp +++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp @@ -13,6 +13,7 @@ #include "llvm/DebugInfo/Symbolize/Symbolize.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/DebugInfo/BTF/BTFContext.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/PDB/PDB.h" #include "llvm/DebugInfo/PDB/PDBContext.h" @@ -363,12 +364,10 @@ ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path, const ELFObjectFileBase *Obj, const std::string &ArchName) { auto BuildID = getBuildID(Obj); - if (!BuildID) - return nullptr; - if (BuildID->size() < 2) + if (BuildID.size() < 2) return nullptr; std::string DebugBinaryPath; - if (!getOrFindDebugBinary(*BuildID, DebugBinaryPath)) + if (!getOrFindDebugBinary(BuildID, DebugBinaryPath)) return nullptr; auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName); if (!DbgObjOrErr) { @@ -617,6 +616,13 @@ LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { return ModuleOrErr; } +// For BPF programs .BTF.ext section contains line numbers information, +// use it if regular DWARF is not available (e.g. for stripped binary). +static bool useBTFContext(const ObjectFile &Obj) { + return Obj.makeTriple().isBPF() && !Obj.hasDebugInfo() && + BTFParser::hasBTFSections(Obj); +} + Expected<SymbolizableModule *> LLVMSymbolizer::getOrCreateModuleInfo(const ObjectFile &Obj) { StringRef ObjName = Obj.getFileName(); @@ -624,7 +630,11 @@ LLVMSymbolizer::getOrCreateModuleInfo(const ObjectFile &Obj) { if (I != Modules.end()) return I->second.get(); - std::unique_ptr<DIContext> Context = DWARFContext::create(Obj); + std::unique_ptr<DIContext> Context; + if (useBTFContext(Obj)) + Context = BTFContext::create(Obj); + else + Context = DWARFContext::create(Obj); // FIXME: handle COFF object with PDB info to use PDBContext return createModuleInfo(&Obj, std::move(Context), ObjName); } @@ -634,8 +644,7 @@ LLVMSymbolizer::getOrCreateModuleInfo(ArrayRef<uint8_t> BuildID) { std::string Path; if (!getOrFindDebugBinary(BuildID, Path)) { return createStringError(errc::no_such_file_or_directory, - Twine("could not find build ID '") + - toHex(BuildID) + "'"); + "could not find build ID"); } return getOrCreateModuleInfo(Path); } @@ -649,22 +658,29 @@ namespace { // vectorcall - foo@@12 // These are all different linkage names for 'foo'. StringRef demanglePE32ExternCFunc(StringRef SymbolName) { - // Remove any '_' or '@' prefix. char Front = SymbolName.empty() ? '\0' : SymbolName[0]; - if (Front == '_' || Front == '@') - SymbolName = SymbolName.drop_front(); // Remove any '@[0-9]+' suffix. + bool HasAtNumSuffix = false; if (Front != '?') { size_t AtPos = SymbolName.rfind('@'); if (AtPos != StringRef::npos && - all_of(drop_begin(SymbolName, AtPos + 1), isDigit)) + all_of(drop_begin(SymbolName, AtPos + 1), isDigit)) { SymbolName = SymbolName.substr(0, AtPos); + HasAtNumSuffix = true; + } } // Remove any ending '@' for vectorcall. - if (SymbolName.endswith("@")) + bool IsVectorCall = false; + if (HasAtNumSuffix && SymbolName.endswith("@")) { SymbolName = SymbolName.drop_back(); + IsVectorCall = true; + } + + // If not vectorcall, remove any '_' or '@' prefix. + if (!IsVectorCall && (Front == '_' || Front == '@')) + SymbolName = SymbolName.drop_front(); return SymbolName; } @@ -675,14 +691,14 @@ std::string LLVMSymbolizer::DemangleName(const std::string &Name, const SymbolizableModule *DbiModuleDescriptor) { std::string Result; - if (nonMicrosoftDemangle(Name.c_str(), Result)) + if (nonMicrosoftDemangle(Name, Result)) return Result; if (!Name.empty() && Name.front() == '?') { // Only do MSVC C++ demangling on symbols starting with '?'. int status = 0; char *DemangledName = microsoftDemangle( - Name.c_str(), nullptr, nullptr, nullptr, &status, + Name, nullptr, &status, MSDemangleFlags(MSDF_NoAccessSpecifier | MSDF_NoCallingConvention | MSDF_NoMemberType | MSDF_NoReturnType)); if (status != 0) @@ -692,8 +708,14 @@ LLVMSymbolizer::DemangleName(const std::string &Name, return Result; } - if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module()) - return std::string(demanglePE32ExternCFunc(Name)); + if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module()) { + std::string DemangledCName(demanglePE32ExternCFunc(Name)); + // On i386 Windows, the C name mangling for different calling conventions + // may also be applied on top of the Itanium or Rust name mangling. + if (nonMicrosoftDemangle(DemangledCName, Result)) + return Result; + return DemangledCName; + } return Name; } |