aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-04-20 21:48:54 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-04-20 21:48:54 +0000
commit6bc11b14146b9a41402d0348438ff4edb1e344cd (patch)
treea3128f15d970747b64d8aaaa66d9fe8c176bef8a /contrib/llvm/lib
parent554491ffbdcfe51993d5b436a9bbca7aba388dd3 (diff)
parent583e75cce441388bc562fa225d23499261a0091e (diff)
downloadsrc-6bc11b14146b9a41402d0348438ff4edb1e344cd.tar.gz
src-6bc11b14146b9a41402d0348438ff4edb1e344cd.zip
Merge llvm, clang, lld and lldb trunk r300890, and update build glue.
Notes
Notes: svn path=/projects/clang500-import/; revision=317230
Diffstat (limited to 'contrib/llvm/lib')
-rw-r--r--contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp12
-rw-r--r--contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp133
-rw-r--r--contrib/llvm/lib/Analysis/CFLGraph.h3
-rw-r--r--contrib/llvm/lib/Analysis/InstructionSimplify.cpp105
-rw-r--r--contrib/llvm/lib/Analysis/MemoryBuiltins.cpp17
-rw-r--r--contrib/llvm/lib/Analysis/MemorySSA.cpp3
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolution.cpp216
-rw-r--r--contrib/llvm/lib/Analysis/ValueTracking.cpp110
-rw-r--r--contrib/llvm/lib/AsmParser/LLParser.cpp15
-rw-r--r--contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp316
-rw-r--r--contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp57
-rw-r--r--contrib/llvm/lib/Bitcode/Reader/MetadataLoader.h3
-rw-r--r--contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp415
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp18
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp9
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp25
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp23
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp13
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp145
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h17
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp14
-rw-r--r--contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp16
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp28
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp17
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp32
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp12
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp17
-rw-r--r--contrib/llvm/lib/CodeGen/InlineSpiller.cpp16
-rw-r--r--contrib/llvm/lib/CodeGen/LowLevelType.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/MachineInstr.cpp28
-rw-r--r--contrib/llvm/lib/CodeGen/MachineVerifier.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocFast.cpp14
-rw-r--r--contrib/llvm/lib/CodeGen/SafeStack.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp207
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp7
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h2
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp13
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp92
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp26
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h2
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp69
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp6
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp30
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp70
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp2
-rw-r--r--contrib/llvm/lib/IR/Attributes.cpp112
-rw-r--r--contrib/llvm/lib/IR/ConstantFold.cpp2
-rw-r--r--contrib/llvm/lib/IR/ConstantRange.cpp92
-rw-r--r--contrib/llvm/lib/IR/Constants.cpp28
-rw-r--r--contrib/llvm/lib/IR/Core.cpp33
-rw-r--r--contrib/llvm/lib/IR/DataLayout.cpp12
-rw-r--r--contrib/llvm/lib/IR/Function.cpp23
-rw-r--r--contrib/llvm/lib/IR/Instructions.cpp15
-rw-r--r--contrib/llvm/lib/MC/MCDwarf.cpp21
-rw-r--r--contrib/llvm/lib/MC/MCParser/AsmParser.cpp4
-rw-r--r--contrib/llvm/lib/Object/Archive.cpp51
-rw-r--r--contrib/llvm/lib/Object/Binary.cpp20
-rw-r--r--contrib/llvm/lib/Object/COFFObjectFile.cpp32
-rw-r--r--contrib/llvm/lib/Object/IRSymtab.cpp35
-rw-r--r--contrib/llvm/lib/Object/ObjectFile.cpp14
-rw-r--r--contrib/llvm/lib/Object/SymbolicFile.cpp14
-rw-r--r--contrib/llvm/lib/Support/APFloat.cpp2
-rw-r--r--contrib/llvm/lib/Support/APInt.cpp256
-rw-r--r--contrib/llvm/lib/Support/CommandLine.cpp5
-rw-r--r--contrib/llvm/lib/Support/Dwarf.cpp169
-rw-r--r--contrib/llvm/lib/Support/LowLevelType.cpp29
-rw-r--r--contrib/llvm/lib/Support/Regex.cpp2
-rw-r--r--contrib/llvm/lib/Support/TargetParser.cpp2
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp8
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp38
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h2
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td5
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp1
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td6
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp22
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h3
-rw-r--r--contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp55
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp226
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp25
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/DSInstructions.td2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp1
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp29
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h8
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.td43
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp166
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h23
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.td6
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp16
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFastISel.cpp3
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp50
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp128
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp66
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.h2
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.td4
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrNEON.td54
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td4
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp14
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp5
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.h6
-rw-r--r--contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp11
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp2
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp180
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp5
-rw-r--r--contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/BitTracker.cpp10
-rw-r--r--contrib/llvm/lib/Target/Hexagon/BitTracker.h10
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp70
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/RDFCopy.cpp2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/RDFGraph.h3
-rw-r--r--contrib/llvm/lib/Target/Hexagon/RDFRegisters.cpp47
-rw-r--r--contrib/llvm/lib/Target/Hexagon/RDFRegisters.h13
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td74
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp25
-rw-r--r--contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp4
-rw-r--r--contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp2
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h2
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp2
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp2
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt28
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp8
-rw-r--r--contrib/llvm/lib/Target/X86/X86FrameLowering.cpp9
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp5
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.cpp54
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.h2
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp61
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterBankInfo.cpp1
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.h5
-rw-r--r--contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp15
-rw-r--r--contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp40
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp15
-rw-r--r--contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp40
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp12
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp23
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp20
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp38
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp77
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp239
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp99
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GVNHoist.cpp3
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp3
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp39
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp14
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp24
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LCSSA.cpp31
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Local.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp106
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp5
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp24
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp12
182 files changed, 3361 insertions, 2659 deletions
diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 09582cf9a71d..3db041cc0fa6 100644
--- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -808,7 +808,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
// well. Or alternatively, replace all of this with inaccessiblememonly once
// that's implemented fully.
auto *Inst = CS.getInstruction();
- if (isMallocLikeFn(Inst, &TLI) || isCallocLikeFn(Inst, &TLI)) {
+ if (isMallocOrCallocLikeFn(Inst, &TLI)) {
// Be conservative if the accessed pointer may alias the allocation -
// fallback to the generic handling below.
if (getBestAAResults().alias(MemoryLocation(Inst), Loc) == NoAlias)
@@ -925,9 +925,8 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
const DataLayout &DL) {
assert(GEP1->getPointerOperand()->stripPointerCasts() ==
- GEP2->getPointerOperand()->stripPointerCasts() &&
- GEP1->getPointerOperand()->getType() ==
- GEP2->getPointerOperand()->getType() &&
+ GEP2->getPointerOperand()->stripPointerCasts() &&
+ GEP1->getPointerOperandType() == GEP2->getPointerOperandType() &&
"Expected GEPs with the same pointer operand");
// Try to determine whether GEP1 and GEP2 index through arrays, into structs,
@@ -1186,9 +1185,8 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
// just the same underlying object), see if that tells us anything about
// the resulting pointers.
if (GEP1->getPointerOperand()->stripPointerCasts() ==
- GEP2->getPointerOperand()->stripPointerCasts() &&
- GEP1->getPointerOperand()->getType() ==
- GEP2->getPointerOperand()->getType()) {
+ GEP2->getPointerOperand()->stripPointerCasts() &&
+ GEP1->getPointerOperandType() == GEP2->getPointerOperandType()) {
AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, DL);
// If we couldn't find anything interesting, don't abandon just yet.
if (R != MayAlias)
diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
index 5935dec15c70..0dc4475ca0e2 100644
--- a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -72,6 +72,32 @@ static const uint32_t UR_TAKEN_WEIGHT = 1;
/// easily subsume it.
static const uint32_t UR_NONTAKEN_WEIGHT = 1024*1024 - 1;
+/// \brief Returns the branch probability for unreachable edge according to
+/// heuristic.
+///
+/// This is the branch probability being taken to a block that terminates
+/// (eventually) in unreachable. These are predicted as unlikely as possible.
+static BranchProbability getUnreachableProbability(uint64_t UnreachableCount) {
+ assert(UnreachableCount > 0 && "UnreachableCount must be > 0");
+ return BranchProbability::getBranchProbability(
+ UR_TAKEN_WEIGHT,
+ (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * UnreachableCount);
+}
+
+/// \brief Returns the branch probability for reachable edge according to
+/// heuristic.
+///
+/// This is the branch probability not being taken toward a block that
+/// terminates (eventually) in unreachable. Such a branch is essentially never
+/// taken. Set the weight to an absurdly high value so that nested loops don't
+/// easily subsume it.
+static BranchProbability getReachableProbability(uint64_t ReachableCount) {
+ assert(ReachableCount > 0 && "ReachableCount must be > 0");
+ return BranchProbability::getBranchProbability(
+ UR_NONTAKEN_WEIGHT,
+ (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * ReachableCount);
+}
+
/// \brief Weight for a branch taken going into a cold block.
///
/// This is the weight for a branch taken toward a block marked
@@ -179,7 +205,11 @@ BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) {
/// unreachable-terminated block as extremely unlikely.
bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
const TerminatorInst *TI = BB->getTerminator();
- if (TI->getNumSuccessors() == 0)
+ assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
+
+ // Return false here so that edge weights for InvokeInst could be decided
+ // in calcInvokeHeuristics().
+ if (isa<InvokeInst>(TI))
return false;
SmallVector<unsigned, 4> UnreachableEdges;
@@ -191,14 +221,8 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
else
ReachableEdges.push_back(I.getSuccessorIndex());
- // Skip probabilities if this block has a single successor or if all were
- // reachable.
- if (TI->getNumSuccessors() == 1 || UnreachableEdges.empty())
- return false;
-
- // Return false here so that edge weights for InvokeInst could be decided
- // in calcInvokeHeuristics().
- if (isa<InvokeInst>(TI))
+ // Skip probabilities if all were reachable.
+ if (UnreachableEdges.empty())
return false;
if (ReachableEdges.empty()) {
@@ -208,12 +232,8 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
return true;
}
- auto UnreachableProb = BranchProbability::getBranchProbability(
- UR_TAKEN_WEIGHT, (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) *
- uint64_t(UnreachableEdges.size()));
- auto ReachableProb = BranchProbability::getBranchProbability(
- UR_NONTAKEN_WEIGHT,
- (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * uint64_t(ReachableEdges.size()));
+ auto UnreachableProb = getUnreachableProbability(UnreachableEdges.size());
+ auto ReachableProb = getReachableProbability(ReachableEdges.size());
for (unsigned SuccIdx : UnreachableEdges)
setEdgeProbability(BB, SuccIdx, UnreachableProb);
@@ -224,11 +244,12 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
}
// Propagate existing explicit probabilities from either profile data or
-// 'expect' intrinsic processing.
+// 'expect' intrinsic processing. Examine metadata against unreachable
+// heuristic. The probability of the edge coming to unreachable block is
+// set to min of metadata and unreachable heuristic.
bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
const TerminatorInst *TI = BB->getTerminator();
- if (TI->getNumSuccessors() == 1)
- return false;
+ assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
return false;
@@ -249,6 +270,8 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
// be scaled to fit in 32 bits.
uint64_t WeightSum = 0;
SmallVector<uint32_t, 2> Weights;
+ SmallVector<unsigned, 2> UnreachableIdxs;
+ SmallVector<unsigned, 2> ReachableIdxs;
Weights.reserve(TI->getNumSuccessors());
for (unsigned i = 1, e = WeightsNode->getNumOperands(); i != e; ++i) {
ConstantInt *Weight =
@@ -259,6 +282,10 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
"Too many bits for uint32_t");
Weights.push_back(Weight->getZExtValue());
WeightSum += Weights.back();
+ if (PostDominatedByUnreachable.count(TI->getSuccessor(i - 1)))
+ UnreachableIdxs.push_back(i - 1);
+ else
+ ReachableIdxs.push_back(i - 1);
}
assert(Weights.size() == TI->getNumSuccessors() && "Checked above");
@@ -267,20 +294,52 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
uint64_t ScalingFactor =
(WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + 1 : 1;
- WeightSum = 0;
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
- Weights[i] /= ScalingFactor;
- WeightSum += Weights[i];
+ if (ScalingFactor > 1) {
+ WeightSum = 0;
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+ Weights[i] /= ScalingFactor;
+ WeightSum += Weights[i];
+ }
}
- if (WeightSum == 0) {
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
- setEdgeProbability(BB, i, {1, e});
- } else {
+ if (WeightSum == 0 || ReachableIdxs.size() == 0) {
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
- setEdgeProbability(BB, i, {Weights[i], static_cast<uint32_t>(WeightSum)});
+ Weights[i] = 1;
+ WeightSum = TI->getNumSuccessors();
+ }
+
+ // Set the probability.
+ SmallVector<BranchProbability, 2> BP;
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ BP.push_back({ Weights[i], static_cast<uint32_t>(WeightSum) });
+
+ // Examine the metadata against unreachable heuristic.
+ // If the unreachable heuristic is more strong then we use it for this edge.
+ if (UnreachableIdxs.size() > 0 && ReachableIdxs.size() > 0) {
+ auto ToDistribute = BranchProbability::getZero();
+ auto UnreachableProb = getUnreachableProbability(UnreachableIdxs.size());
+ for (auto i : UnreachableIdxs)
+ if (UnreachableProb < BP[i]) {
+ ToDistribute += BP[i] - UnreachableProb;
+ BP[i] = UnreachableProb;
+ }
+
+ // If we modified the probability of some edges then we must distribute
+ // the difference between reachable blocks.
+ if (ToDistribute > BranchProbability::getZero()) {
+ BranchProbability PerEdge = ToDistribute / ReachableIdxs.size();
+ for (auto i : ReachableIdxs) {
+ BP[i] += PerEdge;
+ ToDistribute -= PerEdge;
+ }
+ // Tail goes to the first reachable edge.
+ BP[ReachableIdxs[0]] += ToDistribute;
+ }
}
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ setEdgeProbability(BB, i, BP[i]);
+
assert(WeightSum <= UINT32_MAX &&
"Expected weights to scale down to 32 bits");
@@ -297,7 +356,11 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
/// Return false, otherwise.
bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) {
const TerminatorInst *TI = BB->getTerminator();
- if (TI->getNumSuccessors() == 0)
+ assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
+
+ // Return false here so that edge weights for InvokeInst could be decided
+ // in calcInvokeHeuristics().
+ if (isa<InvokeInst>(TI))
return false;
// Determine which successors are post-dominated by a cold block.
@@ -309,13 +372,8 @@ bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) {
else
NormalEdges.push_back(I.getSuccessorIndex());
- // Return false here so that edge weights for InvokeInst could be decided
- // in calcInvokeHeuristics().
- if (isa<InvokeInst>(TI))
- return false;
-
- // Skip probabilities if this block has a single successor.
- if (TI->getNumSuccessors() == 1 || ColdEdges.empty())
+ // Skip probabilities if no cold edges.
+ if (ColdEdges.empty())
return false;
if (NormalEdges.empty()) {
@@ -698,10 +756,13 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI) {
DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n");
updatePostDominatedByUnreachable(BB);
updatePostDominatedByColdCall(BB);
- if (calcUnreachableHeuristics(BB))
+ // If there is no at least two successors, no sense to set probability.
+ if (BB->getTerminator()->getNumSuccessors() < 2)
continue;
if (calcMetadataWeights(BB))
continue;
+ if (calcUnreachableHeuristics(BB))
+ continue;
if (calcColdCallHeuristics(BB))
continue;
if (calcLoopBranchHeuristics(BB, LI))
diff --git a/contrib/llvm/lib/Analysis/CFLGraph.h b/contrib/llvm/lib/Analysis/CFLGraph.h
index e526e0e16aa7..75726e84569b 100644
--- a/contrib/llvm/lib/Analysis/CFLGraph.h
+++ b/contrib/llvm/lib/Analysis/CFLGraph.h
@@ -400,8 +400,7 @@ template <typename CFLAA> class CFLGraphBuilder {
// TODO: address other common library functions such as realloc(),
// strdup(),
// etc.
- if (isMallocLikeFn(Inst, &TLI) || isCallocLikeFn(Inst, &TLI) ||
- isFreeCall(Inst, &TLI))
+ if (isMallocOrCallocLikeFn(Inst, &TLI) || isFreeCall(Inst, &TLI))
return;
// TODO: Add support for noalias args/all the other fun function
diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
index e12f640394e6..2259fbaeb982 100644
--- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -75,20 +75,16 @@ static Value *SimplifyXorInst(Value *, Value *, const Query &, unsigned);
static Value *SimplifyCastInst(unsigned, Value *, Type *,
const Query &, unsigned);
-/// For a boolean type, or a vector of boolean type, return false, or
-/// a vector with every element false, as appropriate for the type.
+/// For a boolean type or a vector of boolean type, return false or a vector
+/// with every element false.
static Constant *getFalse(Type *Ty) {
- assert(Ty->getScalarType()->isIntegerTy(1) &&
- "Expected i1 type or a vector of i1!");
- return Constant::getNullValue(Ty);
+ return ConstantInt::getFalse(Ty);
}
-/// For a boolean type, or a vector of boolean type, return true, or
-/// a vector with every element true, as appropriate for the type.
+/// For a boolean type or a vector of boolean type, return true or a vector
+/// with every element true.
static Constant *getTrue(Type *Ty) {
- assert(Ty->getScalarType()->isIntegerTy(1) &&
- "Expected i1 type or a vector of i1!");
- return Constant::getAllOnesValue(Ty);
+ return ConstantInt::getTrue(Ty);
}
/// isSameCompare - Is V equivalent to the comparison "LHS Pred RHS"?
@@ -572,11 +568,11 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
match(Op1, m_Not(m_Specific(Op0))))
return Constant::getAllOnesValue(Ty);
- // add nsw/nuw (xor Y, signbit), signbit --> Y
+ // add nsw/nuw (xor Y, signmask), signmask --> Y
// The no-wrapping add guarantees that the top bit will be set by the add.
// Therefore, the xor must be clearing the already set sign bit of Y.
- if ((isNSW || isNUW) && match(Op1, m_SignBit()) &&
- match(Op0, m_Xor(m_Value(Y), m_SignBit())))
+ if ((isNSW || isNUW) && match(Op1, m_SignMask()) &&
+ match(Op0, m_Xor(m_Value(Y), m_SignMask())))
return Y;
/// i1 add -> xor.
@@ -1085,7 +1081,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
if (!isSigned && match(Op0, m_UDiv(m_Value(X), m_ConstantInt(C1))) &&
match(Op1, m_ConstantInt(C2))) {
bool Overflow;
- C1->getValue().umul_ov(C2->getValue(), Overflow);
+ (void)C1->getValue().umul_ov(C2->getValue(), Overflow);
if (Overflow)
return Constant::getNullValue(Op0->getType());
}
@@ -2823,7 +2819,7 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
return ConstantInt::getTrue(RHS->getContext());
}
}
- if (CIVal->isSignBit() && *CI2Val == 1) {
+ if (CIVal->isSignMask() && *CI2Val == 1) {
if (Pred == ICmpInst::ICMP_UGT)
return ConstantInt::getFalse(RHS->getContext());
if (Pred == ICmpInst::ICMP_ULE)
@@ -3800,6 +3796,8 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
Type *GEPTy = PointerType::get(LastType, AS);
if (VectorType *VT = dyn_cast<VectorType>(Ops[0]->getType()))
GEPTy = VectorType::get(GEPTy, VT->getNumElements());
+ else if (VectorType *VT = dyn_cast<VectorType>(Ops[1]->getType()))
+ GEPTy = VectorType::get(GEPTy, VT->getNumElements());
if (isa<UndefValue>(Ops[0]))
return UndefValue::get(GEPTy);
@@ -4082,6 +4080,60 @@ Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
RecursionLimit);
}
+/// For the given destination element of a shuffle, peek through shuffles to
+/// match a root vector source operand that contains that element in the same
+/// vector lane (ie, the same mask index), so we can eliminate the shuffle(s).
+static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1,
+ Constant *Mask, Value *RootVec, int RootElt,
+ unsigned MaxRecurse) {
+ if (!MaxRecurse--)
+ return nullptr;
+
+ // Bail out if any mask value is undefined. That kind of shuffle may be
+ // simplified further based on demanded bits or other folds.
+ int MaskVal = ShuffleVectorInst::getMaskValue(Mask, RootElt);
+ if (MaskVal == -1)
+ return nullptr;
+
+ // The mask value chooses which source operand we need to look at next.
+ Value *SourceOp;
+ int InVecNumElts = Op0->getType()->getVectorNumElements();
+ if (MaskVal < InVecNumElts) {
+ RootElt = MaskVal;
+ SourceOp = Op0;
+ } else {
+ RootElt = MaskVal - InVecNumElts;
+ SourceOp = Op1;
+ }
+
+ // If the source operand is a shuffle itself, look through it to find the
+ // matching root vector.
+ if (auto *SourceShuf = dyn_cast<ShuffleVectorInst>(SourceOp)) {
+ return foldIdentityShuffles(
+ DestElt, SourceShuf->getOperand(0), SourceShuf->getOperand(1),
+ SourceShuf->getMask(), RootVec, RootElt, MaxRecurse);
+ }
+
+ // TODO: Look through bitcasts? What if the bitcast changes the vector element
+ // size?
+
+ // The source operand is not a shuffle. Initialize the root vector value for
+ // this shuffle if that has not been done yet.
+ if (!RootVec)
+ RootVec = SourceOp;
+
+ // Give up as soon as a source operand does not match the existing root value.
+ if (RootVec != SourceOp)
+ return nullptr;
+
+ // The element must be coming from the same lane in the source vector
+ // (although it may have crossed lanes in intermediate shuffles).
+ if (RootElt != DestElt)
+ return nullptr;
+
+ return RootVec;
+}
+
static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
Type *RetTy, const Query &Q,
unsigned MaxRecurse) {
@@ -4126,7 +4178,28 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
OpShuf->getMask()->getSplatValue())
return Op1;
- return nullptr;
+ // Don't fold a shuffle with undef mask elements. This may get folded in a
+ // better way using demanded bits or other analysis.
+ // TODO: Should we allow this?
+ for (unsigned i = 0; i != MaskNumElts; ++i)
+ if (ShuffleVectorInst::getMaskValue(Mask, i) == -1)
+ return nullptr;
+
+ // Check if every element of this shuffle can be mapped back to the
+ // corresponding element of a single root vector. If so, we don't need this
+ // shuffle. This handles simple identity shuffles as well as chains of
+ // shuffles that may widen/narrow and/or move elements across lanes and back.
+ Value *RootVec = nullptr;
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ // Note that recursion is limited for each vector element, so if any element
+ // exceeds the limit, this will fail to simplify.
+ RootVec = foldIdentityShuffles(i, Op0, Op1, Mask, RootVec, i, MaxRecurse);
+
+ // We can't replace a widening/narrowing shuffle with one of its operands.
+ if (!RootVec || RootVec->getType() != RetTy)
+ return nullptr;
+ }
+ return RootVec;
}
/// Given operands for a ShuffleVectorInst, fold the result or return null.
diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
index b8c444904723..7983d62c2f7a 100644
--- a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -37,6 +37,7 @@ enum AllocType : uint8_t {
CallocLike = 1<<2, // allocates + bzero
ReallocLike = 1<<3, // reallocates
StrDupLike = 1<<4,
+ MallocOrCallocLike = MallocLike | CallocLike,
AllocLike = MallocLike | CallocLike | StrDupLike,
AnyAlloc = AllocLike | ReallocLike
};
@@ -77,8 +78,8 @@ static const std::pair<LibFunc, AllocFnsTy> AllocationFnData[] = {
// TODO: Handle "int posix_memalign(void **, size_t, size_t)"
};
-static Function *getCalledFunction(const Value *V, bool LookThroughBitCast,
- bool &IsNoBuiltin) {
+static const Function *getCalledFunction(const Value *V, bool LookThroughBitCast,
+ bool &IsNoBuiltin) {
// Don't care about intrinsics in this case.
if (isa<IntrinsicInst>(V))
return nullptr;
@@ -86,13 +87,13 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast,
if (LookThroughBitCast)
V = V->stripPointerCasts();
- CallSite CS(const_cast<Value*>(V));
+ ImmutableCallSite CS(V);
if (!CS.getInstruction())
return nullptr;
IsNoBuiltin = CS.isNoBuiltin();
- Function *Callee = CS.getCalledFunction();
+ const Function *Callee = CS.getCalledFunction();
if (!Callee || !Callee->isDeclaration())
return nullptr;
return Callee;
@@ -220,6 +221,14 @@ bool llvm::isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
}
/// \brief Tests if a value is a call or invoke to a library function that
+/// allocates memory similiar to malloc or calloc.
+bool llvm::isMallocOrCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+ bool LookThroughBitCast) {
+ return getAllocationData(V, MallocOrCallocLike, TLI,
+ LookThroughBitCast).hasValue();
+}
+
+/// \brief Tests if a value is a call or invoke to a library function that
/// allocates memory (either malloc, calloc, or strdup like).
bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
bool LookThroughBitCast) {
diff --git a/contrib/llvm/lib/Analysis/MemorySSA.cpp b/contrib/llvm/lib/Analysis/MemorySSA.cpp
index 910170561abf..2480fe44d5c0 100644
--- a/contrib/llvm/lib/Analysis/MemorySSA.cpp
+++ b/contrib/llvm/lib/Analysis/MemorySSA.cpp
@@ -1291,7 +1291,6 @@ void MemorySSA::buildMemorySSA() {
// could just look up the memory access for every possible instruction in the
// stream.
SmallPtrSet<BasicBlock *, 32> DefiningBlocks;
- SmallPtrSet<BasicBlock *, 32> DefUseBlocks;
// Go through each block, figure out where defs occur, and chain together all
// the accesses.
for (BasicBlock &B : F) {
@@ -1316,8 +1315,6 @@ void MemorySSA::buildMemorySSA() {
}
if (InsertIntoDef)
DefiningBlocks.insert(&B);
- if (Accesses)
- DefUseBlocks.insert(&B);
}
placePHINodes(DefiningBlocks, BBNumbers);
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
index ca32cf3c7c34..700c383a9dd4 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -1093,7 +1093,7 @@ static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
APInt Mult(W, i);
unsigned TwoFactors = Mult.countTrailingZeros();
T += TwoFactors;
- Mult = Mult.lshr(TwoFactors);
+ Mult.lshrInPlace(TwoFactors);
OddFactorial *= Mult;
}
@@ -1276,7 +1276,8 @@ static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step,
namespace {
struct ExtendOpTraitsBase {
- typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *);
+ typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(
+ const SCEV *, Type *, ScalarEvolution::ExtendCacheTy &Cache);
};
// Used to make code generic over signed and unsigned overflow.
@@ -1305,8 +1306,9 @@ struct ExtendOpTraits<SCEVSignExtendExpr> : public ExtendOpTraitsBase {
}
};
-const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
- SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr;
+const ExtendOpTraitsBase::GetExtendExprTy
+ ExtendOpTraits<SCEVSignExtendExpr>::GetExtendExpr =
+ &ScalarEvolution::getSignExtendExprCached;
template <>
struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase {
@@ -1321,8 +1323,9 @@ struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase {
}
};
-const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
- SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr;
+const ExtendOpTraitsBase::GetExtendExprTy
+ ExtendOpTraits<SCEVZeroExtendExpr>::GetExtendExpr =
+ &ScalarEvolution::getZeroExtendExprCached;
}
// The recurrence AR has been shown to have no signed/unsigned wrap or something
@@ -1334,7 +1337,8 @@ const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
// "sext/zext(PostIncAR)"
template <typename ExtendOpTy>
static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
- ScalarEvolution *SE) {
+ ScalarEvolution *SE,
+ ScalarEvolution::ExtendCacheTy &Cache) {
auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
@@ -1381,9 +1385,9 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
const SCEV *OperandExtendedStart =
- SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy),
- (SE->*GetExtendExpr)(Step, WideTy));
- if ((SE->*GetExtendExpr)(Start, WideTy) == OperandExtendedStart) {
+ SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy, Cache),
+ (SE->*GetExtendExpr)(Step, WideTy, Cache));
+ if ((SE->*GetExtendExpr)(Start, WideTy, Cache) == OperandExtendedStart) {
if (PreAR && AR->getNoWrapFlags(WrapType)) {
// If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW
// or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then
@@ -1408,15 +1412,17 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
// Get the normalized zero or sign extended expression for this AddRec's Start.
template <typename ExtendOpTy>
static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty,
- ScalarEvolution *SE) {
+ ScalarEvolution *SE,
+ ScalarEvolution::ExtendCacheTy &Cache) {
auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
- const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE);
+ const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE, Cache);
if (!PreStart)
- return (SE->*GetExtendExpr)(AR->getStart(), Ty);
+ return (SE->*GetExtendExpr)(AR->getStart(), Ty, Cache);
- return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty),
- (SE->*GetExtendExpr)(PreStart, Ty));
+ return SE->getAddExpr(
+ (SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty, Cache),
+ (SE->*GetExtendExpr)(PreStart, Ty, Cache));
}
// Try to prove away overflow by looking at "nearby" add recurrences. A
@@ -1496,8 +1502,31 @@ bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start,
return false;
}
-const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
- Type *Ty) {
+const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty) {
+ // Use the local cache to prevent exponential behavior of
+ // getZeroExtendExprImpl.
+ ExtendCacheTy Cache;
+ return getZeroExtendExprCached(Op, Ty, Cache);
+}
+
+/// Query \p Cache before calling getZeroExtendExprImpl. If there is no
+/// related entry in the \p Cache, call getZeroExtendExprImpl and save
+/// the result in the \p Cache.
+const SCEV *ScalarEvolution::getZeroExtendExprCached(const SCEV *Op, Type *Ty,
+ ExtendCacheTy &Cache) {
+ auto It = Cache.find({Op, Ty});
+ if (It != Cache.end())
+ return It->second;
+ const SCEV *ZExt = getZeroExtendExprImpl(Op, Ty, Cache);
+ auto InsertResult = Cache.insert({{Op, Ty}, ZExt});
+ assert(InsertResult.second && "Expect the key was not in the cache");
+ (void)InsertResult;
+ return ZExt;
+}
+
+/// The real implementation of getZeroExtendExpr.
+const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
+ ExtendCacheTy &Cache) {
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
"This is not an extending conversion!");
assert(isSCEVable(Ty) &&
@@ -1507,11 +1536,11 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
// Fold if the operand is constant.
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
return getConstant(
- cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty)));
+ cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty)));
// zext(zext(x)) --> zext(x)
if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
- return getZeroExtendExpr(SZ->getOperand(), Ty);
+ return getZeroExtendExprCached(SZ->getOperand(), Ty, Cache);
// Before doing any expensive analysis, check to see if we've already
// computed a SCEV for this Op and Ty.
@@ -1555,8 +1584,8 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
// we don't need to do any further analysis.
if (AR->hasNoUnsignedWrap())
return getAddRecExpr(
- getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
- getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache),
+ getZeroExtendExprCached(Step, Ty, Cache), L, AR->getNoWrapFlags());
// Check whether the backedge-taken count is SCEVCouldNotCompute.
// Note that this serves two purposes: It filters out loops that are
@@ -1581,21 +1610,22 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
// Check whether Start+Step*MaxBECount has no unsigned overflow.
const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step);
- const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul), WideTy);
- const SCEV *WideStart = getZeroExtendExpr(Start, WideTy);
+ const SCEV *ZAdd =
+ getZeroExtendExprCached(getAddExpr(Start, ZMul), WideTy, Cache);
+ const SCEV *WideStart = getZeroExtendExprCached(Start, WideTy, Cache);
const SCEV *WideMaxBECount =
- getZeroExtendExpr(CastedMaxBECount, WideTy);
- const SCEV *OperandExtendedAdd =
- getAddExpr(WideStart,
- getMulExpr(WideMaxBECount,
- getZeroExtendExpr(Step, WideTy)));
+ getZeroExtendExprCached(CastedMaxBECount, WideTy, Cache);
+ const SCEV *OperandExtendedAdd = getAddExpr(
+ WideStart, getMulExpr(WideMaxBECount, getZeroExtendExprCached(
+ Step, WideTy, Cache)));
if (ZAdd == OperandExtendedAdd) {
// Cache knowledge of AR NUW, which is propagated to this AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(
- getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
- getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache),
+ getZeroExtendExprCached(Step, Ty, Cache), L,
+ AR->getNoWrapFlags());
}
// Similar to above, only this time treat the step value as signed.
// This covers loops that count down.
@@ -1609,7 +1639,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(
- getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache),
getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
}
}
@@ -1641,8 +1671,9 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(
- getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
- getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache),
+ getZeroExtendExprCached(Step, Ty, Cache), L,
+ AR->getNoWrapFlags());
}
} else if (isKnownNegative(Step)) {
const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
@@ -1657,7 +1688,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(
- getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache),
getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
}
}
@@ -1666,8 +1697,8 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) {
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
return getAddRecExpr(
- getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
- getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache),
+ getZeroExtendExprCached(Step, Ty, Cache), L, AR->getNoWrapFlags());
}
}
@@ -1678,7 +1709,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
// commute the zero extension with the addition operation.
SmallVector<const SCEV *, 4> Ops;
for (const auto *Op : SA->operands())
- Ops.push_back(getZeroExtendExpr(Op, Ty));
+ Ops.push_back(getZeroExtendExprCached(Op, Ty, Cache));
return getAddExpr(Ops, SCEV::FlagNUW);
}
}
@@ -1692,8 +1723,31 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
return S;
}
-const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
- Type *Ty) {
+const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty) {
+ // Use the local cache to prevent exponential behavior of
+ // getSignExtendExprImpl.
+ ExtendCacheTy Cache;
+ return getSignExtendExprCached(Op, Ty, Cache);
+}
+
+/// Query \p Cache before calling getSignExtendExprImpl. If there is no
+/// related entry in the \p Cache, call getSignExtendExprImpl and save
+/// the result in the \p Cache.
+const SCEV *ScalarEvolution::getSignExtendExprCached(const SCEV *Op, Type *Ty,
+ ExtendCacheTy &Cache) {
+ auto It = Cache.find({Op, Ty});
+ if (It != Cache.end())
+ return It->second;
+ const SCEV *SExt = getSignExtendExprImpl(Op, Ty, Cache);
+ auto InsertResult = Cache.insert({{Op, Ty}, SExt});
+ assert(InsertResult.second && "Expect the key was not in the cache");
+ (void)InsertResult;
+ return SExt;
+}
+
+/// The real implementation of getSignExtendExpr.
+const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
+ ExtendCacheTy &Cache) {
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
"This is not an extending conversion!");
assert(isSCEVable(Ty) &&
@@ -1703,11 +1757,11 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
// Fold if the operand is constant.
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
return getConstant(
- cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty)));
+ cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty)));
// sext(sext(x)) --> sext(x)
if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
- return getSignExtendExpr(SS->getOperand(), Ty);
+ return getSignExtendExprCached(SS->getOperand(), Ty, Cache);
// sext(zext(x)) --> zext(x)
if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
@@ -1746,8 +1800,8 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
const APInt &C2 = SC2->getAPInt();
if (C1.isStrictlyPositive() && C2.isStrictlyPositive() &&
C2.ugt(C1) && C2.isPowerOf2())
- return getAddExpr(getSignExtendExpr(SC1, Ty),
- getSignExtendExpr(SMul, Ty));
+ return getAddExpr(getSignExtendExprCached(SC1, Ty, Cache),
+ getSignExtendExprCached(SMul, Ty, Cache));
}
}
}
@@ -1758,7 +1812,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
// commute the sign extension with the addition operation.
SmallVector<const SCEV *, 4> Ops;
for (const auto *Op : SA->operands())
- Ops.push_back(getSignExtendExpr(Op, Ty));
+ Ops.push_back(getSignExtendExprCached(Op, Ty, Cache));
return getAddExpr(Ops, SCEV::FlagNSW);
}
}
@@ -1782,8 +1836,8 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
// we don't need to do any further analysis.
if (AR->hasNoSignedWrap())
return getAddRecExpr(
- getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
- getSignExtendExpr(Step, Ty), L, SCEV::FlagNSW);
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Cache),
+ getSignExtendExprCached(Step, Ty, Cache), L, SCEV::FlagNSW);
// Check whether the backedge-taken count is SCEVCouldNotCompute.
// Note that this serves two purposes: It filters out loops that are
@@ -1808,21 +1862,22 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
// Check whether Start+Step*MaxBECount has no signed overflow.
const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
- const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul), WideTy);
- const SCEV *WideStart = getSignExtendExpr(Start, WideTy);
+ const SCEV *SAdd =
+ getSignExtendExprCached(getAddExpr(Start, SMul), WideTy, Cache);
+ const SCEV *WideStart = getSignExtendExprCached(Start, WideTy, Cache);
const SCEV *WideMaxBECount =
- getZeroExtendExpr(CastedMaxBECount, WideTy);
- const SCEV *OperandExtendedAdd =
- getAddExpr(WideStart,
- getMulExpr(WideMaxBECount,
- getSignExtendExpr(Step, WideTy)));
+ getZeroExtendExpr(CastedMaxBECount, WideTy);
+ const SCEV *OperandExtendedAdd = getAddExpr(
+ WideStart, getMulExpr(WideMaxBECount, getSignExtendExprCached(
+ Step, WideTy, Cache)));
if (SAdd == OperandExtendedAdd) {
// Cache knowledge of AR NSW, which is propagated to this AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(
- getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
- getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Cache),
+ getSignExtendExprCached(Step, Ty, Cache), L,
+ AR->getNoWrapFlags());
}
// Similar to above, only this time treat the step value as unsigned.
// This covers loops that count up with an unsigned step.
@@ -1843,7 +1898,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
// Return the expression with the addrec on the outside.
return getAddRecExpr(
- getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Cache),
getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
}
}
@@ -1875,8 +1930,9 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
// Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
return getAddRecExpr(
- getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
- getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Cache),
+ getSignExtendExprCached(Step, Ty, Cache), L,
+ AR->getNoWrapFlags());
}
}
@@ -1890,18 +1946,18 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
const APInt &C2 = SC2->getAPInt();
if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) &&
C2.isPowerOf2()) {
- Start = getSignExtendExpr(Start, Ty);
+ Start = getSignExtendExprCached(Start, Ty, Cache);
const SCEV *NewAR = getAddRecExpr(getZero(AR->getType()), Step, L,
AR->getNoWrapFlags());
- return getAddExpr(Start, getSignExtendExpr(NewAR, Ty));
+ return getAddExpr(Start, getSignExtendExprCached(NewAR, Ty, Cache));
}
}
if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) {
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
return getAddRecExpr(
- getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
- getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Cache),
+ getSignExtendExprCached(Step, Ty, Cache), L, AR->getNoWrapFlags());
}
}
@@ -3951,9 +4007,9 @@ static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) {
case Instruction::Xor:
if (auto *RHSC = dyn_cast<ConstantInt>(Op->getOperand(1)))
- // If the RHS of the xor is a signbit, then this is just an add.
- // Instcombine turns add of signbit into xor as a strength reduction step.
- if (RHSC->getValue().isSignBit())
+ // If the RHS of the xor is a signmask, then this is just an add.
+ // Instcombine turns add of signmask into xor as a strength reduction step.
+ if (RHSC->getValue().isSignMask())
return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1));
return BinaryOp(Op);
@@ -5272,28 +5328,12 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
break;
case Instruction::Or:
- // If the RHS of the Or is a constant, we may have something like:
- // X*4+1 which got turned into X*4|1. Handle this as an Add so loop
- // optimizations will transparently handle this case.
- //
- // In order for this transformation to be safe, the LHS must be of the
- // form X*(2^n) and the Or constant must be less than 2^n.
- if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) {
- const SCEV *LHS = getSCEV(BO->LHS);
- const APInt &CIVal = CI->getValue();
- if (GetMinTrailingZeros(LHS) >=
- (CIVal.getBitWidth() - CIVal.countLeadingZeros())) {
- // Build a plain add SCEV.
- const SCEV *S = getAddExpr(LHS, getSCEV(CI));
- // If the LHS of the add was an addrec and it has no-wrap flags,
- // transfer the no-wrap flags, since an or won't introduce a wrap.
- if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) {
- const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS);
- const_cast<SCEVAddRecExpr *>(NewAR)->setNoWrapFlags(
- OldAR->getNoWrapFlags());
- }
- return S;
- }
+ // Use ValueTracking to check whether this is actually an add.
+ if (haveNoCommonBitsSet(BO->LHS, BO->RHS, getDataLayout(), &AC,
+ nullptr, &DT)) {
+ // There aren't any common bits set, so the add can't wrap.
+ auto Flags = SCEV::NoWrapFlags(SCEV::FlagNUW | SCEV::FlagNSW);
+ return getAddExpr(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags);
}
break;
@@ -5329,7 +5369,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
// using an add, which is equivalent, and re-apply the zext.
APInt Trunc = CI->getValue().trunc(Z0TySize);
if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() &&
- Trunc.isSignBit())
+ Trunc.isSignMask())
return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)),
UTy);
}
diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp
index d871e83f222a..900a2363e60d 100644
--- a/contrib/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@@ -292,15 +292,15 @@ static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
KnownOne = PossibleSumOne & Known;
// Are we still trying to solve for the sign bit?
- if (!Known.isNegative()) {
+ if (!Known.isSignBitSet()) {
if (NSW) {
// Adding two non-negative numbers, or subtracting a negative number from
// a non-negative one, can't wrap into negative.
- if (LHSKnownZero.isNegative() && KnownZero2.isNegative())
+ if (LHSKnownZero.isSignBitSet() && KnownZero2.isSignBitSet())
KnownZero.setSignBit();
// Adding two negative numbers, or subtracting a non-negative number from
// a negative one, can't wrap into non-negative.
- else if (LHSKnownOne.isNegative() && KnownOne2.isNegative())
+ else if (LHSKnownOne.isSignBitSet() && KnownOne2.isSignBitSet())
KnownOne.setSignBit();
}
}
@@ -322,10 +322,10 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
// The product of a number with itself is non-negative.
isKnownNonNegative = true;
} else {
- bool isKnownNonNegativeOp1 = KnownZero.isNegative();
- bool isKnownNonNegativeOp0 = KnownZero2.isNegative();
- bool isKnownNegativeOp1 = KnownOne.isNegative();
- bool isKnownNegativeOp0 = KnownOne2.isNegative();
+ bool isKnownNonNegativeOp1 = KnownZero.isSignBitSet();
+ bool isKnownNonNegativeOp0 = KnownZero2.isSignBitSet();
+ bool isKnownNegativeOp1 = KnownOne.isSignBitSet();
+ bool isKnownNegativeOp0 = KnownOne2.isSignBitSet();
// The product of two numbers with the same sign is non-negative.
isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) ||
(isKnownNonNegativeOp1 && isKnownNonNegativeOp0);
@@ -361,9 +361,9 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
// which case we prefer to follow the result of the direct computation,
// though as the program is invoking undefined behaviour we can choose
// whatever we like here.
- if (isKnownNonNegative && !KnownOne.isNegative())
+ if (isKnownNonNegative && !KnownOne.isSignBitSet())
KnownZero.setSignBit();
- else if (isKnownNegative && !KnownZero.isNegative())
+ else if (isKnownNegative && !KnownZero.isSignBitSet())
KnownOne.setSignBit();
}
@@ -661,8 +661,10 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
// For those bits in RHS that are known, we can propagate them to known
// bits in V shifted to the right by C.
- KnownZero |= RHSKnownZero.lshr(C->getZExtValue());
- KnownOne |= RHSKnownOne.lshr(C->getZExtValue());
+ RHSKnownZero.lshrInPlace(C->getZExtValue());
+ KnownZero |= RHSKnownZero;
+ RHSKnownOne.lshrInPlace(C->getZExtValue());
+ KnownOne |= RHSKnownOne;
// assume(~(v << c) = a)
} else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
m_Value(A))) &&
@@ -672,8 +674,10 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
// For those bits in RHS that are known, we can propagate them inverted
// to known bits in V shifted to the right by C.
- KnownZero |= RHSKnownOne.lshr(C->getZExtValue());
- KnownOne |= RHSKnownZero.lshr(C->getZExtValue());
+ RHSKnownOne.lshrInPlace(C->getZExtValue());
+ KnownZero |= RHSKnownOne;
+ RHSKnownZero.lshrInPlace(C->getZExtValue());
+ KnownOne |= RHSKnownZero;
// assume(v >> c = a)
} else if (match(Arg,
m_c_ICmp(Pred, m_CombineOr(m_LShr(m_V, m_ConstantInt(C)),
@@ -707,7 +711,7 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
- if (RHSKnownZero.isNegative()) {
+ if (RHSKnownZero.isSignBitSet()) {
// We know that the sign bit is zero.
KnownZero.setSignBit();
}
@@ -718,7 +722,7 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
- if (RHSKnownOne.isAllOnesValue() || RHSKnownZero.isNegative()) {
+ if (RHSKnownOne.isAllOnesValue() || RHSKnownZero.isSignBitSet()) {
// We know that the sign bit is zero.
KnownZero.setSignBit();
}
@@ -729,7 +733,7 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
- if (RHSKnownOne.isNegative()) {
+ if (RHSKnownOne.isSignBitSet()) {
// We know that the sign bit is one.
KnownOne.setSignBit();
}
@@ -740,7 +744,7 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
- if (RHSKnownZero.isAllOnesValue() || RHSKnownOne.isNegative()) {
+ if (RHSKnownZero.isAllOnesValue() || RHSKnownOne.isSignBitSet()) {
// We know that the sign bit is one.
KnownOne.setSignBit();
}
@@ -990,23 +994,23 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
unsigned MaxHighZeros = 0;
if (SPF == SPF_SMAX) {
// If both sides are negative, the result is negative.
- if (KnownOne.isNegative() && KnownOne2.isNegative())
+ if (KnownOne.isSignBitSet() && KnownOne2.isSignBitSet())
// We can derive a lower bound on the result by taking the max of the
// leading one bits.
MaxHighOnes =
std::max(KnownOne.countLeadingOnes(), KnownOne2.countLeadingOnes());
// If either side is non-negative, the result is non-negative.
- else if (KnownZero.isNegative() || KnownZero2.isNegative())
+ else if (KnownZero.isSignBitSet() || KnownZero2.isSignBitSet())
MaxHighZeros = 1;
} else if (SPF == SPF_SMIN) {
// If both sides are non-negative, the result is non-negative.
- if (KnownZero.isNegative() && KnownZero2.isNegative())
+ if (KnownZero.isSignBitSet() && KnownZero2.isSignBitSet())
// We can derive an upper bound on the result by taking the max of the
// leading zero bits.
MaxHighZeros = std::max(KnownZero.countLeadingOnes(),
KnownZero2.countLeadingOnes());
// If either side is negative, the result is negative.
- else if (KnownOne.isNegative() || KnownOne2.isNegative())
+ else if (KnownOne.isSignBitSet() || KnownOne2.isSignBitSet())
MaxHighOnes = 1;
} else if (SPF == SPF_UMAX) {
// We can derive a lower bound on the result by taking the max of the
@@ -1092,14 +1096,14 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
KZResult.setLowBits(ShiftAmt); // Low bits known 0.
// If this shift has "nsw" keyword, then the result is either a poison
// value or has the same sign bit as the first operand.
- if (NSW && KnownZero.isNegative())
+ if (NSW && KnownZero.isSignBitSet())
KZResult.setSignBit();
return KZResult;
};
auto KOF = [NSW](const APInt &KnownOne, unsigned ShiftAmt) {
APInt KOResult = KnownOne << ShiftAmt;
- if (NSW && KnownOne.isNegative())
+ if (NSW && KnownOne.isSignBitSet())
KOResult.setSignBit();
return KOResult;
};
@@ -1111,10 +1115,11 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
}
case Instruction::LShr: {
// (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
- auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) {
- return KnownZero.lshr(ShiftAmt) |
- // High bits known zero.
- APInt::getHighBitsSet(BitWidth, ShiftAmt);
+ auto KZF = [](const APInt &KnownZero, unsigned ShiftAmt) {
+ APInt KZResult = KnownZero.lshr(ShiftAmt);
+ // High bits known zero.
+ KZResult.setHighBits(ShiftAmt);
+ return KZResult;
};
auto KOF = [](const APInt &KnownOne, unsigned ShiftAmt) {
@@ -1169,28 +1174,25 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
// If the first operand is non-negative or has all low bits zero, then
// the upper bits are all zero.
- if (KnownZero2.isNegative() || ((KnownZero2 & LowBits) == LowBits))
+ if (KnownZero2.isSignBitSet() || ((KnownZero2 & LowBits) == LowBits))
KnownZero |= ~LowBits;
// If the first operand is negative and not all low bits are zero, then
// the upper bits are all one.
- if (KnownOne2.isNegative() && ((KnownOne2 & LowBits) != 0))
+ if (KnownOne2.isSignBitSet() && ((KnownOne2 & LowBits) != 0))
KnownOne |= ~LowBits;
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ break;
}
}
// The sign bit is the LHS's sign bit, except when the result of the
// remainder is zero.
- if (KnownZero.isNonNegative()) {
- APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
- Q);
- // If it's known zero, our sign bit is also zero.
- if (LHSKnownZero.isNegative())
- KnownZero.setSignBit();
- }
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q);
+ // If it's known zero, our sign bit is also zero.
+ if (KnownZero2.isSignBitSet())
+ KnownZero.setSignBit();
break;
case Instruction::URem: {
@@ -1331,24 +1333,24 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
// (add non-negative, non-negative) --> non-negative
// (add negative, negative) --> negative
if (Opcode == Instruction::Add) {
- if (KnownZero2.isNegative() && KnownZero3.isNegative())
+ if (KnownZero2.isSignBitSet() && KnownZero3.isSignBitSet())
KnownZero.setSignBit();
- else if (KnownOne2.isNegative() && KnownOne3.isNegative())
+ else if (KnownOne2.isSignBitSet() && KnownOne3.isSignBitSet())
KnownOne.setSignBit();
}
// (sub nsw non-negative, negative) --> non-negative
// (sub nsw negative, non-negative) --> negative
else if (Opcode == Instruction::Sub && LL == I) {
- if (KnownZero2.isNegative() && KnownOne3.isNegative())
+ if (KnownZero2.isSignBitSet() && KnownOne3.isSignBitSet())
KnownZero.setSignBit();
- else if (KnownOne2.isNegative() && KnownZero3.isNegative())
+ else if (KnownOne2.isSignBitSet() && KnownZero3.isSignBitSet())
KnownOne.setSignBit();
}
// (mul nsw non-negative, non-negative) --> non-negative
- else if (Opcode == Instruction::Mul && KnownZero2.isNegative() &&
- KnownZero3.isNegative())
+ else if (Opcode == Instruction::Mul && KnownZero2.isSignBitSet() &&
+ KnownZero3.isSignBitSet())
KnownZero.setSignBit();
}
@@ -1614,8 +1616,8 @@ void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne,
APInt ZeroBits(BitWidth, 0);
APInt OneBits(BitWidth, 0);
computeKnownBits(V, ZeroBits, OneBits, Depth, Q);
- KnownOne = OneBits.isNegative();
- KnownZero = ZeroBits.isNegative();
+ KnownOne = OneBits.isSignBitSet();
+ KnownZero = ZeroBits.isSignBitSet();
}
/// Return true if the given value is known to have exactly one
@@ -1638,9 +1640,9 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
if (match(V, m_Shl(m_One(), m_Value())))
return true;
- // (signbit) >>l X is clearly a power of two if the one is not shifted off the
- // bottom. If it is shifted off the bottom then the result is undefined.
- if (match(V, m_LShr(m_SignBit(), m_Value())))
+ // (signmask) >>l X is clearly a power of two if the one is not shifted off
+ // the bottom. If it is shifted off the bottom then the result is undefined.
+ if (match(V, m_LShr(m_SignMask(), m_Value())))
return true;
// The remaining tests are all recursive, so bail out if we hit the limit.
@@ -2241,7 +2243,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
// If we are subtracting one from a positive number, there is no carry
// out of the result.
- if (KnownZero.isNegative())
+ if (KnownZero.isSignBitSet())
return Tmp;
}
@@ -2265,7 +2267,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
// If the input is known to be positive (the sign bit is known clear),
// the output of the NEG has the same number of sign bits as the input.
- if (KnownZero.isNegative())
+ if (KnownZero.isSignBitSet())
return Tmp2;
// Otherwise, we treat this like a SUB.
@@ -2322,10 +2324,10 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
// If we know that the sign bit is either zero or one, determine the number of
// identical bits in the top of the input value.
- if (KnownZero.isNegative())
+ if (KnownZero.isSignBitSet())
return std::max(FirstAnswer, KnownZero.countLeadingOnes());
- if (KnownOne.isNegative())
+ if (KnownOne.isSignBitSet())
return std::max(FirstAnswer, KnownOne.countLeadingOnes());
// computeKnownBits gave us no extra information about the top bits.
@@ -3556,14 +3558,14 @@ OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS,
// We know the multiply operation doesn't overflow if the maximum values for
// each operand will not overflow after we multiply them together.
bool MaxOverflow;
- LHSMax.umul_ov(RHSMax, MaxOverflow);
+ (void)LHSMax.umul_ov(RHSMax, MaxOverflow);
if (!MaxOverflow)
return OverflowResult::NeverOverflows;
// We know it always overflows if multiplying the smallest possible values for
// the operands also results in overflow.
bool MinOverflow;
- LHSKnownOne.umul_ov(RHSKnownOne, MinOverflow);
+ (void)LHSKnownOne.umul_ov(RHSKnownOne, MinOverflow);
if (MinOverflow)
return OverflowResult::AlwaysOverflows;
diff --git a/contrib/llvm/lib/AsmParser/LLParser.cpp b/contrib/llvm/lib/AsmParser/LLParser.cpp
index 58ea9296afda..c7076ed0dd81 100644
--- a/contrib/llvm/lib/AsmParser/LLParser.cpp
+++ b/contrib/llvm/lib/AsmParser/LLParser.cpp
@@ -143,27 +143,24 @@ bool LLParser::ValidateEndOfModule() {
FnAttrs.removeAttribute(Attribute::Alignment);
}
- AS = AS.addAttributes(
- Context, AttributeList::FunctionIndex,
- AttributeList::get(Context, AttributeList::FunctionIndex, FnAttrs));
+ AS = AS.addAttributes(Context, AttributeList::FunctionIndex,
+ AttributeSet::get(Context, FnAttrs));
Fn->setAttributes(AS);
} else if (CallInst *CI = dyn_cast<CallInst>(V)) {
AttributeList AS = CI->getAttributes();
AttrBuilder FnAttrs(AS.getFnAttributes());
AS = AS.removeAttributes(Context, AttributeList::FunctionIndex);
FnAttrs.merge(B);
- AS = AS.addAttributes(
- Context, AttributeList::FunctionIndex,
- AttributeList::get(Context, AttributeList::FunctionIndex, FnAttrs));
+ AS = AS.addAttributes(Context, AttributeList::FunctionIndex,
+ AttributeSet::get(Context, FnAttrs));
CI->setAttributes(AS);
} else if (InvokeInst *II = dyn_cast<InvokeInst>(V)) {
AttributeList AS = II->getAttributes();
AttrBuilder FnAttrs(AS.getFnAttributes());
AS = AS.removeAttributes(Context, AttributeList::FunctionIndex);
FnAttrs.merge(B);
- AS = AS.addAttributes(
- Context, AttributeList::FunctionIndex,
- AttributeList::get(Context, AttributeList::FunctionIndex, FnAttrs));
+ AS = AS.addAttributes(Context, AttributeList::FunctionIndex,
+ AttributeSet::get(Context, FnAttrs));
II->setAttributes(AS);
} else {
llvm_unreachable("invalid object with forward attribute group reference");
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 24ab7e9a950c..6d727ce83346 100644
--- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -372,15 +372,27 @@ Expected<std::string> readTriple(BitstreamCursor &Stream) {
class BitcodeReaderBase {
protected:
- BitcodeReaderBase(BitstreamCursor Stream) : Stream(std::move(Stream)) {
+ BitcodeReaderBase(BitstreamCursor Stream, StringRef Strtab)
+ : Stream(std::move(Stream)), Strtab(Strtab) {
this->Stream.setBlockInfo(&BlockInfo);
}
BitstreamBlockInfo BlockInfo;
BitstreamCursor Stream;
+ StringRef Strtab;
+
+ /// In version 2 of the bitcode we store names of global values and comdats in
+ /// a string table rather than in the VST.
+ bool UseStrtab = false;
Expected<unsigned> parseVersionRecord(ArrayRef<uint64_t> Record);
+ /// If this module uses a string table, pop the reference to the string table
+ /// and return the referenced string and the rest of the record. Otherwise
+ /// just return the record itself.
+ std::pair<StringRef, ArrayRef<uint64_t>>
+ readNameFromStrtab(ArrayRef<uint64_t> Record);
+
bool readBlockInfo();
// Contains an arbitrary and optional string identifying the bitcode producer
@@ -402,11 +414,22 @@ BitcodeReaderBase::parseVersionRecord(ArrayRef<uint64_t> Record) {
if (Record.size() < 1)
return error("Invalid record");
unsigned ModuleVersion = Record[0];
- if (ModuleVersion > 1)
+ if (ModuleVersion > 2)
return error("Invalid value");
+ UseStrtab = ModuleVersion >= 2;
return ModuleVersion;
}
+std::pair<StringRef, ArrayRef<uint64_t>>
+BitcodeReaderBase::readNameFromStrtab(ArrayRef<uint64_t> Record) {
+ if (!UseStrtab)
+ return {"", Record};
+ // Invalid reference. Let the caller complain about the record being empty.
+ if (Record[0] + Record[1] > Strtab.size())
+ return {"", {}};
+ return {StringRef(Strtab.data() + Record[0], Record[1]), Record.slice(2)};
+}
+
class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
LLVMContext &Context;
Module *TheModule = nullptr;
@@ -492,8 +515,8 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
std::vector<std::string> BundleTags;
public:
- BitcodeReader(BitstreamCursor Stream, StringRef ProducerIdentification,
- LLVMContext &Context);
+ BitcodeReader(BitstreamCursor Stream, StringRef Strtab,
+ StringRef ProducerIdentification, LLVMContext &Context);
Error materializeForwardReferencedFunctions();
@@ -628,7 +651,10 @@ private:
Expected<Value *> recordValue(SmallVectorImpl<uint64_t> &Record,
unsigned NameIndex, Triple &TT);
+ void setDeferredFunctionInfo(unsigned FuncBitcodeOffsetDelta, Function *F,
+ ArrayRef<uint64_t> Record);
Error parseValueSymbolTable(uint64_t Offset = 0);
+ Error parseGlobalValueSymbolTable();
Error parseConstants();
Error rememberAndSkipFunctionBodies();
Error rememberAndSkipFunctionBody();
@@ -681,12 +707,15 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase {
std::string SourceFileName;
public:
- ModuleSummaryIndexBitcodeReader(
- BitstreamCursor Stream, ModuleSummaryIndex &TheIndex);
+ ModuleSummaryIndexBitcodeReader(BitstreamCursor Stream, StringRef Strtab,
+ ModuleSummaryIndex &TheIndex);
Error parseModule(StringRef ModulePath);
private:
+ void setValueGUID(uint64_t ValueID, StringRef ValueName,
+ GlobalValue::LinkageTypes Linkage,
+ StringRef SourceFileName);
Error parseValueSymbolTable(
uint64_t Offset,
DenseMap<unsigned, GlobalValue::LinkageTypes> &ValueIdToLinkageMap);
@@ -716,10 +745,10 @@ std::error_code llvm::errorToErrorCodeAndEmitErrors(LLVMContext &Ctx,
return std::error_code();
}
-BitcodeReader::BitcodeReader(BitstreamCursor Stream,
+BitcodeReader::BitcodeReader(BitstreamCursor Stream, StringRef Strtab,
StringRef ProducerIdentification,
LLVMContext &Context)
- : BitcodeReaderBase(std::move(Stream)), Context(Context),
+ : BitcodeReaderBase(std::move(Stream), Strtab), Context(Context),
ValueList(Context) {
this->ProducerIdentification = ProducerIdentification;
}
@@ -1749,6 +1778,54 @@ static uint64_t jumpToValueSymbolTable(uint64_t Offset,
return CurrentBit;
}
+void BitcodeReader::setDeferredFunctionInfo(unsigned FuncBitcodeOffsetDelta,
+ Function *F,
+ ArrayRef<uint64_t> Record) {
+ // Note that we subtract 1 here because the offset is relative to one word
+ // before the start of the identification or module block, which was
+ // historically always the start of the regular bitcode header.
+ uint64_t FuncWordOffset = Record[1] - 1;
+ uint64_t FuncBitOffset = FuncWordOffset * 32;
+ DeferredFunctionInfo[F] = FuncBitOffset + FuncBitcodeOffsetDelta;
+ // Set the LastFunctionBlockBit to point to the last function block.
+ // Later when parsing is resumed after function materialization,
+ // we can simply skip that last function block.
+ if (FuncBitOffset > LastFunctionBlockBit)
+ LastFunctionBlockBit = FuncBitOffset;
+}
+
+/// Read a new-style GlobalValue symbol table.
+Error BitcodeReader::parseGlobalValueSymbolTable() {
+ unsigned FuncBitcodeOffsetDelta =
+ Stream.getAbbrevIDWidth() + bitc::BlockIDWidth;
+
+ if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
+ return error("Invalid record");
+
+ SmallVector<uint64_t, 64> Record;
+ while (true) {
+ BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+ switch (Entry.Kind) {
+ case BitstreamEntry::SubBlock:
+ case BitstreamEntry::Error:
+ return error("Malformed block");
+ case BitstreamEntry::EndBlock:
+ return Error::success();
+ case BitstreamEntry::Record:
+ break;
+ }
+
+ Record.clear();
+ switch (Stream.readRecord(Entry.ID, Record)) {
+ case bitc::VST_CODE_FNENTRY: // [valueid, offset]
+ setDeferredFunctionInfo(FuncBitcodeOffsetDelta,
+ cast<Function>(ValueList[Record[0]]), Record);
+ break;
+ }
+ }
+}
+
/// Parse the value symbol table at either the current parsing location or
/// at the given bit offset if provided.
Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
@@ -1756,8 +1833,18 @@ Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
// Pass in the Offset to distinguish between calling for the module-level
// VST (where we want to jump to the VST offset) and the function-level
// VST (where we don't).
- if (Offset > 0)
+ if (Offset > 0) {
CurrentBit = jumpToValueSymbolTable(Offset, Stream);
+ // If this module uses a string table, read this as a module-level VST.
+ if (UseStrtab) {
+ if (Error Err = parseGlobalValueSymbolTable())
+ return Err;
+ Stream.JumpToBit(CurrentBit);
+ return Error::success();
+ }
+ // Otherwise, the VST will be in a similar format to a function-level VST,
+ // and will contain symbol names.
+ }
// Compute the delta between the bitcode indices in the VST (the word offset
// to the word-aligned ENTER_SUBBLOCK for the function block, and that
@@ -1818,23 +1905,10 @@ Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
return Err;
Value *V = ValOrErr.get();
- auto *F = dyn_cast<Function>(V);
// Ignore function offsets emitted for aliases of functions in older
// versions of LLVM.
- if (!F)
- break;
-
- // Note that we subtract 1 here because the offset is relative to one word
- // before the start of the identification or module block, which was
- // historically always the start of the regular bitcode header.
- uint64_t FuncWordOffset = Record[1] - 1;
- uint64_t FuncBitOffset = FuncWordOffset * 32;
- DeferredFunctionInfo[F] = FuncBitOffset + FuncBitcodeOffsetDelta;
- // Set the LastFunctionBlockBit to point to the last function block.
- // Later when parsing is resumed after function materialization,
- // we can simply skip that last function block.
- if (FuncBitOffset > LastFunctionBlockBit)
- LastFunctionBlockBit = FuncBitOffset;
+ if (auto *F = dyn_cast<Function>(V))
+ setDeferredFunctionInfo(FuncBitcodeOffsetDelta, F, Record);
break;
}
case bitc::VST_CODE_BBENTRY: {
@@ -2557,6 +2631,7 @@ Error BitcodeReader::globalCleanup() {
// Look for intrinsic functions which need to be upgraded at some point
for (Function &F : *TheModule) {
+ MDLoader->upgradeDebugIntrinsics(F);
Function *NewFn;
if (UpgradeIntrinsicFunction(&F, NewFn))
UpgradedIntrinsics[&F] = NewFn;
@@ -2626,15 +2701,24 @@ bool BitcodeReaderBase::readBlockInfo() {
}
Error BitcodeReader::parseComdatRecord(ArrayRef<uint64_t> Record) {
- // [selection_kind, name]
- if (Record.size() < 2)
+ // v1: [selection_kind, name]
+ // v2: [strtab_offset, strtab_size, selection_kind]
+ StringRef Name;
+ std::tie(Name, Record) = readNameFromStrtab(Record);
+
+ if (Record.size() < 1)
return error("Invalid record");
Comdat::SelectionKind SK = getDecodedComdatSelectionKind(Record[0]);
- std::string Name;
- unsigned ComdatNameSize = Record[1];
- Name.reserve(ComdatNameSize);
- for (unsigned i = 0; i != ComdatNameSize; ++i)
- Name += (char)Record[2 + i];
+ std::string OldFormatName;
+ if (!UseStrtab) {
+ if (Record.size() < 2)
+ return error("Invalid record");
+ unsigned ComdatNameSize = Record[1];
+ OldFormatName.reserve(ComdatNameSize);
+ for (unsigned i = 0; i != ComdatNameSize; ++i)
+ OldFormatName += (char)Record[2 + i];
+ Name = OldFormatName;
+ }
Comdat *C = TheModule->getOrInsertComdat(Name);
C->setSelectionKind(SK);
ComdatList.push_back(C);
@@ -2642,9 +2726,13 @@ Error BitcodeReader::parseComdatRecord(ArrayRef<uint64_t> Record) {
}
Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
- // [pointer type, isconst, initid, linkage, alignment, section,
+ // v1: [pointer type, isconst, initid, linkage, alignment, section,
// visibility, threadlocal, unnamed_addr, externally_initialized,
- // dllstorageclass, comdat]
+ // dllstorageclass, comdat] (name in VST)
+ // v2: [strtab_offset, strtab_size, v1]
+ StringRef Name;
+ std::tie(Name, Record) = readNameFromStrtab(Record);
+
if (Record.size() < 6)
return error("Invalid record");
Type *Ty = getTypeByID(Record[0]);
@@ -2692,7 +2780,7 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
ExternallyInitialized = Record[9];
GlobalVariable *NewGV =
- new GlobalVariable(*TheModule, Ty, isConstant, Linkage, nullptr, "",
+ new GlobalVariable(*TheModule, Ty, isConstant, Linkage, nullptr, Name,
nullptr, TLM, AddressSpace, ExternallyInitialized);
NewGV->setAlignment(Alignment);
if (!Section.empty())
@@ -2724,9 +2812,13 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
}
Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
- // [type, callingconv, isproto, linkage, paramattr, alignment, section,
+ // v1: [type, callingconv, isproto, linkage, paramattr, alignment, section,
// visibility, gc, unnamed_addr, prologuedata, dllstorageclass, comdat,
- // prefixdata]
+ // prefixdata] (name in VST)
+ // v2: [strtab_offset, strtab_size, v1]
+ StringRef Name;
+ std::tie(Name, Record) = readNameFromStrtab(Record);
+
if (Record.size() < 8)
return error("Invalid record");
Type *Ty = getTypeByID(Record[0]);
@@ -2742,7 +2834,7 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
return error("Invalid calling convention ID");
Function *Func =
- Function::Create(FTy, GlobalValue::ExternalLinkage, "", TheModule);
+ Function::Create(FTy, GlobalValue::ExternalLinkage, Name, TheModule);
Func->setCallingConv(CC);
bool isProto = Record[2];
@@ -2810,11 +2902,15 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
Error BitcodeReader::parseGlobalIndirectSymbolRecord(
unsigned BitCode, ArrayRef<uint64_t> Record) {
- // ALIAS_OLD: [alias type, aliasee val#, linkage]
- // ALIAS: [alias type, addrspace, aliasee val#, linkage, visibility,
- // dllstorageclass]
- // IFUNC: [alias type, addrspace, aliasee val#, linkage,
- // visibility, dllstorageclass]
+ // v1 ALIAS_OLD: [alias type, aliasee val#, linkage] (name in VST)
+ // v1 ALIAS: [alias type, addrspace, aliasee val#, linkage, visibility,
+ // dllstorageclass] (name in VST)
+ // v1 IFUNC: [alias type, addrspace, aliasee val#, linkage,
+ // visibility, dllstorageclass] (name in VST)
+ // v2: [strtab_offset, strtab_size, v1]
+ StringRef Name;
+ std::tie(Name, Record) = readNameFromStrtab(Record);
+
bool NewRecord = BitCode != bitc::MODULE_CODE_ALIAS_OLD;
if (Record.size() < (3 + (unsigned)NewRecord))
return error("Invalid record");
@@ -2839,10 +2935,10 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord(
GlobalIndirectSymbol *NewGA;
if (BitCode == bitc::MODULE_CODE_ALIAS ||
BitCode == bitc::MODULE_CODE_ALIAS_OLD)
- NewGA = GlobalAlias::create(Ty, AddrSpace, getDecodedLinkage(Linkage), "",
+ NewGA = GlobalAlias::create(Ty, AddrSpace, getDecodedLinkage(Linkage), Name,
TheModule);
else
- NewGA = GlobalIFunc::create(Ty, AddrSpace, getDecodedLinkage(Linkage), "",
+ NewGA = GlobalIFunc::create(Ty, AddrSpace, getDecodedLinkage(Linkage), Name,
nullptr, TheModule);
// Old bitcode files didn't have visibility field.
// Local linkage must have default visibility.
@@ -4570,8 +4666,8 @@ std::vector<StructType *> BitcodeReader::getIdentifiedStructTypes() const {
}
ModuleSummaryIndexBitcodeReader::ModuleSummaryIndexBitcodeReader(
- BitstreamCursor Cursor, ModuleSummaryIndex &TheIndex)
- : BitcodeReaderBase(std::move(Cursor)), TheIndex(TheIndex) {}
+ BitstreamCursor Cursor, StringRef Strtab, ModuleSummaryIndex &TheIndex)
+ : BitcodeReaderBase(std::move(Cursor), Strtab), TheIndex(TheIndex) {}
std::pair<GlobalValue::GUID, GlobalValue::GUID>
ModuleSummaryIndexBitcodeReader::getGUIDFromValueId(unsigned ValueId) {
@@ -4580,12 +4676,32 @@ ModuleSummaryIndexBitcodeReader::getGUIDFromValueId(unsigned ValueId) {
return VGI->second;
}
+void ModuleSummaryIndexBitcodeReader::setValueGUID(
+ uint64_t ValueID, StringRef ValueName, GlobalValue::LinkageTypes Linkage,
+ StringRef SourceFileName) {
+ std::string GlobalId =
+ GlobalValue::getGlobalIdentifier(ValueName, Linkage, SourceFileName);
+ auto ValueGUID = GlobalValue::getGUID(GlobalId);
+ auto OriginalNameID = ValueGUID;
+ if (GlobalValue::isLocalLinkage(Linkage))
+ OriginalNameID = GlobalValue::getGUID(ValueName);
+ if (PrintSummaryGUIDs)
+ dbgs() << "GUID " << ValueGUID << "(" << OriginalNameID << ") is "
+ << ValueName << "\n";
+ ValueIdToCallGraphGUIDMap[ValueID] =
+ std::make_pair(ValueGUID, OriginalNameID);
+}
+
// Specialized value symbol table parser used when reading module index
// blocks where we don't actually create global values. The parsed information
// is saved in the bitcode reader for use when later parsing summaries.
Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable(
uint64_t Offset,
DenseMap<unsigned, GlobalValue::LinkageTypes> &ValueIdToLinkageMap) {
+ // With a strtab the VST is not required to parse the summary.
+ if (UseStrtab)
+ return Error::success();
+
assert(Offset > 0 && "Expected non-zero VST offset");
uint64_t CurrentBit = jumpToValueSymbolTable(Offset, Stream);
@@ -4627,17 +4743,7 @@ Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable(
assert(VLI != ValueIdToLinkageMap.end() &&
"No linkage found for VST entry?");
auto Linkage = VLI->second;
- std::string GlobalId =
- GlobalValue::getGlobalIdentifier(ValueName, Linkage, SourceFileName);
- auto ValueGUID = GlobalValue::getGUID(GlobalId);
- auto OriginalNameID = ValueGUID;
- if (GlobalValue::isLocalLinkage(Linkage))
- OriginalNameID = GlobalValue::getGUID(ValueName);
- if (PrintSummaryGUIDs)
- dbgs() << "GUID " << ValueGUID << "(" << OriginalNameID << ") is "
- << ValueName << "\n";
- ValueIdToCallGraphGUIDMap[ValueID] =
- std::make_pair(ValueGUID, OriginalNameID);
+ setValueGUID(ValueID, ValueName, Linkage, SourceFileName);
ValueName.clear();
break;
}
@@ -4651,18 +4757,7 @@ Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable(
assert(VLI != ValueIdToLinkageMap.end() &&
"No linkage found for VST entry?");
auto Linkage = VLI->second;
- std::string FunctionGlobalId = GlobalValue::getGlobalIdentifier(
- ValueName, VLI->second, SourceFileName);
- auto FunctionGUID = GlobalValue::getGUID(FunctionGlobalId);
- auto OriginalNameID = FunctionGUID;
- if (GlobalValue::isLocalLinkage(Linkage))
- OriginalNameID = GlobalValue::getGUID(ValueName);
- if (PrintSummaryGUIDs)
- dbgs() << "GUID " << FunctionGUID << "(" << OriginalNameID << ") is "
- << ValueName << "\n";
- ValueIdToCallGraphGUIDMap[ValueID] =
- std::make_pair(FunctionGUID, OriginalNameID);
-
+ setValueGUID(ValueID, ValueName, Linkage, SourceFileName);
ValueName.clear();
break;
}
@@ -4749,6 +4844,11 @@ Error ModuleSummaryIndexBitcodeReader::parseModule(StringRef ModulePath) {
switch (BitCode) {
default:
break; // Default behavior, ignore unknown content.
+ case bitc::MODULE_CODE_VERSION: {
+ if (Error Err = parseVersionRecord(Record).takeError())
+ return Err;
+ break;
+ }
/// MODULE_CODE_SOURCE_FILENAME: [namechar x N]
case bitc::MODULE_CODE_SOURCE_FILENAME: {
SmallString<128> ValueName;
@@ -4783,17 +4883,26 @@ Error ModuleSummaryIndexBitcodeReader::parseModule(StringRef ModulePath) {
// was historically always the start of the regular bitcode header.
VSTOffset = Record[0] - 1;
break;
- // GLOBALVAR: [pointer type, isconst, initid, linkage, ...]
- // FUNCTION: [type, callingconv, isproto, linkage, ...]
- // ALIAS: [alias type, addrspace, aliasee val#, linkage, ...]
+ // v1 GLOBALVAR: [pointer type, isconst, initid, linkage, ...]
+ // v1 FUNCTION: [type, callingconv, isproto, linkage, ...]
+ // v1 ALIAS: [alias type, addrspace, aliasee val#, linkage, ...]
+ // v2: [strtab offset, strtab size, v1]
case bitc::MODULE_CODE_GLOBALVAR:
case bitc::MODULE_CODE_FUNCTION:
case bitc::MODULE_CODE_ALIAS: {
- if (Record.size() <= 3)
+ StringRef Name;
+ ArrayRef<uint64_t> GVRecord;
+ std::tie(Name, GVRecord) = readNameFromStrtab(Record);
+ if (GVRecord.size() <= 3)
return error("Invalid record");
- uint64_t RawLinkage = Record[3];
+ uint64_t RawLinkage = GVRecord[3];
GlobalValue::LinkageTypes Linkage = getDecodedLinkage(RawLinkage);
- ValueIdToLinkageMap[ValueId++] = Linkage;
+ if (!UseStrtab) {
+ ValueIdToLinkageMap[ValueId++] = Linkage;
+ break;
+ }
+
+ setValueGUID(ValueId++, Name, Linkage, SourceFileName);
break;
}
}
@@ -4904,6 +5013,12 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(
switch (BitCode) {
default: // Default behavior: ignore.
break;
+ case bitc::FS_VALUE_GUID: { // [valueid, refguid]
+ uint64_t ValueID = Record[0];
+ GlobalValue::GUID RefGUID = Record[1];
+ ValueIdToCallGraphGUIDMap[ValueID] = std::make_pair(RefGUID, RefGUID);
+ break;
+ }
// FS_PERMODULE: [valueid, flags, instcount, numrefs, numrefs x valueid,
// n x (valueid)]
// FS_PERMODULE_PROFILE: [valueid, flags, instcount, numrefs,
@@ -5208,6 +5323,35 @@ const std::error_category &llvm::BitcodeErrorCategory() {
return *ErrorCategory;
}
+static Expected<StringRef> readStrtab(BitstreamCursor &Stream) {
+ if (Stream.EnterSubBlock(bitc::STRTAB_BLOCK_ID))
+ return error("Invalid record");
+
+ StringRef Strtab;
+ while (1) {
+ BitstreamEntry Entry = Stream.advance();
+ switch (Entry.Kind) {
+ case BitstreamEntry::EndBlock:
+ return Strtab;
+
+ case BitstreamEntry::Error:
+ return error("Malformed block");
+
+ case BitstreamEntry::SubBlock:
+ if (Stream.SkipBlock())
+ return error("Malformed block");
+ break;
+
+ case BitstreamEntry::Record:
+ StringRef Blob;
+ SmallVector<uint64_t, 1> Record;
+ if (Stream.readRecord(Entry.ID, Record, &Blob) == bitc::STRTAB_BLOB)
+ Strtab = Blob;
+ break;
+ }
+ }
+}
+
//===----------------------------------------------------------------------===//
// External interface
//===----------------------------------------------------------------------===//
@@ -5260,6 +5404,22 @@ llvm::getBitcodeModuleList(MemoryBufferRef Buffer) {
continue;
}
+ if (Entry.ID == bitc::STRTAB_BLOCK_ID) {
+ Expected<StringRef> Strtab = readStrtab(Stream);
+ if (!Strtab)
+ return Strtab.takeError();
+ // This string table is used by every preceding bitcode module that does
+ // not have its own string table. A bitcode file may have multiple
+ // string tables if it was created by binary concatenation, for example
+ // with "llvm-cat -b".
+ for (auto I = Modules.rbegin(), E = Modules.rend(); I != E; ++I) {
+ if (!I->Strtab.empty())
+ break;
+ I->Strtab = *Strtab;
+ }
+ continue;
+ }
+
if (Stream.SkipBlock())
return error("Malformed block");
continue;
@@ -5296,8 +5456,8 @@ BitcodeModule::getModuleImpl(LLVMContext &Context, bool MaterializeAll,
}
Stream.JumpToBit(ModuleBit);
- auto *R =
- new BitcodeReader(std::move(Stream), ProducerIdentification, Context);
+ auto *R = new BitcodeReader(std::move(Stream), Strtab, ProducerIdentification,
+ Context);
std::unique_ptr<Module> M =
llvm::make_unique<Module>(ModuleIdentifier, Context);
@@ -5332,7 +5492,7 @@ Expected<std::unique_ptr<ModuleSummaryIndex>> BitcodeModule::getSummary() {
Stream.JumpToBit(ModuleBit);
auto Index = llvm::make_unique<ModuleSummaryIndex>();
- ModuleSummaryIndexBitcodeReader R(std::move(Stream), *Index);
+ ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, *Index);
if (Error Err = R.parseModule(ModuleIdentifier))
return std::move(Err);
diff --git a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index 274dfe89cce5..d089684a052f 100644
--- a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -54,6 +54,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSummaryIndex.h"
@@ -452,6 +453,7 @@ class MetadataLoader::MetadataLoaderImpl {
bool StripTBAA = false;
bool HasSeenOldLoopTags = false;
bool NeedUpgradeToDIGlobalVariableExpression = false;
+ bool NeedDeclareExpressionUpgrade = false;
/// True if metadata is being parsed for a module being ThinLTO imported.
bool IsImporting = false;
@@ -511,6 +513,26 @@ class MetadataLoader::MetadataLoaderImpl {
}
}
+ /// Remove a leading DW_OP_deref from DIExpressions in a dbg.declare that
+ /// describes a function argument.
+ void upgradeDeclareExpressions(Function &F) {
+ if (!NeedDeclareExpressionUpgrade)
+ return;
+
+ for (auto &BB : F)
+ for (auto &I : BB)
+ if (auto *DDI = dyn_cast<DbgDeclareInst>(&I))
+ if (auto *DIExpr = DDI->getExpression())
+ if (DIExpr->startsWithDeref() &&
+ dyn_cast_or_null<Argument>(DDI->getAddress())) {
+ SmallVector<uint64_t, 8> Ops;
+ Ops.append(std::next(DIExpr->elements_begin()),
+ DIExpr->elements_end());
+ auto *E = DIExpression::get(Context, Ops);
+ DDI->setOperand(2, MetadataAsValue::get(Context, E));
+ }
+ }
+
void upgradeDebugInfo() {
upgradeCUSubprograms();
upgradeCUVariables();
@@ -565,6 +587,7 @@ public:
unsigned size() const { return MetadataList.size(); }
void shrinkTo(unsigned N) { MetadataList.shrinkTo(N); }
+ void upgradeDebugIntrinsics(Function &F) { upgradeDeclareExpressions(F); }
};
static Error error(const Twine &Message) {
@@ -1520,12 +1543,32 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
return error("Invalid record");
IsDistinct = Record[0] & 1;
- bool HasOpFragment = Record[0] & 2;
+ uint64_t Version = Record[0] >> 1;
auto Elts = MutableArrayRef<uint64_t>(Record).slice(1);
- if (!HasOpFragment)
- if (unsigned N = Elts.size())
- if (N >= 3 && Elts[N - 3] == dwarf::DW_OP_bit_piece)
- Elts[N - 3] = dwarf::DW_OP_LLVM_fragment;
+ unsigned N = Elts.size();
+ // Perform various upgrades.
+ switch (Version) {
+ case 0:
+ if (N >= 3 && Elts[N - 3] == dwarf::DW_OP_bit_piece)
+ Elts[N - 3] = dwarf::DW_OP_LLVM_fragment;
+ LLVM_FALLTHROUGH;
+ case 1:
+ // Move DW_OP_deref to the end.
+ if (N && Elts[0] == dwarf::DW_OP_deref) {
+ auto End = Elts.end();
+ if (Elts.size() >= 3 && *std::prev(End, 3) == dwarf::DW_OP_LLVM_fragment)
+ End = std::prev(End, 3);
+ std::move(std::next(Elts.begin()), End, Elts.begin());
+ *std::prev(End) = dwarf::DW_OP_deref;
+ }
+ NeedDeclareExpressionUpgrade = true;
+ LLVM_FALLTHROUGH;
+ case 2:
+ // Up-to-date!
+ break;
+ default:
+ return error("Invalid record");
+ }
MetadataList.assignValue(
GET_OR_DISTINCT(DIExpression, (Context, makeArrayRef(Record).slice(1))),
@@ -1858,3 +1901,7 @@ bool MetadataLoader::isStrippingTBAA() { return Pimpl->isStrippingTBAA(); }
unsigned MetadataLoader::size() const { return Pimpl->size(); }
void MetadataLoader::shrinkTo(unsigned N) { return Pimpl->shrinkTo(N); }
+
+void MetadataLoader::upgradeDebugIntrinsics(Function &F) {
+ return Pimpl->upgradeDebugIntrinsics(F);
+}
diff --git a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.h b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.h
index 442dfc94e4e1..f23dcc06cc94 100644
--- a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.h
+++ b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.h
@@ -79,6 +79,9 @@ public:
unsigned size() const;
void shrinkTo(unsigned N);
+
+ /// Perform bitcode upgrades on llvm.dbg.* calls.
+ void upgradeDebugIntrinsics(Function &F);
};
}
diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 043441bac4de..1d3cde2f5ddb 100644
--- a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -28,6 +28,7 @@
#include "llvm/IR/Operator.h"
#include "llvm/IR/UseListOrder.h"
#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Program.h"
@@ -76,26 +77,28 @@ protected:
/// The stream created and owned by the client.
BitstreamWriter &Stream;
- /// Saves the offset of the VSTOffset record that must eventually be
- /// backpatched with the offset of the actual VST.
- uint64_t VSTOffsetPlaceholder = 0;
-
public:
/// Constructs a BitcodeWriterBase object that writes to the provided
/// \p Stream.
BitcodeWriterBase(BitstreamWriter &Stream) : Stream(Stream) {}
protected:
- bool hasVSTOffsetPlaceholder() { return VSTOffsetPlaceholder != 0; }
- void writeValueSymbolTableForwardDecl();
void writeBitcodeHeader();
+ void writeModuleVersion();
};
+void BitcodeWriterBase::writeModuleVersion() {
+ // VERSION: [version#]
+ Stream.EmitRecord(bitc::MODULE_CODE_VERSION, ArrayRef<uint64_t>{2});
+}
+
/// Class to manage the bitcode writing for a module.
class ModuleBitcodeWriter : public BitcodeWriterBase {
/// Pointer to the buffer allocated by caller for bitcode writing.
const SmallVectorImpl<char> &Buffer;
+ StringTableBuilder &StrtabBuilder;
+
/// The Module to write to bitcode.
const Module &M;
@@ -127,15 +130,20 @@ class ModuleBitcodeWriter : public BitcodeWriterBase {
/// Tracks the last value id recorded in the GUIDToValueMap.
unsigned GlobalValueId;
+ /// Saves the offset of the VSTOffset record that must eventually be
+ /// backpatched with the offset of the actual VST.
+ uint64_t VSTOffsetPlaceholder = 0;
+
public:
/// Constructs a ModuleBitcodeWriter object for the given Module,
/// writing to the provided \p Buffer.
ModuleBitcodeWriter(const Module *M, SmallVectorImpl<char> &Buffer,
+ StringTableBuilder &StrtabBuilder,
BitstreamWriter &Stream, bool ShouldPreserveUseListOrder,
const ModuleSummaryIndex *Index, bool GenerateHash,
ModuleHash *ModHash = nullptr)
- : BitcodeWriterBase(Stream), Buffer(Buffer), M(*M),
- VE(*M, ShouldPreserveUseListOrder), Index(Index),
+ : BitcodeWriterBase(Stream), Buffer(Buffer), StrtabBuilder(StrtabBuilder),
+ M(*M), VE(*M, ShouldPreserveUseListOrder), Index(Index),
GenerateHash(GenerateHash), ModHash(ModHash),
BitcodeStartBit(Stream.GetCurrentBitNo()) {
// Assign ValueIds to any callee values in the index that came from
@@ -169,6 +177,7 @@ private:
void writeAttributeTable();
void writeTypeTable();
void writeComdats();
+ void writeValueSymbolTableForwardDecl();
void writeModuleInfo();
void writeValueAsMetadata(const ValueAsMetadata *MD,
SmallVectorImpl<uint64_t> &Record);
@@ -261,9 +270,9 @@ private:
SmallVectorImpl<uint64_t> &Vals);
void writeInstruction(const Instruction &I, unsigned InstID,
SmallVectorImpl<unsigned> &Vals);
- void writeValueSymbolTable(
- const ValueSymbolTable &VST, bool IsModuleLevel = false,
- DenseMap<const Function *, uint64_t> *FunctionToBitcodeIndex = nullptr);
+ void writeFunctionLevelValueSymbolTable(const ValueSymbolTable &VST);
+ void writeGlobalValueSymbolTable(
+ DenseMap<const Function *, uint64_t> &FunctionToBitcodeIndex);
void writeUseList(UseListOrder &&Order);
void writeUseListBlock(const Function *F);
void
@@ -477,7 +486,6 @@ public:
private:
void writeModStrings();
- void writeCombinedValueSymbolTable();
void writeCombinedGlobalValueSummary();
/// Indicates whether the provided \p ModulePath should be written into
@@ -492,15 +500,15 @@ private:
const auto &VMI = GUIDToValueIdMap.find(ValGUID);
return VMI != GUIDToValueIdMap.end();
}
+ void assignValueId(GlobalValue::GUID ValGUID) {
+ unsigned &ValueId = GUIDToValueIdMap[ValGUID];
+ if (ValueId == 0)
+ ValueId = ++GlobalValueId;
+ }
unsigned getValueId(GlobalValue::GUID ValGUID) {
- const auto &VMI = GUIDToValueIdMap.find(ValGUID);
- // If this GUID doesn't have an entry, assign one.
- if (VMI == GUIDToValueIdMap.end()) {
- GUIDToValueIdMap[ValGUID] = ++GlobalValueId;
- return GlobalValueId;
- } else {
- return VMI->second;
- }
+ auto VMI = GUIDToValueIdMap.find(ValGUID);
+ assert(VMI != GUIDToValueIdMap.end());
+ return VMI->second;
}
std::map<GlobalValue::GUID, unsigned> &valueIds() { return GUIDToValueIdMap; }
};
@@ -1047,13 +1055,10 @@ static unsigned getEncodedUnnamedAddr(const GlobalValue &GV) {
void ModuleBitcodeWriter::writeComdats() {
SmallVector<unsigned, 64> Vals;
for (const Comdat *C : VE.getComdats()) {
- // COMDAT: [selection_kind, name]
+ // COMDAT: [strtab offset, strtab size, selection_kind]
+ Vals.push_back(StrtabBuilder.add(C->getName()));
+ Vals.push_back(C->getName().size());
Vals.push_back(getEncodedComdatSelectionKind(*C));
- size_t Size = C->getName().size();
- assert(isUInt<32>(Size));
- Vals.push_back(Size);
- for (char Chr : C->getName())
- Vals.push_back((unsigned char)Chr);
Stream.EmitRecord(bitc::MODULE_CODE_COMDAT, Vals, /*AbbrevToUse=*/0);
Vals.clear();
}
@@ -1062,7 +1067,7 @@ void ModuleBitcodeWriter::writeComdats() {
/// Write a record that will eventually hold the word offset of the
/// module-level VST. For now the offset is 0, which will be backpatched
/// after the real VST is written. Saves the bit offset to backpatch.
-void BitcodeWriterBase::writeValueSymbolTableForwardDecl() {
+void ModuleBitcodeWriter::writeValueSymbolTableForwardDecl() {
// Write a placeholder value in for the offset of the real VST,
// which is written after the function blocks so that it can include
// the offset of each function. The placeholder offset will be
@@ -1165,6 +1170,8 @@ void ModuleBitcodeWriter::writeModuleInfo() {
// Add an abbrev for common globals with no visibility or thread localness.
auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_GLOBALVAR));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
Log2_32_Ceil(MaxGlobalType+1)));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // AddrSpace << 2
@@ -1188,15 +1195,42 @@ void ModuleBitcodeWriter::writeModuleInfo() {
SimpleGVarAbbrev = Stream.EmitAbbrev(std::move(Abbv));
}
- // Emit the global variable information.
SmallVector<unsigned, 64> Vals;
+ // Emit the module's source file name.
+ {
+ StringEncoding Bits = getStringEncoding(M.getSourceFileName().data(),
+ M.getSourceFileName().size());
+ BitCodeAbbrevOp AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8);
+ if (Bits == SE_Char6)
+ AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Char6);
+ else if (Bits == SE_Fixed7)
+ AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7);
+
+ // MODULE_CODE_SOURCE_FILENAME: [namechar x N]
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_SOURCE_FILENAME));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(AbbrevOpToUse);
+ unsigned FilenameAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
+ for (const auto P : M.getSourceFileName())
+ Vals.push_back((unsigned char)P);
+
+ // Emit the finished record.
+ Stream.EmitRecord(bitc::MODULE_CODE_SOURCE_FILENAME, Vals, FilenameAbbrev);
+ Vals.clear();
+ }
+
+ // Emit the global variable information.
for (const GlobalVariable &GV : M.globals()) {
unsigned AbbrevToUse = 0;
- // GLOBALVAR: [type, isconst, initid,
+ // GLOBALVAR: [strtab offset, strtab size, type, isconst, initid,
// linkage, alignment, section, visibility, threadlocal,
// unnamed_addr, externally_initialized, dllstorageclass,
// comdat]
+ Vals.push_back(StrtabBuilder.add(GV.getName()));
+ Vals.push_back(GV.getName().size());
Vals.push_back(VE.getTypeID(GV.getValueType()));
Vals.push_back(GV.getType()->getAddressSpace() << 2 | 2 | GV.isConstant());
Vals.push_back(GV.isDeclaration() ? 0 :
@@ -1226,9 +1260,12 @@ void ModuleBitcodeWriter::writeModuleInfo() {
// Emit the function proto information.
for (const Function &F : M) {
- // FUNCTION: [type, callingconv, isproto, linkage, paramattrs, alignment,
- // section, visibility, gc, unnamed_addr, prologuedata,
- // dllstorageclass, comdat, prefixdata, personalityfn]
+ // FUNCTION: [strtab offset, strtab size, type, callingconv, isproto,
+ // linkage, paramattrs, alignment, section, visibility, gc,
+ // unnamed_addr, prologuedata, dllstorageclass, comdat,
+ // prefixdata, personalityfn]
+ Vals.push_back(StrtabBuilder.add(F.getName()));
+ Vals.push_back(F.getName().size());
Vals.push_back(VE.getTypeID(F.getFunctionType()));
Vals.push_back(F.getCallingConv());
Vals.push_back(F.isDeclaration());
@@ -1255,8 +1292,10 @@ void ModuleBitcodeWriter::writeModuleInfo() {
// Emit the alias information.
for (const GlobalAlias &A : M.aliases()) {
- // ALIAS: [alias type, aliasee val#, linkage, visibility, dllstorageclass,
- // threadlocal, unnamed_addr]
+ // ALIAS: [strtab offset, strtab size, alias type, aliasee val#, linkage,
+ // visibility, dllstorageclass, threadlocal, unnamed_addr]
+ Vals.push_back(StrtabBuilder.add(A.getName()));
+ Vals.push_back(A.getName().size());
Vals.push_back(VE.getTypeID(A.getValueType()));
Vals.push_back(A.getType()->getAddressSpace());
Vals.push_back(VE.getValueID(A.getAliasee()));
@@ -1272,7 +1311,10 @@ void ModuleBitcodeWriter::writeModuleInfo() {
// Emit the ifunc information.
for (const GlobalIFunc &I : M.ifuncs()) {
- // IFUNC: [ifunc type, address space, resolver val#, linkage, visibility]
+ // IFUNC: [strtab offset, strtab size, ifunc type, address space, resolver
+ // val#, linkage, visibility]
+ Vals.push_back(StrtabBuilder.add(I.getName()));
+ Vals.push_back(I.getName().size());
Vals.push_back(VE.getTypeID(I.getValueType()));
Vals.push_back(I.getType()->getAddressSpace());
Vals.push_back(VE.getValueID(I.getResolver()));
@@ -1282,34 +1324,6 @@ void ModuleBitcodeWriter::writeModuleInfo() {
Vals.clear();
}
- // Emit the module's source file name.
- {
- StringEncoding Bits = getStringEncoding(M.getSourceFileName().data(),
- M.getSourceFileName().size());
- BitCodeAbbrevOp AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8);
- if (Bits == SE_Char6)
- AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Char6);
- else if (Bits == SE_Fixed7)
- AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7);
-
- // MODULE_CODE_SOURCE_FILENAME: [namechar x N]
- auto Abbv = std::make_shared<BitCodeAbbrev>();
- Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_SOURCE_FILENAME));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
- Abbv->Add(AbbrevOpToUse);
- unsigned FilenameAbbrev = Stream.EmitAbbrev(std::move(Abbv));
-
- for (const auto P : M.getSourceFileName())
- Vals.push_back((unsigned char)P);
-
- // Emit the finished record.
- Stream.EmitRecord(bitc::MODULE_CODE_SOURCE_FILENAME, Vals, FilenameAbbrev);
- Vals.clear();
- }
-
- // If we have a VST, write the VSTOFFSET record placeholder.
- if (M.getValueSymbolTable().empty())
- return;
writeValueSymbolTableForwardDecl();
}
@@ -1757,9 +1771,8 @@ void ModuleBitcodeWriter::writeDIExpression(const DIExpression *N,
SmallVectorImpl<uint64_t> &Record,
unsigned Abbrev) {
Record.reserve(N->getElements().size() + 1);
-
- const uint64_t HasOpFragmentFlag = 1 << 1;
- Record.push_back((uint64_t)N->isDistinct() | HasOpFragmentFlag);
+ const uint64_t Version = 2 << 1;
+ Record.push_back((uint64_t)N->isDistinct() | Version);
Record.append(N->elements_begin(), N->elements_end());
Stream.EmitRecord(bitc::METADATA_EXPRESSION, Record, Abbrev);
@@ -2839,77 +2852,59 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
Vals.clear();
}
-/// Emit names for globals/functions etc. \p IsModuleLevel is true when
-/// we are writing the module-level VST, where we are including a function
-/// bitcode index and need to backpatch the VST forward declaration record.
-void ModuleBitcodeWriter::writeValueSymbolTable(
- const ValueSymbolTable &VST, bool IsModuleLevel,
- DenseMap<const Function *, uint64_t> *FunctionToBitcodeIndex) {
- if (VST.empty()) {
- // writeValueSymbolTableForwardDecl should have returned early as
- // well. Ensure this handling remains in sync by asserting that
- // the placeholder offset is not set.
- assert(!IsModuleLevel || !hasVSTOffsetPlaceholder());
- return;
- }
+/// Write a GlobalValue VST to the module. The purpose of this data structure is
+/// to allow clients to efficiently find the function body.
+void ModuleBitcodeWriter::writeGlobalValueSymbolTable(
+ DenseMap<const Function *, uint64_t> &FunctionToBitcodeIndex) {
+ // Get the offset of the VST we are writing, and backpatch it into
+ // the VST forward declaration record.
+ uint64_t VSTOffset = Stream.GetCurrentBitNo();
+ // The BitcodeStartBit was the stream offset of the identification block.
+ VSTOffset -= bitcodeStartBit();
+ assert((VSTOffset & 31) == 0 && "VST block not 32-bit aligned");
+ // Note that we add 1 here because the offset is relative to one word
+ // before the start of the identification block, which was historically
+ // always the start of the regular bitcode header.
+ Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32 + 1);
+
+ Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
+
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
+ unsigned FnEntryAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
+ for (const Function &F : M) {
+ uint64_t Record[2];
- if (IsModuleLevel && hasVSTOffsetPlaceholder()) {
- // Get the offset of the VST we are writing, and backpatch it into
- // the VST forward declaration record.
- uint64_t VSTOffset = Stream.GetCurrentBitNo();
- // The BitcodeStartBit was the stream offset of the identification block.
- VSTOffset -= bitcodeStartBit();
- assert((VSTOffset & 31) == 0 && "VST block not 32-bit aligned");
+ if (F.isDeclaration())
+ continue;
+
+ Record[0] = VE.getValueID(&F);
+
+ // Save the word offset of the function (from the start of the
+ // actual bitcode written to the stream).
+ uint64_t BitcodeIndex = FunctionToBitcodeIndex[&F] - bitcodeStartBit();
+ assert((BitcodeIndex & 31) == 0 && "function block not 32-bit aligned");
// Note that we add 1 here because the offset is relative to one word
// before the start of the identification block, which was historically
// always the start of the regular bitcode header.
- Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32 + 1);
- }
-
- Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
+ Record[1] = BitcodeIndex / 32 + 1;
- // For the module-level VST, add abbrev Ids for the VST_CODE_FNENTRY
- // records, which are not used in the per-function VSTs.
- unsigned FnEntry8BitAbbrev;
- unsigned FnEntry7BitAbbrev;
- unsigned FnEntry6BitAbbrev;
- unsigned GUIDEntryAbbrev;
- if (IsModuleLevel && hasVSTOffsetPlaceholder()) {
- // 8-bit fixed-width VST_CODE_FNENTRY function strings.
- auto Abbv = std::make_shared<BitCodeAbbrev>();
- Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
- FnEntry8BitAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+ Stream.EmitRecord(bitc::VST_CODE_FNENTRY, Record, FnEntryAbbrev);
+ }
- // 7-bit fixed width VST_CODE_FNENTRY function strings.
- Abbv = std::make_shared<BitCodeAbbrev>();
- Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
- FnEntry7BitAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+ Stream.ExitBlock();
+}
- // 6-bit char6 VST_CODE_FNENTRY function strings.
- Abbv = std::make_shared<BitCodeAbbrev>();
- Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
- FnEntry6BitAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+/// Emit names for arguments, instructions and basic blocks in a function.
+void ModuleBitcodeWriter::writeFunctionLevelValueSymbolTable(
+ const ValueSymbolTable &VST) {
+ if (VST.empty())
+ return;
- // FIXME: Change the name of this record as it is now used by
- // the per-module index as well.
- Abbv = std::make_shared<BitCodeAbbrev>();
- Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_ENTRY));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // refguid
- GUIDEntryAbbrev = Stream.EmitAbbrev(std::move(Abbv));
- }
+ Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
// FIXME: Set up the abbrev, we know how many values there are!
// FIXME: We know if the type names can use 7-bit ascii.
@@ -2923,38 +2918,13 @@ void ModuleBitcodeWriter::writeValueSymbolTable(
unsigned AbbrevToUse = VST_ENTRY_8_ABBREV;
NameVals.push_back(VE.getValueID(Name.getValue()));
- Function *F = dyn_cast<Function>(Name.getValue());
-
// VST_CODE_ENTRY: [valueid, namechar x N]
- // VST_CODE_FNENTRY: [valueid, funcoffset, namechar x N]
// VST_CODE_BBENTRY: [bbid, namechar x N]
unsigned Code;
if (isa<BasicBlock>(Name.getValue())) {
Code = bitc::VST_CODE_BBENTRY;
if (Bits == SE_Char6)
AbbrevToUse = VST_BBENTRY_6_ABBREV;
- } else if (F && !F->isDeclaration()) {
- // Must be the module-level VST, where we pass in the Index and
- // have a VSTOffsetPlaceholder. The function-level VST should not
- // contain any Function symbols.
- assert(FunctionToBitcodeIndex);
- assert(hasVSTOffsetPlaceholder());
-
- // Save the word offset of the function (from the start of the
- // actual bitcode written to the stream).
- uint64_t BitcodeIndex = (*FunctionToBitcodeIndex)[F] - bitcodeStartBit();
- assert((BitcodeIndex & 31) == 0 && "function block not 32-bit aligned");
- // Note that we add 1 here because the offset is relative to one word
- // before the start of the identification block, which was historically
- // always the start of the regular bitcode header.
- NameVals.push_back(BitcodeIndex / 32 + 1);
-
- Code = bitc::VST_CODE_FNENTRY;
- AbbrevToUse = FnEntry8BitAbbrev;
- if (Bits == SE_Char6)
- AbbrevToUse = FnEntry6BitAbbrev;
- else if (Bits == SE_Fixed7)
- AbbrevToUse = FnEntry7BitAbbrev;
} else {
Code = bitc::VST_CODE_ENTRY;
if (Bits == SE_Char6)
@@ -2970,47 +2940,7 @@ void ModuleBitcodeWriter::writeValueSymbolTable(
Stream.EmitRecord(Code, NameVals, AbbrevToUse);
NameVals.clear();
}
- // Emit any GUID valueIDs created for indirect call edges into the
- // module-level VST.
- if (IsModuleLevel && hasVSTOffsetPlaceholder())
- for (const auto &GI : valueIds()) {
- NameVals.push_back(GI.second);
- NameVals.push_back(GI.first);
- Stream.EmitRecord(bitc::VST_CODE_COMBINED_ENTRY, NameVals,
- GUIDEntryAbbrev);
- NameVals.clear();
- }
- Stream.ExitBlock();
-}
-
-/// Emit function names and summary offsets for the combined index
-/// used by ThinLTO.
-void IndexBitcodeWriter::writeCombinedValueSymbolTable() {
- assert(hasVSTOffsetPlaceholder() && "Expected non-zero VSTOffsetPlaceholder");
- // Get the offset of the VST we are writing, and backpatch it into
- // the VST forward declaration record.
- uint64_t VSTOffset = Stream.GetCurrentBitNo();
- assert((VSTOffset & 31) == 0 && "VST block not 32-bit aligned");
- Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32);
-
- Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
-
- auto Abbv = std::make_shared<BitCodeAbbrev>();
- Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_ENTRY));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // refguid
- unsigned EntryAbbrev = Stream.EmitAbbrev(std::move(Abbv));
- SmallVector<uint64_t, 64> NameVals;
- for (const auto &GVI : valueIds()) {
- // VST_CODE_COMBINED_ENTRY: [valueid, refguid]
- NameVals.push_back(GVI.second);
- NameVals.push_back(GVI.first);
-
- // Emit the finished record.
- Stream.EmitRecord(bitc::VST_CODE_COMBINED_ENTRY, NameVals, EntryAbbrev);
- NameVals.clear();
- }
Stream.ExitBlock();
}
@@ -3114,7 +3044,7 @@ void ModuleBitcodeWriter::writeFunction(
// Emit names for all the instructions etc.
if (auto *Symtab = F.getValueSymbolTable())
- writeValueSymbolTable(*Symtab);
+ writeFunctionLevelValueSymbolTable(*Symtab);
if (NeedsMetadataAttachment)
writeFunctionMetadataAttachment(F);
@@ -3502,6 +3432,11 @@ void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() {
return;
}
+ for (const auto &GVI : valueIds()) {
+ Stream.EmitRecord(bitc::FS_VALUE_GUID,
+ ArrayRef<uint64_t>{GVI.second, GVI.first});
+ }
+
// Abbrev for FS_PERMODULE.
auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE));
@@ -3594,6 +3529,39 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Stream.EnterSubblock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID, 3);
Stream.EmitRecord(bitc::FS_VERSION, ArrayRef<uint64_t>{INDEX_VERSION});
+ // Create value IDs for undefined references.
+ for (const auto &I : *this) {
+ if (auto *VS = dyn_cast<GlobalVarSummary>(I.second)) {
+ for (auto &RI : VS->refs())
+ assignValueId(RI.getGUID());
+ continue;
+ }
+
+ auto *FS = dyn_cast<FunctionSummary>(I.second);
+ if (!FS)
+ continue;
+ for (auto &RI : FS->refs())
+ assignValueId(RI.getGUID());
+
+ for (auto &EI : FS->calls()) {
+ GlobalValue::GUID GUID = EI.first.getGUID();
+ if (!hasValueId(GUID)) {
+ // For SamplePGO, the indirect call targets for local functions will
+ // have its original name annotated in profile. We try to find the
+ // corresponding PGOFuncName as the GUID.
+ GUID = Index.getGUIDFromOriginalID(GUID);
+ if (GUID == 0 || !hasValueId(GUID))
+ continue;
+ }
+ assignValueId(GUID);
+ }
+ }
+
+ for (const auto &GVI : valueIds()) {
+ Stream.EmitRecord(bitc::FS_VALUE_GUID,
+ ArrayRef<uint64_t>{GVI.second, GVI.first});
+ }
+
// Abbrev for FS_COMBINED.
auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED));
@@ -3808,10 +3776,7 @@ void ModuleBitcodeWriter::write() {
Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
size_t BlockStartPos = Buffer.size();
- SmallVector<unsigned, 1> Vals;
- unsigned CurVersion = 1;
- Vals.push_back(CurVersion);
- Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals);
+ writeModuleVersion();
// Emit blockinfo, which defines the standard abbreviations etc.
writeBlockInfo();
@@ -3857,8 +3822,7 @@ void ModuleBitcodeWriter::write() {
if (Index)
writePerModuleGlobalValueSummary();
- writeValueSymbolTable(M.getValueSymbolTable(),
- /* IsModuleLevel */ true, &FunctionToBitcodeIndex);
+ writeGlobalValueSymbolTable(FunctionToBitcodeIndex);
writeModuleHash(BlockStartPos);
@@ -3946,13 +3910,45 @@ BitcodeWriter::BitcodeWriter(SmallVectorImpl<char> &Buffer)
writeBitcodeHeader(*Stream);
}
-BitcodeWriter::~BitcodeWriter() = default;
+BitcodeWriter::~BitcodeWriter() { assert(WroteStrtab); }
+
+void BitcodeWriter::writeBlob(unsigned Block, unsigned Record, StringRef Blob) {
+ Stream->EnterSubblock(Block, 3);
+
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(Record));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
+ auto AbbrevNo = Stream->EmitAbbrev(std::move(Abbv));
+
+ Stream->EmitRecordWithBlob(AbbrevNo, ArrayRef<uint64_t>{Record}, Blob);
+
+ Stream->ExitBlock();
+}
+
+void BitcodeWriter::writeStrtab() {
+ assert(!WroteStrtab);
+
+ std::vector<char> Strtab;
+ StrtabBuilder.finalizeInOrder();
+ Strtab.resize(StrtabBuilder.getSize());
+ StrtabBuilder.write((uint8_t *)Strtab.data());
+
+ writeBlob(bitc::STRTAB_BLOCK_ID, bitc::STRTAB_BLOB,
+ {Strtab.data(), Strtab.size()});
+
+ WroteStrtab = true;
+}
+
+void BitcodeWriter::copyStrtab(StringRef Strtab) {
+ writeBlob(bitc::STRTAB_BLOCK_ID, bitc::STRTAB_BLOB, Strtab);
+ WroteStrtab = true;
+}
void BitcodeWriter::writeModule(const Module *M,
bool ShouldPreserveUseListOrder,
const ModuleSummaryIndex *Index,
bool GenerateHash, ModuleHash *ModHash) {
- ModuleBitcodeWriter ModuleWriter(M, Buffer, *Stream,
+ ModuleBitcodeWriter ModuleWriter(M, Buffer, StrtabBuilder, *Stream,
ShouldPreserveUseListOrder, Index,
GenerateHash, ModHash);
ModuleWriter.write();
@@ -3976,6 +3972,7 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
BitcodeWriter Writer(Buffer);
Writer.writeModule(M, ShouldPreserveUseListOrder, Index, GenerateHash,
ModHash);
+ Writer.writeStrtab();
if (TT.isOSDarwin() || TT.isOSBinFormatMachO())
emitDarwinBCHeaderAndTrailer(Buffer, TT);
@@ -3987,13 +3984,7 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
void IndexBitcodeWriter::write() {
Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
- SmallVector<unsigned, 1> Vals;
- unsigned CurVersion = 1;
- Vals.push_back(CurVersion);
- Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals);
-
- // If we have a VST, write the VSTOFFSET record placeholder.
- writeValueSymbolTableForwardDecl();
+ writeModuleVersion();
// Write the module paths in the combined index.
writeModStrings();
@@ -4001,10 +3992,6 @@ void IndexBitcodeWriter::write() {
// Write the summary combined index records.
writeCombinedGlobalValueSummary();
- // Need a special VST writer for the combined index (we don't have a
- // real VST and real values when this is invoked).
- writeCombinedValueSymbolTable();
-
Stream.ExitBlock();
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 6c18d56b8272..028c79f3ab6d 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -834,9 +834,9 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
OS << " <- ";
// The second operand is only an offset if it's an immediate.
- bool Deref = MI->getOperand(0).isReg() && MI->getOperand(1).isImm();
- int64_t Offset = Deref ? MI->getOperand(1).getImm() : 0;
-
+ bool Deref = false;
+ bool MemLoc = MI->getOperand(0).isReg() && MI->getOperand(1).isImm();
+ int64_t Offset = MemLoc ? MI->getOperand(1).getImm() : 0;
for (unsigned i = 0; i < Expr->getNumElements(); ++i) {
uint64_t Op = Expr->getElement(i);
if (Op == dwarf::DW_OP_LLVM_fragment) {
@@ -844,7 +844,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
break;
} else if (Deref) {
// We currently don't support extra Offsets or derefs after the first
- // one. Bail out early instead of emitting an incorrect comment
+ // one. Bail out early instead of emitting an incorrect comment.
OS << " [complex expression]";
AP.OutStreamer->emitRawComment(OS.str());
return true;
@@ -899,12 +899,12 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
AP.OutStreamer->emitRawComment(OS.str());
return true;
}
- if (Deref)
+ if (MemLoc || Deref)
OS << '[';
OS << PrintReg(Reg, AP.MF->getSubtarget().getRegisterInfo());
}
- if (Deref)
+ if (MemLoc || Deref)
OS << '+' << Offset << ']';
// NOTE: Want this comment at start of line, don't emit with AddComment.
@@ -1356,7 +1356,7 @@ bool AsmPrinter::doFinalization(Module &M) {
OutContext.getOrCreateSymbol(StringRef("__morestack_addr"));
OutStreamer->EmitLabel(AddrSymbol);
- unsigned PtrSize = M.getDataLayout().getPointerSize(0);
+ unsigned PtrSize = MAI->getCodePointerSize();
OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("__morestack"),
PtrSize);
}
@@ -2246,7 +2246,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
// chu[nk1 chu][nk2 chu] ... [nkN-1 chunkN]
ExtraBits = Realigned.getRawData()[0] &
(((uint64_t)-1) >> (64 - ExtraBitsSize));
- Realigned = Realigned.lshr(ExtraBitsSize);
+ Realigned.lshrInPlace(ExtraBitsSize);
} else
ExtraBits = Realigned.getRawData()[BitWidth / 64];
}
@@ -2781,7 +2781,7 @@ void AsmPrinter::emitXRayTable() {
// before the function's end, we assume that this is happening after
// the last return instruction.
- auto WordSizeBytes = TM.getPointerSize();
+ auto WordSizeBytes = MAI->getCodePointerSize();
MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true);
OutStreamer->EmitCodeAlignment(16);
OutStreamer->EmitSymbolValue(Tmp, WordSizeBytes, false);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 383b8cddb1a0..2571f6869651 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -1136,7 +1136,7 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
DITypeRef ElementTypeRef = Ty->getBaseType();
TypeIndex ElementTypeIndex = getTypeIndex(ElementTypeRef);
// IndexType is size_t, which depends on the bitness of the target.
- TypeIndex IndexType = Asm->MAI->getPointerSize() == 8
+ TypeIndex IndexType = Asm->TM.getPointerSize() == 8
? TypeIndex(SimpleTypeKind::UInt64Quad)
: TypeIndex(SimpleTypeKind::UInt32Long);
@@ -1342,8 +1342,8 @@ TypeIndex CodeViewDebug::lowerTypeMemberPointer(const DIDerivedType *Ty) {
assert(Ty->getTag() == dwarf::DW_TAG_ptr_to_member_type);
TypeIndex ClassTI = getTypeIndex(Ty->getClassType());
TypeIndex PointeeTI = getTypeIndex(Ty->getBaseType(), Ty->getClassType());
- PointerKind PK = Asm->MAI->getPointerSize() == 8 ? PointerKind::Near64
- : PointerKind::Near32;
+ PointerKind PK = Asm->TM.getPointerSize() == 8 ? PointerKind::Near64
+ : PointerKind::Near32;
bool IsPMF = isa<DISubroutineType>(Ty->getBaseType());
PointerMode PM = IsPMF ? PointerMode::PointerToMemberFunction
: PointerMode::PointerToDataMember;
@@ -1458,7 +1458,8 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,
}
TypeIndex CodeViewDebug::lowerTypeVFTableShape(const DIDerivedType *Ty) {
- unsigned VSlotCount = Ty->getSizeInBits() / (8 * Asm->MAI->getPointerSize());
+ unsigned VSlotCount =
+ Ty->getSizeInBits() / (8 * Asm->MAI->getCodePointerSize());
SmallVector<VFTableSlotKind, 4> Slots(VSlotCount, VFTableSlotKind::Near);
VFTableShapeRecord VFTSR(Slots);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index b510e0ef36ac..31c2b3b5e752 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -31,6 +31,8 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "dwarfdebug"
+
//===----------------------------------------------------------------------===//
// DIEAbbrevData Implementation
//===----------------------------------------------------------------------===//
@@ -79,15 +81,22 @@ void DIEAbbrev::Emit(const AsmPrinter *AP) const {
dwarf::AttributeString(AttrData.getAttribute()).data());
// Emit form type.
+#ifndef NDEBUG
+ // Could be an assertion, but this way we can see the failing form code
+ // easily, which helps track down where it came from.
+ if (!dwarf::isValidFormForVersion(AttrData.getForm(),
+ AP->getDwarfVersion())) {
+ DEBUG(dbgs() << "Invalid form " << format("0x%x", AttrData.getForm())
+ << " for DWARF version " << AP->getDwarfVersion() << "\n");
+ llvm_unreachable("Invalid form for specified DWARF version");
+ }
+#endif
AP->EmitULEB128(AttrData.getForm(),
dwarf::FormEncodingString(AttrData.getForm()).data());
// Emit value for DW_FORM_implicit_const.
- if (AttrData.getForm() == dwarf::DW_FORM_implicit_const) {
- assert(AP->getDwarfVersion() >= 5 &&
- "DW_FORM_implicit_const is supported starting from DWARFv5");
+ if (AttrData.getForm() == dwarf::DW_FORM_implicit_const)
AP->EmitSLEB128(AttrData.getValue());
- }
}
// Mark end of abbreviation.
@@ -518,7 +527,7 @@ unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
if (Form == dwarf::DW_FORM_sec_offset) return 4;
if (Form == dwarf::DW_FORM_strp) return 4;
- return AP->getPointerSize();
+ return AP->MAI->getCodePointerSize();
}
LLVM_DUMP_METHOD
@@ -540,7 +549,7 @@ unsigned DIEDelta::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
if (Form == dwarf::DW_FORM_sec_offset) return 4;
if (Form == dwarf::DW_FORM_strp) return 4;
- return AP->getPointerSize();
+ return AP->MAI->getCodePointerSize();
}
LLVM_DUMP_METHOD
@@ -682,7 +691,7 @@ unsigned DIEEntry::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
return getULEB128Size(Entry->getOffset());
case dwarf::DW_FORM_ref_addr:
if (AP->getDwarfVersion() == 2)
- return AP->getPointerSize();
+ return AP->MAI->getCodePointerSize();
switch (AP->OutStreamer->getContext().getDwarfFormat()) {
case dwarf::DWARF32:
return 4;
@@ -808,7 +817,7 @@ unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
return 4;
if (Form == dwarf::DW_FORM_sec_offset)
return 4;
- return AP->getPointerSize();
+ return AP->MAI->getCodePointerSize();
}
/// EmitValue - Emit label value.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index a550ff2fb90f..738e062cb93f 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -547,18 +547,19 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
for (auto &Fragment : DV.getFrameIndexExprs()) {
unsigned FrameReg = 0;
+ const DIExpression *Expr = Fragment.Expr;
const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg);
- DwarfExpr.addFragmentOffset(Fragment.Expr);
+ DwarfExpr.addFragmentOffset(Expr);
SmallVector<uint64_t, 8> Ops;
Ops.push_back(dwarf::DW_OP_plus);
Ops.push_back(Offset);
- Ops.push_back(dwarf::DW_OP_deref);
- Ops.append(Fragment.Expr->elements_begin(), Fragment.Expr->elements_end());
- DIExpressionCursor Expr(Ops);
+ Ops.append(Expr->elements_begin(), Expr->elements_end());
+ DIExpressionCursor Cursor(Ops);
+ DwarfExpr.setMemoryLocationKind();
DwarfExpr.addMachineRegExpression(
- *Asm->MF->getSubtarget().getRegisterInfo(), Expr, FrameReg);
- DwarfExpr.addExpression(std::move(Expr));
+ *Asm->MF->getSubtarget().getRegisterInfo(), Cursor, FrameReg);
+ DwarfExpr.addExpression(std::move(Cursor));
}
addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
@@ -779,12 +780,13 @@ void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
const MachineLocation &Location) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ if (Location.isIndirect())
+ DwarfExpr.setMemoryLocationKind();
SmallVector<uint64_t, 8> Ops;
- if (Location.isIndirect()) {
+ if (Location.isIndirect() && Location.getOffset()) {
Ops.push_back(dwarf::DW_OP_plus);
Ops.push_back(Location.getOffset());
- Ops.push_back(dwarf::DW_OP_deref);
}
DIExpressionCursor Cursor(Ops);
const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
@@ -807,12 +809,13 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
const DIExpression *DIExpr = DV.getSingleExpression();
DwarfExpr.addFragmentOffset(DIExpr);
+ if (Location.isIndirect())
+ DwarfExpr.setMemoryLocationKind();
SmallVector<uint64_t, 8> Ops;
- if (Location.isIndirect()) {
+ if (Location.isIndirect() && Location.getOffset()) {
Ops.push_back(dwarf::DW_OP_plus);
Ops.push_back(Location.getOffset());
- Ops.push_back(dwarf::DW_OP_deref);
}
Ops.append(DIExpr->elements_begin(), DIExpr->elements_end());
DIExpressionCursor Cursor(Ops);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 5ce111309208..d72656bcc58d 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1517,13 +1517,12 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
DwarfExpr.addUnsignedConstant(Value.getInt());
} else if (Value.isLocation()) {
MachineLocation Location = Value.getLoc();
-
+ if (Location.isIndirect())
+ DwarfExpr.setMemoryLocationKind();
SmallVector<uint64_t, 8> Ops;
- // FIXME: Should this condition be Location.isIndirect() instead?
- if (Location.getOffset()) {
+ if (Location.isIndirect() && Location.getOffset()) {
Ops.push_back(dwarf::DW_OP_plus);
Ops.push_back(Location.getOffset());
- Ops.push_back(dwarf::DW_OP_deref);
}
Ops.append(DIExpr->elements_begin(), DIExpr->elements_end());
DIExpressionCursor Cursor(Ops);
@@ -1578,7 +1577,7 @@ void DwarfDebug::emitDebugLoc() {
// Start the dwarf loc section.
Asm->OutStreamer->SwitchSection(
Asm->getObjFileLowering().getDwarfLocSection());
- unsigned char Size = Asm->getDataLayout().getPointerSize();
+ unsigned char Size = Asm->MAI->getCodePointerSize();
for (const auto &List : DebugLocs.getLists()) {
Asm->OutStreamer->EmitLabel(List.Label);
const DwarfCompileUnit *CU = List.CU;
@@ -1708,7 +1707,7 @@ void DwarfDebug::emitDebugARanges() {
Asm->OutStreamer->SwitchSection(
Asm->getObjFileLowering().getDwarfARangesSection());
- unsigned PtrSize = Asm->getDataLayout().getPointerSize();
+ unsigned PtrSize = Asm->MAI->getCodePointerSize();
// Build a list of CUs used.
std::vector<DwarfCompileUnit *> CUs;
@@ -1791,7 +1790,7 @@ void DwarfDebug::emitDebugRanges() {
Asm->getObjFileLowering().getDwarfRangesSection());
// Size for our labels.
- unsigned char Size = Asm->getDataLayout().getPointerSize();
+ unsigned char Size = Asm->MAI->getCodePointerSize();
// Grab the specific ranges for the compile units in the module.
for (const auto &I : CUMap) {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index debe88f3b1ee..f65dc151f301 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -23,9 +23,12 @@
using namespace llvm;
void DwarfExpression::addReg(int DwarfReg, const char *Comment) {
- assert(DwarfReg >= 0 && "invalid negative dwarf register number");
- if (DwarfReg < 32) {
- emitOp(dwarf::DW_OP_reg0 + DwarfReg, Comment);
+ assert(DwarfReg >= 0 && "invalid negative dwarf register number");
+ assert((LocationKind == Unknown || LocationKind == Register) &&
+ "location description already locked down");
+ LocationKind = Register;
+ if (DwarfReg < 32) {
+ emitOp(dwarf::DW_OP_reg0 + DwarfReg, Comment);
} else {
emitOp(dwarf::DW_OP_regx, Comment);
emitUnsigned(DwarfReg);
@@ -34,6 +37,7 @@ void DwarfExpression::addReg(int DwarfReg, const char *Comment) {
void DwarfExpression::addBReg(int DwarfReg, int Offset) {
assert(DwarfReg >= 0 && "invalid negative dwarf register number");
+ assert(LocationKind != Register && "location description already locked down");
if (DwarfReg < 32) {
emitOp(dwarf::DW_OP_breg0 + DwarfReg);
} else {
@@ -156,18 +160,23 @@ void DwarfExpression::addStackValue() {
}
void DwarfExpression::addSignedConstant(int64_t Value) {
+ assert(LocationKind == Implicit || LocationKind == Unknown);
+ LocationKind = Implicit;
emitOp(dwarf::DW_OP_consts);
emitSigned(Value);
- addStackValue();
}
void DwarfExpression::addUnsignedConstant(uint64_t Value) {
+ assert(LocationKind == Implicit || LocationKind == Unknown);
+ LocationKind = Implicit;
emitOp(dwarf::DW_OP_constu);
emitUnsigned(Value);
- addStackValue();
}
void DwarfExpression::addUnsignedConstant(const APInt &Value) {
+ assert(LocationKind == Implicit || LocationKind == Unknown);
+ LocationKind = Implicit;
+
unsigned Size = Value.getBitWidth();
const uint64_t *Data = Value.getRawData();
@@ -178,7 +187,8 @@ void DwarfExpression::addUnsignedConstant(const APInt &Value) {
addUnsignedConstant(*Data++);
if (Offset == 0 && Size <= 64)
break;
- addOpPiece(std::min(Size-Offset, 64u), Offset);
+ addStackValue();
+ addOpPiece(std::min(Size - Offset, 64u), Offset);
Offset += 64;
}
}
@@ -206,7 +216,7 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
}
// Handle simple register locations.
- if (!HasComplexExpression) {
+ if (LocationKind != Memory && !HasComplexExpression) {
for (auto &Reg : DwarfRegs) {
if (Reg.DwarfRegNo >= 0)
addReg(Reg.DwarfRegNo, Reg.Comment);
@@ -216,62 +226,65 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
return true;
}
+ // Don't emit locations that cannot be expressed without DW_OP_stack_value.
+ if (DwarfVersion < 4)
+ if (std::any_of(ExprCursor.begin(), ExprCursor.end(),
+ [](DIExpression::ExprOperand Op) -> bool {
+ return Op.getOp() == dwarf::DW_OP_stack_value;
+ })) {
+ DwarfRegs.clear();
+ return false;
+ }
+
assert(DwarfRegs.size() == 1);
auto Reg = DwarfRegs[0];
- bool FBReg = isFrameRegister(TRI, MachineReg);
+ bool FBReg = isFrameRegister(TRI, MachineReg);
+ int SignedOffset = 0;
assert(Reg.Size == 0 && "subregister has same size as superregister");
// Pattern-match combinations for which more efficient representations exist.
- switch (Op->getOp()) {
- default: {
- if (FBReg)
- addFBReg(0);
- else
- addReg(Reg.DwarfRegNo, 0);
- break;
+ // [Reg, Offset, DW_OP_plus] --> [DW_OP_breg, Offset].
+ // [Reg, Offset, DW_OP_minus] --> [DW_OP_breg, -Offset].
+ // If Reg is a subregister we need to mask it out before subtracting.
+ if (Op && ((Op->getOp() == dwarf::DW_OP_plus) ||
+ (Op->getOp() == dwarf::DW_OP_minus && !SubRegisterSizeInBits))) {
+ int Offset = Op->getArg(0);
+ SignedOffset = (Op->getOp() == dwarf::DW_OP_plus) ? Offset : -Offset;
+ ExprCursor.take();
}
- case dwarf::DW_OP_plus:
- case dwarf::DW_OP_minus: {
- // [DW_OP_reg,Offset,DW_OP_plus, DW_OP_deref] --> [DW_OP_breg, Offset].
- // [DW_OP_reg,Offset,DW_OP_minus,DW_OP_deref] --> [DW_OP_breg,-Offset].
- auto N = ExprCursor.peekNext();
- if (N && N->getOp() == dwarf::DW_OP_deref) {
- int Offset = Op->getArg(0);
- int SignedOffset = (Op->getOp() == dwarf::DW_OP_plus) ? Offset : -Offset;
- if (FBReg)
- addFBReg(SignedOffset);
- else
- addBReg(Reg.DwarfRegNo, SignedOffset);
+ if (FBReg)
+ addFBReg(SignedOffset);
+ else
+ addBReg(Reg.DwarfRegNo, SignedOffset);
+ DwarfRegs.clear();
+ return true;
+}
- ExprCursor.consume(2);
+/// Assuming a well-formed expression, match "DW_OP_deref* DW_OP_LLVM_fragment?".
+static bool isMemoryLocation(DIExpressionCursor ExprCursor) {
+ while (ExprCursor) {
+ auto Op = ExprCursor.take();
+ switch (Op->getOp()) {
+ case dwarf::DW_OP_deref:
+ case dwarf::DW_OP_LLVM_fragment:
break;
+ default:
+ return false;
}
- addReg(Reg.DwarfRegNo, 0);
- break;
- }
- case dwarf::DW_OP_deref:
- // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg].
- if (FBReg)
- addFBReg(0);
- else
- addBReg(Reg.DwarfRegNo, 0);
- ExprCursor.take();
- break;
}
- DwarfRegs.clear();
return true;
}
void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
unsigned FragmentOffsetInBits) {
+ // If we need to mask out a subregister, do it now, unless the next
+ // operation would emit an OpPiece anyway.
+ auto N = ExprCursor.peek();
+ if (SubRegisterSizeInBits && N && (N->getOp() != dwarf::DW_OP_LLVM_fragment))
+ maskSubRegister();
+
while (ExprCursor) {
auto Op = ExprCursor.take();
-
- // If we need to mask out a subregister, do it now, unless the next
- // operation would emit an OpPiece anyway.
- if (SubRegisterSizeInBits && Op->getOp() != dwarf::DW_OP_LLVM_fragment)
- maskSubRegister();
-
switch (Op->getOp()) {
case dwarf::DW_OP_LLVM_fragment: {
unsigned SizeInBits = Op->getArg(1);
@@ -281,50 +294,74 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
// location.
assert(OffsetInBits >= FragmentOffset && "fragment offset not added?");
- // If \a addMachineReg already emitted DW_OP_piece operations to represent
+ // If addMachineReg already emitted DW_OP_piece operations to represent
// a super-register by splicing together sub-registers, subtract the size
// of the pieces that was already emitted.
SizeInBits -= OffsetInBits - FragmentOffset;
- // If \a addMachineReg requested a DW_OP_bit_piece to stencil out a
+ // If addMachineReg requested a DW_OP_bit_piece to stencil out a
// sub-register that is smaller than the current fragment's size, use it.
if (SubRegisterSizeInBits)
SizeInBits = std::min<unsigned>(SizeInBits, SubRegisterSizeInBits);
-
+
+ // Emit a DW_OP_stack_value for implicit location descriptions.
+ if (LocationKind == Implicit)
+ addStackValue();
+
+ // Emit the DW_OP_piece.
addOpPiece(SizeInBits, SubRegisterOffsetInBits);
setSubRegisterPiece(0, 0);
- break;
+ // Reset the location description kind.
+ LocationKind = Unknown;
+ return;
}
case dwarf::DW_OP_plus:
+ assert(LocationKind != Register);
emitOp(dwarf::DW_OP_plus_uconst);
emitUnsigned(Op->getArg(0));
break;
case dwarf::DW_OP_minus:
- // There is no OP_minus_uconst.
+ assert(LocationKind != Register);
+ // There is no DW_OP_minus_uconst.
emitOp(dwarf::DW_OP_constu);
emitUnsigned(Op->getArg(0));
emitOp(dwarf::DW_OP_minus);
break;
- case dwarf::DW_OP_deref:
- emitOp(dwarf::DW_OP_deref);
+ case dwarf::DW_OP_deref: {
+ assert(LocationKind != Register);
+ if (LocationKind != Memory && isMemoryLocation(ExprCursor))
+ // Turning this into a memory location description makes the deref
+ // implicit.
+ LocationKind = Memory;
+ else
+ emitOp(dwarf::DW_OP_deref);
break;
+ }
case dwarf::DW_OP_constu:
+ assert(LocationKind != Register);
emitOp(dwarf::DW_OP_constu);
emitUnsigned(Op->getArg(0));
break;
case dwarf::DW_OP_stack_value:
- addStackValue();
+ assert(LocationKind == Unknown || LocationKind == Implicit);
+ LocationKind = Implicit;
break;
case dwarf::DW_OP_swap:
+ assert(LocationKind != Register);
emitOp(dwarf::DW_OP_swap);
break;
case dwarf::DW_OP_xderef:
+ assert(LocationKind != Register);
emitOp(dwarf::DW_OP_xderef);
break;
default:
llvm_unreachable("unhandled opcode found in expression");
}
}
+
+ if (LocationKind == Implicit)
+ // Turn this into an implicit location description.
+ addStackValue();
}
/// add masking operations to stencil out a subregister.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
index e8dc211eb3c2..de8613200067 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -72,6 +72,8 @@ public:
}
/// Determine whether there are any operations left in this expression.
operator bool() const { return Start != End; }
+ DIExpression::expr_op_iterator begin() const { return Start; }
+ DIExpression::expr_op_iterator end() const { return End; }
/// Retrieve the fragment information, if any.
Optional<DIExpression::FragmentInfo> getFragmentInfo() const {
@@ -102,6 +104,9 @@ protected:
unsigned SubRegisterSizeInBits = 0;
unsigned SubRegisterOffsetInBits = 0;
+ /// The kind of location description being produced.
+ enum { Unknown = 0, Register, Memory, Implicit } LocationKind = Unknown;
+
/// Push a DW_OP_piece / DW_OP_bit_piece for emitting later, if one is needed
/// to represent a subregister.
void setSubRegisterPiece(unsigned SizeInBits, unsigned OffsetInBits) {
@@ -122,7 +127,8 @@ protected:
/// current function.
virtual bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) = 0;
- /// Emit a DW_OP_reg operation.
+ /// Emit a DW_OP_reg operation. Note that this is only legal inside a DWARF
+ /// register location description.
void addReg(int DwarfReg, const char *Comment = nullptr);
/// Emit a DW_OP_breg operation.
void addBReg(int DwarfReg, int Offset);
@@ -185,11 +191,18 @@ public:
/// Emit an unsigned constant.
void addUnsignedConstant(const APInt &Value);
+ /// Lock this down to become a memory location description.
+ void setMemoryLocationKind() {
+ assert(LocationKind == Unknown);
+ LocationKind = Memory;
+ }
+
/// Emit a machine register location. As an optimization this may also consume
/// the prefix of a DwarfExpression if a more efficient representation for
/// combining the register location and the first operation exists.
///
- /// \param FragmentOffsetInBits If this is one fragment out of a fragmented
+ /// \param FragmentOffsetInBits If this is one fragment out of a
+ /// fragmented
/// location, this is the offset of the
/// fragment inside the entire variable.
/// \return false if no DWARF register exists
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index bad5b09553cd..bac0c204d04f 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Metadata.h"
#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
@@ -73,8 +74,8 @@ bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
DwarfUnit::DwarfUnit(dwarf::Tag UnitTag, const DICompileUnit *Node,
AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU)
- : DIEUnit(A->getDwarfVersion(), A->getPointerSize(), UnitTag), CUNode(Node),
- Asm(A), DD(DW), DU(DWU), IndexTyDie(nullptr) {
+ : DIEUnit(A->getDwarfVersion(), A->MAI->getCodePointerSize(), UnitTag),
+ CUNode(Node), Asm(A), DD(DW), DU(DWU), IndexTyDie(nullptr) {
}
DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A,
@@ -471,12 +472,13 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
// variable's location.
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ if (Location.isIndirect())
+ DwarfExpr.setMemoryLocationKind();
SmallVector<uint64_t, 9> Ops;
- if (Location.isIndirect()) {
+ if (Location.isIndirect() && Location.getOffset()) {
Ops.push_back(dwarf::DW_OP_plus);
Ops.push_back(Location.getOffset());
- Ops.push_back(dwarf::DW_OP_deref);
}
// If we started with a pointer to the __Block_byref... struct, then
// the first thing we need to do is dereference the pointer (DW_OP_deref).
@@ -1546,7 +1548,7 @@ void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) {
Asm->OutStreamer->AddComment("DWARF Unit Type");
Asm->EmitInt8(UT);
Asm->OutStreamer->AddComment("Address Size (in bytes)");
- Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
+ Asm->EmitInt8(Asm->MAI->getCodePointerSize());
}
// We share one abbreviations table across all units so it's always at the
@@ -1562,7 +1564,7 @@ void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) {
if (Version <= 4) {
Asm->OutStreamer->AddComment("Address Size (in bytes)");
- Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
+ Asm->EmitInt8(Asm->MAI->getCodePointerSize());
}
}
diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 2bdd189557b4..c862cfd28add 100644
--- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -570,8 +570,14 @@ bool CodeGenPrepare::splitIndirectCriticalEdges(Function &F) {
ValueToValueMapTy VMap;
BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F);
- for (BasicBlock *Pred : OtherPreds)
- Pred->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
+ for (BasicBlock *Pred : OtherPreds) {
+ // If the target is a loop to itself, then the terminator of the split
+ // block needs to be updated.
+ if (Pred == Target)
+ BodyBlock->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
+ else
+ Pred->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
+ }
// Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that
// they are clones, so the number of PHIs are the same.
@@ -5059,16 +5065,14 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
if (!ShlC)
return false;
uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
- auto ShlDemandBits = APInt::getAllOnesValue(BitWidth).lshr(ShiftAmt);
- DemandBits |= ShlDemandBits;
+ DemandBits.setLowBits(BitWidth - ShiftAmt);
break;
}
case llvm::Instruction::Trunc: {
EVT TruncVT = TLI->getValueType(*DL, I->getType());
unsigned TruncBitWidth = TruncVT.getSizeInBits();
- auto TruncBits = APInt::getAllOnesValue(TruncBitWidth).zext(BitWidth);
- DemandBits |= TruncBits;
+ DemandBits.setLowBits(TruncBitWidth);
break;
}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 766187378446..5fb8dfc95d3f 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -381,18 +381,19 @@ bool IRTranslator::translateInsertValue(const User &U,
uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices);
unsigned Res = getOrCreateVReg(U);
- const Value &Inserted = *U.getOperand(1);
- MIRBuilder.buildInsert(Res, getOrCreateVReg(*Src), getOrCreateVReg(Inserted),
- Offset);
+ unsigned Inserted = getOrCreateVReg(*U.getOperand(1));
+ MIRBuilder.buildInsert(Res, getOrCreateVReg(*Src), Inserted, Offset);
return true;
}
bool IRTranslator::translateSelect(const User &U,
MachineIRBuilder &MIRBuilder) {
- MIRBuilder.buildSelect(getOrCreateVReg(U), getOrCreateVReg(*U.getOperand(0)),
- getOrCreateVReg(*U.getOperand(1)),
- getOrCreateVReg(*U.getOperand(2)));
+ unsigned Res = getOrCreateVReg(U);
+ unsigned Tst = getOrCreateVReg(*U.getOperand(0));
+ unsigned Op0 = getOrCreateVReg(*U.getOperand(1));
+ unsigned Op1 = getOrCreateVReg(*U.getOperand(2));
+ MIRBuilder.buildSelect(Res, Tst, Op0, Op1);
return true;
}
@@ -984,9 +985,11 @@ bool IRTranslator::translateInsertElement(const User &U,
ValToVReg[&U] = Elt;
return true;
}
- MIRBuilder.buildInsertVectorElement(
- getOrCreateVReg(U), getOrCreateVReg(*U.getOperand(0)),
- getOrCreateVReg(*U.getOperand(1)), getOrCreateVReg(*U.getOperand(2)));
+ unsigned Res = getOrCreateVReg(U);
+ unsigned Val = getOrCreateVReg(*U.getOperand(0));
+ unsigned Elt = getOrCreateVReg(*U.getOperand(1));
+ unsigned Idx = getOrCreateVReg(*U.getOperand(2));
+ MIRBuilder.buildInsertVectorElement(Res, Val, Elt, Idx);
return true;
}
@@ -999,9 +1002,10 @@ bool IRTranslator::translateExtractElement(const User &U,
ValToVReg[&U] = Elt;
return true;
}
- MIRBuilder.buildExtractVectorElement(getOrCreateVReg(U),
- getOrCreateVReg(*U.getOperand(0)),
- getOrCreateVReg(*U.getOperand(1)));
+ unsigned Res = getOrCreateVReg(U);
+ unsigned Val = getOrCreateVReg(*U.getOperand(0));
+ unsigned Idx = getOrCreateVReg(*U.getOperand(1));
+ MIRBuilder.buildExtractVectorElement(Res, Val, Idx);
return true;
}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index fb9d01ef8542..942680b6fff3 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -68,23 +68,6 @@ bool InstructionSelector::constrainSelectedInstRegOperands(
return true;
}
-Optional<int64_t>
-InstructionSelector::getConstantVRegVal(unsigned VReg,
- const MachineRegisterInfo &MRI) const {
- MachineInstr *MI = MRI.getVRegDef(VReg);
- if (MI->getOpcode() != TargetOpcode::G_CONSTANT)
- return None;
-
- if (MI->getOperand(1).isImm())
- return MI->getOperand(1).getImm();
-
- if (MI->getOperand(1).isCImm() &&
- MI->getOperand(1).getCImm()->getBitWidth() <= 64)
- return MI->getOperand(1).getCImm()->getSExtValue();
-
- return None;
-}
-
bool InstructionSelector::isOperandImmEqual(
const MachineOperand &MO, int64_t Value,
const MachineRegisterInfo &MRI) const {
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index 657ddb307919..74ed58e8d049 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -24,6 +24,8 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <iterator>
+
#define DEBUG_TYPE "legalizer"
using namespace llvm;
@@ -161,7 +163,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
// convergence for performance reasons.
bool Changed = false;
MachineBasicBlock::iterator NextMI;
- for (auto &MBB : MF)
+ for (auto &MBB : MF) {
for (auto MI = MBB.begin(); MI != MBB.end(); MI = NextMI) {
// Get the next Instruction before we try to legalize, because there's a
// good chance MI will be deleted.
@@ -171,18 +173,21 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
// and are assumed to be legal.
if (!isPreISelGenericOpcode(MI->getOpcode()))
continue;
+ unsigned NumNewInsns = 0;
SmallVector<MachineInstr *, 4> WorkList;
- Helper.MIRBuilder.recordInsertions(
- [&](MachineInstr *MI) { WorkList.push_back(MI); });
+ Helper.MIRBuilder.recordInsertions([&](MachineInstr *MI) {
+ ++NumNewInsns;
+ WorkList.push_back(MI);
+ });
WorkList.push_back(&*MI);
+ bool Changed = false;
LegalizerHelper::LegalizeResult Res;
unsigned Idx = 0;
do {
Res = Helper.legalizeInstrStep(*WorkList[Idx]);
// Error out if we couldn't legalize this instruction. We may want to
- // fall
- // back to DAG ISel instead in the future.
+ // fall back to DAG ISel instead in the future.
if (Res == LegalizerHelper::UnableToLegalize) {
Helper.MIRBuilder.stopRecordingInsertions();
if (Res == LegalizerHelper::UnableToLegalize) {
@@ -194,10 +199,21 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
}
Changed |= Res == LegalizerHelper::Legalized;
++Idx;
+
+#ifndef NDEBUG
+ if (NumNewInsns) {
+ DEBUG(dbgs() << ".. .. Emitted " << NumNewInsns << " insns\n");
+ for (auto I = WorkList.end() - NumNewInsns, E = WorkList.end();
+ I != E; ++I)
+ DEBUG(dbgs() << ".. .. New MI: "; (*I)->print(dbgs()));
+ NumNewInsns = 0;
+ }
+#endif
} while (Idx < WorkList.size());
Helper.MIRBuilder.stopRecordingInsertions();
}
+ }
MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
@@ -207,7 +223,11 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
// good chance MI will be deleted.
NextMI = std::next(MI);
- Changed |= combineExtracts(*MI, MRI, TII);
+ // combineExtracts erases MI.
+ if (combineExtracts(*MI, MRI, TII)) {
+ Changed = true;
+ continue;
+ }
Changed |= combineMerges(*MI, MRI, TII);
}
}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 20358f7ee6c2..58778077bc0e 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -24,7 +24,7 @@
#include <sstream>
-#define DEBUG_TYPE "legalize-mir"
+#define DEBUG_TYPE "legalizer"
using namespace llvm;
@@ -35,24 +35,34 @@ LegalizerHelper::LegalizerHelper(MachineFunction &MF)
LegalizerHelper::LegalizeResult
LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
+ DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
+
auto Action = LI.getAction(MI, MRI);
switch (std::get<0>(Action)) {
case LegalizerInfo::Legal:
+ DEBUG(dbgs() << ".. Already legal\n");
return AlreadyLegal;
case LegalizerInfo::Libcall:
+ DEBUG(dbgs() << ".. Convert to libcall\n");
return libcall(MI);
case LegalizerInfo::NarrowScalar:
+ DEBUG(dbgs() << ".. Narrow scalar\n");
return narrowScalar(MI, std::get<1>(Action), std::get<2>(Action));
case LegalizerInfo::WidenScalar:
+ DEBUG(dbgs() << ".. Widen scalar\n");
return widenScalar(MI, std::get<1>(Action), std::get<2>(Action));
case LegalizerInfo::Lower:
+ DEBUG(dbgs() << ".. Lower\n");
return lower(MI, std::get<1>(Action), std::get<2>(Action));
case LegalizerInfo::FewerElements:
+ DEBUG(dbgs() << ".. Reduce number of elements\n");
return fewerElementsVector(MI, std::get<1>(Action), std::get<2>(Action));
case LegalizerInfo::Custom:
+ DEBUG(dbgs() << ".. Custom legalization\n");
return LI.legalizeCustom(MI, MRI, MIRBuilder) ? Legalized
: UnableToLegalize;
default:
+ DEBUG(dbgs() << ".. Unable to legalize\n");
return UnableToLegalize;
}
}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 8d1a263395a0..54ef7e5c5a1b 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -592,7 +592,7 @@ MachineInstrBuilder MachineIRBuilder::buildInsertVectorElement(unsigned Res,
LLT EltTy = MRI->getType(Elt);
LLT IdxTy = MRI->getType(Idx);
assert(ResTy.isVector() && ValTy.isVector() && "invalid operand type");
- assert(EltTy.isScalar() && IdxTy.isScalar() && "invalid operand type");
+ assert(IdxTy.isScalar() && "invalid operand type");
assert(ResTy.getNumElements() == ValTy.getNumElements() && "type mismatch");
assert(ResTy.getElementType() == EltTy && "type mismatch");
#endif
@@ -612,7 +612,8 @@ MachineInstrBuilder MachineIRBuilder::buildExtractVectorElement(unsigned Res,
LLT ValTy = MRI->getType(Val);
LLT IdxTy = MRI->getType(Idx);
assert(ValTy.isVector() && "invalid operand type");
- assert(ResTy.isScalar() && IdxTy.isScalar() && "invalid operand type");
+ assert((ResTy.isScalar() || ResTy.isPointer()) && "invalid operand type");
+ assert(IdxTy.isScalar() && "invalid operand type");
assert(ValTy.getElementType() == ResTy && "type mismatch");
#endif
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 606a59680a3d..3c93f8123b0d 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Constants.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -93,3 +94,19 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
R << Msg << ": " << ore::MNV("Inst", MI);
reportGISelFailure(MF, TPC, MORE, R);
}
+
+Optional<int64_t> llvm::getConstantVRegVal(unsigned VReg,
+ const MachineRegisterInfo &MRI) {
+ MachineInstr *MI = MRI.getVRegDef(VReg);
+ if (MI->getOpcode() != TargetOpcode::G_CONSTANT)
+ return None;
+
+ if (MI->getOperand(1).isImm())
+ return MI->getOperand(1).getImm();
+
+ if (MI->getOperand(1).isCImm() &&
+ MI->getOperand(1).getCImm()->getBitWidth() <= 64)
+ return MI->getOperand(1).getCImm()->getSExtValue();
+
+ return None;
+}
diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
index a1cb0a0695bf..b7ab404070b1 100644
--- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -888,20 +888,10 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
// Debug values are not allowed to affect codegen.
if (MI->isDebugValue()) {
// Modify DBG_VALUE now that the value is in a spill slot.
- bool IsIndirect = MI->isIndirectDebugValue();
- uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
- const MDNode *Var = MI->getDebugVariable();
- const MDNode *Expr = MI->getDebugExpression();
- DebugLoc DL = MI->getDebugLoc();
- DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
MachineBasicBlock *MBB = MI->getParent();
- assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
- "Expected inlined-at fields to agree");
- BuildMI(*MBB, MBB->erase(MI), DL, TII.get(TargetOpcode::DBG_VALUE))
- .addFrameIndex(StackSlot)
- .addImm(Offset)
- .addMetadata(Var)
- .addMetadata(Expr);
+ DEBUG(dbgs() << "Modifying debug info due to spill:\t" << *MI);
+ buildDbgValueForSpill(*MBB, MI, *MI, StackSlot);
+ MBB->erase(MI);
continue;
}
diff --git a/contrib/llvm/lib/CodeGen/LowLevelType.cpp b/contrib/llvm/lib/CodeGen/LowLevelType.cpp
index c4b9068fa905..1c682e72fa49 100644
--- a/contrib/llvm/lib/CodeGen/LowLevelType.cpp
+++ b/contrib/llvm/lib/CodeGen/LowLevelType.cpp
@@ -21,10 +21,10 @@ using namespace llvm;
LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) {
if (auto VTy = dyn_cast<VectorType>(&Ty)) {
auto NumElements = VTy->getNumElements();
- auto ScalarSizeInBits = VTy->getElementType()->getPrimitiveSizeInBits();
+ LLT ScalarTy = getLLTForType(*VTy->getElementType(), DL);
if (NumElements == 1)
- return LLT::scalar(ScalarSizeInBits);
- return LLT::vector(NumElements, ScalarSizeInBits);
+ return ScalarTy;
+ return LLT::vector(NumElements, ScalarTy);
} else if (auto PTy = dyn_cast<PointerType>(&Ty)) {
return LLT::pointer(PTy->getAddressSpace(), DL.getTypeSizeInBits(&Ty));
} else if (Ty.isSized()) {
diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
index c0a8b95ed8a0..4bd5fbfe38e6 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
@@ -2351,3 +2351,31 @@ MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB,
BB.insert(I, MI);
return MachineInstrBuilder(MF, MI);
}
+
+MachineInstr *llvm::buildDbgValueForSpill(MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I,
+ const MachineInstr &Orig,
+ int FrameIndex) {
+ const MDNode *Var = Orig.getDebugVariable();
+ auto *Expr = cast_or_null<DIExpression>(Orig.getDebugExpression());
+ bool IsIndirect = Orig.isIndirectDebugValue();
+ uint64_t Offset = IsIndirect ? Orig.getOperand(1).getImm() : 0;
+ DebugLoc DL = Orig.getDebugLoc();
+ assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ // If the DBG_VALUE already was a memory location, add an extra
+ // DW_OP_deref. Otherwise just turning this from a register into a
+ // memory/indirect location is sufficient.
+ if (IsIndirect) {
+ SmallVector<uint64_t, 8> Ops;
+ Ops.push_back(dwarf::DW_OP_deref);
+ if (Expr)
+ Ops.append(Expr->elements_begin(), Expr->elements_end());
+ Expr = DIExpression::get(Expr->getContext(), Ops);
+ }
+ return BuildMI(BB, I, DL, Orig.getDesc())
+ .addFrameIndex(FrameIndex)
+ .addImm(Offset)
+ .addMetadata(Var)
+ .addMetadata(Expr);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
index d392c044bd71..84bd670105e1 100644
--- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -2030,6 +2030,8 @@ namespace {
void MachineVerifier::verifyStackFrame() {
unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode();
unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
+ if (FrameSetupOpcode == ~0u && FrameDestroyOpcode == ~0u)
+ return;
SmallVector<StackStateOfBB, 8> SPState;
SPState.resize(MF->getNumBlockIDs());
diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
index fd759bc372b2..283d84629f8e 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -304,19 +304,7 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
LiveDbgValueMap[LRI->VirtReg];
for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) {
MachineInstr *DBG = LRIDbgValues[li];
- const MDNode *Var = DBG->getDebugVariable();
- const MDNode *Expr = DBG->getDebugExpression();
- bool IsIndirect = DBG->isIndirectDebugValue();
- uint64_t Offset = IsIndirect ? DBG->getOperand(1).getImm() : 0;
- DebugLoc DL = DBG->getDebugLoc();
- assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
- "Expected inlined-at fields to agree");
- MachineInstr *NewDV =
- BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::DBG_VALUE))
- .addFrameIndex(FI)
- .addImm(Offset)
- .addMetadata(Var)
- .addMetadata(Expr);
+ MachineInstr *NewDV = buildDbgValueForSpill(*MBB, MI, *DBG, FI);
assert(NewDV->getParent() == MBB && "dangling parent pointer");
(void)NewDV;
DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV);
diff --git a/contrib/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm/lib/CodeGen/SafeStack.cpp
index fa68411284e7..7fa379d80c6c 100644
--- a/contrib/llvm/lib/CodeGen/SafeStack.cpp
+++ b/contrib/llvm/lib/CodeGen/SafeStack.cpp
@@ -550,7 +550,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
// Replace alloc with the new location.
replaceDbgDeclare(Arg, BasePointer, BasePointer->getNextNode(), DIB,
- /*Deref=*/true, -Offset);
+ /*Deref=*/false, -Offset);
Arg->replaceAllUsesWith(NewArg);
IRB.SetInsertPoint(cast<Instruction>(NewArg)->getNextNode());
IRB.CreateMemCpy(Off, Arg, Size, Arg->getParamAlignment());
@@ -565,7 +565,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
if (Size == 0)
Size = 1; // Don't create zero-sized stack objects.
- replaceDbgDeclareForAlloca(AI, BasePointer, DIB, /*Deref=*/true, -Offset);
+ replaceDbgDeclareForAlloca(AI, BasePointer, DIB, /*Deref=*/false, -Offset);
replaceDbgValueForAlloca(AI, BasePointer, DIB, -Offset);
// Replace uses of the alloca with the new location.
@@ -655,7 +655,7 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
if (AI->hasName() && isa<Instruction>(NewAI))
NewAI->takeName(AI);
- replaceDbgDeclareForAlloca(AI, NewAI, DIB, /*Deref=*/true);
+ replaceDbgDeclareForAlloca(AI, NewAI, DIB, /*Deref=*/false);
AI->replaceAllUsesWith(NewAI);
AI->eraseFromParent();
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4d468551ae24..4702d63cb617 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2146,7 +2146,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (N->getFlags()->hasNoUnsignedWrap())
return N0;
- if (DAG.MaskedValueIsZero(N1, ~APInt::getSignBit(BitWidth))) {
+ if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
// N1 is either 0 or the minimum signed value. If the sub is NSW, then
// N1 must be 0 because negating the minimum signed value is undefined.
if (N->getFlags()->hasNoSignedWrap())
@@ -3705,7 +3705,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
// fold (and (sra)) -> (and (srl)) when possible.
- if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
+ if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// fold (zext_inreg (extload x)) -> (zextload x)
@@ -4225,8 +4225,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return Load;
// Simplify the operands using demanded-bits information.
- if (!VT.isVector() &&
- SimplifyDemandedBits(SDValue(N, 0)))
+ if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
return SDValue();
@@ -5058,8 +5057,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
return Tmp;
// Simplify the expression using non-local knowledge.
- if (!VT.isVector() &&
- SimplifyDemandedBits(SDValue(N, 0)))
+ if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
return SDValue();
@@ -5350,7 +5348,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
DAG.getConstant(c2 - c1, DL, N1.getValueType()));
} else {
- Mask = Mask.lshr(c1 - c2);
+ Mask.lshrInPlace(c1 - c2);
SDLoc DL(N);
Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
DAG.getConstant(c1 - c2, DL, N1.getValueType()));
@@ -5660,7 +5658,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
DAG.getConstant(ShiftAmt, DL0,
getShiftAmountTy(SmallVT)));
AddToWorklist(SmallShift.getNode());
- APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt);
+ APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
SDLoc DL(N);
return DAG.getNode(ISD::AND, DL, VT,
DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
@@ -8300,11 +8298,11 @@ static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
switch (N0.getOpcode()) {
case ISD::AND:
FPOpcode = ISD::FABS;
- SignMask = ~APInt::getSignBit(SourceVT.getSizeInBits());
+ SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits());
break;
case ISD::XOR:
FPOpcode = ISD::FNEG;
- SignMask = APInt::getSignBit(SourceVT.getSizeInBits());
+ SignMask = APInt::getSignMask(SourceVT.getSizeInBits());
break;
// TODO: ISD::OR --> ISD::FNABS?
default:
@@ -8415,7 +8413,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
assert(VT.getSizeInBits() == 128);
SDValue SignBit = DAG.getConstant(
- APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
+ APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
SDValue FlipBit;
if (N0.getOpcode() == ISD::FNEG) {
FlipBit = SignBit;
@@ -8435,7 +8433,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
AddToWorklist(FlipBits.getNode());
return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
}
- APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
+ APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
if (N0.getOpcode() == ISD::FNEG)
return DAG.getNode(ISD::XOR, DL, VT,
NewConv, DAG.getConstant(SignBit, DL, VT));
@@ -8483,7 +8481,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
}
if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
- APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2);
+ APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
AddToWorklist(Cst.getNode());
SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
@@ -8504,7 +8502,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
AddToWorklist(FlipBits.getNode());
return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
}
- APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
+ APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
X = DAG.getNode(ISD::AND, SDLoc(X), VT,
X, DAG.getConstant(SignBit, SDLoc(X), VT));
AddToWorklist(X.getNode());
@@ -8687,7 +8685,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
APInt ThisVal = OpVal.trunc(DstBitSize);
Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
- OpVal = OpVal.lshr(DstBitSize);
+ OpVal.lshrInPlace(DstBitSize);
}
// For big endian targets, swap the order of the pieces of each element.
@@ -10315,11 +10313,11 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
if (N0.getValueType().isVector()) {
// For a vector, get a mask such as 0x80... per scalar element
// and splat it.
- SignMask = APInt::getSignBit(N0.getScalarValueSizeInBits());
+ SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
} else {
// For a scalar, just generate 0x80...
- SignMask = APInt::getSignBit(IntVT.getSizeInBits());
+ SignMask = APInt::getSignMask(IntVT.getSizeInBits());
}
SDLoc DL0(N0);
Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
@@ -10420,11 +10418,11 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
if (N0.getValueType().isVector()) {
// For a vector, get a mask such as 0x7f... per scalar element
// and splat it.
- SignMask = ~APInt::getSignBit(N0.getScalarValueSizeInBits());
+ SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
} else {
// For a scalar, just generate 0x7f...
- SignMask = ~APInt::getSignBit(IntVT.getSizeInBits());
+ SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
}
SDLoc DL(N0);
Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
@@ -12375,6 +12373,27 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
return LHS.OffsetFromBase < RHS.OffsetFromBase;
});
+ // Store Merge attempts to merge the lowest stores. This generally
+ // works out as if successful, as the remaining stores are checked
+ // after the first collection of stores is merged. However, in the
+ // case that a non-mergeable store is found first, e.g., {p[-2],
+ // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
+ // mergeable cases. To prevent this, we prune such stores from the
+ // front of StoreNodes here.
+
+ unsigned StartIdx = 0;
+ while ((StartIdx + 1 < StoreNodes.size()) &&
+ StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
+ StoreNodes[StartIdx + 1].OffsetFromBase)
+ ++StartIdx;
+
+ // Bail if we don't have enough candidates to merge.
+ if (StartIdx + 1 >= StoreNodes.size())
+ return false;
+
+ if (StartIdx)
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
+
// Scan the memory operations on the chain and find the first non-consecutive
// store memory address.
unsigned NumConsecutiveStores = 0;
@@ -12485,39 +12504,52 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
// When extracting multiple vector elements, try to store them
// in one vector store rather than a sequence of scalar stores.
if (IsExtractVecSrc) {
- LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
- unsigned FirstStoreAS = FirstInChain->getAddressSpace();
- unsigned FirstStoreAlign = FirstInChain->getAlignment();
- unsigned NumStoresToMerge = 0;
- bool IsVec = MemVT.isVector();
- for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- unsigned StoreValOpcode = St->getValue().getOpcode();
- // This restriction could be loosened.
- // Bail out if any stored values are not elements extracted from a vector.
- // It should be possible to handle mixed sources, but load sources need
- // more careful handling (see the block of code below that handles
- // consecutive loads).
- if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
- StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
- return false;
+ bool RV = false;
+ while (StoreNodes.size() >= 2) {
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ unsigned NumStoresToMerge = 0;
+ bool IsVec = MemVT.isVector();
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ unsigned StoreValOpcode = St->getValue().getOpcode();
+ // This restriction could be loosened.
+ // Bail out if any stored values are not elements extracted from a
+ // vector. It should be possible to handle mixed sources, but load
+ // sources need more careful handling (see the block of code below that
+ // handles consecutive loads).
+ if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
+ StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
+ return false;
- // Find a legal type for the vector store.
- unsigned Elts = i + 1;
- if (IsVec) {
- // When merging vector stores, get the total number of elements.
- Elts *= MemVT.getVectorNumElements();
+ // Find a legal type for the vector store.
+ unsigned Elts = i + 1;
+ if (IsVec) {
+ // When merging vector stores, get the total number of elements.
+ Elts *= MemVT.getVectorNumElements();
+ }
+ EVT Ty =
+ EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
+ bool IsFast;
+ if (TLI.isTypeLegal(Ty) &&
+ TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
+ FirstStoreAlign, &IsFast) &&
+ IsFast)
+ NumStoresToMerge = i + 1;
}
- EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
- bool IsFast;
- if (TLI.isTypeLegal(Ty) &&
- TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
- FirstStoreAlign, &IsFast) && IsFast)
- NumStoresToMerge = i + 1;
- }
- return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumStoresToMerge,
- false, true);
+ bool Merged = MergeStoresOfConstantsOrVecElts(
+ StoreNodes, MemVT, NumStoresToMerge, false, true);
+ if (!Merged)
+ break;
+ // Remove merged stores for next iteration.
+ StoreNodes.erase(StoreNodes.begin(),
+ StoreNodes.begin() + NumStoresToMerge);
+ RV = true;
+ NumConsecutiveStores -= NumStoresToMerge;
+ }
+ return RV;
}
// Below we handle the case of multiple consecutive stores that
@@ -15122,9 +15154,9 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
// Extract the sub element from the constant bit mask.
if (DAG.getDataLayout().isBigEndian()) {
- Bits = Bits.lshr((Split - SubIdx - 1) * NumSubBits);
+ Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
} else {
- Bits = Bits.lshr(SubIdx * NumSubBits);
+ Bits.lshrInPlace(SubIdx * NumSubBits);
}
if (Split > 1)
@@ -16004,7 +16036,7 @@ SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
/// Return true if base is a frame index, which is known not to alias with
/// anything but itself. Provides base object and offset as results.
-static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
+static bool findBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
const GlobalValue *&GV, const void *&CV) {
// Assume it is a primitive operation.
Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr;
@@ -16057,53 +16089,56 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
return false;
// Gather base node and offset information.
- SDValue Base1, Base2;
- int64_t Offset1, Offset2;
- const GlobalValue *GV1, *GV2;
- const void *CV1, *CV2;
- bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(),
+ SDValue Base0, Base1;
+ int64_t Offset0, Offset1;
+ const GlobalValue *GV0, *GV1;
+ const void *CV0, *CV1;
+ bool IsFrameIndex0 = findBaseOffset(Op0->getBasePtr(),
+ Base0, Offset0, GV0, CV0);
+ bool IsFrameIndex1 = findBaseOffset(Op1->getBasePtr(),
Base1, Offset1, GV1, CV1);
- bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(),
- Base2, Offset2, GV2, CV2);
- // If they have a same base address then check to see if they overlap.
- if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
- return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
- (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
+ // If they have the same base address, then check to see if they overlap.
+ unsigned NumBytes0 = Op0->getMemoryVT().getSizeInBits() >> 3;
+ unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3;
+ if (Base0 == Base1 || (GV0 && (GV0 == GV1)) || (CV0 && (CV0 == CV1)))
+ return !((Offset0 + NumBytes0) <= Offset1 ||
+ (Offset1 + NumBytes1) <= Offset0);
// It is possible for different frame indices to alias each other, mostly
// when tail call optimization reuses return address slots for arguments.
// To catch this case, look up the actual index of frame indices to compute
// the real alias relationship.
- if (isFrameIndex1 && isFrameIndex2) {
+ if (IsFrameIndex0 && IsFrameIndex1) {
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ Offset0 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base0)->getIndex());
Offset1 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
- Offset2 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
- return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
- (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
+ return !((Offset0 + NumBytes0) <= Offset1 ||
+ (Offset1 + NumBytes1) <= Offset0);
}
// Otherwise, if we know what the bases are, and they aren't identical, then
// we know they cannot alias.
- if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
+ if ((IsFrameIndex0 || CV0 || GV0) && (IsFrameIndex1 || CV1 || GV1))
return false;
// If we know required SrcValue1 and SrcValue2 have relatively large alignment
// compared to the size and offset of the access, we may be able to prove they
- // do not alias. This check is conservative for now to catch cases created by
+ // do not alias. This check is conservative for now to catch cases created by
// splitting vector types.
- if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) &&
- (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) &&
- (Op0->getMemoryVT().getSizeInBits() >> 3 ==
- Op1->getMemoryVT().getSizeInBits() >> 3) &&
- (Op0->getOriginalAlignment() > (Op0->getMemoryVT().getSizeInBits() >> 3))) {
- int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment();
- int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment();
+ int64_t SrcValOffset0 = Op0->getSrcValueOffset();
+ int64_t SrcValOffset1 = Op1->getSrcValueOffset();
+ unsigned OrigAlignment0 = Op0->getOriginalAlignment();
+ unsigned OrigAlignment1 = Op1->getOriginalAlignment();
+ if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
+ NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) {
+ int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
+ int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
// There is no overlap between these relatively aligned accesses of similar
- // size, return no alias.
- if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 ||
- (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1)
+ // size. Return no alias.
+ if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
+ (OffAlign1 + NumBytes1) <= OffAlign0)
return false;
}
@@ -16115,19 +16150,17 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
UseAA = false;
#endif
+
if (UseAA &&
Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
// Use alias analysis information.
- int64_t MinOffset = std::min(Op0->getSrcValueOffset(),
- Op1->getSrcValueOffset());
- int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) +
- Op0->getSrcValueOffset() - MinOffset;
- int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) +
- Op1->getSrcValueOffset() - MinOffset;
+ int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
+ int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
+ int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
AliasResult AAResult =
- AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1,
+ AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
- MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2,
+ MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
UseTBAA ? Op1->getAAInfo() : AAMDNodes()));
if (AAResult == NoAlias)
return false;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 0584ab9f60d1..6fb26fc3b73d 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1164,9 +1164,11 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
"Expected inlined-at fields to agree");
if (Op->isReg()) {
Op->setIsDebug(true);
+ // A dbg.declare describes the address of a source variable, so lower it
+ // into an indirect DBG_VALUE.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::DBG_VALUE), false, Op->getReg(), 0,
- DI->getVariable(), DI->getExpression());
+ TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true,
+ Op->getReg(), 0, DI->getVariable(), DI->getExpression());
} else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::DBG_VALUE))
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index fc7cd020fe2e..3bae3bf9ab7c 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1343,7 +1343,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
// Convert to an integer of the same size.
if (TLI.isTypeLegal(IVT)) {
State.IntValue = DAG.getNode(ISD::BITCAST, DL, IVT, Value);
- State.SignMask = APInt::getSignBit(NumBits);
+ State.SignMask = APInt::getSignMask(NumBits);
State.SignBit = NumBits - 1;
return;
}
@@ -2984,7 +2984,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
EVT NVT = Node->getValueType(0);
APFloat apf(DAG.EVTToAPFloatSemantics(VT),
APInt::getNullValue(VT.getSizeInBits()));
- APInt x = APInt::getSignBit(NVT.getSizeInBits());
+ APInt x = APInt::getSignMask(NVT.getSizeInBits());
(void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven);
Tmp1 = DAG.getConstantFP(apf, dl, VT);
Tmp2 = DAG.getSetCC(dl, getSetCCResultType(VT),
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 6f2b1b94ce46..c1cb5d9b5235 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -72,7 +72,7 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;
case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N, ResNo); break;
case ISD::EXTRACT_VECTOR_ELT:
- R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break;
+ R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N, ResNo); break;
case ISD::FABS: R = SoftenFloatRes_FABS(N, ResNo); break;
case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break;
case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break;
@@ -171,7 +171,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) {
}
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo) {
+ // When LegalInHWReg, keep the extracted value in register.
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0));
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
NewOp.getValueType().getVectorElementType(),
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 0a2b680e1c66..154af46c9446 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -925,9 +925,9 @@ SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) {
assert(Op.getValueType().isVector() && "Only applies to vectors!");
unsigned EltWidth = Op.getScalarValueSizeInBits();
EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
- unsigned NumElts = Op.getValueType().getVectorNumElements();
+ auto EltCnt = Op.getValueType().getVectorElementCount();
return DAG.getNode(ISD::BITCAST, SDLoc(Op),
- EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op);
+ EVT::getVectorVT(*DAG.getContext(), EltNVT, EltCnt), Op);
}
SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 80c939700518..af55a22972a6 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -428,7 +428,7 @@ private:
SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo);
- SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_FMINNUM(SDNode *N);
SDValue SoftenFloatRes_FMAXNUM(SDNode *N);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 78fddb5ce8f5..1a7d7b7af5fa 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1293,12 +1293,9 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
if ((NumElements & 1) == 0 &&
SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) {
LLVMContext &Ctx = *DAG.getContext();
- EVT NewSrcVT = EVT::getVectorVT(
- Ctx, EVT::getIntegerVT(
- Ctx, SrcVT.getScalarSizeInBits() * 2),
- NumElements);
- EVT SplitSrcVT =
- EVT::getVectorVT(Ctx, SrcVT.getVectorElementType(), NumElements / 2);
+ EVT NewSrcVT = SrcVT.widenIntegerVectorElementType(Ctx);
+ EVT SplitSrcVT = SrcVT.getHalfNumVectorElementsVT(Ctx);
+
EVT SplitLoVT, SplitHiVT;
std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT);
if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) &&
@@ -3012,8 +3009,8 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
// Don't touch if this will be scalarized.
EVT FinalVT = VSelVT;
while (getTypeAction(FinalVT) == TargetLowering::TypeSplitVector)
- FinalVT = EVT::getVectorVT(Ctx, FinalVT.getVectorElementType(),
- FinalVT.getVectorNumElements() / 2);
+ FinalVT = FinalVT.getHalfNumVectorElementsVT(Ctx);
+
if (FinalVT.getVectorNumElements() == 1)
return SDValue();
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 003ea5030bfc..523f409e6b2c 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -639,12 +639,15 @@ void SelectionDAG::DeallocateNode(SDNode *N) {
// If we have operands, deallocate them.
removeOperands(N);
+ NodeAllocator.Deallocate(AllNodes.remove(N));
+
// Set the opcode to DELETED_NODE to help catch bugs when node
// memory is reallocated.
+ // FIXME: There are places in SDag that have grown a dependency on the opcode
+ // value in the released node.
+ __asan_unpoison_memory_region(&N->NodeType, sizeof(N->NodeType));
N->NodeType = ISD::DELETED_NODE;
- NodeAllocator.Deallocate(AllNodes.remove(N));
-
// If any of the SDDbgValue nodes refer to this SDNode, invalidate
// them and forget about that node.
DbgInfo->erase(N);
@@ -1826,7 +1829,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign);
int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
- return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout()));
+ return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout()));
}
SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
@@ -1839,7 +1842,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
int FrameIdx = MFI.CreateStackObject(Bytes, Align, false);
- return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout()));
+ return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout()));
}
SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
@@ -1955,7 +1958,7 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
/// use this predicate to simplify operations downstream.
bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
unsigned BitWidth = Op.getScalarValueSizeInBits();
- return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth);
+ return MaskedValueIsZero(Op, APInt::getSignMask(BitWidth), Depth);
}
/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
@@ -2330,8 +2333,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
Depth + 1);
- KnownZero = KnownZero.lshr(*ShAmt);
- KnownOne = KnownOne.lshr(*ShAmt);
+ KnownZero.lshrInPlace(*ShAmt);
+ KnownOne.lshrInPlace(*ShAmt);
// High bits are known zero.
KnownZero.setHighBits(ShAmt->getZExtValue());
}
@@ -2340,15 +2343,15 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
Depth + 1);
- KnownZero = KnownZero.lshr(*ShAmt);
- KnownOne = KnownOne.lshr(*ShAmt);
+ KnownZero.lshrInPlace(*ShAmt);
+ KnownOne.lshrInPlace(*ShAmt);
// If we know the value of the sign bit, then we know it is copied across
// the high bits by the shift amount.
- APInt SignBit = APInt::getSignBit(BitWidth);
- SignBit = SignBit.lshr(*ShAmt); // Adjust to where it is now in the mask.
- if (KnownZero.intersects(SignBit)) {
+ APInt SignMask = APInt::getSignMask(BitWidth);
+ SignMask.lshrInPlace(*ShAmt); // Adjust to where it is now in the mask.
+ if (KnownZero.intersects(SignMask)) {
KnownZero.setHighBits(ShAmt->getZExtValue());// New bits are known zero.
- } else if (KnownOne.intersects(SignBit)) {
+ } else if (KnownOne.intersects(SignMask)) {
KnownOne.setHighBits(ShAmt->getZExtValue()); // New bits are known one.
}
}
@@ -2361,14 +2364,14 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
// present in the input.
APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits);
- APInt InSignBit = APInt::getSignBit(EBits);
+ APInt InSignMask = APInt::getSignMask(EBits);
APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits);
// If the sign extended bits are demanded, we know that the sign
// bit is demanded.
- InSignBit = InSignBit.zext(BitWidth);
+ InSignMask = InSignMask.zext(BitWidth);
if (NewBits.getBoolValue())
- InputDemandedBits |= InSignBit;
+ InputDemandedBits |= InSignMask;
computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
Depth + 1);
@@ -2377,10 +2380,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
// If the sign bit of the input is known set or clear, then we know the
// top bits of the result.
- if (KnownZero.intersects(InSignBit)) { // Input sign bit known clear
+ if (KnownZero.intersects(InSignMask)) { // Input sign bit known clear
KnownZero |= NewBits;
KnownOne &= ~NewBits;
- } else if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
+ } else if (KnownOne.intersects(InSignMask)) { // Input sign bit known set
KnownOne |= NewBits;
KnownZero &= ~NewBits;
} else { // Input sign bit unknown
@@ -2745,7 +2748,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
// a set bit that isn't the sign bit (otherwise it could be INT_MIN).
KnownOne2.clearBit(BitWidth - 1);
if (KnownOne2.getBoolValue()) {
- KnownZero = APInt::getSignBit(BitWidth);
+ KnownZero = APInt::getSignMask(BitWidth);
break;
}
break;
@@ -2833,7 +2836,7 @@ SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0,
computeKnownBits(N0, N0Zero, N0One);
bool overflow;
- (~N0Zero).uadd_ov(~N1Zero, overflow);
+ (void)(~N0Zero).uadd_ov(~N1Zero, overflow);
if (!overflow)
return OFK_Never;
}
@@ -2874,7 +2877,7 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
// one bit set.
if (Val.getOpcode() == ISD::SRL) {
auto *C = dyn_cast<ConstantSDNode>(Val.getOperand(0));
- if (C && C->getAPIntValue().isSignBit())
+ if (C && C->getAPIntValue().isSignMask())
return true;
}
@@ -2967,7 +2970,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return std::max(Tmp, Tmp2);
case ISD::SRA:
- Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
// SRA X, C -> adds C sign bits.
if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) {
APInt ShiftVal = C->getAPIntValue();
@@ -3130,40 +3133,6 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// result. Otherwise it gives either negative or > bitwidth result
return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
}
- case ISD::INSERT_VECTOR_ELT: {
- SDValue InVec = Op.getOperand(0);
- SDValue InVal = Op.getOperand(1);
- SDValue EltNo = Op.getOperand(2);
- unsigned NumElts = InVec.getValueType().getVectorNumElements();
-
- ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo);
- if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) {
- // If we know the element index, split the demand between the
- // source vector and the inserted element.
- unsigned EltIdx = CEltNo->getZExtValue();
-
- // If we demand the inserted element then get its sign bits.
- Tmp = UINT_MAX;
- if (DemandedElts[EltIdx])
- Tmp = ComputeNumSignBits(InVal, Depth + 1);
-
- // If we demand the source vector then get its sign bits, and determine
- // the minimum.
- APInt VectorElts = DemandedElts;
- VectorElts.clearBit(EltIdx);
- if (!!VectorElts) {
- Tmp2 = ComputeNumSignBits(InVec, VectorElts, Depth + 1);
- Tmp = std::min(Tmp, Tmp2);
- }
- } else {
- // Unknown element index, so ignore DemandedElts and demand them all.
- Tmp = ComputeNumSignBits(InVec, Depth + 1);
- Tmp2 = ComputeNumSignBits(InVal, Depth + 1);
- Tmp = std::min(Tmp, Tmp2);
- }
- assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
- return Tmp;
- }
case ISD::EXTRACT_VECTOR_ELT: {
SDValue InVec = Op.getOperand(0);
SDValue EltNo = Op.getOperand(1);
@@ -7607,14 +7576,11 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
std::pair<EVT, EVT> SelectionDAG::GetSplitDestVTs(const EVT &VT) const {
// Currently all types are split in half.
EVT LoVT, HiVT;
- if (!VT.isVector()) {
+ if (!VT.isVector())
LoVT = HiVT = TLI->getTypeToTransformTo(*getContext(), VT);
- } else {
- unsigned NumElements = VT.getVectorNumElements();
- assert(!(NumElements & 1) && "Splitting vector, but not in half!");
- LoVT = HiVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(),
- NumElements/2);
- }
+ else
+ LoVT = HiVT = VT.getHalfNumVectorElementsVT(*getContext());
+
return std::make_pair(LoVT, HiVT);
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 8708f58f1e63..2c58953ee908 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1151,7 +1151,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end())
return DAG.getFrameIndex(SI->second,
- TLI.getPointerTy(DAG.getDataLayout()));
+ TLI.getFrameIndexTy(DAG.getDataLayout()));
}
// If this is an instruction which fast-isel has deferred, select it now.
@@ -4674,7 +4674,7 @@ static unsigned getUnderlyingArgReg(const SDValue &N) {
/// At the end of instruction selection, they will be inserted to the entry BB.
bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
const Value *V, DILocalVariable *Variable, DIExpression *Expr,
- DILocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) {
+ DILocation *DL, int64_t Offset, bool IsDbgDeclare, const SDValue &N) {
const Argument *Arg = dyn_cast<Argument>(V);
if (!Arg)
return false;
@@ -4688,6 +4688,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
if (!Variable->getScope()->getSubprogram()->describes(MF.getFunction()))
return false;
+ bool IsIndirect = false;
Optional<MachineOperand> Op;
// Some arguments' frame index is recorded during argument lowering.
if (int FI = FuncInfo.getArgumentFrameIndex(Arg))
@@ -4701,15 +4702,19 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
if (PR)
Reg = PR;
}
- if (Reg)
+ if (Reg) {
Op = MachineOperand::CreateReg(Reg, false);
+ IsIndirect = IsDbgDeclare;
+ }
}
if (!Op) {
// Check if ValueMap has reg number.
DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
- if (VMI != FuncInfo.ValueMap.end())
+ if (VMI != FuncInfo.ValueMap.end()) {
Op = MachineOperand::CreateReg(VMI->second, false);
+ IsIndirect = IsDbgDeclare;
+ }
}
if (!Op && N.getNode())
@@ -4955,8 +4960,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
} else if (isa<Argument>(Address)) {
// Address is an argument, so try to emit its dbg value using
// virtual register info from the FuncInfo.ValueMap.
- EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false,
- N);
+ EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, true, N);
return nullptr;
} else {
SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
@@ -4966,7 +4970,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
} else {
// If Address is an argument then try to emit its dbg value using
// virtual register info from the FuncInfo.ValueMap.
- if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false,
+ if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, true,
N)) {
// If variable is pinned by a alloca in dominating bb then
// use StaticAllocaMap.
@@ -5613,7 +5617,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Ops[2];
Ops[0] = getRoot();
Ops[1] =
- DAG.getFrameIndex(FI, TLI.getPointerTy(DAG.getDataLayout()), true);
+ DAG.getFrameIndex(FI, TLI.getFrameIndexTy(DAG.getDataLayout()), true);
unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END);
Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops);
@@ -6626,7 +6630,7 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
unsigned Align = DL.getPrefTypeAlignment(Ty);
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
- SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy(DL));
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL));
Chain = DAG.getStore(Chain, Location, OpInfo.CallOperand, StackSlot,
MachinePointerInfo::getFixedStack(MF, SSFI));
OpInfo.CallOperand = StackSlot;
@@ -7389,7 +7393,7 @@ static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx,
} else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) {
const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
Ops.push_back(Builder.DAG.getTargetFrameIndex(
- FI->getIndex(), TLI.getPointerTy(Builder.DAG.getDataLayout())));
+ FI->getIndex(), TLI.getFrameIndexTy(Builder.DAG.getDataLayout())));
} else
Ops.push_back(OpVal);
}
@@ -7657,7 +7661,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy);
- DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy(DL));
+ DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL));
ArgListEntry Entry;
Entry.Node = DemoteStackSlot;
Entry.Ty = StackSlotPtrType;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index c6acc09b6602..9e34590cc39c 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -928,7 +928,7 @@ private:
/// instruction selection, they will be inserted to the entry BB.
bool EmitFuncArgumentDbgValue(const Value *V, DILocalVariable *Variable,
DIExpression *Expr, DILocation *DL,
- int64_t Offset, bool IsIndirect,
+ int64_t Offset, bool IsDbgDeclare,
const SDValue &N);
/// Return the next block after MBB, or nullptr if there is none.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 2756e276c6a9..93c6738f650d 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -574,7 +574,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// using the bits from the RHS. Below, we use knowledge about the RHS to
// simplify the LHS, here we're using information from the LHS to simplify
// the RHS.
- if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (ConstantSDNode *RHSC = isConstOrConstSplat(Op.getOperand(1))) {
SDValue Op0 = Op.getOperand(0);
APInt LHSZero, LHSOne;
// Do not increment Depth here; that can cause an infinite loop.
@@ -715,7 +715,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If the RHS is a constant, see if we can simplify it.
// for XOR, we prefer to force bits to 1 if they will make a -1.
// If we can't force bits, try to shrink the constant.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) {
APInt Expanded = C->getAPIntValue() | (~NewMask);
// If we can expand it to have all bits set, do it.
if (Expanded.isAllOnesValue()) {
@@ -778,7 +778,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If (1) we only need the sign-bit, (2) the setcc operands are the same
// width as the setcc result, and (3) the result of a setcc conforms to 0 or
// -1, we may be able to bypass the setcc.
- if (NewMask.isSignBit() && Op0.getScalarValueSizeInBits() == BitWidth &&
+ if (NewMask.isSignMask() && Op0.getScalarValueSizeInBits() == BitWidth &&
getBooleanContents(Op.getValueType()) ==
BooleanContent::ZeroOrNegativeOneBooleanContent) {
// If we're testing X < 0, then this compare isn't needed - just use X!
@@ -839,7 +839,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
SDValue InnerOp = InOp.getNode()->getOperand(0);
EVT InnerVT = InnerOp.getValueType();
unsigned InnerBits = InnerVT.getSizeInBits();
- if (ShAmt < InnerBits && NewMask.lshr(InnerBits) == 0 &&
+ if (ShAmt < InnerBits && NewMask.getActiveBits() <= InnerBits &&
isTypeDesirableForOp(ISD::SHL, InnerVT)) {
EVT ShTy = getShiftAmountTy(InnerVT, DL);
if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
@@ -861,12 +861,12 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
InnerOp.getOpcode() == ISD::SRL &&
InnerOp.hasOneUse() &&
isa<ConstantSDNode>(InnerOp.getOperand(1))) {
- uint64_t InnerShAmt = cast<ConstantSDNode>(InnerOp.getOperand(1))
+ unsigned InnerShAmt = cast<ConstantSDNode>(InnerOp.getOperand(1))
->getZExtValue();
if (InnerShAmt < ShAmt &&
InnerShAmt < InnerBits &&
- NewMask.lshr(InnerBits - InnerShAmt + ShAmt) == 0 &&
- NewMask.trunc(ShAmt) == 0) {
+ NewMask.getActiveBits() <= (InnerBits - InnerShAmt + ShAmt) &&
+ NewMask.countTrailingZeros() >= ShAmt) {
SDValue NewSA =
TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
Op.getOperand(1).getValueType());
@@ -929,8 +929,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownZero, KnownOne, TLO, Depth+1))
return true;
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- KnownZero = KnownZero.lshr(ShAmt);
- KnownOne = KnownOne.lshr(ShAmt);
+ KnownZero.lshrInPlace(ShAmt);
+ KnownOne.lshrInPlace(ShAmt);
KnownZero.setHighBits(ShAmt); // High bits known zero.
}
@@ -964,21 +964,21 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// demand the input sign bit.
APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
if (HighBits.intersects(NewMask))
- InDemandedMask |= APInt::getSignBit(VT.getScalarSizeInBits());
+ InDemandedMask |= APInt::getSignMask(VT.getScalarSizeInBits());
if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
KnownZero, KnownOne, TLO, Depth+1))
return true;
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- KnownZero = KnownZero.lshr(ShAmt);
- KnownOne = KnownOne.lshr(ShAmt);
+ KnownZero.lshrInPlace(ShAmt);
+ KnownOne.lshrInPlace(ShAmt);
// Handle the sign bit, adjusted to where it is now in the mask.
- APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt);
+ APInt SignMask = APInt::getSignMask(BitWidth).lshr(ShAmt);
// If the input sign bit is known to be zero, or if none of the top bits
// are demanded, turn this into an unsigned shift right.
- if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) {
+ if (KnownZero.intersects(SignMask) || (HighBits & ~NewMask) == HighBits) {
SDNodeFlags Flags;
Flags.setExact(cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact());
return TLO.CombineTo(Op,
@@ -996,7 +996,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
Op.getOperand(0), NewSA));
}
- if (KnownOne.intersects(SignBit))
+ if (KnownOne.intersects(SignMask))
// New bits are known one.
KnownOne |= HighBits;
}
@@ -1040,7 +1040,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
return TLO.CombineTo(Op, Op.getOperand(0));
APInt InSignBit =
- APInt::getSignBit(ExVT.getScalarSizeInBits()).zext(BitWidth);
+ APInt::getSignMask(ExVT.getScalarSizeInBits()).zext(BitWidth);
APInt InputDemandedBits =
APInt::getLowBitsSet(BitWidth,
ExVT.getScalarSizeInBits()) &
@@ -1205,20 +1205,23 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
getShiftAmountTy(Op.getValueType(), DL));
}
- APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
- OperandBitWidth - BitWidth);
- HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth);
-
- if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {
- // None of the shifted in bits are needed. Add a truncate of the
- // shift input, then shift it.
- SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
- Op.getValueType(),
- In.getOperand(0));
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
- Op.getValueType(),
- NewTrunc,
- Shift));
+ if (ShAmt->getZExtValue() < BitWidth) {
+ APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
+ OperandBitWidth - BitWidth);
+ HighBits.lshrInPlace(ShAmt->getZExtValue());
+ HighBits = HighBits.trunc(BitWidth);
+
+ if (!(HighBits & NewMask)) {
+ // None of the shifted in bits are needed. Add a truncate of the
+ // shift input, then shift it.
+ SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
+ Op.getValueType(),
+ In.getOperand(0));
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
+ Op.getValueType(),
+ NewTrunc,
+ Shift));
+ }
}
break;
}
@@ -1247,7 +1250,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (!TLO.LegalOperations() &&
!Op.getValueType().isVector() &&
!Op.getOperand(0).getValueType().isVector() &&
- NewMask == APInt::getSignBit(Op.getValueSizeInBits()) &&
+ NewMask == APInt::getSignMask(Op.getValueSizeInBits()) &&
Op.getOperand(0).getValueType().isFloatingPoint()) {
bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
@@ -2055,7 +2058,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
} else {
ShiftBits = C1.countTrailingZeros();
}
- NewC = NewC.lshr(ShiftBits);
+ NewC.lshrInPlace(ShiftBits);
if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
isLegalICmpImmediate(NewC.getSExtValue())) {
auto &DL = DAG.getDataLayout();
@@ -3353,7 +3356,7 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
SDValue Bias = DAG.getConstant(127, dl, IntVT);
- SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()), dl,
+ SDValue SignMask = DAG.getConstant(APInt::getSignMask(VT.getSizeInBits()), dl,
IntVT);
SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, dl, IntVT);
SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index cbce2dc89deb..bbb19b5e998d 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -579,7 +579,7 @@ DWARFContext::getInliningInfoForAddress(uint64_t Address,
return InliningInfo;
}
- uint32_t CallFile = 0, CallLine = 0, CallColumn = 0;
+ uint32_t CallFile = 0, CallLine = 0, CallColumn = 0, CallDiscriminator = 0;
for (uint32_t i = 0, n = InlinedChain.size(); i != n; i++) {
DWARFDie &FunctionDIE = InlinedChain[i];
DILineInfo Frame;
@@ -605,10 +605,12 @@ DWARFContext::getInliningInfoForAddress(uint64_t Address,
Spec.FLIKind, Frame.FileName);
Frame.Line = CallLine;
Frame.Column = CallColumn;
+ Frame.Discriminator = CallDiscriminator;
}
// Get call file/line/column of a current DIE.
if (i + 1 < n) {
- FunctionDIE.getCallerFrame(CallFile, CallLine, CallColumn);
+ FunctionDIE.getCallerFrame(CallFile, CallLine, CallColumn,
+ CallDiscriminator);
}
}
InliningInfo.addFrame(Frame);
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
index 4308cc2e2639..24039eb35209 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -290,10 +290,12 @@ uint64_t DWARFDie::getDeclLine() const {
}
void DWARFDie::getCallerFrame(uint32_t &CallFile, uint32_t &CallLine,
- uint32_t &CallColumn) const {
+ uint32_t &CallColumn,
+ uint32_t &CallDiscriminator) const {
CallFile = toUnsigned(find(DW_AT_call_file), 0);
CallLine = toUnsigned(find(DW_AT_call_line), 0);
CallColumn = toUnsigned(find(DW_AT_call_column), 0);
+ CallDiscriminator = toUnsigned(find(DW_AT_GNU_discriminator), 0);
}
void DWARFDie::dump(raw_ostream &OS, unsigned RecurseDepth,
@@ -350,32 +352,6 @@ void DWARFDie::dump(raw_ostream &OS, unsigned RecurseDepth,
}
}
-void DWARFDie::getInlinedChainForAddress(
- const uint64_t Address, SmallVectorImpl<DWARFDie> &InlinedChain) const {
- if (isNULL())
- return;
- DWARFDie DIE(*this);
- while (DIE) {
- // Append current DIE to inlined chain only if it has correct tag
- // (e.g. it is not a lexical block).
- if (DIE.isSubroutineDIE())
- InlinedChain.push_back(DIE);
-
- // Try to get child which also contains provided address.
- DWARFDie Child = DIE.getFirstChild();
- while (Child) {
- if (Child.addressRangeContainsAddress(Address)) {
- // Assume there is only one such child.
- break;
- }
- Child = Child.getSibling();
- }
- DIE = Child;
- }
- // Reverse the obtained chain to make the root of inlined chain last.
- std::reverse(InlinedChain.begin(), InlinedChain.end());
-}
-
DWARFDie DWARFDie::getParent() const {
if (isValid())
return U->getParent(Die);
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index 4ee8e8f46d2e..c3f467745402 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -343,37 +343,63 @@ void DWARFUnit::collectAddressRanges(DWARFAddressRangesVector &CURanges) {
clearDIEs(true);
}
-DWARFDie
-DWARFUnit::getSubprogramForAddress(uint64_t Address) {
- extractDIEsIfNeeded(false);
- for (const DWARFDebugInfoEntry &D : DieArray) {
- DWARFDie DIE(this, &D);
- if (DIE.isSubprogramDIE() &&
- DIE.addressRangeContainsAddress(Address)) {
- return DIE;
+void DWARFUnit::updateAddressDieMap(DWARFDie Die) {
+ if (Die.isSubroutineDIE()) {
+ for (const auto &R : Die.getAddressRanges()) {
+ // Ignore 0-sized ranges.
+ if (R.first == R.second)
+ continue;
+ auto B = AddrDieMap.upper_bound(R.first);
+ if (B != AddrDieMap.begin() && R.first < (--B)->second.first) {
+ // The range is a sub-range of existing ranges, we need to split the
+ // existing range.
+ if (R.second < B->second.first)
+ AddrDieMap[R.second] = B->second;
+ if (R.first > B->first)
+ AddrDieMap[B->first].first = R.first;
+ }
+ AddrDieMap[R.first] = std::make_pair(R.second, Die);
}
}
- return DWARFDie();
+ // Parent DIEs are added to the AddrDieMap prior to the Children DIEs to
+ // simplify the logic to update AddrDieMap. The child's range will always
+ // be equal or smaller than the parent's range. With this assumption, when
+ // adding one range into the map, it will at most split a range into 3
+ // sub-ranges.
+ for (DWARFDie Child = Die.getFirstChild(); Child; Child = Child.getSibling())
+ updateAddressDieMap(Child);
+}
+
+DWARFDie DWARFUnit::getSubroutineForAddress(uint64_t Address) {
+ extractDIEsIfNeeded(false);
+ if (AddrDieMap.empty())
+ updateAddressDieMap(getUnitDIE());
+ auto R = AddrDieMap.upper_bound(Address);
+ if (R == AddrDieMap.begin())
+ return DWARFDie();
+ // upper_bound's previous item contains Address.
+ --R;
+ if (Address >= R->second.first)
+ return DWARFDie();
+ return R->second.second;
}
void
DWARFUnit::getInlinedChainForAddress(uint64_t Address,
SmallVectorImpl<DWARFDie> &InlinedChain) {
- // First, find a subprogram that contains the given address (the root
- // of inlined chain).
- DWARFDie SubprogramDIE;
+ assert(InlinedChain.empty());
// Try to look for subprogram DIEs in the DWO file.
parseDWO();
- if (DWO)
- SubprogramDIE = DWO->getUnit()->getSubprogramForAddress(Address);
- else
- SubprogramDIE = getSubprogramForAddress(Address);
-
- // Get inlined chain rooted at this subprogram DIE.
- if (SubprogramDIE)
- SubprogramDIE.getInlinedChainForAddress(Address, InlinedChain);
- else
- InlinedChain.clear();
+ // First, find the subroutine that contains the given address (the leaf
+ // of inlined chain).
+ DWARFDie SubroutineDIE =
+ (DWO ? DWO->getUnit() : this)->getSubroutineForAddress(Address);
+
+ while (SubroutineDIE) {
+ if (SubroutineDIE.isSubroutineDIE())
+ InlinedChain.push_back(SubroutineDIE);
+ SubroutineDIE = SubroutineDIE.getParent();
+ }
}
const DWARFUnitIndex &llvm::getDWARFUnitIndex(DWARFContext &Context,
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
index e29e9fc2c702..10b4e98b6079 100644
--- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -1580,7 +1580,7 @@ GenericValue Interpreter::executeBitCastInst(Value *SrcVal, Type *DstTy,
GenericValue Elt;
Elt.IntVal = Elt.IntVal.zext(SrcBitSize);
Elt.IntVal = TempSrc.AggregateVal[i].IntVal;
- Elt.IntVal = Elt.IntVal.lshr(ShiftAmt);
+ Elt.IntVal.lshrInPlace(ShiftAmt);
// it could be DstBitSize == SrcBitSize, so check it
if (DstBitSize < SrcBitSize)
Elt.IntVal = Elt.IntVal.trunc(DstBitSize);
diff --git a/contrib/llvm/lib/IR/Attributes.cpp b/contrib/llvm/lib/IR/Attributes.cpp
index 2b7359dab807..d690111ef210 100644
--- a/contrib/llvm/lib/IR/Attributes.cpp
+++ b/contrib/llvm/lib/IR/Attributes.cpp
@@ -984,20 +984,23 @@ AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index,
}
AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index,
- AttributeSet AS) const {
- if (!AS.hasAttributes())
+ const AttrBuilder &B) const {
+ if (!B.hasAttributes())
return *this;
+ if (!pImpl)
+ return AttributeList::get(C, {{Index, AttributeSet::get(C, B)}});
+
#ifndef NDEBUG
// FIXME it is not obvious how this should work for alignment. For now, say
// we can't change a known alignment.
unsigned OldAlign = getParamAlignment(Index);
- unsigned NewAlign = AS.getAlignment();
+ unsigned NewAlign = B.getAlignment();
assert((!OldAlign || !NewAlign || OldAlign == NewAlign) &&
"Attempt to change alignment!");
#endif
- SmallVector<std::pair<unsigned, AttributeSet>, 4> AttrSet;
+ SmallVector<IndexAttrPair, 4> AttrVec;
uint64_t NumAttrs = pImpl->getNumSlots();
unsigned I;
@@ -1005,31 +1008,25 @@ AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index,
for (I = 0; I < NumAttrs; ++I) {
if (getSlotIndex(I) >= Index)
break;
- AttrSet.emplace_back(getSlotIndex(I), pImpl->getSlotNode(I));
+ AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotNode(I));
}
+ AttrBuilder NewAttrs;
if (I < NumAttrs && getSlotIndex(I) == Index) {
- // We need to merge two AttributeSets.
- AttributeSet Merged = AttributeSet::get(
- C, AttrBuilder(pImpl->getSlotNode(I)).merge(AttrBuilder(AS)));
- AttrSet.emplace_back(Index, Merged);
+ // We need to merge the attribute sets.
+ NewAttrs.merge(pImpl->getSlotNode(I));
++I;
- } else {
- // Otherwise, there were no attributes at this position in the original
- // list. Add the set as is.
- AttrSet.emplace_back(Index, AS);
}
+ NewAttrs.merge(B);
+
+ // Add the new or merged attribute set at this index.
+ AttrVec.emplace_back(Index, AttributeSet::get(C, NewAttrs));
// Add the remaining entries.
for (; I < NumAttrs; ++I)
- AttrSet.emplace_back(getSlotIndex(I), pImpl->getSlotNode(I));
-
- return get(C, AttrSet);
-}
+ AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotNode(I));
-AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index,
- const AttrBuilder &B) const {
- return get(C, Index, AttributeSet::get(C, B));
+ return get(C, AttrVec);
}
AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index,
@@ -1046,46 +1043,7 @@ AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index,
AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index,
AttributeList Attrs) const {
- if (!pImpl)
- return AttributeList();
- if (!Attrs.pImpl) return *this;
-
- // FIXME it is not obvious how this should work for alignment.
- // For now, say we can't pass in alignment, which no current use does.
- assert(!Attrs.hasAttribute(Index, Attribute::Alignment) &&
- "Attempt to change alignment!");
-
- // Add the attribute slots before the one we're trying to add.
- SmallVector<AttributeList, 4> AttrSet;
- uint64_t NumAttrs = pImpl->getNumSlots();
- AttributeList AL;
- uint64_t LastIndex = 0;
- for (unsigned I = 0, E = NumAttrs; I != E; ++I) {
- if (getSlotIndex(I) >= Index) {
- if (getSlotIndex(I) == Index) AL = getSlotAttributes(LastIndex++);
- break;
- }
- LastIndex = I + 1;
- AttrSet.push_back(getSlotAttributes(I));
- }
-
- // Now remove the attribute from the correct slot. There may already be an
- // AttributeList there.
- AttrBuilder B(AL, Index);
-
- for (unsigned I = 0, E = Attrs.pImpl->getNumSlots(); I != E; ++I)
- if (Attrs.getSlotIndex(I) == Index) {
- B.removeAttributes(Attrs.pImpl->getSlotAttributes(I), Index);
- break;
- }
-
- AttrSet.push_back(AttributeList::get(C, Index, B));
-
- // Add the remaining attribute slots.
- for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I)
- AttrSet.push_back(getSlotAttributes(I));
-
- return get(C, AttrSet);
+ return removeAttributes(C, Index, AttrBuilder(Attrs.getAttributes(Index)));
}
AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index,
@@ -1098,31 +1056,30 @@ AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index,
assert(!Attrs.hasAlignmentAttr() && "Attempt to change alignment!");
// Add the attribute slots before the one we're trying to add.
- SmallVector<AttributeList, 4> AttrSet;
+ SmallVector<IndexAttrPair, 4> AttrSets;
uint64_t NumAttrs = pImpl->getNumSlots();
- AttributeList AL;
+ AttrBuilder B;
uint64_t LastIndex = 0;
for (unsigned I = 0, E = NumAttrs; I != E; ++I) {
if (getSlotIndex(I) >= Index) {
- if (getSlotIndex(I) == Index) AL = getSlotAttributes(LastIndex++);
+ if (getSlotIndex(I) == Index)
+ B = AttrBuilder(pImpl->getSlotNode(LastIndex++));
break;
}
LastIndex = I + 1;
- AttrSet.push_back(getSlotAttributes(I));
+ AttrSets.push_back({getSlotIndex(I), pImpl->getSlotNode(I)});
}
- // Now remove the attribute from the correct slot. There may already be an
- // AttributeList there.
- AttrBuilder B(AL, Index);
+ // Remove the attributes from the existing set and add them.
B.remove(Attrs);
-
- AttrSet.push_back(AttributeList::get(C, Index, B));
+ if (B.hasAttributes())
+ AttrSets.push_back({Index, AttributeSet::get(C, B)});
// Add the remaining attribute slots.
for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I)
- AttrSet.push_back(getSlotAttributes(I));
+ AttrSets.push_back({getSlotIndex(I), pImpl->getSlotNode(I)});
- return get(C, AttrSet);
+ return get(C, AttrSets);
}
AttributeList AttributeList::removeAttributes(LLVMContext &C,
@@ -1406,18 +1363,7 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) {
}
AttrBuilder &AttrBuilder::removeAttributes(AttributeList A, uint64_t Index) {
- unsigned Slot = ~0U;
- for (unsigned I = 0, E = A.getNumSlots(); I != E; ++I)
- if (A.getSlotIndex(I) == Index) {
- Slot = I;
- break;
- }
-
- assert(Slot != ~0U && "Couldn't find index in AttributeList!");
-
- for (AttributeList::iterator I = A.begin(Slot), E = A.end(Slot); I != E;
- ++I) {
- Attribute Attr = *I;
+ for (Attribute Attr : A.getAttributes(Index)) {
if (Attr.isEnumAttribute() || Attr.isIntAttribute()) {
removeAttribute(Attr.getKindAsEnum());
} else {
diff --git a/contrib/llvm/lib/IR/ConstantFold.cpp b/contrib/llvm/lib/IR/ConstantFold.cpp
index bba230677ebf..80b117015ede 100644
--- a/contrib/llvm/lib/IR/ConstantFold.cpp
+++ b/contrib/llvm/lib/IR/ConstantFold.cpp
@@ -223,7 +223,7 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
APInt V = CI->getValue();
if (ByteStart)
- V = V.lshr(ByteStart*8);
+ V.lshrInPlace(ByteStart*8);
V = V.trunc(ByteSize*8);
return ConstantInt::get(CI->getContext(), V);
}
diff --git a/contrib/llvm/lib/IR/ConstantRange.cpp b/contrib/llvm/lib/IR/ConstantRange.cpp
index 8dfd6c8036c4..0cc38b025209 100644
--- a/contrib/llvm/lib/IR/ConstantRange.cpp
+++ b/contrib/llvm/lib/IR/ConstantRange.cpp
@@ -29,8 +29,6 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-/// Initialize a full (the default) or empty set for the specified type.
-///
ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) {
if (Full)
Lower = Upper = APInt::getMaxValue(BitWidth);
@@ -38,8 +36,6 @@ ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) {
Lower = Upper = APInt::getMinValue(BitWidth);
}
-/// Initialize a range to hold the single specified value.
-///
ConstantRange::ConstantRange(APInt V)
: Lower(std::move(V)), Upper(Lower + 1) {}
@@ -232,35 +228,23 @@ ConstantRange::makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp,
return Result;
}
-/// isFullSet - Return true if this set contains all of the elements possible
-/// for this data-type
bool ConstantRange::isFullSet() const {
return Lower == Upper && Lower.isMaxValue();
}
-/// isEmptySet - Return true if this set contains no members.
-///
bool ConstantRange::isEmptySet() const {
return Lower == Upper && Lower.isMinValue();
}
-/// isWrappedSet - Return true if this set wraps around the top of the range,
-/// for example: [100, 8)
-///
bool ConstantRange::isWrappedSet() const {
return Lower.ugt(Upper);
}
-/// isSignWrappedSet - Return true if this set wraps around the INT_MIN of
-/// its bitwidth, for example: i8 [120, 140).
-///
bool ConstantRange::isSignWrappedSet() const {
return contains(APInt::getSignedMaxValue(getBitWidth())) &&
contains(APInt::getSignedMinValue(getBitWidth()));
}
-/// getSetSize - Return the number of elements in this set.
-///
APInt ConstantRange::getSetSize() const {
if (isFullSet()) {
APInt Size(getBitWidth()+1, 0);
@@ -272,12 +256,6 @@ APInt ConstantRange::getSetSize() const {
return (Upper - Lower).zext(getBitWidth()+1);
}
-/// isSizeStrictlySmallerThanOf - Compare set size of this range with the range
-/// CR.
-/// This function is faster than comparing results of getSetSize for the two
-/// ranges, because we don't need to extend bitwidth of APInts we're operating
-/// with.
-///
bool
ConstantRange::isSizeStrictlySmallerThanOf(const ConstantRange &Other) const {
assert(getBitWidth() == Other.getBitWidth());
@@ -288,58 +266,44 @@ ConstantRange::isSizeStrictlySmallerThanOf(const ConstantRange &Other) const {
return (Upper - Lower).ult(Other.Upper - Other.Lower);
}
-/// getUnsignedMax - Return the largest unsigned value contained in the
-/// ConstantRange.
-///
APInt ConstantRange::getUnsignedMax() const {
if (isFullSet() || isWrappedSet())
return APInt::getMaxValue(getBitWidth());
return getUpper() - 1;
}
-/// getUnsignedMin - Return the smallest unsigned value contained in the
-/// ConstantRange.
-///
APInt ConstantRange::getUnsignedMin() const {
if (isFullSet() || (isWrappedSet() && getUpper() != 0))
return APInt::getMinValue(getBitWidth());
return getLower();
}
-/// getSignedMax - Return the largest signed value contained in the
-/// ConstantRange.
-///
APInt ConstantRange::getSignedMax() const {
APInt SignedMax(APInt::getSignedMaxValue(getBitWidth()));
if (!isWrappedSet()) {
- if (getLower().sle(getUpper() - 1))
- return getUpper() - 1;
- return SignedMax;
+ APInt UpperMinusOne = getUpper() - 1;
+ if (getLower().sle(UpperMinusOne))
+ return UpperMinusOne;
+ return APInt::getSignedMaxValue(getBitWidth());
}
if (getLower().isNegative() == getUpper().isNegative())
- return SignedMax;
+ return APInt::getSignedMaxValue(getBitWidth());
return getUpper() - 1;
}
-/// getSignedMin - Return the smallest signed value contained in the
-/// ConstantRange.
-///
APInt ConstantRange::getSignedMin() const {
- APInt SignedMin(APInt::getSignedMinValue(getBitWidth()));
if (!isWrappedSet()) {
if (getLower().sle(getUpper() - 1))
return getLower();
- return SignedMin;
+ return APInt::getSignedMinValue(getBitWidth());
}
if ((getUpper() - 1).slt(getLower())) {
- if (getUpper() != SignedMin)
- return SignedMin;
+ if (!getUpper().isMinSignedValue())
+ return APInt::getSignedMinValue(getBitWidth());
}
return getLower();
}
-/// contains - Return true if the specified value is in the set.
-///
bool ConstantRange::contains(const APInt &V) const {
if (Lower == Upper)
return isFullSet();
@@ -349,10 +313,6 @@ bool ConstantRange::contains(const APInt &V) const {
return Lower.ule(V) || V.ult(Upper);
}
-/// contains - Return true if the argument is a subset of this range.
-/// Two equal sets contain each other. The empty set contained by all other
-/// sets.
-///
bool ConstantRange::contains(const ConstantRange &Other) const {
if (isFullSet() || Other.isEmptySet()) return true;
if (isEmptySet() || Other.isFullSet()) return false;
@@ -371,8 +331,6 @@ bool ConstantRange::contains(const ConstantRange &Other) const {
return Other.getUpper().ule(Upper) && Lower.ule(Other.getLower());
}
-/// subtract - Subtract the specified constant from the endpoints of this
-/// constant range.
ConstantRange ConstantRange::subtract(const APInt &Val) const {
assert(Val.getBitWidth() == getBitWidth() && "Wrong bit width");
// If the set is empty or full, don't modify the endpoints.
@@ -381,17 +339,10 @@ ConstantRange ConstantRange::subtract(const APInt &Val) const {
return ConstantRange(Lower - Val, Upper - Val);
}
-/// \brief Subtract the specified range from this range (aka relative complement
-/// of the sets).
ConstantRange ConstantRange::difference(const ConstantRange &CR) const {
return intersectWith(CR.inverse());
}
-/// intersectWith - Return the range that results from the intersection of this
-/// range with another range. The resultant range is guaranteed to include all
-/// elements contained in both input ranges, and to have the smallest possible
-/// set size that does so. Because there may be two intersections with the
-/// same set size, A.intersectWith(B) might not be equal to B.intersectWith(A).
ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const {
assert(getBitWidth() == CR.getBitWidth() &&
"ConstantRange types don't agree!");
@@ -466,13 +417,6 @@ ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const {
return CR;
}
-
-/// unionWith - Return the range that results from the union of this range with
-/// another range. The resultant range is guaranteed to include the elements of
-/// both sets, but may contain more. For example, [3, 9) union [12,15) is
-/// [3, 15), which includes 9, 10, and 11, which were not included in either
-/// set before.
-///
ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const {
assert(getBitWidth() == CR.getBitWidth() &&
"ConstantRange types don't agree!");
@@ -593,10 +537,6 @@ ConstantRange ConstantRange::castOp(Instruction::CastOps CastOp,
};
}
-/// zeroExtend - Return a new range in the specified integer type, which must
-/// be strictly larger than the current type. The returned range will
-/// correspond to the possible range of values as if the source range had been
-/// zero extended.
ConstantRange ConstantRange::zeroExtend(uint32_t DstTySize) const {
if (isEmptySet()) return ConstantRange(DstTySize, /*isFullSet=*/false);
@@ -613,10 +553,6 @@ ConstantRange ConstantRange::zeroExtend(uint32_t DstTySize) const {
return ConstantRange(Lower.zext(DstTySize), Upper.zext(DstTySize));
}
-/// signExtend - Return a new range in the specified integer type, which must
-/// be strictly larger than the current type. The returned range will
-/// correspond to the possible range of values as if the source range had been
-/// sign extended.
ConstantRange ConstantRange::signExtend(uint32_t DstTySize) const {
if (isEmptySet()) return ConstantRange(DstTySize, /*isFullSet=*/false);
@@ -635,10 +571,6 @@ ConstantRange ConstantRange::signExtend(uint32_t DstTySize) const {
return ConstantRange(Lower.sext(DstTySize), Upper.sext(DstTySize));
}
-/// truncate - Return a new range in the specified integer type, which must be
-/// strictly smaller than the current type. The returned range will
-/// correspond to the possible range of values as if the source range had been
-/// truncated to the specified type.
ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
assert(getBitWidth() > DstTySize && "Not a value truncation");
if (isEmptySet())
@@ -690,8 +622,6 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
return ConstantRange(DstTySize, /*isFullSet=*/true);
}
-/// zextOrTrunc - make this range have the bit width given by \p DstTySize. The
-/// value is zero extended, truncated, or left alone to make it that width.
ConstantRange ConstantRange::zextOrTrunc(uint32_t DstTySize) const {
unsigned SrcTySize = getBitWidth();
if (SrcTySize > DstTySize)
@@ -701,8 +631,6 @@ ConstantRange ConstantRange::zextOrTrunc(uint32_t DstTySize) const {
return *this;
}
-/// sextOrTrunc - make this range have the bit width given by \p DstTySize. The
-/// value is sign extended, truncated, or left alone to make it that width.
ConstantRange ConstantRange::sextOrTrunc(uint32_t DstTySize) const {
unsigned SrcTySize = getBitWidth();
if (SrcTySize > DstTySize)
@@ -999,8 +927,6 @@ ConstantRange ConstantRange::inverse() const {
return ConstantRange(Upper, Lower);
}
-/// print - Print out the bounds to a stream...
-///
void ConstantRange::print(raw_ostream &OS) const {
if (isFullSet())
OS << "full-set";
@@ -1011,8 +937,6 @@ void ConstantRange::print(raw_ostream &OS) const {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-/// dump - Allow printing from a debugger easily...
-///
LLVM_DUMP_METHOD void ConstantRange::dump() const {
print(dbgs());
}
diff --git a/contrib/llvm/lib/IR/Constants.cpp b/contrib/llvm/lib/IR/Constants.cpp
index c5f93c9f4db0..ffc8f2e4303b 100644
--- a/contrib/llvm/lib/IR/Constants.cpp
+++ b/contrib/llvm/lib/IR/Constants.cpp
@@ -518,27 +518,19 @@ ConstantInt *ConstantInt::getFalse(LLVMContext &Context) {
}
Constant *ConstantInt::getTrue(Type *Ty) {
- VectorType *VTy = dyn_cast<VectorType>(Ty);
- if (!VTy) {
- assert(Ty->isIntegerTy(1) && "True must be i1 or vector of i1.");
- return ConstantInt::getTrue(Ty->getContext());
- }
- assert(VTy->getElementType()->isIntegerTy(1) &&
- "True must be vector of i1 or i1.");
- return ConstantVector::getSplat(VTy->getNumElements(),
- ConstantInt::getTrue(Ty->getContext()));
+ assert(Ty->getScalarType()->isIntegerTy(1) && "Type not i1 or vector of i1.");
+ ConstantInt *TrueC = ConstantInt::getTrue(Ty->getContext());
+ if (auto *VTy = dyn_cast<VectorType>(Ty))
+ return ConstantVector::getSplat(VTy->getNumElements(), TrueC);
+ return TrueC;
}
Constant *ConstantInt::getFalse(Type *Ty) {
- VectorType *VTy = dyn_cast<VectorType>(Ty);
- if (!VTy) {
- assert(Ty->isIntegerTy(1) && "False must be i1 or vector of i1.");
- return ConstantInt::getFalse(Ty->getContext());
- }
- assert(VTy->getElementType()->isIntegerTy(1) &&
- "False must be vector of i1 or i1.");
- return ConstantVector::getSplat(VTy->getNumElements(),
- ConstantInt::getFalse(Ty->getContext()));
+ assert(Ty->getScalarType()->isIntegerTy(1) && "Type not i1 or vector of i1.");
+ ConstantInt *FalseC = ConstantInt::getFalse(Ty->getContext());
+ if (auto *VTy = dyn_cast<VectorType>(Ty))
+ return ConstantVector::getSplat(VTy->getNumElements(), FalseC);
+ return FalseC;
}
// Get a ConstantInt from an APInt.
diff --git a/contrib/llvm/lib/IR/Core.cpp b/contrib/llvm/lib/IR/Core.cpp
index b5ed30b85c8a..50292b6e20bf 100644
--- a/contrib/llvm/lib/IR/Core.cpp
+++ b/contrib/llvm/lib/IR/Core.cpp
@@ -863,6 +863,19 @@ LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count) {
return LLVMMDNodeInContext(LLVMGetGlobalContext(), Vals, Count);
}
+LLVMValueRef LLVMMetadataAsValue(LLVMContextRef C, LLVMMetadataRef MD) {
+ return wrap(MetadataAsValue::get(*unwrap(C), unwrap(MD)));
+}
+
+LLVMMetadataRef LLVMValueAsMetadata(LLVMValueRef Val) {
+ auto *V = unwrap(Val);
+ if (auto *C = dyn_cast<Constant>(V))
+ return wrap(ConstantAsMetadata::get(C));
+ if (auto *MAV = dyn_cast<MetadataAsValue>(V))
+ return wrap(MAV->getMetadata());
+ return wrap(ValueAsMetadata::get(V));
+}
+
const char *LLVMGetMDString(LLVMValueRef V, unsigned *Length) {
if (const auto *MD = dyn_cast<MetadataAsValue>(unwrap(V)))
if (const MDString *S = dyn_cast<MDString>(MD->getMetadata())) {
@@ -1883,13 +1896,8 @@ void LLVMRemoveStringAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx,
void LLVMAddTargetDependentFunctionAttr(LLVMValueRef Fn, const char *A,
const char *V) {
Function *Func = unwrap<Function>(Fn);
- AttributeList::AttrIndex Idx =
- AttributeList::AttrIndex(AttributeList::FunctionIndex);
- AttrBuilder B;
-
- B.addAttribute(A, V);
- AttributeList Set = AttributeList::get(Func->getContext(), Idx, B);
- Func->addAttributes(Idx, Set);
+ Attribute Attr = Attribute::get(Func->getContext(), A, V);
+ Func->addAttribute(AttributeList::FunctionIndex, Attr);
}
/*--.. Operations on parameters ............................................--*/
@@ -1949,9 +1957,7 @@ LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg) {
void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align) {
Argument *A = unwrap<Argument>(Arg);
- AttrBuilder B;
- B.addAlignmentAttr(align);
- A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(Attribute::getWithAlignment(A->getContext(), align));
}
/*--.. Operations on basic blocks ..........................................--*/
@@ -2158,11 +2164,8 @@ void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC) {
void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index,
unsigned align) {
CallSite Call = CallSite(unwrap<Instruction>(Instr));
- AttrBuilder B;
- B.addAlignmentAttr(align);
- Call.setAttributes(Call.getAttributes().addAttributes(
- Call->getContext(), index,
- AttributeList::get(Call->getContext(), index, B)));
+ Attribute AlignAttr = Attribute::getWithAlignment(Call->getContext(), align);
+ Call.addAttribute(index, AlignAttr);
}
void LLVMAddCallSiteAttribute(LLVMValueRef C, LLVMAttributeIndex Idx,
diff --git a/contrib/llvm/lib/IR/DataLayout.cpp b/contrib/llvm/lib/IR/DataLayout.cpp
index 6f90ce598568..93bacdd2e80f 100644
--- a/contrib/llvm/lib/IR/DataLayout.cpp
+++ b/contrib/llvm/lib/IR/DataLayout.cpp
@@ -608,11 +608,8 @@ unsigned DataLayout::getPointerSize(unsigned AS) const {
unsigned DataLayout::getPointerTypeSizeInBits(Type *Ty) const {
assert(Ty->isPtrOrPtrVectorTy() &&
"This should only be called with a pointer or pointer vector type");
-
- if (Ty->isPointerTy())
- return getTypeSizeInBits(Ty);
-
- return getTypeSizeInBits(Ty->getScalarType());
+ Ty = Ty->getScalarType();
+ return getPointerSizeInBits(cast<PointerType>(Ty)->getAddressSpace());
}
/*!
@@ -624,7 +621,7 @@ unsigned DataLayout::getPointerTypeSizeInBits(Type *Ty) const {
== false) for the requested type \a Ty.
*/
unsigned DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const {
- int AlignType = -1;
+ AlignTypeEnum AlignType;
assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
switch (Ty->getTypeID()) {
@@ -673,8 +670,7 @@ unsigned DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const {
llvm_unreachable("Bad type for getAlignment!!!");
}
- return getAlignmentInfo((AlignTypeEnum)AlignType, getTypeSizeInBits(Ty),
- abi_or_pref, Ty);
+ return getAlignmentInfo(AlignType, getTypeSizeInBits(Ty), abi_or_pref, Ty);
}
unsigned DataLayout::getABITypeAlignment(Type *Ty) const {
diff --git a/contrib/llvm/lib/IR/Function.cpp b/contrib/llvm/lib/IR/Function.cpp
index c4bb9e83acd7..e1f5fdea44e4 100644
--- a/contrib/llvm/lib/IR/Function.cpp
+++ b/contrib/llvm/lib/IR/Function.cpp
@@ -138,13 +138,18 @@ bool Argument::onlyReadsMemory() const {
Attrs.hasParamAttribute(getArgNo(), Attribute::ReadNone);
}
-void Argument::addAttr(AttributeList AS) {
- assert(AS.getNumSlots() <= 1 &&
- "Trying to add more than one attribute set to an argument!");
- AttrBuilder B(AS, AS.getSlotIndex(0));
- getParent()->addAttributes(
- getArgNo() + 1,
- AttributeList::get(Parent->getContext(), getArgNo() + 1, B));
+void Argument::addAttrs(AttrBuilder &B) {
+ AttributeList AL = getParent()->getAttributes();
+ AL = AL.addAttributes(Parent->getContext(), getArgNo() + 1, B);
+ getParent()->setAttributes(AL);
+}
+
+void Argument::addAttr(Attribute::AttrKind Kind) {
+ getParent()->addAttribute(getArgNo() + 1, Kind);
+}
+
+void Argument::addAttr(Attribute Attr) {
+ getParent()->addAttribute(getArgNo() + 1, Attr);
}
void Argument::removeAttr(AttributeList AS) {
@@ -156,6 +161,10 @@ void Argument::removeAttr(AttributeList AS) {
AttributeList::get(Parent->getContext(), getArgNo() + 1, B));
}
+void Argument::removeAttr(Attribute::AttrKind Kind) {
+ getParent()->removeAttribute(getArgNo() + 1, Kind);
+}
+
bool Argument::hasAttribute(Attribute::AttrKind Kind) const {
return getParent()->hasParamAttribute(getArgNo(), Kind);
}
diff --git a/contrib/llvm/lib/IR/Instructions.cpp b/contrib/llvm/lib/IR/Instructions.cpp
index c10c144122e2..76582e334d1f 100644
--- a/contrib/llvm/lib/IR/Instructions.cpp
+++ b/contrib/llvm/lib/IR/Instructions.cpp
@@ -1855,7 +1855,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
return false;
// Mask must be vector of i32.
- VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType());
+ auto *MaskTy = dyn_cast<VectorType>(Mask->getType());
if (!MaskTy || !MaskTy->getElementType()->isIntegerTy(32))
return false;
@@ -1863,10 +1863,10 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
if (isa<UndefValue>(Mask) || isa<ConstantAggregateZero>(Mask))
return true;
- if (const ConstantVector *MV = dyn_cast<ConstantVector>(Mask)) {
+ if (const auto *MV = dyn_cast<ConstantVector>(Mask)) {
unsigned V1Size = cast<VectorType>(V1->getType())->getNumElements();
for (Value *Op : MV->operands()) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ if (auto *CI = dyn_cast<ConstantInt>(Op)) {
if (CI->uge(V1Size*2))
return false;
} else if (!isa<UndefValue>(Op)) {
@@ -1876,8 +1876,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
return true;
}
- if (const ConstantDataSequential *CDS =
- dyn_cast<ConstantDataSequential>(Mask)) {
+ if (const auto *CDS = dyn_cast<ConstantDataSequential>(Mask)) {
unsigned V1Size = cast<VectorType>(V1->getType())->getNumElements();
for (unsigned i = 0, e = MaskTy->getNumElements(); i != e; ++i)
if (CDS->getElementAsInteger(i) >= V1Size*2)
@@ -1889,7 +1888,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
// used as the shuffle mask. When this occurs, the shuffle mask will
// fall into this case and fail. To avoid this error, do this bit of
// ugliness to allow such a mask pass.
- if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(Mask))
+ if (const auto *CE = dyn_cast<ConstantExpr>(Mask))
if (CE->getOpcode() == Instruction::UserOp1)
return true;
@@ -1898,7 +1897,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
int ShuffleVectorInst::getMaskValue(Constant *Mask, unsigned i) {
assert(i < Mask->getType()->getVectorNumElements() && "Index out of range");
- if (ConstantDataSequential *CDS =dyn_cast<ConstantDataSequential>(Mask))
+ if (auto *CDS = dyn_cast<ConstantDataSequential>(Mask))
return CDS->getElementAsInteger(i);
Constant *C = Mask->getAggregateElement(i);
if (isa<UndefValue>(C))
@@ -1910,7 +1909,7 @@ void ShuffleVectorInst::getShuffleMask(Constant *Mask,
SmallVectorImpl<int> &Result) {
unsigned NumElts = Mask->getType()->getVectorNumElements();
- if (ConstantDataSequential *CDS=dyn_cast<ConstantDataSequential>(Mask)) {
+ if (auto *CDS = dyn_cast<ConstantDataSequential>(Mask)) {
for (unsigned i = 0; i != NumElts; ++i)
Result.push_back(CDS->getElementAsInteger(i));
return;
diff --git a/contrib/llvm/lib/MC/MCDwarf.cpp b/contrib/llvm/lib/MC/MCDwarf.cpp
index cc32e90ad36e..1a320b0165fa 100644
--- a/contrib/llvm/lib/MC/MCDwarf.cpp
+++ b/contrib/llvm/lib/MC/MCDwarf.cpp
@@ -168,7 +168,7 @@ EmitDwarfLineTable(MCObjectStreamer *MCOS, MCSection *Section,
// and the current Label.
const MCAsmInfo *asmInfo = MCOS->getContext().getAsmInfo();
MCOS->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label,
- asmInfo->getPointerSize());
+ asmInfo->getCodePointerSize());
Discriminator = 0;
LastLine = LineEntry.getLine();
@@ -188,7 +188,7 @@ EmitDwarfLineTable(MCObjectStreamer *MCOS, MCSection *Section,
const MCAsmInfo *AsmInfo = Ctx.getAsmInfo();
MCOS->EmitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, SectionEnd,
- AsmInfo->getPointerSize());
+ AsmInfo->getCodePointerSize());
}
//
@@ -594,7 +594,7 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS,
// Figure the padding after the header before the table of address and size
// pairs who's values are PointerSize'ed.
const MCAsmInfo *asmInfo = context.getAsmInfo();
- int AddrSize = asmInfo->getPointerSize();
+ int AddrSize = asmInfo->getCodePointerSize();
int Pad = 2 * AddrSize - (Length & (2 * AddrSize - 1));
if (Pad == 2 * AddrSize)
Pad = 0;
@@ -677,7 +677,7 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
// The DWARF v5 header has unit type, address size, abbrev offset.
// Earlier versions have abbrev offset, address size.
const MCAsmInfo &AsmInfo = *context.getAsmInfo();
- int AddrSize = AsmInfo.getPointerSize();
+ int AddrSize = AsmInfo.getCodePointerSize();
if (context.getDwarfVersion() >= 5) {
MCOS->EmitIntValue(dwarf::DW_UT_compile, 1);
MCOS->EmitIntValue(AddrSize, 1);
@@ -823,7 +823,7 @@ static void EmitGenDwarfRanges(MCStreamer *MCOS) {
auto &Sections = context.getGenDwarfSectionSyms();
const MCAsmInfo *AsmInfo = context.getAsmInfo();
- int AddrSize = AsmInfo->getPointerSize();
+ int AddrSize = AsmInfo->getCodePointerSize();
MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfRangesSection());
@@ -981,7 +981,7 @@ static unsigned getSizeForEncoding(MCStreamer &streamer,
default: llvm_unreachable("Unknown Encoding");
case dwarf::DW_EH_PE_absptr:
case dwarf::DW_EH_PE_signed:
- return context.getAsmInfo()->getPointerSize();
+ return context.getAsmInfo()->getCodePointerSize();
case dwarf::DW_EH_PE_udata2:
case dwarf::DW_EH_PE_sdata2:
return 2;
@@ -1318,7 +1318,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(const MCSymbol *personality,
if (CIEVersion >= 4) {
// Address Size
- Streamer.EmitIntValue(context.getAsmInfo()->getPointerSize(), 1);
+ Streamer.EmitIntValue(context.getAsmInfo()->getCodePointerSize(), 1);
// Segment Descriptor Size
Streamer.EmitIntValue(0, 1);
@@ -1384,7 +1384,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(const MCSymbol *personality,
InitialCFAOffset = CFAOffset;
// Padding
- Streamer.EmitValueToAlignment(IsEH ? 4 : MAI->getPointerSize());
+ Streamer.EmitValueToAlignment(IsEH ? 4 : MAI->getCodePointerSize());
Streamer.EmitLabel(sectionEnd);
return *sectionStart;
@@ -1453,7 +1453,7 @@ void FrameEmitterImpl::EmitFDE(const MCSymbol &cieStart,
// The size of a .eh_frame section has to be a multiple of the alignment
// since a null CIE is interpreted as the end. Old systems overaligned
// .eh_frame, so we do too and account for it in the last FDE.
- unsigned Align = LastInSection ? asmInfo->getPointerSize() : PCSize;
+ unsigned Align = LastInSection ? asmInfo->getCodePointerSize() : PCSize;
Streamer.EmitValueToAlignment(Align);
Streamer.EmitLabel(fdeEnd);
@@ -1514,6 +1514,7 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
MCContext &Context = Streamer.getContext();
const MCObjectFileInfo *MOFI = Context.getObjectFileInfo();
+ const MCAsmInfo *AsmInfo = Context.getAsmInfo();
FrameEmitterImpl Emitter(IsEH, Streamer);
ArrayRef<MCDwarfFrameInfo> FrameArray = Streamer.getDwarfFrameInfos();
@@ -1525,7 +1526,7 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
if (Frame.CompactUnwindEncoding == 0) continue;
if (!SectionEmitted) {
Streamer.SwitchSection(MOFI->getCompactUnwindSection());
- Streamer.EmitValueToAlignment(Context.getAsmInfo()->getPointerSize());
+ Streamer.EmitValueToAlignment(AsmInfo->getCodePointerSize());
SectionEmitted = true;
}
NeedsEHFrameSection |=
diff --git a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
index e65ce9f0b936..42e8ad340281 100644
--- a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -1755,8 +1755,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
case DK_8BYTE:
return parseDirectiveValue(IDVal, 8);
case DK_DC_A:
- return parseDirectiveValue(IDVal,
- getContext().getAsmInfo()->getPointerSize());
+ return parseDirectiveValue(
+ IDVal, getContext().getAsmInfo()->getCodePointerSize());
case DK_OCTA:
return parseDirectiveOctaValue(IDVal);
case DK_SINGLE:
diff --git a/contrib/llvm/lib/Object/Archive.cpp b/contrib/llvm/lib/Object/Archive.cpp
index f2021f796d12..c4924f85a907 100644
--- a/contrib/llvm/lib/Object/Archive.cpp
+++ b/contrib/llvm/lib/Object/Archive.cpp
@@ -1,4 +1,4 @@
-//===- Archive.cpp - ar File Format implementation --------------*- C++ -*-===//
+//===- Archive.cpp - ar File Format implementation ------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,12 +11,29 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Object/Archive.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Object/Archive.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/Error.h"
+#include "llvm/Support/Chrono.h"
#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <system_error>
using namespace llvm;
using namespace object;
@@ -25,7 +42,7 @@ using namespace llvm::support::endian;
static const char *const Magic = "!<arch>\n";
static const char *const ThinMagic = "!<thin>\n";
-void Archive::anchor() { }
+void Archive::anchor() {}
static Error
malformedError(Twine Msg) {
@@ -61,8 +78,8 @@ ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent,
if (Err) {
std::string Buf;
raw_string_ostream OS(Buf);
- OS.write_escaped(llvm::StringRef(ArMemHdr->Terminator,
- sizeof(ArMemHdr->Terminator)));
+ OS.write_escaped(StringRef(ArMemHdr->Terminator,
+ sizeof(ArMemHdr->Terminator)));
OS.flush();
std::string Msg("terminator characters in archive member \"" + Buf +
"\" not the correct \"`\\n\" values for the archive "
@@ -97,13 +114,13 @@ Expected<StringRef> ArchiveMemberHeader::getRawName() const {
EndCond = ' ';
else
EndCond = '/';
- llvm::StringRef::size_type end =
- llvm::StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond);
- if (end == llvm::StringRef::npos)
+ StringRef::size_type end =
+ StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond);
+ if (end == StringRef::npos)
end = sizeof(ArMemHdr->Name);
assert(end <= sizeof(ArMemHdr->Name) && end > 0);
// Don't include the EndCond if there is one.
- return llvm::StringRef(ArMemHdr->Name, end);
+ return StringRef(ArMemHdr->Name, end);
}
// This gets the name looking up long names. Size is the size of the archive
@@ -205,12 +222,12 @@ Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const {
Expected<uint32_t> ArchiveMemberHeader::getSize() const {
uint32_t Ret;
- if (llvm::StringRef(ArMemHdr->Size,
- sizeof(ArMemHdr->Size)).rtrim(" ").getAsInteger(10, Ret)) {
+ if (StringRef(ArMemHdr->Size,
+ sizeof(ArMemHdr->Size)).rtrim(" ").getAsInteger(10, Ret)) {
std::string Buf;
raw_string_ostream OS(Buf);
- OS.write_escaped(llvm::StringRef(ArMemHdr->Size,
- sizeof(ArMemHdr->Size)).rtrim(" "));
+ OS.write_escaped(StringRef(ArMemHdr->Size,
+ sizeof(ArMemHdr->Size)).rtrim(" "));
OS.flush();
uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
Parent->getData().data();
@@ -227,8 +244,8 @@ Expected<sys::fs::perms> ArchiveMemberHeader::getAccessMode() const {
sizeof(ArMemHdr->AccessMode)).rtrim(' ').getAsInteger(8, Ret)) {
std::string Buf;
raw_string_ostream OS(Buf);
- OS.write_escaped(llvm::StringRef(ArMemHdr->AccessMode,
- sizeof(ArMemHdr->AccessMode)).rtrim(" "));
+ OS.write_escaped(StringRef(ArMemHdr->AccessMode,
+ sizeof(ArMemHdr->AccessMode)).rtrim(" "));
OS.flush();
uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
Parent->getData().data();
@@ -247,8 +264,8 @@ ArchiveMemberHeader::getLastModified() const {
.getAsInteger(10, Seconds)) {
std::string Buf;
raw_string_ostream OS(Buf);
- OS.write_escaped(llvm::StringRef(ArMemHdr->LastModified,
- sizeof(ArMemHdr->LastModified)).rtrim(" "));
+ OS.write_escaped(StringRef(ArMemHdr->LastModified,
+ sizeof(ArMemHdr->LastModified)).rtrim(" "));
OS.flush();
uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
Parent->getData().data();
diff --git a/contrib/llvm/lib/Object/Binary.cpp b/contrib/llvm/lib/Object/Binary.cpp
index 8467d349cd95..2b44c4a82d2c 100644
--- a/contrib/llvm/lib/Object/Binary.cpp
+++ b/contrib/llvm/lib/Object/Binary.cpp
@@ -1,4 +1,4 @@
-//===- Binary.cpp - A generic binary file -----------------------*- C++ -*-===//
+//===- Binary.cpp - A generic binary file ---------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,21 +11,25 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Object/Binary.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Path.h"
-
-// Include headers for createBinary.
#include "llvm/Object/Archive.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/Error.h"
#include "llvm/Object/MachOUniversal.h"
#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <algorithm>
+#include <memory>
+#include <system_error>
using namespace llvm;
using namespace object;
-Binary::~Binary() {}
+Binary::~Binary() = default;
Binary::Binary(unsigned int Type, MemoryBufferRef Source)
: TypeID(Type), Data(Source) {}
diff --git a/contrib/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm/lib/Object/COFFObjectFile.cpp
index a2d8f12449e6..1866aba9b21a 100644
--- a/contrib/llvm/lib/Object/COFFObjectFile.cpp
+++ b/contrib/llvm/lib/Object/COFFObjectFile.cpp
@@ -1,4 +1,4 @@
-//===- COFFObjectFile.cpp - COFF object file implementation -----*- C++ -*-===//
+//===- COFFObjectFile.cpp - COFF object file implementation ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,16 +11,28 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Object/COFF.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator_range.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/Error.h"
+#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/COFF.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cctype>
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
#include <limits>
+#include <memory>
+#include <system_error>
using namespace llvm;
using namespace object;
@@ -116,7 +128,7 @@ const coff_symbol_type *COFFObjectFile::toSymb(DataRefImpl Ref) const {
const coff_section *COFFObjectFile::toSec(DataRefImpl Ref) const {
const coff_section *Addr = reinterpret_cast<const coff_section*>(Ref.p);
-# ifndef NDEBUG
+#ifndef NDEBUG
// Verify that the section points to a valid entry in the section table.
if (Addr < SectionTable || Addr >= (SectionTable + getNumberOfSections()))
report_fatal_error("Section was outside of section table.");
@@ -124,7 +136,7 @@ const coff_section *COFFObjectFile::toSec(DataRefImpl Ref) const {
uintptr_t Offset = uintptr_t(Addr) - uintptr_t(SectionTable);
assert(Offset % sizeof(coff_section) == 0 &&
"Section did not point to the beginning of a section");
-# endif
+#endif
return Addr;
}
@@ -985,7 +997,7 @@ COFFObjectFile::getSymbolAuxData(COFFSymbolRef Symbol) const {
if (Symbol.getNumberOfAuxSymbols() > 0) {
// AUX data comes immediately after the symbol in COFF
Aux = reinterpret_cast<const uint8_t *>(Symbol.getRawPtr()) + SymbolSize;
-# ifndef NDEBUG
+#ifndef NDEBUG
// Verify that the Aux symbol points to a valid entry in the symbol table.
uintptr_t Offset = uintptr_t(Aux) - uintptr_t(base());
if (Offset < getPointerToSymbolTable() ||
@@ -995,7 +1007,7 @@ COFFObjectFile::getSymbolAuxData(COFFSymbolRef Symbol) const {
assert((Offset - getPointerToSymbolTable()) % SymbolSize == 0 &&
"Aux Symbol data did not point to the beginning of a symbol");
-# endif
+#endif
}
return makeArrayRef(Aux, Symbol.getNumberOfAuxSymbols() * SymbolSize);
}
diff --git a/contrib/llvm/lib/Object/IRSymtab.cpp b/contrib/llvm/lib/Object/IRSymtab.cpp
index da1ef9946b50..bb3d1b2cf695 100644
--- a/contrib/llvm/lib/Object/IRSymtab.cpp
+++ b/contrib/llvm/lib/Object/IRSymtab.cpp
@@ -28,14 +28,12 @@ struct Builder {
Builder(SmallVector<char, 0> &Symtab, SmallVector<char, 0> &Strtab)
: Symtab(Symtab), Strtab(Strtab) {}
- StringTableBuilder StrtabBuilder{StringTableBuilder::ELF};
+ StringTableBuilder StrtabBuilder{StringTableBuilder::RAW};
BumpPtrAllocator Alloc;
StringSaver Saver{Alloc};
DenseMap<const Comdat *, unsigned> ComdatMap;
- ModuleSymbolTable Msymtab;
- SmallPtrSet<GlobalValue *, 8> Used;
Mangler Mang;
Triple TT;
@@ -49,6 +47,7 @@ struct Builder {
void setStr(storage::Str &S, StringRef Value) {
S.Offset = StrtabBuilder.add(Value);
+ S.Size = Value.size();
}
template <typename T>
void writeRange(storage::Range<T> &R, const std::vector<T> &Objs) {
@@ -59,18 +58,24 @@ struct Builder {
}
Error addModule(Module *M);
- Error addSymbol(ModuleSymbolTable::Symbol Sym);
+ Error addSymbol(const ModuleSymbolTable &Msymtab,
+ const SmallPtrSet<GlobalValue *, 8> &Used,
+ ModuleSymbolTable::Symbol Sym);
Error build(ArrayRef<Module *> Mods);
};
Error Builder::addModule(Module *M) {
+ SmallPtrSet<GlobalValue *, 8> Used;
collectUsedGlobalVariables(*M, Used, /*CompilerUsed*/ false);
- storage::Module Mod;
- Mod.Begin = Msymtab.symbols().size();
+ ModuleSymbolTable Msymtab;
Msymtab.addModule(M);
- Mod.End = Msymtab.symbols().size();
+
+ storage::Module Mod;
+ Mod.Begin = Syms.size();
+ Mod.End = Syms.size() + Msymtab.symbols().size();
+ Mod.UncBegin = Uncommons.size();
Mods.push_back(Mod);
if (TT.isOSBinFormatCOFF()) {
@@ -84,20 +89,25 @@ Error Builder::addModule(Module *M) {
}
}
+ for (ModuleSymbolTable::Symbol Msym : Msymtab.symbols())
+ if (Error Err = addSymbol(Msymtab, Used, Msym))
+ return Err;
+
return Error::success();
}
-Error Builder::addSymbol(ModuleSymbolTable::Symbol Msym) {
+Error Builder::addSymbol(const ModuleSymbolTable &Msymtab,
+ const SmallPtrSet<GlobalValue *, 8> &Used,
+ ModuleSymbolTable::Symbol Msym) {
Syms.emplace_back();
storage::Symbol &Sym = Syms.back();
Sym = {};
- Sym.UncommonIndex = -1;
storage::Uncommon *Unc = nullptr;
auto Uncommon = [&]() -> storage::Uncommon & {
if (Unc)
return *Unc;
- Sym.UncommonIndex = Uncommons.size();
+ Sym.Flags |= 1 << storage::Symbol::FB_has_uncommon;
Uncommons.emplace_back();
Unc = &Uncommons.back();
*Unc = {};
@@ -194,15 +204,10 @@ Error Builder::build(ArrayRef<Module *> IRMods) {
setStr(Hdr.SourceFileName, IRMods[0]->getSourceFileName());
TT = Triple(IRMods[0]->getTargetTriple());
- // This adds the symbols for each module to Msymtab.
for (auto *M : IRMods)
if (Error Err = addModule(M))
return Err;
- for (ModuleSymbolTable::Symbol Msym : Msymtab.symbols())
- if (Error Err = addSymbol(Msym))
- return Err;
-
COFFLinkerOptsOS.flush();
setStr(Hdr.COFFLinkerOpts, COFFLinkerOpts);
diff --git a/contrib/llvm/lib/Object/ObjectFile.cpp b/contrib/llvm/lib/Object/ObjectFile.cpp
index f36388b677f3..1f60e7157bd9 100644
--- a/contrib/llvm/lib/Object/ObjectFile.cpp
+++ b/contrib/llvm/lib/Object/ObjectFile.cpp
@@ -1,4 +1,4 @@
-//===- ObjectFile.cpp - File format independent object file -----*- C++ -*-===//
+//===- ObjectFile.cpp - File format independent object file ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,20 +11,28 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Object/ObjectFile.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Object/Binary.h"
#include "llvm/Object/COFF.h"
+#include "llvm/Object/Error.h"
#include "llvm/Object/MachO.h"
+#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/Wasm.h"
+#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cstdint>
+#include <memory>
#include <system_error>
using namespace llvm;
using namespace object;
-void ObjectFile::anchor() { }
+void ObjectFile::anchor() {}
ObjectFile::ObjectFile(unsigned int Type, MemoryBufferRef Source)
: SymbolicFile(Type, Source) {}
diff --git a/contrib/llvm/lib/Object/SymbolicFile.cpp b/contrib/llvm/lib/Object/SymbolicFile.cpp
index 4b51a49cf342..16cff5c228bd 100644
--- a/contrib/llvm/lib/Object/SymbolicFile.cpp
+++ b/contrib/llvm/lib/Object/SymbolicFile.cpp
@@ -1,4 +1,4 @@
-//===- SymbolicFile.cpp - Interface that only provides symbols --*- C++ -*-===//
+//===- SymbolicFile.cpp - Interface that only provides symbols ------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,12 +11,20 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Object/COFF.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Object/COFFImportFile.h"
+#include "llvm/Object/Error.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/SymbolicFile.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
+#include <algorithm>
+#include <memory>
using namespace llvm;
using namespace object;
@@ -24,7 +32,7 @@ using namespace object;
SymbolicFile::SymbolicFile(unsigned int Type, MemoryBufferRef Source)
: Binary(Type, Source) {}
-SymbolicFile::~SymbolicFile() {}
+SymbolicFile::~SymbolicFile() = default;
Expected<std::unique_ptr<SymbolicFile>> SymbolicFile::createSymbolicFile(
MemoryBufferRef Object, sys::fs::file_magic Type, LLVMContext *Context) {
diff --git a/contrib/llvm/lib/Support/APFloat.cpp b/contrib/llvm/lib/Support/APFloat.cpp
index 9778628911cd..c4c892f0352a 100644
--- a/contrib/llvm/lib/Support/APFloat.cpp
+++ b/contrib/llvm/lib/Support/APFloat.cpp
@@ -3442,7 +3442,7 @@ void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
// Ignore trailing binary zeros.
int trailingZeros = significand.countTrailingZeros();
exp += trailingZeros;
- significand = significand.lshr(trailingZeros);
+ significand.lshrInPlace(trailingZeros);
// Change the exponent from 2^e to 10^e.
if (exp == 0) {
diff --git a/contrib/llvm/lib/Support/APInt.cpp b/contrib/llvm/lib/Support/APInt.cpp
index 0c7da1dad0d2..2d049a1cff85 100644
--- a/contrib/llvm/lib/Support/APInt.cpp
+++ b/contrib/llvm/lib/Support/APInt.cpp
@@ -125,16 +125,16 @@ APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
fromString(numbits, Str, radix);
}
-APInt& APInt::AssignSlowCase(const APInt& RHS) {
+void APInt::AssignSlowCase(const APInt& RHS) {
// Don't do anything for X = X
if (this == &RHS)
- return *this;
+ return;
if (BitWidth == RHS.getBitWidth()) {
// assume same bit-width single-word case is already handled
assert(!isSingleWord());
memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE);
- return *this;
+ return;
}
if (isSingleWord()) {
@@ -154,7 +154,7 @@ APInt& APInt::AssignSlowCase(const APInt& RHS) {
memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
}
BitWidth = RHS.BitWidth;
- return clearUnusedBits();
+ clearUnusedBits();
}
/// This method 'profiles' an APInt for use with FoldingSet.
@@ -339,19 +339,16 @@ APInt& APInt::operator*=(const APInt& RHS) {
return *this;
}
-APInt& APInt::AndAssignSlowCase(const APInt& RHS) {
+void APInt::AndAssignSlowCase(const APInt& RHS) {
tcAnd(pVal, RHS.pVal, getNumWords());
- return *this;
}
-APInt& APInt::OrAssignSlowCase(const APInt& RHS) {
+void APInt::OrAssignSlowCase(const APInt& RHS) {
tcOr(pVal, RHS.pVal, getNumWords());
- return *this;
}
-APInt& APInt::XorAssignSlowCase(const APInt& RHS) {
+void APInt::XorAssignSlowCase(const APInt& RHS) {
tcXor(pVal, RHS.pVal, getNumWords());
- return *this;
}
APInt APInt::operator*(const APInt& RHS) const {
@@ -367,14 +364,6 @@ bool APInt::EqualSlowCase(const APInt& RHS) const {
return std::equal(pVal, pVal + getNumWords(), RHS.pVal);
}
-bool APInt::EqualSlowCase(uint64_t Val) const {
- unsigned n = getActiveBits();
- if (n <= APINT_BITS_PER_WORD)
- return pVal[0] == Val;
- else
- return false;
-}
-
bool APInt::ult(const APInt& RHS) const {
assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
if (isSingleWord())
@@ -733,6 +722,22 @@ unsigned APInt::countPopulationSlowCase() const {
return Count;
}
+bool APInt::intersectsSlowCase(const APInt &RHS) const {
+ for (unsigned i = 0, e = getNumWords(); i != e; ++i)
+ if ((pVal[i] & RHS.pVal[i]) != 0)
+ return true;
+
+ return false;
+}
+
+bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
+ for (unsigned i = 0, e = getNumWords(); i != e; ++i)
+ if ((pVal[i] & ~RHS.pVal[i]) != 0)
+ return false;
+
+ return true;
+}
+
APInt APInt::byteSwap() const {
assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
if (BitWidth == 16)
@@ -774,14 +779,12 @@ APInt APInt::reverseBits() const {
}
APInt Val(*this);
- APInt Reversed(*this);
- int S = BitWidth - 1;
-
- const APInt One(BitWidth, 1);
+ APInt Reversed(BitWidth, 0);
+ unsigned S = BitWidth;
- for ((Val = Val.lshr(1)); Val != 0; (Val = Val.lshr(1))) {
+ for (; Val != 0; Val.lshrInPlace(1)) {
Reversed <<= 1;
- Reversed |= (Val & One);
+ Reversed |= Val[0];
--S;
}
@@ -1136,63 +1139,14 @@ APInt APInt::ashr(unsigned shiftAmt) const {
/// Logical right-shift this APInt by shiftAmt.
/// @brief Logical right-shift function.
-APInt APInt::lshr(const APInt &shiftAmt) const {
- return lshr((unsigned)shiftAmt.getLimitedValue(BitWidth));
-}
-
-/// Perform a logical right-shift from Src to Dst of Words words, by Shift,
-/// which must be less than 64. If the source and destination ranges overlap,
-/// we require that Src >= Dst (put another way, we require that the overall
-/// operation is a right shift on the combined range).
-static void lshrWords(APInt::WordType *Dst, APInt::WordType *Src,
- unsigned Words, unsigned Shift) {
- assert(Shift < APInt::APINT_BITS_PER_WORD);
-
- if (!Words)
- return;
-
- if (Shift == 0) {
- std::memmove(Dst, Src, Words * APInt::APINT_WORD_SIZE);
- return;
- }
-
- uint64_t Low = Src[0];
- for (unsigned I = 1; I != Words; ++I) {
- uint64_t High = Src[I];
- Dst[I - 1] =
- (Low >> Shift) | (High << (APInt::APINT_BITS_PER_WORD - Shift));
- Low = High;
- }
- Dst[Words - 1] = Low >> Shift;
+void APInt::lshrInPlace(const APInt &shiftAmt) {
+ lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
}
/// Logical right-shift this APInt by shiftAmt.
/// @brief Logical right-shift function.
-void APInt::lshrInPlace(unsigned shiftAmt) {
- if (isSingleWord()) {
- if (shiftAmt >= BitWidth)
- VAL = 0;
- else
- VAL >>= shiftAmt;
- return;
- }
-
- // Don't bother performing a no-op shift.
- if (!shiftAmt)
- return;
-
- // Find number of complete words being shifted out and zeroed.
- const unsigned Words = getNumWords();
- const unsigned ShiftFullWords =
- std::min(shiftAmt / APINT_BITS_PER_WORD, Words);
-
- // Fill in first Words - ShiftFullWords by shifting.
- lshrWords(pVal, pVal + ShiftFullWords, Words - ShiftFullWords,
- shiftAmt % APINT_BITS_PER_WORD);
-
- // The remaining high words are all zero.
- for (unsigned I = Words - ShiftFullWords; I != Words; ++I)
- pVal[I] = 0;
+void APInt::lshrSlowCase(unsigned ShiftAmt) {
+ tcShiftRight(pVal, getNumWords(), ShiftAmt);
}
/// Left-shift this APInt by shiftAmt.
@@ -1202,60 +1156,9 @@ APInt APInt::shl(const APInt &shiftAmt) const {
return shl((unsigned)shiftAmt.getLimitedValue(BitWidth));
}
-APInt APInt::shlSlowCase(unsigned shiftAmt) const {
- // If all the bits were shifted out, the result is 0. This avoids issues
- // with shifting by the size of the integer type, which produces undefined
- // results. We define these "undefined results" to always be 0.
- if (shiftAmt == BitWidth)
- return APInt(BitWidth, 0);
-
- // If none of the bits are shifted out, the result is *this. This avoids a
- // lshr by the words size in the loop below which can produce incorrect
- // results. It also avoids the expensive computation below for a common case.
- if (shiftAmt == 0)
- return *this;
-
- // Create some space for the result.
- uint64_t * val = new uint64_t[getNumWords()];
-
- // If we are shifting less than a word, do it the easy way
- if (shiftAmt < APINT_BITS_PER_WORD) {
- uint64_t carry = 0;
- for (unsigned i = 0; i < getNumWords(); i++) {
- val[i] = pVal[i] << shiftAmt | carry;
- carry = pVal[i] >> (APINT_BITS_PER_WORD - shiftAmt);
- }
- APInt Result(val, BitWidth);
- Result.clearUnusedBits();
- return Result;
- }
-
- // Compute some values needed by the remaining shift algorithms
- unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
- unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
-
- // If we are shifting whole words, just move whole words
- if (wordShift == 0) {
- for (unsigned i = 0; i < offset; i++)
- val[i] = 0;
- for (unsigned i = offset; i < getNumWords(); i++)
- val[i] = pVal[i-offset];
- APInt Result(val, BitWidth);
- Result.clearUnusedBits();
- return Result;
- }
-
- // Copy whole words from this to Result.
- unsigned i = getNumWords() - 1;
- for (; i > offset; --i)
- val[i] = pVal[i-offset] << wordShift |
- pVal[i-offset-1] >> (APINT_BITS_PER_WORD - wordShift);
- val[offset] = pVal[0] << wordShift;
- for (i = 0; i < offset; ++i)
- val[i] = 0;
- APInt Result(val, BitWidth);
- Result.clearUnusedBits();
- return Result;
+void APInt::shlSlowCase(unsigned ShiftAmt) {
+ tcShiftLeft(pVal, getNumWords(), ShiftAmt);
+ clearUnusedBits();
}
// Calculate the rotate amount modulo the bit width.
@@ -2239,7 +2142,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
while (Tmp != 0) {
unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
Str.push_back(Digits[Digit]);
- Tmp = Tmp.lshr(ShiftAmt);
+ Tmp.lshrInPlace(ShiftAmt);
}
} else {
APInt divisor(Radix == 10? 4 : 8, Radix);
@@ -2698,63 +2601,58 @@ int APInt::tcDivide(WordType *lhs, const WordType *rhs,
return false;
}
-/* Shift a bignum left COUNT bits in-place. Shifted in bits are zero.
- There are no restrictions on COUNT. */
-void APInt::tcShiftLeft(WordType *dst, unsigned parts, unsigned count) {
- if (count) {
- /* Jump is the inter-part jump; shift is is intra-part shift. */
- unsigned jump = count / APINT_BITS_PER_WORD;
- unsigned shift = count % APINT_BITS_PER_WORD;
-
- while (parts > jump) {
- WordType part;
+/// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
+/// no restrictions on Count.
+void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
+ // Don't bother performing a no-op shift.
+ if (!Count)
+ return;
- parts--;
+ /* WordShift is the inter-part shift; BitShift is is intra-part shift. */
+ unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
+ unsigned BitShift = Count % APINT_BITS_PER_WORD;
- /* dst[i] comes from the two parts src[i - jump] and, if we have
- an intra-part shift, src[i - jump - 1]. */
- part = dst[parts - jump];
- if (shift) {
- part <<= shift;
- if (parts >= jump + 1)
- part |= dst[parts - jump - 1] >> (APINT_BITS_PER_WORD - shift);
- }
-
- dst[parts] = part;
+ // Fastpath for moving by whole words.
+ if (BitShift == 0) {
+ std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
+ } else {
+ while (Words-- > WordShift) {
+ Dst[Words] = Dst[Words - WordShift] << BitShift;
+ if (Words > WordShift)
+ Dst[Words] |=
+ Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
}
-
- while (parts > 0)
- dst[--parts] = 0;
}
+
+ // Fill in the remainder with 0s.
+ std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
}
-/* Shift a bignum right COUNT bits in-place. Shifted in bits are
- zero. There are no restrictions on COUNT. */
-void APInt::tcShiftRight(WordType *dst, unsigned parts, unsigned count) {
- if (count) {
- /* Jump is the inter-part jump; shift is is intra-part shift. */
- unsigned jump = count / APINT_BITS_PER_WORD;
- unsigned shift = count % APINT_BITS_PER_WORD;
+/// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
+/// are no restrictions on Count.
+void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
+ // Don't bother performing a no-op shift.
+ if (!Count)
+ return;
- /* Perform the shift. This leaves the most significant COUNT bits
- of the result at zero. */
- for (unsigned i = 0; i < parts; i++) {
- WordType part;
+ // WordShift is the inter-part shift; BitShift is is intra-part shift.
+ unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
+ unsigned BitShift = Count % APINT_BITS_PER_WORD;
- if (i + jump >= parts) {
- part = 0;
- } else {
- part = dst[i + jump];
- if (shift) {
- part >>= shift;
- if (i + jump + 1 < parts)
- part |= dst[i + jump + 1] << (APINT_BITS_PER_WORD - shift);
- }
- }
-
- dst[i] = part;
+ unsigned WordsToMove = Words - WordShift;
+ // Fastpath for moving by whole words.
+ if (BitShift == 0) {
+ std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
+ } else {
+ for (unsigned i = 0; i != WordsToMove; ++i) {
+ Dst[i] = Dst[i + WordShift] >> BitShift;
+ if (i + 1 != WordsToMove)
+ Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
}
}
+
+ // Fill in the remainder with 0s.
+ std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
}
/* Bitwise and of two bignums. */
diff --git a/contrib/llvm/lib/Support/CommandLine.cpp b/contrib/llvm/lib/Support/CommandLine.cpp
index f4a9108b8544..34345901eab1 100644
--- a/contrib/llvm/lib/Support/CommandLine.cpp
+++ b/contrib/llvm/lib/Support/CommandLine.cpp
@@ -2069,12 +2069,15 @@ public:
#ifndef NDEBUG
OS << " with assertions";
#endif
+#if LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO
std::string CPU = sys::getHostCPUName();
if (CPU == "generic")
CPU = "(unknown)";
OS << ".\n"
<< " Default target: " << sys::getDefaultTargetTriple() << '\n'
- << " Host CPU: " << CPU << '\n';
+ << " Host CPU: " << CPU;
+#endif
+ OS << '\n';
}
void operator=(bool OptionWasSpecified) {
if (!OptionWasSpecified)
diff --git a/contrib/llvm/lib/Support/Dwarf.cpp b/contrib/llvm/lib/Support/Dwarf.cpp
index f13da62e4a87..200546857de7 100644
--- a/contrib/llvm/lib/Support/Dwarf.cpp
+++ b/contrib/llvm/lib/Support/Dwarf.cpp
@@ -22,7 +22,7 @@ StringRef llvm::dwarf::TagString(unsigned Tag) {
switch (Tag) {
default:
return StringRef();
-#define HANDLE_DW_TAG(ID, NAME) \
+#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \
case DW_TAG_##NAME: \
return "DW_TAG_" #NAME;
#include "llvm/Support/Dwarf.def"
@@ -31,11 +31,34 @@ StringRef llvm::dwarf::TagString(unsigned Tag) {
unsigned llvm::dwarf::getTag(StringRef TagString) {
return StringSwitch<unsigned>(TagString)
-#define HANDLE_DW_TAG(ID, NAME) .Case("DW_TAG_" #NAME, DW_TAG_##NAME)
+#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \
+ .Case("DW_TAG_" #NAME, DW_TAG_##NAME)
#include "llvm/Support/Dwarf.def"
.Default(DW_TAG_invalid);
}
+unsigned llvm::dwarf::TagVersion(dwarf::Tag Tag) {
+ switch (Tag) {
+ default:
+ return 0;
+#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \
+ case DW_TAG_##NAME: \
+ return VERSION;
+#include "llvm/Support/Dwarf.def"
+ }
+}
+
+unsigned llvm::dwarf::TagVendor(dwarf::Tag Tag) {
+ switch (Tag) {
+ default:
+ return 0;
+#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \
+ case DW_TAG_##NAME: \
+ return DWARF_VENDOR_##VENDOR;
+#include "llvm/Support/Dwarf.def"
+ }
+}
+
StringRef llvm::dwarf::ChildrenString(unsigned Children) {
switch (Children) {
case DW_CHILDREN_no: return "DW_CHILDREN_no";
@@ -48,29 +71,73 @@ StringRef llvm::dwarf::AttributeString(unsigned Attribute) {
switch (Attribute) {
default:
return StringRef();
-#define HANDLE_DW_AT(ID, NAME) \
- case DW_AT_##NAME: \
+#define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR) \
+ case DW_AT_##NAME: \
return "DW_AT_" #NAME;
#include "llvm/Support/Dwarf.def"
}
}
+unsigned llvm::dwarf::AttributeVersion(dwarf::Attribute Attribute) {
+ switch (Attribute) {
+ default:
+ return 0;
+#define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR) \
+ case DW_AT_##NAME: \
+ return VERSION;
+#include "llvm/Support/Dwarf.def"
+ }
+}
+
+unsigned llvm::dwarf::AttributeVendor(dwarf::Attribute Attribute) {
+ switch (Attribute) {
+ default:
+ return 0;
+#define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR) \
+ case DW_AT_##NAME: \
+ return DWARF_VENDOR_##VENDOR;
+#include "llvm/Support/Dwarf.def"
+ }
+}
+
StringRef llvm::dwarf::FormEncodingString(unsigned Encoding) {
switch (Encoding) {
default:
return StringRef();
-#define HANDLE_DW_FORM(ID, NAME) \
- case DW_FORM_##NAME: \
+#define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR) \
+ case DW_FORM_##NAME: \
return "DW_FORM_" #NAME;
#include "llvm/Support/Dwarf.def"
}
}
+unsigned llvm::dwarf::FormVersion(dwarf::Form Form) {
+ switch (Form) {
+ default:
+ return 0;
+#define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR) \
+ case DW_FORM_##NAME: \
+ return VERSION;
+#include "llvm/Support/Dwarf.def"
+ }
+}
+
+unsigned llvm::dwarf::FormVendor(dwarf::Form Form) {
+ switch (Form) {
+ default:
+ return 0;
+#define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR) \
+ case DW_FORM_##NAME: \
+ return DWARF_VENDOR_##VENDOR;
+#include "llvm/Support/Dwarf.def"
+ }
+}
+
StringRef llvm::dwarf::OperationEncodingString(unsigned Encoding) {
switch (Encoding) {
default:
return StringRef();
-#define HANDLE_DW_OP(ID, NAME) \
+#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) \
case DW_OP_##NAME: \
return "DW_OP_" #NAME;
#include "llvm/Support/Dwarf.def"
@@ -81,17 +148,40 @@ StringRef llvm::dwarf::OperationEncodingString(unsigned Encoding) {
unsigned llvm::dwarf::getOperationEncoding(StringRef OperationEncodingString) {
return StringSwitch<unsigned>(OperationEncodingString)
-#define HANDLE_DW_OP(ID, NAME) .Case("DW_OP_" #NAME, DW_OP_##NAME)
+#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) \
+ .Case("DW_OP_" #NAME, DW_OP_##NAME)
#include "llvm/Support/Dwarf.def"
.Case("DW_OP_LLVM_fragment", DW_OP_LLVM_fragment)
.Default(0);
}
+unsigned llvm::dwarf::OperationVersion(dwarf::LocationAtom Op) {
+ switch (Op) {
+ default:
+ return 0;
+#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) \
+ case DW_OP_##NAME: \
+ return VERSION;
+#include "llvm/Support/Dwarf.def"
+ }
+}
+
+unsigned llvm::dwarf::OperationVendor(dwarf::LocationAtom Op) {
+ switch (Op) {
+ default:
+ return 0;
+#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) \
+ case DW_OP_##NAME: \
+ return DWARF_VENDOR_##VENDOR;
+#include "llvm/Support/Dwarf.def"
+ }
+}
+
StringRef llvm::dwarf::AttributeEncodingString(unsigned Encoding) {
switch (Encoding) {
default:
return StringRef();
-#define HANDLE_DW_ATE(ID, NAME) \
+#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) \
case DW_ATE_##NAME: \
return "DW_ATE_" #NAME;
#include "llvm/Support/Dwarf.def"
@@ -100,11 +190,34 @@ StringRef llvm::dwarf::AttributeEncodingString(unsigned Encoding) {
unsigned llvm::dwarf::getAttributeEncoding(StringRef EncodingString) {
return StringSwitch<unsigned>(EncodingString)
-#define HANDLE_DW_ATE(ID, NAME) .Case("DW_ATE_" #NAME, DW_ATE_##NAME)
+#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) \
+ .Case("DW_ATE_" #NAME, DW_ATE_##NAME)
#include "llvm/Support/Dwarf.def"
.Default(0);
}
+unsigned llvm::dwarf::AttributeEncodingVersion(dwarf::TypeKind ATE) {
+ switch (ATE) {
+ default:
+ return 0;
+#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) \
+ case DW_ATE_##NAME: \
+ return VERSION;
+#include "llvm/Support/Dwarf.def"
+ }
+}
+
+unsigned llvm::dwarf::AttributeEncodingVendor(dwarf::TypeKind ATE) {
+ switch (ATE) {
+ default:
+ return 0;
+#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) \
+ case DW_ATE_##NAME: \
+ return DWARF_VENDOR_##VENDOR;
+#include "llvm/Support/Dwarf.def"
+ }
+}
+
StringRef llvm::dwarf::DecimalSignString(unsigned Sign) {
switch (Sign) {
case DW_DS_unsigned: return "DW_DS_unsigned";
@@ -169,7 +282,7 @@ StringRef llvm::dwarf::LanguageString(unsigned Language) {
switch (Language) {
default:
return StringRef();
-#define HANDLE_DW_LANG(ID, NAME) \
+#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \
case DW_LANG_##NAME: \
return "DW_LANG_" #NAME;
#include "llvm/Support/Dwarf.def"
@@ -178,11 +291,34 @@ StringRef llvm::dwarf::LanguageString(unsigned Language) {
unsigned llvm::dwarf::getLanguage(StringRef LanguageString) {
return StringSwitch<unsigned>(LanguageString)
-#define HANDLE_DW_LANG(ID, NAME) .Case("DW_LANG_" #NAME, DW_LANG_##NAME)
+#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \
+ .Case("DW_LANG_" #NAME, DW_LANG_##NAME)
#include "llvm/Support/Dwarf.def"
.Default(0);
}
+unsigned llvm::dwarf::LanguageVersion(dwarf::SourceLanguage Lang) {
+ switch (Lang) {
+ default:
+ return 0;
+#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \
+ case DW_LANG_##NAME: \
+ return VERSION;
+#include "llvm/Support/Dwarf.def"
+ }
+}
+
+unsigned llvm::dwarf::LanguageVendor(dwarf::SourceLanguage Lang) {
+ switch (Lang) {
+ default:
+ return 0;
+#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \
+ case DW_LANG_##NAME: \
+ return DWARF_VENDOR_##VENDOR;
+#include "llvm/Support/Dwarf.def"
+ }
+}
+
StringRef llvm::dwarf::CaseString(unsigned Case) {
switch (Case) {
case DW_ID_case_sensitive: return "DW_ID_case_sensitive";
@@ -394,3 +530,12 @@ StringRef llvm::dwarf::AttributeValueString(uint16_t Attr, unsigned Val) {
return StringRef();
}
+
+bool llvm::dwarf::isValidFormForVersion(Form F, unsigned Version,
+ bool ExtensionsOk) {
+ if (FormVendor(F) == DWARF_VENDOR_DWARF) {
+ unsigned FV = FormVersion(F);
+ return FV > 0 && FV <= Version;
+ }
+ return ExtensionsOk;
+}
diff --git a/contrib/llvm/lib/Support/LowLevelType.cpp b/contrib/llvm/lib/Support/LowLevelType.cpp
index 4290d69cd197..0ee3f1d0119e 100644
--- a/contrib/llvm/lib/Support/LowLevelType.cpp
+++ b/contrib/llvm/lib/Support/LowLevelType.cpp
@@ -18,25 +18,25 @@ using namespace llvm;
LLT::LLT(MVT VT) {
if (VT.isVector()) {
- SizeInBits = VT.getVectorElementType().getSizeInBits();
- ElementsOrAddrSpace = VT.getVectorNumElements();
- Kind = ElementsOrAddrSpace == 1 ? Scalar : Vector;
+ init(/*isPointer=*/false, VT.getVectorNumElements() > 1,
+ VT.getVectorNumElements(), VT.getVectorElementType().getSizeInBits(),
+ /*AddressSpace=*/0);
} else if (VT.isValid()) {
// Aggregates are no different from real scalars as far as GlobalISel is
// concerned.
- Kind = Scalar;
- SizeInBits = VT.getSizeInBits();
- ElementsOrAddrSpace = 1;
- assert(SizeInBits != 0 && "invalid zero-sized type");
+ assert(VT.getSizeInBits() != 0 && "invalid zero-sized type");
+ init(/*isPointer=*/false, /*isVector=*/false, /*NumElements=*/0,
+ VT.getSizeInBits(), /*AddressSpace=*/0);
} else {
- Kind = Invalid;
- SizeInBits = ElementsOrAddrSpace = 0;
+ IsPointer = false;
+ IsVector = false;
+ RawData = 0;
}
}
void LLT::print(raw_ostream &OS) const {
if (isVector())
- OS << "<" << ElementsOrAddrSpace << " x s" << SizeInBits << ">";
+ OS << "<" << getNumElements() << " x " << getElementType() << ">";
else if (isPointer())
OS << "p" << getAddressSpace();
else if (isValid()) {
@@ -45,3 +45,12 @@ void LLT::print(raw_ostream &OS) const {
} else
llvm_unreachable("trying to print an invalid type");
}
+
+const constexpr LLT::BitFieldInfo LLT::ScalarSizeFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::PointerSizeFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::PointerAddressSpaceFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::VectorElementsFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::VectorSizeFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::PointerVectorElementsFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::PointerVectorSizeFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::PointerVectorAddressSpaceFieldInfo;
diff --git a/contrib/llvm/lib/Support/Regex.cpp b/contrib/llvm/lib/Support/Regex.cpp
index 68ba79e11766..b1087fd8853c 100644
--- a/contrib/llvm/lib/Support/Regex.cpp
+++ b/contrib/llvm/lib/Support/Regex.cpp
@@ -48,7 +48,7 @@ Regex::~Regex() {
}
}
-bool Regex::isValid(std::string &Error) {
+bool Regex::isValid(std::string &Error) const {
if (!error)
return true;
diff --git a/contrib/llvm/lib/Support/TargetParser.cpp b/contrib/llvm/lib/Support/TargetParser.cpp
index 639d2ece263a..bba7c6d0d604 100644
--- a/contrib/llvm/lib/Support/TargetParser.cpp
+++ b/contrib/llvm/lib/Support/TargetParser.cpp
@@ -210,7 +210,7 @@ bool llvm::ARM::getHWDivFeatures(unsigned HWDivKind,
else
Features.push_back("-hwdiv-arm");
- if (HWDivKind & ARM::AEK_HWDIV)
+ if (HWDivKind & ARM::AEK_HWDIVTHUMB)
Features.push_back("+hwdiv");
else
Features.push_back("-hwdiv");
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index ae01ea477bb9..7141e77fcd25 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -1865,7 +1865,7 @@ static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm);
getUsefulBits(Op, OpUsefulBits, Depth + 1);
// The interesting part was at zero in the argument
- OpUsefulBits = OpUsefulBits.lshr(OpUsefulBits.getBitWidth() - Imm);
+ OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
}
UsefulBits &= OpUsefulBits;
@@ -1894,13 +1894,13 @@ static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
Mask = Mask.shl(ShiftAmt);
getUsefulBits(Op, Mask, Depth + 1);
- Mask = Mask.lshr(ShiftAmt);
+ Mask.lshrInPlace(ShiftAmt);
} else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
// Shift Right
// We do not handle AArch64_AM::ASR, because the sign will change the
// number of useful bits
uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
- Mask = Mask.lshr(ShiftAmt);
+ Mask.lshrInPlace(ShiftAmt);
getUsefulBits(Op, Mask, Depth + 1);
Mask = Mask.shl(ShiftAmt);
} else
@@ -1954,7 +1954,7 @@ static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
if (Op.getOperand(1) == Orig) {
// Copy the bits from the result to the zero bits.
Mask = ResultUsefulBits & OpUsefulBits;
- Mask = Mask.lshr(LSB);
+ Mask.lshrInPlace(LSB);
}
if (Op.getOperand(0) == Orig)
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0d3289ac84c3..4ddc95199d4c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3239,30 +3239,26 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
// node so that legalize doesn't hack it.
- if (getTargetMachine().getCodeModel() == CodeModel::Large &&
- Subtarget->isTargetMachO()) {
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ auto GV = G->getGlobal();
+ if (Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine()) ==
+ AArch64II::MO_GOT) {
+ Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT);
+ Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
+ } else {
const GlobalValue *GV = G->getGlobal();
- bool InternalLinkage = GV->hasInternalLinkage();
- if (InternalLinkage)
- Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
- else {
- Callee =
- DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT);
- Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
- }
- } else if (ExternalSymbolSDNode *S =
- dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
+ }
+ } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ if (getTargetMachine().getCodeModel() == CodeModel::Large &&
+ Subtarget->isTargetMachO()) {
const char *Sym = S->getSymbol();
Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
+ } else {
+ const char *Sym = S->getSymbol();
+ Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
}
- } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- const GlobalValue *GV = G->getGlobal();
- Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
- } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
- const char *Sym = S->getSymbol();
- Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
}
// We don't usually want to end the call-sequence here because we would tidy
@@ -7130,7 +7126,7 @@ bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const {
if (I->getOpcode() != Instruction::FMul)
return true;
- if (I->getNumUses() != 1)
+ if (!I->hasOneUse())
return true;
Instruction *User = I->user_back();
@@ -10395,7 +10391,7 @@ bool AArch64TargetLowering::isUsedByReturnOnly(SDNode *N,
// call. This will cause the optimizers to attempt to move, or duplicate,
// return instructions to help enable tail call optimizations for this
// instruction.
-bool AArch64TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return CI->isTailCall();
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 2ad6c8b23df8..a023b4373835 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -593,7 +593,7 @@ private:
}
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
- bool mayBeEmittedAsTailCall(CallInst *CI) const override;
+ bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
ISD::MemIndexedMode &AM, bool &IsInc,
SelectionDAG &DAG) const;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 4449412532f3..82e9c5a88e3b 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -2586,6 +2586,11 @@ def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
Sched<[WriteF]>;
}
+// Similarly add aliases
+def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>,
+ Requires<[HasFullFP16]>;
+def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>;
+def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>;
//===----------------------------------------------------------------------===//
// Floating point conversion instruction.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 878dac6bff1e..5e01b6cd2b46 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -20,6 +20,7 @@
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
index 20a5979f9b4b..6f9021c4a030 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
@@ -482,7 +482,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
auto &MO = MI.getOperand(Idx);
- if (!MO.isReg())
+ if (!MO.isReg() || !MO.getReg())
continue;
LLT Ty = MRI.getType(MO.getReg());
@@ -537,7 +537,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
InstructionMapping{DefaultMappingID, Cost, nullptr, NumOperands};
SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
- if (MI.getOperand(Idx).isReg()) {
+ if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) {
auto Mapping = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]);
if (!Mapping->isValid())
return InstructionMapping();
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td
index 6bce4ef6b652..4bd77d344488 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td
@@ -265,6 +265,12 @@ def : InstRW<[FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc, WriteAdr],(instregex "^LD4
// Arithmetic and Logical Instructions
// -----------------------------------------------------------------------------
def : InstRW<[FalkorWr_ADD], (instregex "^ADD(S)?(W|X)r(s|x)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^AND(S)?(W|X)r(i|r|s)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^BIC(S)?(W|X)r(r|s)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EON(W|X)r(r|s)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EOR(W|X)r(i|r|s)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORN(W|X)r(r|s)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORR(W|X)r(i|r|s)$")>;
def : InstRW<[FalkorWr_2XYZ_2cyc], (instregex "^SUB(S)?(W|X)r(s|x)$")>;
// SIMD Miscellaneous Instructions
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index b3aba4781db8..042755bd36d0 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -35,6 +35,11 @@ static cl::opt<bool>
UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
"an address is ignored"), cl::init(false), cl::Hidden);
+static cl::opt<bool>
+ UseNonLazyBind("aarch64-enable-nonlazybind",
+ cl::desc("Call nonlazybind functions via direct GOT load"),
+ cl::init(false), cl::Hidden);
+
AArch64Subtarget &
AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,
StringRef CPUString) {
@@ -155,6 +160,23 @@ AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
return AArch64II::MO_NO_FLAG;
}
+unsigned char AArch64Subtarget::classifyGlobalFunctionReference(
+ const GlobalValue *GV, const TargetMachine &TM) const {
+ // MachO large model always goes via a GOT, because we don't have the
+ // relocations available to do anything else..
+ if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
+ !GV->hasInternalLinkage())
+ return AArch64II::MO_GOT;
+
+ // NonLazyBind goes via GOT unless we know it's available locally.
+ auto *F = dyn_cast<Function>(GV);
+ if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
+ !TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
+ return AArch64II::MO_GOT;
+
+ return AArch64II::MO_NO_FLAG;
+}
+
/// This function returns the name of a function which has an interface
/// like the non-standard bzero function, if such a function exists on
/// the current subtarget and it is considered prefereable over
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 40ad9185012c..3d66a9ea8ce6 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -271,6 +271,9 @@ public:
unsigned char ClassifyGlobalReference(const GlobalValue *GV,
const TargetMachine &TM) const;
+ unsigned char classifyGlobalFunctionReference(const GlobalValue *GV,
+ const TargetMachine &TM) const;
+
/// This function returns the name of a function which has an interface
/// like the non-standard bzero function, if such a function exists on
/// the current subtarget and it is considered prefereable over
diff --git a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index cbab68979c56..d7bbc2bcd22c 100644
--- a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -2100,27 +2100,9 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
bool isNegative = parseOptionalToken(AsmToken::Minus);
const AsmToken &Tok = Parser.getTok();
- if (Tok.is(AsmToken::Real)) {
- APFloat RealVal(APFloat::IEEEdouble(), Tok.getString());
- if (isNegative)
- RealVal.changeSign();
-
- uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
- int Val = AArch64_AM::getFP64Imm(APInt(64, IntVal));
- Parser.Lex(); // Eat the token.
- // Check for out of range values. As an exception, we let Zero through,
- // as we handle that special case in post-processing before matching in
- // order to use the zero register for it.
- if (Val == -1 && !RealVal.isPosZero()) {
- TokError("expected compatible register or floating-point constant");
- return MatchOperand_ParseFail;
- }
- Operands.push_back(AArch64Operand::CreateFPImm(Val, S, getContext()));
- return MatchOperand_Success;
- }
- if (Tok.is(AsmToken::Integer)) {
+ if (Tok.is(AsmToken::Real) || Tok.is(AsmToken::Integer)) {
int64_t Val;
- if (!isNegative && Tok.getString().startswith("0x")) {
+ if (Tok.is(AsmToken::Integer) && !isNegative && Tok.getString().startswith("0x")) {
Val = Tok.getIntVal();
if (Val > 255 || Val < 0) {
TokError("encoded floating point value out of range");
@@ -2128,10 +2110,24 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
}
} else {
APFloat RealVal(APFloat::IEEEdouble(), Tok.getString());
+ if (isNegative)
+ RealVal.changeSign();
+
uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
- // If we had a '-' in front, toggle the sign bit.
- IntVal ^= (uint64_t)isNegative << 63;
Val = AArch64_AM::getFP64Imm(APInt(64, IntVal));
+
+ // Check for out of range values. As an exception we let Zero through,
+ // but as tokens instead of an FPImm so that it can be matched by the
+ // appropriate alias if one exists.
+ if (RealVal.isPosZero()) {
+ Parser.Lex(); // Eat the token.
+ Operands.push_back(AArch64Operand::CreateToken("#0", false, S, getContext()));
+ Operands.push_back(AArch64Operand::CreateToken(".0", false, S, getContext()));
+ return MatchOperand_Success;
+ } else if (Val == -1) {
+ TokError("expected compatible register or floating-point constant");
+ return MatchOperand_ParseFail;
+ }
}
Parser.Lex(); // Eat the token.
Operands.push_back(AArch64Operand::CreateFPImm(Val, S, getContext()));
@@ -3655,21 +3651,6 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
}
- // Yet another horrible hack to handle FMOV Rd, #0.0 using [WX]ZR.
- if (NumOperands == 3 && Tok == "fmov") {
- AArch64Operand &RegOp = static_cast<AArch64Operand &>(*Operands[1]);
- AArch64Operand &ImmOp = static_cast<AArch64Operand &>(*Operands[2]);
- if (RegOp.isReg() && ImmOp.isFPImm() && ImmOp.getFPImm() == (unsigned)-1) {
- unsigned zreg =
- !AArch64MCRegisterClasses[AArch64::FPR64RegClassID].contains(
- RegOp.getReg())
- ? AArch64::WZR
- : AArch64::XZR;
- Operands[2] = AArch64Operand::CreateReg(zreg, false, Op.getStartLoc(),
- Op.getEndLoc(), getContext());
- }
- }
-
MCInst Inst;
// First try to match against the secondary set of tables containing the
// short-form NEON instructions (e.g. "fadd.2s v0, v1, v2").
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index 8fc822329595..94112849f84e 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -39,7 +39,7 @@ AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() {
PrivateLabelPrefix = "L";
SeparatorString = "%%";
CommentString = ";";
- PointerSize = CalleeSaveStackSlotSize = 8;
+ CodePointerSize = CalleeSaveStackSlotSize = 8;
AlignmentIsInBytes = false;
UsesELFSectionDirectiveForBSS = true;
@@ -71,7 +71,7 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(const Triple &T) {
// We prefer NEON instructions to be printed in the short form.
AssemblerDialect = AsmWriterVariant == Default ? 0 : AsmWriterVariant;
- PointerSize = 8;
+ CodePointerSize = 8;
// ".comm align is in bytes but .align is pow-2."
AlignmentIsInBytes = false;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 0446655830d1..a81bcb56dfdc 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -144,6 +144,10 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
}
void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
+ const AMDGPUMachineFunction *MFI = MF->getInfo<AMDGPUMachineFunction>();
+ if (!MFI->isEntryFunction())
+ return;
+
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
SIProgramInfo KernelInfo;
amd_kernel_code_t KernelCode;
@@ -184,9 +188,11 @@ void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
}
bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
// The starting address of all shader programs must be 256 bytes aligned.
- MF.setAlignment(8);
+ // Regular functions just need the basic required instruction alignment.
+ MF.setAlignment(MFI->isEntryFunction() ? 8 : 2);
SetupMachineFunction(MF);
@@ -220,13 +226,19 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutStreamer->SwitchSection(CommentSection);
if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- OutStreamer->emitRawComment(" Kernel info:", false);
- OutStreamer->emitRawComment(" codeLenInByte = " + Twine(KernelInfo.CodeLen),
- false);
+ if (MFI->isEntryFunction()) {
+ OutStreamer->emitRawComment(" Kernel info:", false);
+ } else {
+ OutStreamer->emitRawComment(" Function info:", false);
+ }
+
+ OutStreamer->emitRawComment(" codeLenInByte = " +
+ Twine(getFunctionCodeSize(MF)), false);
OutStreamer->emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR),
false);
OutStreamer->emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
false);
+
OutStreamer->emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode),
false);
OutStreamer->emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
@@ -236,6 +248,9 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutStreamer->emitRawComment(" LDSByteSize: " + Twine(KernelInfo.LDSSize) +
" bytes/workgroup (compile time only)", false);
+ if (!MFI->isEntryFunction())
+ return false;
+
OutStreamer->emitRawComment(" SGPRBlocks: " +
Twine(KernelInfo.SGPRBlocks), false);
OutStreamer->emitRawComment(" VGPRBlocks: " +
@@ -317,7 +332,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
const MachineOperand &MO = MI.getOperand(op_idx);
if (!MO.isReg())
continue;
- unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff;
+ unsigned HWReg = RI->getHWRegIndex(MO.getReg());
// Register with value > 127 aren't GPR
if (HWReg > 127)
@@ -360,18 +375,12 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
}
}
-void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
- const MachineFunction &MF) const {
+uint64_t AMDGPUAsmPrinter::getFunctionCodeSize(const MachineFunction &MF) const {
const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
- const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- uint64_t CodeSize = 0;
- unsigned MaxSGPR = 0;
- unsigned MaxVGPR = 0;
- bool VCCUsed = false;
- bool FlatUsed = false;
- const SIRegisterInfo *RI = STM.getRegisterInfo();
const SIInstrInfo *TII = STM.getInstrInfo();
+ uint64_t CodeSize = 0;
+
for (const MachineBasicBlock &MBB : MF) {
for (const MachineInstr &MI : MBB) {
// TODO: CodeSize should account for multiple functions.
@@ -380,122 +389,86 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
if (MI.isDebugValue())
continue;
- if (isVerbose())
- CodeSize += TII->getInstSizeInBytes(MI);
+ CodeSize += TII->getInstSizeInBytes(MI);
+ }
+ }
- unsigned numOperands = MI.getNumOperands();
- for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
- const MachineOperand &MO = MI.getOperand(op_idx);
- unsigned width = 0;
- bool isSGPR = false;
+ return CodeSize;
+}
- if (!MO.isReg())
- continue;
+static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI,
+ const SIInstrInfo &TII,
+ unsigned Reg) {
+ for (const MachineOperand &UseOp : MRI.reg_operands(Reg)) {
+ if (!UseOp.isImplicit() || !TII.isFLAT(*UseOp.getParent()))
+ return true;
+ }
- unsigned reg = MO.getReg();
- switch (reg) {
- case AMDGPU::EXEC:
- case AMDGPU::EXEC_LO:
- case AMDGPU::EXEC_HI:
- case AMDGPU::SCC:
- case AMDGPU::M0:
- case AMDGPU::SRC_SHARED_BASE:
- case AMDGPU::SRC_SHARED_LIMIT:
- case AMDGPU::SRC_PRIVATE_BASE:
- case AMDGPU::SRC_PRIVATE_LIMIT:
- continue;
+ return false;
+}
- case AMDGPU::VCC:
- case AMDGPU::VCC_LO:
- case AMDGPU::VCC_HI:
- VCCUsed = true;
- continue;
+void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
+ const MachineFunction &MF) const {
+ const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const SIInstrInfo *TII = STM.getInstrInfo();
+ const SIRegisterInfo *RI = &TII->getRegisterInfo();
- case AMDGPU::FLAT_SCR:
- case AMDGPU::FLAT_SCR_LO:
- case AMDGPU::FLAT_SCR_HI:
- // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat
- // instructions aren't used to access the scratch buffer.
- if (MFI->hasFlatScratchInit())
- FlatUsed = true;
- continue;
- case AMDGPU::TBA:
- case AMDGPU::TBA_LO:
- case AMDGPU::TBA_HI:
- case AMDGPU::TMA:
- case AMDGPU::TMA_LO:
- case AMDGPU::TMA_HI:
- llvm_unreachable("trap handler registers should not be used");
-
- default:
- break;
- }
-
- if (AMDGPU::SReg_32RegClass.contains(reg)) {
- assert(!AMDGPU::TTMP_32RegClass.contains(reg) &&
- "trap handler registers should not be used");
- isSGPR = true;
- width = 1;
- } else if (AMDGPU::VGPR_32RegClass.contains(reg)) {
- isSGPR = false;
- width = 1;
- } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
- assert(!AMDGPU::TTMP_64RegClass.contains(reg) &&
- "trap handler registers should not be used");
- isSGPR = true;
- width = 2;
- } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
- isSGPR = false;
- width = 2;
- } else if (AMDGPU::VReg_96RegClass.contains(reg)) {
- isSGPR = false;
- width = 3;
- } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
- isSGPR = true;
- width = 4;
- } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
- isSGPR = false;
- width = 4;
- } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
- isSGPR = true;
- width = 8;
- } else if (AMDGPU::VReg_256RegClass.contains(reg)) {
- isSGPR = false;
- width = 8;
- } else if (AMDGPU::SReg_512RegClass.contains(reg)) {
- isSGPR = true;
- width = 16;
- } else if (AMDGPU::VReg_512RegClass.contains(reg)) {
- isSGPR = false;
- width = 16;
- } else {
- llvm_unreachable("Unknown register class");
- }
- unsigned hwReg = RI->getEncodingValue(reg) & 0xff;
- unsigned maxUsed = hwReg + width - 1;
- if (isSGPR) {
- MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
- } else {
- MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
- }
- }
+ MCPhysReg NumVGPRReg = AMDGPU::NoRegister;
+ for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) {
+ if (MRI.isPhysRegUsed(Reg)) {
+ NumVGPRReg = Reg;
+ break;
+ }
+ }
+
+ MCPhysReg NumSGPRReg = AMDGPU::NoRegister;
+ for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) {
+ if (MRI.isPhysRegUsed(Reg)) {
+ NumSGPRReg = Reg;
+ break;
}
}
+ // We found the maximum register index. They start at 0, so add one to get the
+ // number of registers.
+ ProgInfo.NumVGPR = NumVGPRReg == AMDGPU::NoRegister ? 0 :
+ RI->getHWRegIndex(NumVGPRReg) + 1;
+ ProgInfo.NumSGPR = NumSGPRReg == AMDGPU::NoRegister ? 0 :
+ RI->getHWRegIndex(NumSGPRReg) + 1;
unsigned ExtraSGPRs = 0;
- if (VCCUsed)
+ ProgInfo.VCCUsed = MRI.isPhysRegUsed(AMDGPU::VCC_LO) ||
+ MRI.isPhysRegUsed(AMDGPU::VCC_HI);
+ if (ProgInfo.VCCUsed)
ExtraSGPRs = 2;
+ ProgInfo.FlatUsed = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
+ MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI);
+
+ // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat
+ // instructions aren't used to access the scratch buffer. Inline assembly
+ // may need it though.
+ //
+ // If we only have implicit uses of flat_scr on flat instructions, it is not
+ // really needed.
+ if (ProgInfo.FlatUsed && !MFI->hasFlatScratchInit() &&
+ (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) &&
+ !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) &&
+ !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) {
+ ProgInfo.FlatUsed = false;
+ }
+
if (STM.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) {
- if (FlatUsed)
+ if (ProgInfo.FlatUsed)
ExtraSGPRs = 4;
} else {
if (STM.isXNACKEnabled())
ExtraSGPRs = 4;
- if (FlatUsed)
+ if (ProgInfo.FlatUsed)
ExtraSGPRs = 6;
}
@@ -505,34 +478,29 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
!STM.hasSGPRInitBug()) {
unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
- if (MaxSGPR + 1 > MaxAddressableNumSGPRs) {
+ if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
// This can happen due to a compiler bug or when using inline asm.
LLVMContext &Ctx = MF.getFunction()->getContext();
DiagnosticInfoResourceLimit Diag(*MF.getFunction(),
"addressable scalar registers",
- MaxSGPR + 1, DS_Error,
+ ProgInfo.NumSGPR, DS_Error,
DK_ResourceLimit,
MaxAddressableNumSGPRs);
Ctx.diagnose(Diag);
- MaxSGPR = MaxAddressableNumSGPRs - 1;
+ ProgInfo.NumSGPR = MaxAddressableNumSGPRs - 1;
}
}
// Account for extra SGPRs and VGPRs reserved for debugger use.
- MaxSGPR += ExtraSGPRs;
- MaxVGPR += ExtraVGPRs;
-
- // We found the maximum register index. They start at 0, so add one to get the
- // number of registers.
- ProgInfo.NumSGPR = MaxSGPR + 1;
- ProgInfo.NumVGPR = MaxVGPR + 1;
+ ProgInfo.NumSGPR += ExtraSGPRs;
+ ProgInfo.NumVGPR += ExtraVGPRs;
// Adjust number of registers used to meet default/requested minimum/maximum
// number of waves per execution unit request.
ProgInfo.NumSGPRsForWavesPerEU = std::max(
- ProgInfo.NumSGPR, STM.getMinNumSGPRs(MFI->getMaxWavesPerEU()));
+ std::max(ProgInfo.NumSGPR, 1u), STM.getMinNumSGPRs(MFI->getMaxWavesPerEU()));
ProgInfo.NumVGPRsForWavesPerEU = std::max(
- ProgInfo.NumVGPR, STM.getMinNumVGPRs(MFI->getMaxWavesPerEU()));
+ std::max(ProgInfo.NumVGPR, 1u), STM.getMinNumVGPRs(MFI->getMaxWavesPerEU()));
if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ||
STM.hasSGPRInitBug()) {
@@ -559,10 +527,10 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
}
- if (MFI->NumUserSGPRs > STM.getMaxNumUserSGPRs()) {
+ if (MFI->getNumUserSGPRs() > STM.getMaxNumUserSGPRs()) {
LLVMContext &Ctx = MF.getFunction()->getContext();
DiagnosticInfoResourceLimit Diag(*MF.getFunction(), "user SGPRs",
- MFI->NumUserSGPRs, DS_Error);
+ MFI->getNumUserSGPRs(), DS_Error);
Ctx.diagnose(Diag);
}
@@ -584,7 +552,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / STM.getVGPREncodingGranule() - 1;
// Record first reserved VGPR and number of reserved VGPRs.
- ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? MaxVGPR + 1 : 0;
+ ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? ProgInfo.NumVGPR : 0;
ProgInfo.ReservedVGPRCount = STM.getReservedNumVGPRs(MF);
// Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
@@ -609,10 +577,6 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
ProgInfo.ScratchSize = FrameInfo.getStackSize();
- ProgInfo.FlatUsed = FlatUsed;
- ProgInfo.VCCUsed = VCCUsed;
- ProgInfo.CodeLen = CodeSize;
-
unsigned LDSAlignShift;
if (STM.getGeneration() < SISubtarget::SEA_ISLANDS) {
// LDS is allocated in 64 dword blocks.
@@ -623,7 +587,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
}
unsigned LDSSpillSize =
- MFI->LDSWaveSpillSize * MFI->getMaxFlatWorkGroupSize();
+ MFI->getLDSWaveSpillSize() * MFI->getMaxFlatWorkGroupSize();
ProgInfo.LDSSize = MFI->getLDSSize() + LDSSpillSize;
ProgInfo.LDSBlocks =
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 13425c8b2a0f..8c86dea4b885 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -55,7 +55,7 @@ private:
uint32_t NumVGPR = 0;
uint32_t NumSGPR = 0;
- uint32_t LDSSize;
+ uint32_t LDSSize = 0;
bool FlatUsed = false;
// Number of SGPRs that meets number of waves per execution unit request.
@@ -85,11 +85,11 @@ private:
// Bonus information for debugging.
bool VCCUsed = false;
- uint64_t CodeLen = 0;
SIProgramInfo() = default;
};
+ uint64_t getFunctionCodeSize(const MachineFunction &MF) const;
void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF) const;
void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo,
const MachineFunction &MF) const;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 36bc2498781f..a5cda817ac11 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -415,9 +415,11 @@ public:
return 0;
}
+ // Scratch is allocated in 256 dword per wave blocks for the entire
+ // wavefront. When viewed from the perspecive of an arbitrary workitem, this
+ // is 4-byte aligned.
unsigned getStackAlignment() const {
- // Scratch is allocated in 256 dword per wave blocks.
- return 4 * 256 / getWavefrontSize();
+ return 4;
}
bool enableMachineScheduler() const override {
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 01ac9968181a..6edd3e923ba1 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -426,16 +426,23 @@ static bool isArgPassedInSGPR(const Argument *A) {
const Function *F = A->getParent();
// Arguments to compute shaders are never a source of divergence.
- if (!AMDGPU::isShader(F->getCallingConv()))
+ CallingConv::ID CC = F->getCallingConv();
+ switch (CC) {
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
return true;
-
- // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
- if (F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
- F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal))
- return true;
-
- // Everything else is in VGPRs.
- return false;
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
+ // Everything else is in VGPRs.
+ return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
+ F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
+ default:
+ // TODO: Should calls support inreg for SGPR inputs?
+ return false;
+ }
}
///
diff --git a/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td b/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td
index a9f64589fa5e..357e18108e7e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -255,8 +255,6 @@ class DS_1A1D_PERMUTE <string opName, SDPatternOperator node = null_frag>
[(set i32:$vdst,
(node (DS1Addr1Offset i32:$addr, i16:$offset), i32:$data0))] > {
- let LGKM_CNT = 0;
-
let mayLoad = 0;
let mayStore = 0;
let isConvergent = 1;
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index 1655591abf39..6c61fb1f2d6b 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -14,6 +14,7 @@
using namespace llvm;
AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() {
+ CodePointerSize = (TT.getArch() == Triple::amdgcn) ? 8 : 4;
HasSingleParameterDotFile = false;
//===------------------------------------------------------------------===//
MinInstAlignment = 4;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 7268131396dc..dd867b15b4c7 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -461,6 +461,13 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ZERO_EXTEND, MVT::v2i32, Expand);
setOperationAction(ISD::SIGN_EXTEND, MVT::v2i32, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Expand);
+ } else {
+ setOperationAction(ISD::SELECT, MVT::v2i16, Custom);
+ setOperationAction(ISD::SELECT, MVT::v2f16, Custom);
+ }
+
+ for (MVT VT : { MVT::v4i16, MVT::v4f16, MVT::v2i8, MVT::v4i8, MVT::v8i8 }) {
+ setOperationAction(ISD::SELECT, VT, Custom);
}
setTargetDAGCombine(ISD::FADD);
@@ -2191,6 +2198,28 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N,
break;
}
}
+ case ISD::SELECT: {
+ SDLoc SL(N);
+ EVT VT = N->getValueType(0);
+ EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT);
+ SDValue LHS = DAG.getNode(ISD::BITCAST, SL, NewVT, N->getOperand(1));
+ SDValue RHS = DAG.getNode(ISD::BITCAST, SL, NewVT, N->getOperand(2));
+
+ EVT SelectVT = NewVT;
+ if (NewVT.bitsLT(MVT::i32)) {
+ LHS = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, LHS);
+ RHS = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, RHS);
+ SelectVT = MVT::i32;
+ }
+
+ SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, SelectVT,
+ N->getOperand(0), LHS, RHS);
+
+ if (NewVT != SelectVT)
+ NewSelect = DAG.getNode(ISD::TRUNCATE, SL, NewVT, NewSelect);
+ Results.push_back(DAG.getNode(ISD::BITCAST, SL, VT, NewSelect));
+ return;
+ }
default:
break;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index a84f3e274f82..810fb05984c4 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -133,14 +133,12 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
AMDGPUBufferPseudoSourceValue BufferPSV;
AMDGPUImagePseudoSourceValue ImagePSV;
-public:
- // FIXME: Make private
+private:
unsigned LDSWaveSpillSize;
unsigned ScratchOffsetReg;
unsigned NumUserSGPRs;
unsigned NumSystemSGPRs;
-private:
bool HasSpilledSGPRs;
bool HasSpilledVGPRs;
bool HasNonSpillStackObjects;
@@ -535,6 +533,10 @@ public:
llvm_unreachable("unexpected dimension");
}
+ unsigned getLDSWaveSpillSize() const {
+ return LDSWaveSpillSize;
+ }
+
const AMDGPUBufferPseudoSourceValue *getBufferPSV() const {
return &BufferPSV;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 36d4df52ff0e..098c67252dd8 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -124,7 +124,7 @@ unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
unsigned RegCount = ST.getMaxNumSGPRs(MF);
unsigned Reg;
- // Try to place it in a hole after PrivateSegmentbufferReg.
+ // Try to place it in a hole after PrivateSegmentBufferReg.
if (RegCount & 3) {
// We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
// alignment constraints, so we have a hole where can put the wave offset.
diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td
index 57f9d1c6b610..005b74a68af3 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.td
+++ b/contrib/llvm/lib/Target/ARM/ARM.td
@@ -67,8 +67,9 @@ def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
[FeatureFPARMv8]>;
def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true",
"Restrict FP to 16 double registers">;
-def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
- "Enable divide instructions">;
+def FeatureHWDivThumb : SubtargetFeature<"hwdiv", "HasHardwareDivideInThumb",
+ "true",
+ "Enable divide instructions in Thumb">;
def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm",
"HasHardwareDivideInARM", "true",
"Enable divide instructions in ARM mode">;
@@ -225,7 +226,7 @@ def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
def FeatureVirtualization : SubtargetFeature<"virtualization",
"HasVirtualization", "true",
"Supports Virtualization extension",
- [FeatureHWDiv, FeatureHWDivARM]>;
+ [FeatureHWDivThumb, FeatureHWDivARM]>;
// M-series ISA
def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass",
@@ -433,21 +434,21 @@ def ARMv7ve : Architecture<"armv7ve", "ARMv7ve", [HasV7Ops,
def ARMv7r : Architecture<"armv7-r", "ARMv7r", [HasV7Ops,
FeatureDB,
FeatureDSP,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureRClass]>;
def ARMv7m : Architecture<"armv7-m", "ARMv7m", [HasV7Ops,
FeatureThumb2,
FeatureNoARM,
FeatureDB,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureMClass]>;
def ARMv7em : Architecture<"armv7e-m", "ARMv7em", [HasV7Ops,
FeatureThumb2,
FeatureNoARM,
FeatureDB,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureMClass,
FeatureDSP]>;
@@ -502,7 +503,7 @@ def ARMv8mBaseline : Architecture<"armv8-m.base", "ARMv8mBaseline",
[HasV8MBaselineOps,
FeatureNoARM,
FeatureDB,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureV7Clrex,
Feature8MSecExt,
FeatureAcquireRelease,
@@ -512,7 +513,7 @@ def ARMv8mMainline : Architecture<"armv8-m.main", "ARMv8mMainline",
[HasV8MMainlineOps,
FeatureNoARM,
FeatureDB,
- FeatureHWDiv,
+ FeatureHWDivThumb,
Feature8MSecExt,
FeatureAcquireRelease,
FeatureMClass]>;
@@ -678,7 +679,7 @@ def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait,
FeatureFP16,
FeatureAvoidPartialCPSR,
FeatureVFP4,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM]>;
def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift,
@@ -686,7 +687,7 @@ def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift,
FeatureNEONForFP,
FeatureVFP4,
FeatureMP,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureAvoidPartialCPSR,
FeatureAvoidMOVsShOp,
@@ -768,39 +769,39 @@ def : ProcNoItin<"cortex-m33", [ARMv8mMainline,
FeatureVFPOnlySP]>;
def : ProcNoItin<"cortex-a32", [ARMv8a,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
def : ProcNoItin<"cortex-a35", [ARMv8a, ProcA35,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC,
FeatureFPAO]>;
def : ProcNoItin<"cortex-a57", [ARMv8a, ProcA57,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC,
FeatureFPAO]>;
def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
@@ -811,7 +812,7 @@ def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift,
FeatureNEONForFP,
FeatureVFP4,
FeatureMP,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureAvoidPartialCPSR,
FeatureAvoidMOVsShOp,
@@ -820,25 +821,25 @@ def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift,
FeatureZCZeroing]>;
def : ProcNoItin<"exynos-m1", [ARMv8a, ProcExynosM1,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
def : ProcNoItin<"exynos-m2", [ARMv8a, ProcExynosM1,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
def : ProcNoItin<"exynos-m3", [ARMv8a, ProcExynosM1,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
def : ProcNoItin<"kryo", [ARMv8a, ProcKryo,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index eb0d410b596b..14e197f477f1 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -589,12 +589,6 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
ATS.finishAttributeSection();
}
-static bool isV8M(const ARMSubtarget *Subtarget) {
- // Note that v8M Baseline is a subset of v6T2!
- return (Subtarget->hasV8MBaselineOps() && !Subtarget->hasV6T2Ops()) ||
- Subtarget->hasV8MMainlineOps();
-}
-
//===----------------------------------------------------------------------===//
// Helper routines for EmitStartOfAsmFile() and EmitEndOfAsmFile()
// FIXME:
@@ -602,39 +596,6 @@ static bool isV8M(const ARMSubtarget *Subtarget) {
// to appear in the .ARM.attributes section in ELF.
// Instead of subclassing the MCELFStreamer, we do the work here.
-static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU,
- const ARMSubtarget *Subtarget) {
- if (CPU == "xscale")
- return ARMBuildAttrs::v5TEJ;
-
- if (Subtarget->hasV8Ops()) {
- if (Subtarget->isRClass())
- return ARMBuildAttrs::v8_R;
- return ARMBuildAttrs::v8_A;
- } else if (Subtarget->hasV8MMainlineOps())
- return ARMBuildAttrs::v8_M_Main;
- else if (Subtarget->hasV7Ops()) {
- if (Subtarget->isMClass() && Subtarget->hasDSP())
- return ARMBuildAttrs::v7E_M;
- return ARMBuildAttrs::v7;
- } else if (Subtarget->hasV6T2Ops())
- return ARMBuildAttrs::v6T2;
- else if (Subtarget->hasV8MBaselineOps())
- return ARMBuildAttrs::v8_M_Base;
- else if (Subtarget->hasV6MOps())
- return ARMBuildAttrs::v6S_M;
- else if (Subtarget->hasV6Ops())
- return ARMBuildAttrs::v6;
- else if (Subtarget->hasV5TEOps())
- return ARMBuildAttrs::v5TE;
- else if (Subtarget->hasV5TOps())
- return ARMBuildAttrs::v5T;
- else if (Subtarget->hasV4TOps())
- return ARMBuildAttrs::v4T;
- else
- return ARMBuildAttrs::v4;
-}
-
// Returns true if all functions have the same function attribute value.
// It also returns true when the module has no functions.
static bool checkFunctionsAttributeConsistency(const Module &M, StringRef Attr,
@@ -671,89 +632,8 @@ void ARMAsmPrinter::emitAttributes() {
static_cast<const ARMBaseTargetMachine &>(TM);
const ARMSubtarget STI(TT, CPU, ArchFS, ATM, ATM.isLittleEndian());
- const std::string &CPUString = STI.getCPUString();
-
- if (!StringRef(CPUString).startswith("generic")) {
- // FIXME: remove krait check when GNU tools support krait cpu
- if (STI.isKrait()) {
- ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9");
- // We consider krait as a "cortex-a9" + hwdiv CPU
- // Enable hwdiv through ".arch_extension idiv"
- if (STI.hasDivide() || STI.hasDivideInARMMode())
- ATS.emitArchExtension(ARM::AEK_HWDIV | ARM::AEK_HWDIVARM);
- } else
- ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString);
- }
-
- ATS.emitAttribute(ARMBuildAttrs::CPU_arch, getArchForCPU(CPUString, &STI));
-
- // Tag_CPU_arch_profile must have the default value of 0 when "Architecture
- // profile is not applicable (e.g. pre v7, or cross-profile code)".
- if (STI.hasV7Ops() || isV8M(&STI)) {
- if (STI.isAClass()) {
- ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
- ARMBuildAttrs::ApplicationProfile);
- } else if (STI.isRClass()) {
- ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
- ARMBuildAttrs::RealTimeProfile);
- } else if (STI.isMClass()) {
- ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
- ARMBuildAttrs::MicroControllerProfile);
- }
- }
-
- ATS.emitAttribute(ARMBuildAttrs::ARM_ISA_use,
- STI.hasARMOps() ? ARMBuildAttrs::Allowed
- : ARMBuildAttrs::Not_Allowed);
- if (isV8M(&STI)) {
- ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
- ARMBuildAttrs::AllowThumbDerived);
- } else if (STI.isThumb1Only()) {
- ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed);
- } else if (STI.hasThumb2()) {
- ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
- ARMBuildAttrs::AllowThumb32);
- }
-
- if (STI.hasNEON()) {
- /* NEON is not exactly a VFP architecture, but GAS emit one of
- * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
- if (STI.hasFPARMv8()) {
- if (STI.hasCrypto())
- ATS.emitFPU(ARM::FK_CRYPTO_NEON_FP_ARMV8);
- else
- ATS.emitFPU(ARM::FK_NEON_FP_ARMV8);
- } else if (STI.hasVFP4())
- ATS.emitFPU(ARM::FK_NEON_VFPV4);
- else
- ATS.emitFPU(STI.hasFP16() ? ARM::FK_NEON_FP16 : ARM::FK_NEON);
- // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture
- if (STI.hasV8Ops())
- ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
- STI.hasV8_1aOps() ? ARMBuildAttrs::AllowNeonARMv8_1a:
- ARMBuildAttrs::AllowNeonARMv8);
- } else {
- if (STI.hasFPARMv8())
- // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
- // FPU, but there are two different names for it depending on the CPU.
- ATS.emitFPU(STI.hasD16()
- ? (STI.isFPOnlySP() ? ARM::FK_FPV5_SP_D16 : ARM::FK_FPV5_D16)
- : ARM::FK_FP_ARMV8);
- else if (STI.hasVFP4())
- ATS.emitFPU(STI.hasD16()
- ? (STI.isFPOnlySP() ? ARM::FK_FPV4_SP_D16 : ARM::FK_VFPV4_D16)
- : ARM::FK_VFPV4);
- else if (STI.hasVFP3())
- ATS.emitFPU(STI.hasD16()
- // +d16
- ? (STI.isFPOnlySP()
- ? (STI.hasFP16() ? ARM::FK_VFPV3XD_FP16 : ARM::FK_VFPV3XD)
- : (STI.hasFP16() ? ARM::FK_VFPV3_D16_FP16 : ARM::FK_VFPV3_D16))
- // -d16
- : (STI.hasFP16() ? ARM::FK_VFPV3_FP16 : ARM::FK_VFPV3));
- else if (STI.hasVFP2())
- ATS.emitFPU(ARM::FK_VFPV2);
- }
+ // Emit build attributes for the available hardware.
+ ATS.emitTargetAttributes(STI);
// RW data addressing.
if (isPositionIndependent()) {
@@ -846,32 +726,15 @@ void ARMAsmPrinter::emitAttributes() {
ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model,
ARMBuildAttrs::AllowIEEE754);
- if (STI.allowsUnalignedMem())
- ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
- ARMBuildAttrs::Allowed);
- else
- ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
- ARMBuildAttrs::Not_Allowed);
-
// FIXME: add more flags to ARMBuildAttributes.h
// 8-bytes alignment stuff.
ATS.emitAttribute(ARMBuildAttrs::ABI_align_needed, 1);
ATS.emitAttribute(ARMBuildAttrs::ABI_align_preserved, 1);
- // ABI_HardFP_use attribute to indicate single precision FP.
- if (STI.isFPOnlySP())
- ATS.emitAttribute(ARMBuildAttrs::ABI_HardFP_use,
- ARMBuildAttrs::HardFPSinglePrecision);
-
// Hard float. Use both S and D registers and conform to AAPCS-VFP.
if (STI.isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard)
ATS.emitAttribute(ARMBuildAttrs::ABI_VFP_args, ARMBuildAttrs::HardFPAAPCS);
- // FIXME: Should we signal R9 usage?
-
- if (STI.hasFP16())
- ATS.emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP);
-
// FIXME: To support emitting this build attribute as GCC does, the
// -mfp16-format option and associated plumbing must be
// supported. For now the __fp16 type is exposed by default, so this
@@ -879,21 +742,6 @@ void ARMAsmPrinter::emitAttributes() {
ATS.emitAttribute(ARMBuildAttrs::ABI_FP_16bit_format,
ARMBuildAttrs::FP16FormatIEEE);
- if (STI.hasMPExtension())
- ATS.emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP);
-
- // Hardware divide in ARM mode is part of base arch, starting from ARMv8.
- // If only Thumb hwdiv is present, it must also be in base arch (ARMv7-R/M).
- // It is not possible to produce DisallowDIV: if hwdiv is present in the base
- // arch, supplying -hwdiv downgrades the effective arch, via ClearImpliedBits.
- // AllowDIVExt is only emitted if hwdiv isn't available in the base arch;
- // otherwise, the default value (AllowDIVIfExists) applies.
- if (STI.hasDivideInARMMode() && !STI.hasV8Ops())
- ATS.emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt);
-
- if (STI.hasDSP() && isV8M(&STI))
- ATS.emitAttribute(ARMBuildAttrs::DSP_extension, ARMBuildAttrs::Allowed);
-
if (MMI) {
if (const Module *SourceModule = MMI->getModule()) {
// ABI_PCS_wchar_t to indicate wchar_t width
@@ -930,16 +778,6 @@ void ARMAsmPrinter::emitAttributes() {
else
ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use,
ARMBuildAttrs::R9IsGPR);
-
- if (STI.hasTrustZone() && STI.hasVirtualization())
- ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
- ARMBuildAttrs::AllowTZVirtualization);
- else if (STI.hasTrustZone())
- ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
- ARMBuildAttrs::AllowTZ);
- else if (STI.hasVirtualization())
- ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
- ARMBuildAttrs::AllowVirtualization);
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 23777b821f9f..faf1c631a3a7 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -404,6 +404,29 @@ public:
/// Returns true if the instruction has a shift by immediate that can be
/// executed in one cycle less.
bool isSwiftFastImmShift(const MachineInstr *MI) const;
+
+ /// Returns predicate register associated with the given frame instruction.
+ unsigned getFramePred(const MachineInstr &MI) const {
+ assert(isFrameInstr(MI));
+ if (isFrameSetup(MI))
+ // Operands of ADJCALLSTACKDOWN:
+ // - argument declared in ADJCALLSTACKDOWN pattern:
+ // 0 - frame size
+ // 1 - predicate code (like ARMCC::AL)
+ // - added by predOps:
+ // 2 - predicate reg
+ return MI.getOperand(2).getReg();
+ assert(MI.getOpcode() == ARM::ADJCALLSTACKUP ||
+ MI.getOpcode() == ARM::tADJCALLSTACKUP);
+ // Operands of ADJCALLSTACKUP:
+ // - argument declared in ADJCALLSTACKUP pattern:
+ // 0 - frame size
+ // 1 - arg of CALLSEQ_END
+ // 2 - predicate code
+ // - added by predOps:
+ // 3 - predicate reg
+ return MI.getOperand(3).getReg();
+ }
};
/// Get the operands corresponding to the given \p Pred value. By default, the
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
index 7a7b7fede7c8..bc7afdb7f1c9 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -273,9 +273,9 @@ def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R8)>;
def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
(sub CSR_AAPCS_ThisReturn, R9))>;
-def CSR_iOS_TLSCall : CalleeSavedRegs<(add LR, SP,
- (sequence "R%u", 12, 1),
- (sequence "D%u", 31, 0))>;
+def CSR_iOS_TLSCall
+ : CalleeSavedRegs<(add LR, SP, (sub(sequence "R%u", 12, 1), R9, R12),
+ (sequence "D%u", 31, 0))>;
// C++ TLS access function saves all registers except SP. Try to match
// the order of CSRs in CSR_iOS.
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index 23722f1b7f3f..6434df317aa8 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -1741,10 +1741,9 @@ bool ARMConstantIslands::undoLRSpillRestore() {
.add(MI->getOperand(1));
MI->eraseFromParent();
MadeChange = true;
- }
- if (MI->getOpcode() == ARM::tPUSH &&
- MI->getOperand(2).getReg() == ARM::LR &&
- MI->getNumExplicitOperands() == 3) {
+ } else if (MI->getOpcode() == ARM::tPUSH &&
+ MI->getOperand(2).getReg() == ARM::LR &&
+ MI->getNumExplicitOperands() == 3) {
// Just remove the push.
MI->eraseFromParent();
MadeChange = true;
@@ -2158,6 +2157,15 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
// If we're in PIC mode, there should be another ADD following.
auto *TRI = STI->getRegisterInfo();
+
+ // %base cannot be redefined after the load as it will appear before
+ // TBB/TBH like:
+ // %base =
+ // %base =
+ // tBB %base, %idx
+ if (registerDefinedBetween(BaseReg, Load->getNextNode(), MBB->end(), TRI))
+ continue;
+
if (isPositionIndependentOrROPI) {
MachineInstr *Add = Load->getNextNode();
if (Add->getOpcode() != ARM::tADDrr ||
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
index 01e062bd185c..e9bc7db66fa4 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -1702,7 +1702,8 @@ bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) {
// If we have integer div support we should have selected this automagically.
// In case we have a real miss go ahead and return false and we'll pick
// it up later.
- if (Subtarget->hasDivide()) return false;
+ if (Subtarget->hasDivideInThumbMode())
+ return false;
// Otherwise emit a libcall.
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 37be22bed540..70dbe1bc5b95 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -322,6 +322,18 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
}
}
+/// We need the offset of the frame pointer relative to other MachineFrameInfo
+/// offsets which are encoded relative to SP at function begin.
+/// See also emitPrologue() for how the FP is set up.
+/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
+/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
+/// this to produce a conservative estimate that we check in an assert() later.
+static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI) {
+ // This is a conservative estimation: Assume the frame pointer being r7 and
+ // pc("r15") up to r8 getting spilled before (= 8 registers).
+ return -AFI.getArgRegsSaveSize() - (8 * 4);
+}
+
void ARMFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -432,8 +444,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
int FramePtrOffsetInPush = 0;
if (HasFP) {
- FramePtrOffsetInPush =
- MFI.getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize;
+ int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
+ assert(getMaxFPOffset(*MF.getFunction(), *AFI) <= FPOffset &&
+ "Max FP estimation is wrong");
+ FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize;
AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
NumBytes);
}
@@ -1700,6 +1714,14 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// worth the effort and added fragility?
unsigned EstimatedStackSize =
MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
+
+ // Determine biggest (positive) SP offset in MachineFrameInfo.
+ int MaxFixedOffset = 0;
+ for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
+ int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);
+ MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);
+ }
+
bool HasFP = hasFP(MF);
if (HasFP) {
if (AFI->hasStackFrame())
@@ -1707,15 +1729,20 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
} else {
// If FP is not used, SP will be used to access arguments, so count the
// size of arguments into the estimation.
- EstimatedStackSize += AFI->getArgumentStackSize();
+ EstimatedStackSize += MaxFixedOffset;
}
EstimatedStackSize += 16; // For possible paddings.
- bool BigStack = EstimatedStackSize >= estimateRSStackSizeLimit(MF, this) ||
- MFI.hasVarSizedObjects() ||
- (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
+ unsigned EstimatedRSStackSizeLimit = estimateRSStackSizeLimit(MF, this);
+ int MaxFPOffset = getMaxFPOffset(*MF.getFunction(), *AFI);
+ bool BigFrameOffsets = EstimatedStackSize >= EstimatedRSStackSizeLimit ||
+ MFI.hasVarSizedObjects() ||
+ (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF)) ||
+ // For large argument stacks fp relative addressed may overflow.
+ (HasFP && (MaxFixedOffset - MaxFPOffset) >= (int)EstimatedRSStackSizeLimit);
bool ExtraCSSpill = false;
- if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
+ if (BigFrameOffsets ||
+ !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
AFI->setHasStackFrame(true);
if (HasFP) {
@@ -1899,7 +1926,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// callee-saved register or reserve a special spill slot to facilitate
// register scavenging. Thumb1 needs a spill slot for stack pointer
// adjustments also, even when the frame itself is small.
- if (BigStack && !ExtraCSSpill) {
+ if (BigFrameOffsets && !ExtraCSSpill) {
// If any non-reserved CS register isn't spilled, just spill one or two
// extra. That should take care of it!
unsigned NumExtras = TargetAlign / 4;
@@ -1958,7 +1985,7 @@ MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
// ADJCALLSTACKUP -> add, sp, sp, amount
MachineInstr &Old = *I;
DebugLoc dl = Old.getDebugLoc();
- unsigned Amount = Old.getOperand(0).getImm();
+ unsigned Amount = TII.getFrameSize(Old);
if (Amount != 0) {
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
@@ -1976,14 +2003,11 @@ MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
ARMCC::CondCodes Pred =
(PIdx == -1) ? ARMCC::AL
: (ARMCC::CondCodes)Old.getOperand(PIdx).getImm();
+ unsigned PredReg = TII.getFramePred(Old);
if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
- // Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
- unsigned PredReg = Old.getOperand(2).getReg();
emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
Pred, PredReg);
} else {
- // Note: PredReg is operand 3 for ADJCALLSTACKUP.
- unsigned PredReg = Old.getOperand(3).getReg();
assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
Pred, PredReg);
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index b07b4e1f5cfb..e9df9449103c 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -228,11 +228,6 @@ private:
const uint16_t *DOpcodes,
const uint16_t *QOpcodes = nullptr);
- /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2,
- /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be
- /// generated to force the table registers to be consecutive.
- void SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
-
/// Try to select SBFX/UBFX instructions for ARM.
bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
@@ -544,11 +539,11 @@ bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
SDValue NewMulConst;
if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
HandleSDNode Handle(N);
+ SDLoc Loc(N);
replaceDAGValue(N.getOperand(1), NewMulConst);
BaseReg = Handle.getValue();
- Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl,
- PowerOfTwo),
- SDLoc(N), MVT::i32);
+ Opc = CurDAG->getTargetConstant(
+ ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
return true;
}
}
@@ -1859,6 +1854,14 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
return Opc; // If not one we handle, return it unchanged.
}
+/// Returns true if the given increment is a Constant known to be equal to the
+/// access size performed by a NEON load/store. This means the "[rN]!" form can
+/// be used.
+static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
+ auto C = dyn_cast<ConstantSDNode>(Inc);
+ return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
+}
+
void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
const uint16_t *DOpcodes,
const uint16_t *QOpcodes0,
@@ -1926,13 +1929,13 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
SDValue Inc = N->getOperand(AddrOpIdx + 1);
// FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
// case entirely when the rest are updated to that form, too.
- if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))
+ bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
+ if ((NumVecs <= 2) && !IsImmUpdate)
Opc = getVLDSTRegisterUpdateOpcode(Opc);
// FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
// check for that explicitly too. Horribly hacky, but temporary.
- if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
- !isa<ConstantSDNode>(Inc.getNode()))
- Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ if ((NumVecs > 2 && !isVLDfixed(Opc)) || !IsImmUpdate)
+ Ops.push_back(IsImmUpdate ? Reg0 : Inc);
}
Ops.push_back(Pred);
Ops.push_back(Reg0);
@@ -2080,11 +2083,12 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
SDValue Inc = N->getOperand(AddrOpIdx + 1);
// FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
// case entirely when the rest are updated to that form, too.
- if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
+ bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
+ if (NumVecs <= 2 && !IsImmUpdate)
Opc = getVLDSTRegisterUpdateOpcode(Opc);
// FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
// check for that explicitly too. Horribly hacky, but temporary.
- if (!isa<ConstantSDNode>(Inc.getNode()))
+ if (!IsImmUpdate)
Ops.push_back(Inc);
else if (NumVecs > 2 && !isVSTfixed(Opc))
Ops.push_back(Reg0);
@@ -2214,7 +2218,9 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
Ops.push_back(Align);
if (isUpdating) {
SDValue Inc = N->getOperand(AddrOpIdx + 1);
- Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ bool IsImmUpdate =
+ isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
+ Ops.push_back(IsImmUpdate ? Reg0 : Inc);
}
SDValue SuperReg;
@@ -2318,9 +2324,11 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
// fixed-stride update instructions don't have an explicit writeback
// operand. It's implicit in the opcode itself.
SDValue Inc = N->getOperand(2);
- if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
+ bool IsImmUpdate =
+ isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
+ if (NumVecs <= 2 && !IsImmUpdate)
Opc = getVLDSTRegisterUpdateOpcode(Opc);
- if (!isa<ConstantSDNode>(Inc.getNode()))
+ if (!IsImmUpdate)
Ops.push_back(Inc);
// FIXME: VLD3 and VLD4 haven't been updated to that form yet.
else if (NumVecs > 2)
@@ -2356,39 +2364,6 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
CurDAG->RemoveDeadNode(N);
}
-void ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
- unsigned Opc) {
- assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
- unsigned FirstTblReg = IsExt ? 2 : 1;
-
- // Form a REG_SEQUENCE to force register allocation.
- SDValue RegSeq;
- SDValue V0 = N->getOperand(FirstTblReg + 0);
- SDValue V1 = N->getOperand(FirstTblReg + 1);
- if (NumVecs == 2)
- RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
- else {
- SDValue V2 = N->getOperand(FirstTblReg + 2);
- // If it's a vtbl3, form a quad D-register and leave the last part as
- // an undef.
- SDValue V3 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
- : N->getOperand(FirstTblReg + 3);
- RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
- }
-
- SmallVector<SDValue, 6> Ops;
- if (IsExt)
- Ops.push_back(N->getOperand(1));
- Ops.push_back(RegSeq);
- Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
- Ops.push_back(getAL(CurDAG, dl)); // predicate
- Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
- ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
-}
-
bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
if (!Subtarget->hasV6T2Ops())
return false;
@@ -3730,59 +3705,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
break;
}
- case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
- switch (IntNo) {
- default:
- break;
-
- case Intrinsic::arm_neon_vtbl2:
- SelectVTBL(N, false, 2, ARM::VTBL2);
- return;
- case Intrinsic::arm_neon_vtbl3:
- SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
- return;
- case Intrinsic::arm_neon_vtbl4:
- SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
- return;
-
- case Intrinsic::arm_neon_vtbx2:
- SelectVTBL(N, true, 2, ARM::VTBX2);
- return;
- case Intrinsic::arm_neon_vtbx3:
- SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
- return;
- case Intrinsic::arm_neon_vtbx4:
- SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
- return;
- }
- break;
- }
-
- case ARMISD::VTBL1: {
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
- SDValue Ops[] = {N->getOperand(0), N->getOperand(1),
- getAL(CurDAG, dl), // Predicate
- CurDAG->getRegister(0, MVT::i32)}; // Predicate Register
- ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops));
- return;
- }
- case ARMISD::VTBL2: {
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
-
- // Form a REG_SEQUENCE to force register allocation.
- SDValue V0 = N->getOperand(0);
- SDValue V1 = N->getOperand(1);
- SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
-
- SDValue Ops[] = {RegSeq, N->getOperand(2), getAL(CurDAG, dl), // Predicate
- CurDAG->getRegister(0, MVT::i32)}; // Predicate Register
- ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops));
- return;
- }
-
case ISD::ATOMIC_CMP_SWAP:
SelectCMP_SWAP(N);
return;
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
index e697c8ca5339..165e9b7378c7 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -852,7 +852,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (!Subtarget->hasV6Ops())
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
- bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivide()
+ bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
: Subtarget->hasDivideInARMMode();
if (!hasDivide) {
// These are expanded into libcalls if the cpu doesn't have HW divider.
@@ -860,7 +860,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UDIV, MVT::i32, LibCall);
}
- if (Subtarget->isTargetWindows() && !Subtarget->hasDivide()) {
+ if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
setOperationAction(ISD::SDIV, MVT::i32, Custom);
setOperationAction(ISD::UDIV, MVT::i32, Custom);
@@ -2633,7 +2633,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
return true;
}
-bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
if (!Subtarget->supportsTailCall())
return false;
@@ -3347,6 +3347,12 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
}
+ case Intrinsic::arm_neon_vtbl1:
+ return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::arm_neon_vtbl2:
+ return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
}
}
@@ -10867,11 +10873,8 @@ static SDValue CombineBaseUpdate(SDNode *N,
// If the increment is a constant, it must match the memory ref size.
SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
- if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
- uint64_t IncVal = CInc->getZExtValue();
- if (IncVal != NumBytes)
- continue;
- } else if (NumBytes >= 3 * 16) {
+ ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
+ if (NumBytes >= 3 * 16 && (!CInc || CInc->getZExtValue() != NumBytes)) {
// VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
// separate instructions that make it harder to use a non-constant update.
continue;
@@ -11688,34 +11691,6 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero,
- APInt &KnownOne) {
- if (Op.getOpcode() == ARMISD::BFI) {
- // Conservatively, we can recurse down the first operand
- // and just mask out all affected bits.
- computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne);
-
- // The operand to BFI is already a mask suitable for removing the bits it
- // sets.
- ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
- const APInt &Mask = CI->getAPIntValue();
- KnownZero &= Mask;
- KnownOne &= Mask;
- return;
- }
- if (Op.getOpcode() == ARMISD::CMOV) {
- APInt KZ2(KnownZero.getBitWidth(), 0);
- APInt KO2(KnownOne.getBitWidth(), 0);
- computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne);
- computeKnownBits(DAG, Op.getOperand(1), KZ2, KO2);
-
- KnownZero &= KZ2;
- KnownOne &= KO2;
- return;
- }
- return DAG.computeKnownBits(Op, KnownZero, KnownOne);
-}
-
SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const {
// If we have a CMOV, OR and AND combination such as:
// if (x & CN)
@@ -11777,7 +11752,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D
// Lastly, can we determine that the bits defined by OrCI
// are zero in Y?
APInt KnownZero, KnownOne;
- computeKnownBits(DAG, Y, KnownZero, KnownOne);
+ DAG.computeKnownBits(Y, KnownZero, KnownOne);
if ((OrCI & KnownZero) != OrCI)
return SDValue();
@@ -12657,6 +12632,19 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
}
}
}
+ case ARMISD::BFI: {
+ // Conservatively, we can recurse down the first operand
+ // and just mask out all affected bits.
+ DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth + 1);
+
+ // The operand to BFI is already a mask suitable for removing the bits it
+ // sets.
+ ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
+ const APInt &Mask = CI->getAPIntValue();
+ KnownZero &= Mask;
+ KnownOne &= Mask;
+ return;
+ }
}
}
@@ -13052,7 +13040,9 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
// rem = a - b * div
// return {div, rem}
// This should be lowered into UDIV/SDIV + MLS later on.
- if (Subtarget->hasDivide() && Op->getValueType(0).isSimple() &&
+ bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
+ : Subtarget->hasDivideInARMMode();
+ if (hasDivide && Op->getValueType(0).isSimple() &&
Op->getSimpleValueType(0) == MVT::i32) {
unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
const SDValue Dividend = Op->getOperand(0);
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
index 70a0b1380ec9..8b54ce430ed2 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -717,7 +717,7 @@ class InstrItineraryData;
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
- bool mayBeEmittedAsTailCall(CallInst *CI) const override;
+ bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
SDValue ARMcc, SDValue CCR, SDValue Cmp,
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
index cc0e7d4d9c35..703e8071b177 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -259,8 +259,8 @@ def HasFP16 : Predicate<"Subtarget->hasFP16()">,
AssemblerPredicate<"FeatureFP16","half-float conversions">;
def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
AssemblerPredicate<"FeatureFullFP16","full half-float">;
-def HasDivide : Predicate<"Subtarget->hasDivide()">,
- AssemblerPredicate<"FeatureHWDiv", "divide in THUMB">;
+def HasDivideInThumb : Predicate<"Subtarget->hasDivideInThumbMode()">,
+ AssemblerPredicate<"FeatureHWDivThumb", "divide in THUMB">;
def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,
AssemblerPredicate<"FeatureHWDivARM", "divide in ARM">;
def HasDSP : Predicate<"Subtarget->hasDSP()">,
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
index 681e235d78f0..9b08c612e16b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -587,6 +587,14 @@ def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
+def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
+ SDTCisVT<2, v8i8>]>;
+def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
+ SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>;
+def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>;
+def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>;
+
+
def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
unsigned EltBits = 0;
@@ -6443,7 +6451,8 @@ def VTBL1
: N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
(ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
"vtbl", "8", "$Vd, $Vn, $Vm", "",
- [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
+ [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
+
let hasExtraSrcRegAllocReq = 1 in {
def VTBL2
: N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
@@ -6498,6 +6507,49 @@ def VTBX4Pseudo
IIC_VTBX4, "$orig = $dst", []>;
} // DecoderMethod = "DecodeTBLInstruction"
+def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)),
+ (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
+ v8i8:$Vn1, dsub_1),
+ v8i8:$Vm))>;
+def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
+ v8i8:$Vm)),
+ (v8i8 (VTBX2 v8i8:$orig,
+ (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
+ v8i8:$Vn1, dsub_1),
+ v8i8:$Vm))>;
+
+def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1,
+ v8i8:$Vn2, v8i8:$Vm)),
+ (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
+ v8i8:$Vn1, dsub_1,
+ v8i8:$Vn2, dsub_2,
+ (v8i8 (IMPLICIT_DEF)), dsub_3),
+ v8i8:$Vm))>;
+def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
+ v8i8:$Vn2, v8i8:$Vm)),
+ (v8i8 (VTBX3Pseudo v8i8:$orig,
+ (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
+ v8i8:$Vn1, dsub_1,
+ v8i8:$Vn2, dsub_2,
+ (v8i8 (IMPLICIT_DEF)), dsub_3),
+ v8i8:$Vm))>;
+
+def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1,
+ v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
+ (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
+ v8i8:$Vn1, dsub_1,
+ v8i8:$Vn2, dsub_2,
+ v8i8:$Vn3, dsub_3),
+ v8i8:$Vm))>;
+def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
+ v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
+ (v8i8 (VTBX4Pseudo v8i8:$orig,
+ (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
+ v8i8:$Vn1, dsub_1,
+ v8i8:$Vn2, dsub_2,
+ v8i8:$Vn3, dsub_3),
+ v8i8:$Vm))>;
+
// VRINT : Vector Rounding
multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
index f5b673b78ad7..f710ee6a7e77 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -2797,7 +2797,7 @@ def t2SMLSLDX : T2DualHalfMulAddLong<0b101, 0b1101, "smlsldx">;
def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
"sdiv", "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>,
- Requires<[HasDivide, IsThumb, HasV8MBaseline]>,
+ Requires<[HasDivideInThumb, IsThumb, HasV8MBaseline]>,
Sched<[WriteDIV]> {
let Inst{31-27} = 0b11111;
let Inst{26-21} = 0b011100;
@@ -2809,7 +2809,7 @@ def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
"udiv", "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>,
- Requires<[HasDivide, IsThumb, HasV8MBaseline]>,
+ Requires<[HasDivideInThumb, IsThumb, HasV8MBaseline]>,
Sched<[WriteDIV]> {
let Inst{31-27} = 0b11111;
let Inst{26-21} = 0b011101;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
index 8d224d6a70fa..816596b85721 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -299,6 +299,20 @@ bool ARMInstructionSelector::select(MachineInstr &I) const {
I.setDesc(TII.get(ARM::ADDrr));
MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
break;
+ case G_SUB:
+ I.setDesc(TII.get(ARM::SUBrr));
+ MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
+ break;
+ case G_MUL:
+ if (TII.getSubtarget().hasV6Ops()) {
+ I.setDesc(TII.get(ARM::MUL));
+ } else {
+ assert(TII.getSubtarget().useMulOps() && "Unsupported target");
+ I.setDesc(TII.get(ARM::MULv5));
+ MIB->getOperand(0).setIsEarlyClobber(true);
+ }
+ MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
+ break;
case G_FADD:
if (!selectFAdd(MIB, TII, MRI))
return false;
diff --git a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
index 994bbd673dd8..fe9681439e6b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -43,8 +43,9 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
setAction({Op, 1, p0}, Legal);
}
- for (auto Ty : {s1, s8, s16, s32})
- setAction({G_ADD, Ty}, Legal);
+ for (unsigned Op : {G_ADD, G_SUB, G_MUL})
+ for (auto Ty : {s1, s8, s16, s32})
+ setAction({Op, Ty}, Legal);
for (unsigned Op : {G_SEXT, G_ZEXT}) {
setAction({Op, s32}, Legal);
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
index 08f3da738868..e47bd3a8963e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -219,6 +219,8 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
switch (Opc) {
case G_ADD:
+ case G_SUB:
+ case G_MUL:
case G_SEXT:
case G_ZEXT:
case G_GEP:
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
index 40993fc0aa8a..d2630685d91b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -208,8 +208,8 @@ protected:
/// FP registers for VFPv3.
bool HasD16 = false;
- /// HasHardwareDivide - True if subtarget supports [su]div
- bool HasHardwareDivide = false;
+ /// HasHardwareDivide - True if subtarget supports [su]div in Thumb mode
+ bool HasHardwareDivideInThumb = false;
/// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode
bool HasHardwareDivideInARM = false;
@@ -507,7 +507,7 @@ public:
return hasNEON() && UseNEONForSinglePrecisionFP;
}
- bool hasDivide() const { return HasHardwareDivide; }
+ bool hasDivideInThumbMode() const { return HasHardwareDivideInThumb; }
bool hasDivideInARMMode() const { return HasHardwareDivideInARM; }
bool hasDataBarrier() const { return HasDataBarrier; }
bool hasV7Clrex() const { return HasV7Clrex; }
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index f421d3ac1693..ada816c16389 100644
--- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -67,6 +67,9 @@ static cl::opt<ImplicitItModeTy> ImplicitItMode(
clEnumValN(ImplicitItModeTy::ThumbOnly, "thumb",
"Warn in ARM, emit implicit ITs in Thumb")));
+static cl::opt<bool> AddBuildAttributes("arm-add-build-attributes",
+ cl::init(false));
+
class ARMOperand;
enum VectorLaneTy { NoLanes, AllLanes, IndexedLane };
@@ -540,6 +543,10 @@ public:
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ // Add build attributes based on the selected target.
+ if (AddBuildAttributes)
+ getTargetStreamer().emitTargetAttributes(STI);
+
// Not in an ITBlock to start with.
ITState.CurPosition = ~0U;
@@ -10189,8 +10196,8 @@ static const struct {
{ ARM::AEK_CRYPTO, Feature_HasV8,
{ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} },
{ ARM::AEK_FP, Feature_HasV8, {ARM::FeatureFPARMv8} },
- { (ARM::AEK_HWDIV | ARM::AEK_HWDIVARM), Feature_HasV7 | Feature_IsNotMClass,
- {ARM::FeatureHWDiv, ARM::FeatureHWDivARM} },
+ { (ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM), Feature_HasV7 | Feature_IsNotMClass,
+ {ARM::FeatureHWDivThumb, ARM::FeatureHWDivARM} },
{ ARM::AEK_MP, Feature_HasV7 | Feature_IsNotMClass, {ARM::FeatureMP} },
{ ARM::AEK_SIMD, Feature_HasV8, {ARM::FeatureNEON, ARM::FeatureFPARMv8} },
{ ARM::AEK_SEC, Feature_HasV6K, {ARM::FeatureTrustZone} },
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 6fa890ba1cd5..4d6c52f3cd49 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -464,7 +464,7 @@ public:
void emitUnwindRaw(int64_t Offset, const SmallVectorImpl<uint8_t> &Opcodes);
void ChangeSection(MCSection *Section, const MCExpr *Subsection) override {
- LastMappingSymbols[getPreviousSection().first] = std::move(LastEMSInfo);
+ LastMappingSymbols[getCurrentSection().first] = std::move(LastEMSInfo);
MCELFStreamer::ChangeSection(Section, Subsection);
auto LastMappingSymbol = LastMappingSymbols.find(Section);
if (LastMappingSymbol != LastMappingSymbols.end()) {
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index 73e563890dd9..2b0cd461df7a 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -11,9 +11,13 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMTargetMachine.h"
#include "llvm/MC/ConstantPools.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ARMBuildAttributes.h"
+#include "llvm/Support/TargetParser.h"
using namespace llvm;
@@ -75,3 +79,179 @@ void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {}
void
ARMTargetStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) {}
void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {}
+
+static ARMBuildAttrs::CPUArch getArchForCPU(const MCSubtargetInfo &STI) {
+ if (STI.getCPU() == "xscale")
+ return ARMBuildAttrs::v5TEJ;
+
+ if (STI.hasFeature(ARM::HasV8Ops)) {
+ if (STI.hasFeature(ARM::FeatureRClass))
+ return ARMBuildAttrs::v8_R;
+ return ARMBuildAttrs::v8_A;
+ } else if (STI.hasFeature(ARM::HasV8MMainlineOps))
+ return ARMBuildAttrs::v8_M_Main;
+ else if (STI.hasFeature(ARM::HasV7Ops)) {
+ if (STI.hasFeature(ARM::FeatureMClass) && STI.hasFeature(ARM::FeatureDSP))
+ return ARMBuildAttrs::v7E_M;
+ return ARMBuildAttrs::v7;
+ } else if (STI.hasFeature(ARM::HasV6T2Ops))
+ return ARMBuildAttrs::v6T2;
+ else if (STI.hasFeature(ARM::HasV8MBaselineOps))
+ return ARMBuildAttrs::v8_M_Base;
+ else if (STI.hasFeature(ARM::HasV6MOps))
+ return ARMBuildAttrs::v6S_M;
+ else if (STI.hasFeature(ARM::HasV6Ops))
+ return ARMBuildAttrs::v6;
+ else if (STI.hasFeature(ARM::HasV5TEOps))
+ return ARMBuildAttrs::v5TE;
+ else if (STI.hasFeature(ARM::HasV5TOps))
+ return ARMBuildAttrs::v5T;
+ else if (STI.hasFeature(ARM::HasV4TOps))
+ return ARMBuildAttrs::v4T;
+ else
+ return ARMBuildAttrs::v4;
+}
+
+static bool isV8M(const MCSubtargetInfo &STI) {
+ // Note that v8M Baseline is a subset of v6T2!
+ return (STI.hasFeature(ARM::HasV8MBaselineOps) &&
+ !STI.hasFeature(ARM::HasV6T2Ops)) ||
+ STI.hasFeature(ARM::HasV8MMainlineOps);
+}
+
+/// Emit the build attributes that only depend on the hardware that we expect
+// /to be available, and not on the ABI, or any source-language choices.
+void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
+ switchVendor("aeabi");
+
+ const StringRef CPUString = STI.getCPU();
+ if (!CPUString.empty() && !CPUString.startswith("generic")) {
+ // FIXME: remove krait check when GNU tools support krait cpu
+ if (STI.hasFeature(ARM::ProcKrait)) {
+ emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9");
+ // We consider krait as a "cortex-a9" + hwdiv CPU
+ // Enable hwdiv through ".arch_extension idiv"
+ if (STI.hasFeature(ARM::FeatureHWDivThumb) ||
+ STI.hasFeature(ARM::FeatureHWDivARM))
+ emitArchExtension(ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM);
+ } else {
+ emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString);
+ }
+ }
+
+ emitAttribute(ARMBuildAttrs::CPU_arch, getArchForCPU(STI));
+
+ if (STI.hasFeature(ARM::FeatureAClass)) {
+ emitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::ApplicationProfile);
+ } else if (STI.hasFeature(ARM::FeatureRClass)) {
+ emitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::RealTimeProfile);
+ } else if (STI.hasFeature(ARM::FeatureMClass)) {
+ emitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::MicroControllerProfile);
+ }
+
+ emitAttribute(ARMBuildAttrs::ARM_ISA_use, STI.hasFeature(ARM::FeatureNoARM)
+ ? ARMBuildAttrs::Not_Allowed
+ : ARMBuildAttrs::Allowed);
+
+ if (isV8M(STI)) {
+ emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::AllowThumbDerived);
+ } else if (STI.hasFeature(ARM::FeatureThumb2)) {
+ emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::AllowThumb32);
+ } else if (STI.hasFeature(ARM::HasV4TOps)) {
+ emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed);
+ }
+
+ if (STI.hasFeature(ARM::FeatureNEON)) {
+ /* NEON is not exactly a VFP architecture, but GAS emit one of
+ * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
+ if (STI.hasFeature(ARM::FeatureFPARMv8)) {
+ if (STI.hasFeature(ARM::FeatureCrypto))
+ emitFPU(ARM::FK_CRYPTO_NEON_FP_ARMV8);
+ else
+ emitFPU(ARM::FK_NEON_FP_ARMV8);
+ } else if (STI.hasFeature(ARM::FeatureVFP4))
+ emitFPU(ARM::FK_NEON_VFPV4);
+ else
+ emitFPU(STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_NEON_FP16
+ : ARM::FK_NEON);
+ // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture
+ if (STI.hasFeature(ARM::HasV8Ops))
+ emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
+ STI.hasFeature(ARM::HasV8_1aOps)
+ ? ARMBuildAttrs::AllowNeonARMv8_1a
+ : ARMBuildAttrs::AllowNeonARMv8);
+ } else {
+ if (STI.hasFeature(ARM::FeatureFPARMv8))
+ // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
+ // FPU, but there are two different names for it depending on the CPU.
+ emitFPU(STI.hasFeature(ARM::FeatureD16)
+ ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV5_SP_D16
+ : ARM::FK_FPV5_D16)
+ : ARM::FK_FP_ARMV8);
+ else if (STI.hasFeature(ARM::FeatureVFP4))
+ emitFPU(STI.hasFeature(ARM::FeatureD16)
+ ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV4_SP_D16
+ : ARM::FK_VFPV4_D16)
+ : ARM::FK_VFPV4);
+ else if (STI.hasFeature(ARM::FeatureVFP3))
+ emitFPU(
+ STI.hasFeature(ARM::FeatureD16)
+ // +d16
+ ? (STI.hasFeature(ARM::FeatureVFPOnlySP)
+ ? (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3XD_FP16
+ : ARM::FK_VFPV3XD)
+ : (STI.hasFeature(ARM::FeatureFP16)
+ ? ARM::FK_VFPV3_D16_FP16
+ : ARM::FK_VFPV3_D16))
+ // -d16
+ : (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3_FP16
+ : ARM::FK_VFPV3));
+ else if (STI.hasFeature(ARM::FeatureVFP2))
+ emitFPU(ARM::FK_VFPV2);
+ }
+
+ // ABI_HardFP_use attribute to indicate single precision FP.
+ if (STI.hasFeature(ARM::FeatureVFPOnlySP))
+ emitAttribute(ARMBuildAttrs::ABI_HardFP_use,
+ ARMBuildAttrs::HardFPSinglePrecision);
+
+ if (STI.hasFeature(ARM::FeatureFP16))
+ emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP);
+
+ if (STI.hasFeature(ARM::FeatureMP))
+ emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP);
+
+ // Hardware divide in ARM mode is part of base arch, starting from ARMv8.
+ // If only Thumb hwdiv is present, it must also be in base arch (ARMv7-R/M).
+ // It is not possible to produce DisallowDIV: if hwdiv is present in the base
+ // arch, supplying -hwdiv downgrades the effective arch, via ClearImpliedBits.
+ // AllowDIVExt is only emitted if hwdiv isn't available in the base arch;
+ // otherwise, the default value (AllowDIVIfExists) applies.
+ if (STI.hasFeature(ARM::FeatureHWDivARM) && !STI.hasFeature(ARM::HasV8Ops))
+ emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt);
+
+ if (STI.hasFeature(ARM::FeatureDSP) && isV8M(STI))
+ emitAttribute(ARMBuildAttrs::DSP_extension, ARMBuildAttrs::Allowed);
+
+ if (STI.hasFeature(ARM::FeatureStrictAlign))
+ emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
+ ARMBuildAttrs::Not_Allowed);
+ else
+ emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
+ ARMBuildAttrs::Allowed);
+
+ if (STI.hasFeature(ARM::FeatureTrustZone) &&
+ STI.hasFeature(ARM::FeatureVirtualization))
+ emitAttribute(ARMBuildAttrs::Virtualization_use,
+ ARMBuildAttrs::AllowTZVirtualization);
+ else if (STI.hasFeature(ARM::FeatureTrustZone))
+ emitAttribute(ARMBuildAttrs::Virtualization_use, ARMBuildAttrs::AllowTZ);
+ else if (STI.hasFeature(ARM::FeatureVirtualization))
+ emitAttribute(ARMBuildAttrs::Virtualization_use,
+ ARMBuildAttrs::AllowVirtualization);
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index fc083b98395b..d0fd366ab9ed 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -83,13 +83,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// ADJCALLSTACKUP -> add, sp, sp, amount
MachineInstr &Old = *I;
DebugLoc dl = Old.getDebugLoc();
- unsigned Amount = Old.getOperand(0).getImm();
+ unsigned Amount = TII.getFrameSize(Old);
if (Amount != 0) {
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
// alignment boundary.
- unsigned Align = getStackAlignment();
- Amount = (Amount+Align-1)/Align*Align;
+ Amount = alignTo(Amount, getStackAlignment());
// Replace the pseudo instruction with a new instruction...
unsigned Opc = Old.getOpcode();
diff --git a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
index 9f2ee8cf8035..535bb012eb07 100644
--- a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
@@ -18,7 +18,7 @@
namespace llvm {
AVRMCAsmInfo::AVRMCAsmInfo(const Triple &TT) {
- PointerSize = 2;
+ CodePointerSize = 2;
CalleeSaveStackSlotSize = 2;
CommentString = ";";
PrivateGlobalPrefix = ".L";
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
index 559ac291a79e..fd7c97bf1f0a 100644
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
@@ -42,7 +42,7 @@ public:
// messed up in random places by 4 bytes. .debug_line
// section will be parsable, but with odd offsets and
// line numbers, etc.
- PointerSize = 8;
+ CodePointerSize = 8;
}
};
}
diff --git a/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp
index 61d3630ac095..cb3049bf1500 100644
--- a/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp
@@ -1011,12 +1011,7 @@ void BT::subst(RegisterRef OldRR, RegisterRef NewRR) {
bool BT::reached(const MachineBasicBlock *B) const {
int BN = B->getNumber();
assert(BN >= 0);
- for (EdgeSetType::iterator I = EdgeExec.begin(), E = EdgeExec.end();
- I != E; ++I) {
- if (I->second == BN)
- return true;
- }
- return false;
+ return ReachedBB.count(BN);
}
// Visit an individual instruction. This could be a newly added instruction,
@@ -1036,6 +1031,8 @@ void BT::reset() {
EdgeExec.clear();
InstrExec.clear();
Map.clear();
+ ReachedBB.clear();
+ ReachedBB.reserve(MF.size());
}
void BT::run() {
@@ -1068,6 +1065,7 @@ void BT::run() {
if (EdgeExec.count(Edge))
continue;
EdgeExec.insert(Edge);
+ ReachedBB.insert(Edge.second);
const MachineBasicBlock &B = *MF.getBlockNumbered(Edge.second);
MachineBasicBlock::const_iterator It = B.begin(), End = B.end();
diff --git a/contrib/llvm/lib/Target/Hexagon/BitTracker.h b/contrib/llvm/lib/Target/Hexagon/BitTracker.h
index a547b34e852f..7f49f430382d 100644
--- a/contrib/llvm/lib/Target/Hexagon/BitTracker.h
+++ b/contrib/llvm/lib/Target/Hexagon/BitTracker.h
@@ -10,6 +10,7 @@
#ifndef LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H
#define LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -68,10 +69,11 @@ private:
typedef std::set<const MachineInstr *> InstrSetType;
typedef std::queue<CFGEdge> EdgeQueueType;
- EdgeSetType EdgeExec; // Executable flow graph edges.
- InstrSetType InstrExec; // Executable instructions.
- EdgeQueueType FlowQ; // Work queue of CFG edges.
- bool Trace; // Enable tracing for debugging.
+ EdgeSetType EdgeExec; // Executable flow graph edges.
+ InstrSetType InstrExec; // Executable instructions.
+ EdgeQueueType FlowQ; // Work queue of CFG edges.
+ DenseSet<unsigned> ReachedBB; // Cache of reached blocks.
+ bool Trace; // Enable tracing for debugging.
const MachineEvaluator &ME;
MachineFunction &MF;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 418dd71aeb4b..e5eb059b566f 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -635,7 +635,7 @@ HexagonTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, RetOps);
}
-bool HexagonTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
// If either no tail call or told not to tail call at all, don't.
auto Attr =
CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index fb8f0ba6b057..1415156487c0 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -195,7 +195,7 @@ namespace HexagonISD {
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &dl, SelectionDAG &DAG) const override;
- bool mayBeEmittedAsTailCall(CallInst *CI) const override;
+ bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
index b243de317dc5..27b40f134b1f 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@@ -35,7 +35,6 @@
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
-#include <map>
static cl::opt<int> CodeGrowthLimit("hexagon-amode-growth-limit",
cl::Hidden, cl::init(0), cl::desc("Code growth limit for address mode "
@@ -45,10 +44,8 @@ using namespace llvm;
using namespace rdf;
namespace llvm {
-
FunctionPass *createHexagonOptAddrMode();
- void initializeHexagonOptAddrModePass(PassRegistry &);
-
+ void initializeHexagonOptAddrModePass(PassRegistry&);
} // end namespace llvm
namespace {
@@ -59,10 +56,7 @@ public:
HexagonOptAddrMode()
: MachineFunctionPass(ID), HII(nullptr), MDT(nullptr), DFG(nullptr),
- LV(nullptr) {
- PassRegistry &R = *PassRegistry::getPassRegistry();
- initializeHexagonOptAddrModePass(R);
- }
+ LV(nullptr) {}
StringRef getPassName() const override {
return "Optimize addressing mode of load/store";
@@ -84,7 +78,6 @@ private:
MachineDominatorTree *MDT;
DataFlowGraph *DFG;
DataFlowGraph::DefStackMap DefM;
- std::map<RegisterRef, std::map<NodeId, NodeId>> RDefMap;
Liveness *LV;
MISetType Deleted;
@@ -99,8 +92,6 @@ private:
void getAllRealUses(NodeAddr<StmtNode *> SN, NodeList &UNodeList);
bool allValidCandidates(NodeAddr<StmtNode *> SA, NodeList &UNodeList);
short getBaseWithLongOffset(const MachineInstr &MI) const;
- void updateMap(NodeAddr<InstrNode *> IA);
- bool constructDefMap(MachineBasicBlock *B);
bool changeStore(MachineInstr *OldMI, MachineOperand ImmOp,
unsigned ImmOpNum);
bool changeLoad(MachineInstr *OldMI, MachineOperand ImmOp, unsigned ImmOpNum);
@@ -112,11 +103,11 @@ private:
char HexagonOptAddrMode::ID = 0;
-INITIALIZE_PASS_BEGIN(HexagonOptAddrMode, "opt-amode",
+INITIALIZE_PASS_BEGIN(HexagonOptAddrMode, "amode-opt",
"Optimize addressing mode", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier)
-INITIALIZE_PASS_END(HexagonOptAddrMode, "opt-amode", "Optimize addressing mode",
+INITIALIZE_PASS_END(HexagonOptAddrMode, "amode-opt", "Optimize addressing mode",
false, false)
bool HexagonOptAddrMode::hasRepForm(MachineInstr &MI, unsigned TfrDefR) {
@@ -173,8 +164,11 @@ bool HexagonOptAddrMode::canRemoveAddasl(NodeAddr<StmtNode *> AddAslSN,
for (auto I = UNodeList.rbegin(), E = UNodeList.rend(); I != E; ++I) {
NodeAddr<UseNode *> UA = *I;
NodeAddr<InstrNode *> IA = UA.Addr->getOwner(*DFG);
- if ((UA.Addr->getFlags() & NodeAttrs::PhiRef) ||
- RDefMap[OffsetRR][IA.Id] != OffsetRegRD)
+ if (UA.Addr->getFlags() & NodeAttrs::PhiRef)
+ return false;
+ NodeAddr<RefNode*> AA = LV->getNearestAliasedRef(OffsetRR, IA);
+ if ((DFG->IsDef(AA) && AA.Id != OffsetRegRD) ||
+ AA.Addr->getReachingDef() != OffsetRegRD)
return false;
MachineInstr &UseMI = *NodeAddr<StmtNode *>(IA).Addr->getCode();
@@ -486,14 +480,14 @@ bool HexagonOptAddrMode::changeAddAsl(NodeAddr<UseNode *> AddAslUN,
MIB.add(AddAslMI->getOperand(2));
MIB.add(AddAslMI->getOperand(3));
const GlobalValue *GV = ImmOp.getGlobal();
- MIB.addGlobalAddress(GV, UseMI->getOperand(2).getImm(),
+ MIB.addGlobalAddress(GV, UseMI->getOperand(2).getImm()+ImmOp.getOffset(),
ImmOp.getTargetFlags());
OpStart = 3;
} else if (UseMID.mayStore()) {
MIB.add(AddAslMI->getOperand(2));
MIB.add(AddAslMI->getOperand(3));
const GlobalValue *GV = ImmOp.getGlobal();
- MIB.addGlobalAddress(GV, UseMI->getOperand(1).getImm(),
+ MIB.addGlobalAddress(GV, UseMI->getOperand(1).getImm()+ImmOp.getOffset(),
ImmOp.getTargetFlags());
MIB.add(UseMI->getOperand(2));
OpStart = 3;
@@ -597,46 +591,6 @@ bool HexagonOptAddrMode::processBlock(NodeAddr<BlockNode *> BA) {
return Changed;
}
-void HexagonOptAddrMode::updateMap(NodeAddr<InstrNode *> IA) {
- RegisterSet RRs;
- for (NodeAddr<RefNode *> RA : IA.Addr->members(*DFG))
- RRs.insert(RA.Addr->getRegRef(*DFG));
- bool Common = false;
- for (auto &R : RDefMap) {
- if (!RRs.count(R.first))
- continue;
- Common = true;
- break;
- }
- if (!Common)
- return;
-
- for (auto &R : RDefMap) {
- auto F = DefM.find(R.first.Reg);
- if (F == DefM.end() || F->second.empty())
- continue;
- R.second[IA.Id] = F->second.top()->Id;
- }
-}
-
-bool HexagonOptAddrMode::constructDefMap(MachineBasicBlock *B) {
- bool Changed = false;
- auto BA = DFG->getFunc().Addr->findBlock(B, *DFG);
- DFG->markBlock(BA.Id, DefM);
-
- for (NodeAddr<InstrNode *> IA : BA.Addr->members(*DFG)) {
- updateMap(IA);
- DFG->pushAllDefs(IA, DefM);
- }
-
- MachineDomTreeNode *N = MDT->getNode(B);
- for (auto I : *N)
- Changed |= constructDefMap(I->getBlock());
-
- DFG->releaseBlock(BA.Id, DefM);
- return Changed;
-}
-
bool HexagonOptAddrMode::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(*MF.getFunction()))
return false;
@@ -658,8 +612,6 @@ bool HexagonOptAddrMode::runOnMachineFunction(MachineFunction &MF) {
L.computePhiInfo();
LV = &L;
- constructDefMap(&DFG->getMF().front());
-
Deleted.clear();
NodeAddr<FuncNode *> FA = DFG->getFunc();
DEBUG(dbgs() << "==== [RefMap#]=====:\n "
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 06fc9195fa67..6913d50bbcaa 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -111,6 +111,7 @@ namespace llvm {
extern char &HexagonExpandCondsetsID;
void initializeHexagonExpandCondsetsPass(PassRegistry&);
void initializeHexagonLoopIdiomRecognizePass(PassRegistry&);
+ void initializeHexagonOptAddrModePass(PassRegistry&);
Pass *createHexagonLoopIdiomPass();
FunctionPass *createHexagonBitSimplify();
@@ -152,6 +153,7 @@ extern "C" void LLVMInitializeHexagonTarget() {
// Register the target.
RegisterTargetMachine<HexagonTargetMachine> X(getTheHexagonTarget());
initializeHexagonLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
+ initializeHexagonOptAddrModePass(*PassRegistry::getPassRegistry());
}
HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFCopy.cpp b/contrib/llvm/lib/Target/Hexagon/RDFCopy.cpp
index 57ce9fabc5e3..ea86ffba58f6 100644
--- a/contrib/llvm/lib/Target/Hexagon/RDFCopy.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/RDFCopy.cpp
@@ -59,7 +59,7 @@ void CopyPropagation::recordCopy(NodeAddr<StmtNode*> SA, EqualityMap &EM) {
bool CopyPropagation::scanBlock(MachineBasicBlock *B) {
bool Changed = false;
- auto BA = DFG.getFunc().Addr->findBlock(B, DFG);
+ NodeAddr<BlockNode*> BA = DFG.findBlock(B);
for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG)) {
if (DFG.IsCode<NodeAttrs::Stmt>(IA)) {
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFGraph.h b/contrib/llvm/lib/Target/Hexagon/RDFGraph.h
index d5faca4cd6f4..52f390356b26 100644
--- a/contrib/llvm/lib/Target/Hexagon/RDFGraph.h
+++ b/contrib/llvm/lib/Target/Hexagon/RDFGraph.h
@@ -508,7 +508,8 @@ namespace rdf {
static_assert(sizeof(NodeBase) <= NodeAllocator::NodeMemSize,
"NodeBase must be at most NodeAllocator::NodeMemSize bytes");
- typedef std::vector<NodeAddr<NodeBase*>> NodeList;
+// typedef std::vector<NodeAddr<NodeBase*>> NodeList;
+ typedef SmallVector<NodeAddr<NodeBase*>,4> NodeList;
typedef std::set<NodeId> NodeSet;
struct RefNode : public NodeBase {
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFRegisters.cpp b/contrib/llvm/lib/Target/Hexagon/RDFRegisters.cpp
index 5c5496a548af..4224ded3418b 100644
--- a/contrib/llvm/lib/Target/Hexagon/RDFRegisters.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/RDFRegisters.cpp
@@ -69,6 +69,19 @@ PhysicalRegisterInfo::PhysicalRegisterInfo(const TargetRegisterInfo &tri,
for (const MachineOperand &Op : In.operands())
if (Op.isRegMask())
RegMasks.insert(Op.getRegMask());
+
+ MaskInfos.resize(RegMasks.size()+1);
+ for (uint32_t M = 1, NM = RegMasks.size(); M <= NM; ++M) {
+ BitVector PU(TRI.getNumRegUnits());
+ const uint32_t *MB = RegMasks.get(M);
+ for (unsigned i = 1, e = TRI.getNumRegs(); i != e; ++i) {
+ if (!(MB[i/32] & (1u << (i%32))))
+ continue;
+ for (MCRegUnitIterator U(i, &TRI); U.isValid(); ++U)
+ PU.set(*U);
+ }
+ MaskInfos[M].Units = PU.flip();
+ }
}
RegisterRef PhysicalRegisterInfo::normalize(RegisterRef RR) const {
@@ -201,17 +214,8 @@ bool PhysicalRegisterInfo::aliasMM(RegisterRef RM, RegisterRef RN) const {
bool RegisterAggr::hasAliasOf(RegisterRef RR) const {
- if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) {
- // XXX SLOW
- const uint32_t *MB = PRI.getRegMaskBits(RR.Reg);
- for (unsigned i = 1, e = PRI.getTRI().getNumRegs(); i != e; ++i) {
- if (MB[i/32] & (1u << (i%32)))
- continue;
- if (hasAliasOf(RegisterRef(i, LaneBitmask::getAll())))
- return true;
- }
- return false;
- }
+ if (PhysicalRegisterInfo::isRegMaskId(RR.Reg))
+ return Units.anyCommon(PRI.getMaskUnits(RR.Reg));
for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) {
std::pair<uint32_t,LaneBitmask> P = *U;
@@ -224,15 +228,8 @@ bool RegisterAggr::hasAliasOf(RegisterRef RR) const {
bool RegisterAggr::hasCoverOf(RegisterRef RR) const {
if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) {
- // XXX SLOW
- const uint32_t *MB = PRI.getRegMaskBits(RR.Reg);
- for (unsigned i = 1, e = PRI.getTRI().getNumRegs(); i != e; ++i) {
- if (MB[i/32] & (1u << (i%32)))
- continue;
- if (!hasCoverOf(RegisterRef(i, LaneBitmask::getAll())))
- return false;
- }
- return true;
+ BitVector T(PRI.getMaskUnits(RR.Reg));
+ return T.reset(Units).none();
}
for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) {
@@ -246,15 +243,7 @@ bool RegisterAggr::hasCoverOf(RegisterRef RR) const {
RegisterAggr &RegisterAggr::insert(RegisterRef RR) {
if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) {
- BitVector PU(PRI.getTRI().getNumRegUnits()); // Preserved units.
- const uint32_t *MB = PRI.getRegMaskBits(RR.Reg);
- for (unsigned i = 1, e = PRI.getTRI().getNumRegs(); i != e; ++i) {
- if (!(MB[i/32] & (1u << (i%32))))
- continue;
- for (MCRegUnitIterator U(i, &PRI.getTRI()); U.isValid(); ++U)
- PU.set(*U);
- }
- Units |= PU.flip();
+ Units |= PRI.getMaskUnits(RR.Reg);
return *this;
}
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFRegisters.h b/contrib/llvm/lib/Target/Hexagon/RDFRegisters.h
index 4b35c85a6b62..314d8b5666d7 100644
--- a/contrib/llvm/lib/Target/Hexagon/RDFRegisters.h
+++ b/contrib/llvm/lib/Target/Hexagon/RDFRegisters.h
@@ -51,6 +51,8 @@ namespace rdf {
return F - Map.begin() + 1;
}
+ uint32_t size() const { return Map.size(); }
+
typedef typename std::vector<T>::const_iterator const_iterator;
const_iterator begin() const { return Map.begin(); }
const_iterator end() const { return Map.end(); }
@@ -107,6 +109,9 @@ namespace rdf {
RegisterRef getRefForUnit(uint32_t U) const {
return RegisterRef(UnitInfos[U].Reg, UnitInfos[U].Mask);
}
+ const BitVector &getMaskUnits(RegisterId MaskId) const {
+ return MaskInfos[TargetRegisterInfo::stackSlot2Index(MaskId)].Units;
+ }
const TargetRegisterInfo &getTRI() const { return TRI; }
@@ -118,11 +123,15 @@ namespace rdf {
RegisterId Reg = 0;
LaneBitmask Mask;
};
+ struct MaskInfo {
+ BitVector Units;
+ };
const TargetRegisterInfo &TRI;
+ IndexedSet<const uint32_t*> RegMasks;
std::vector<RegInfo> RegInfos;
std::vector<UnitInfo> UnitInfos;
- IndexedSet<const uint32_t*> RegMasks;
+ std::vector<MaskInfo> MaskInfos;
bool aliasRR(RegisterRef RA, RegisterRef RB) const;
bool aliasRM(RegisterRef RR, RegisterRef RM) const;
@@ -135,7 +144,7 @@ namespace rdf {
: Units(pri.getTRI().getNumRegUnits()), PRI(pri) {}
RegisterAggr(const RegisterAggr &RG) = default;
- bool empty() const { return Units.empty(); }
+ bool empty() const { return Units.none(); }
bool hasAliasOf(RegisterRef RR) const;
bool hasCoverOf(RegisterRef RR) const;
static bool isCoverOf(RegisterRef RA, RegisterRef RB,
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
index c26b3081dbc3..82e6731ecd78 100644
--- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
@@ -17,7 +17,7 @@ using namespace llvm;
void MSP430MCAsmInfo::anchor() { }
MSP430MCAsmInfo::MSP430MCAsmInfo(const Triple &TT) {
- PointerSize = CalleeSaveStackSlotSize = 2;
+ CodePointerSize = CalleeSaveStackSlotSize = 2;
CommentString = ";";
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index ebe3c5784888..11411d997bb3 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -23,7 +23,7 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Triple &TheTriple) {
if ((TheTriple.getArch() == Triple::mips64el) ||
(TheTriple.getArch() == Triple::mips64)) {
- PointerSize = CalleeSaveStackSlotSize = 8;
+ CodePointerSize = CalleeSaveStackSlotSize = 8;
}
// FIXME: This condition isn't quite right but it's the best we can do until
diff --git a/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
index 8b04fcb76920..bf79f0f2ff82 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -3781,6 +3781,80 @@ let Predicates = [HasMSA] in {
ISA_MIPS1_NOT_32R6_64R6;
}
+def vsplati64_imm_eq_63 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{
+ APInt Imm;
+ SDNode *BV = N->getOperand(0).getNode();
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+
+ return selectVSplat(BV, Imm, EltTy.getSizeInBits()) &&
+ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 63;
+}]>;
+
+def immi32Cst7 : ImmLeaf<i32, [{return isUInt<32>(Imm) && Imm == 7;}]>;
+def immi32Cst15 : ImmLeaf<i32, [{return isUInt<32>(Imm) && Imm == 15;}]>;
+def immi32Cst31 : ImmLeaf<i32, [{return isUInt<32>(Imm) && Imm == 31;}]>;
+
+def vsplati8imm7 : PatFrag<(ops node:$wt),
+ (and node:$wt, (vsplati8 immi32Cst7))>;
+def vsplati16imm15 : PatFrag<(ops node:$wt),
+ (and node:$wt, (vsplati16 immi32Cst15))>;
+def vsplati32imm31 : PatFrag<(ops node:$wt),
+ (and node:$wt, (vsplati32 immi32Cst31))>;
+def vsplati64imm63 : PatFrag<(ops node:$wt),
+ (and node:$wt, vsplati64_imm_eq_63)>;
+
+class MSAShiftPat<SDNode Node, ValueType VT, MSAInst Insn, dag Vec> :
+ MSAPat<(VT (Node VT:$ws, (VT (and VT:$wt, Vec)))),
+ (VT (Insn VT:$ws, VT:$wt))>;
+
+class MSABitPat<SDNode Node, ValueType VT, MSAInst Insn, PatFrag Frag> :
+ MSAPat<(VT (Node VT:$ws, (shl vsplat_imm_eq_1, (Frag VT:$wt)))),
+ (VT (Insn VT:$ws, VT:$wt))>;
+
+multiclass MSAShiftPats<SDNode Node, string Insn> {
+ def : MSAShiftPat<Node, v16i8, !cast<MSAInst>(Insn#_B),
+ (vsplati8 immi32Cst7)>;
+ def : MSAShiftPat<Node, v8i16, !cast<MSAInst>(Insn#_H),
+ (vsplati16 immi32Cst15)>;
+ def : MSAShiftPat<Node, v4i32, !cast<MSAInst>(Insn#_W),
+ (vsplati32 immi32Cst31)>;
+ def : MSAPat<(v2i64 (Node v2i64:$ws, (v2i64 (and v2i64:$wt,
+ vsplati64_imm_eq_63)))),
+ (v2i64 (!cast<MSAInst>(Insn#_D) v2i64:$ws, v2i64:$wt))>;
+}
+
+multiclass MSABitPats<SDNode Node, string Insn> {
+ def : MSABitPat<Node, v16i8, !cast<MSAInst>(Insn#_B), vsplati8imm7>;
+ def : MSABitPat<Node, v8i16, !cast<MSAInst>(Insn#_H), vsplati16imm15>;
+ def : MSABitPat<Node, v4i32, !cast<MSAInst>(Insn#_W), vsplati32imm31>;
+ def : MSAPat<(Node v2i64:$ws, (shl (v2i64 vsplati64_imm_eq_1),
+ (vsplati64imm63 v2i64:$wt))),
+ (v2i64 (!cast<MSAInst>(Insn#_D) v2i64:$ws, v2i64:$wt))>;
+}
+
+defm : MSAShiftPats<shl, "SLL">;
+defm : MSAShiftPats<srl, "SRL">;
+defm : MSAShiftPats<sra, "SRA">;
+defm : MSABitPats<xor, "BNEG">;
+defm : MSABitPats<or, "BSET">;
+
+def : MSAPat<(and v16i8:$ws, (xor (shl vsplat_imm_eq_1,
+ (vsplati8imm7 v16i8:$wt)),
+ immAllOnesV)),
+ (v16i8 (BCLR_B v16i8:$ws, v16i8:$wt))>;
+def : MSAPat<(and v8i16:$ws, (xor (shl vsplat_imm_eq_1,
+ (vsplati16imm15 v8i16:$wt)),
+ immAllOnesV)),
+ (v8i16 (BCLR_H v8i16:$ws, v8i16:$wt))>;
+def : MSAPat<(and v4i32:$ws, (xor (shl vsplat_imm_eq_1,
+ (vsplati32imm31 v4i32:$wt)),
+ immAllOnesV)),
+ (v4i32 (BCLR_W v4i32:$ws, v4i32:$wt))>;
+def : MSAPat<(and v2i64:$ws, (xor (shl (v2i64 vsplati64_imm_eq_1),
+ (vsplati64imm63 v2i64:$wt)),
+ (bitconvert (v4i32 immAllOnesV)))),
+ (v2i64 (BCLR_D v2i64:$ws, v2i64:$wt))>;
+
// Vector extraction with fixed index.
//
// Extracting 32-bit values on MSA32 should always use COPY_S_W rather than
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index e2da8477295b..bf7f079e3105 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -1547,11 +1547,24 @@ static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm);
}
+static SDValue truncateVecElts(SDValue Op, SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ EVT ResTy = Op->getValueType(0);
+ SDValue Vec = Op->getOperand(2);
+ bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian();
+ MVT ResEltTy = ResTy == MVT::v2i64 ? MVT::i64 : MVT::i32;
+ SDValue ConstValue = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1,
+ DL, ResEltTy);
+ SDValue SplatVec = getBuildVectorSplat(ResTy, ConstValue, BigEndian, DAG);
+
+ return DAG.getNode(ISD::AND, DL, ResTy, Vec, SplatVec);
+}
+
static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) {
EVT ResTy = Op->getValueType(0);
SDLoc DL(Op);
SDValue One = DAG.getConstant(1, DL, ResTy);
- SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, Op->getOperand(2));
+ SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Op, DAG));
return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1),
DAG.getNOT(DL, Bit, ResTy));
@@ -1687,7 +1700,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1),
DAG.getNode(ISD::SHL, DL, VecTy, One,
- Op->getOperand(2)));
+ truncateVecElts(Op, DAG)));
}
case Intrinsic::mips_bnegi_b:
case Intrinsic::mips_bnegi_h:
@@ -1723,7 +1736,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1),
DAG.getNode(ISD::SHL, DL, VecTy, One,
- Op->getOperand(2)));
+ truncateVecElts(Op, DAG)));
}
case Intrinsic::mips_bseti_b:
case Intrinsic::mips_bseti_h:
@@ -2210,7 +2223,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::mips_sll_w:
case Intrinsic::mips_sll_d:
return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1),
- Op->getOperand(2));
+ truncateVecElts(Op, DAG));
case Intrinsic::mips_slli_b:
case Intrinsic::mips_slli_h:
case Intrinsic::mips_slli_w:
@@ -2240,7 +2253,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::mips_sra_w:
case Intrinsic::mips_sra_d:
return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1),
- Op->getOperand(2));
+ truncateVecElts(Op, DAG));
case Intrinsic::mips_srai_b:
case Intrinsic::mips_srai_h:
case Intrinsic::mips_srai_w:
@@ -2270,7 +2283,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::mips_srl_w:
case Intrinsic::mips_srl_d:
return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1),
- Op->getOperand(2));
+ truncateVecElts(Op, DAG));
case Intrinsic::mips_srli_b:
case Intrinsic::mips_srli_h:
case Intrinsic::mips_srli_w:
diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index 78bdf4e698d8..bdd0f156c8af 100644
--- a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -27,7 +27,7 @@ void NVPTXMCAsmInfo::anchor() {}
NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Triple &TheTriple) {
if (TheTriple.getArch() == Triple::nvptx64) {
- PointerSize = CalleeSaveStackSlotSize = 8;
+ CodePointerSize = CalleeSaveStackSlotSize = 8;
}
CommentString = "//";
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 21e25de80dc7..ba28cd83278b 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -2004,7 +2004,7 @@ void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) {
uint8_t Byte = Val.getLoBits(8).getZExtValue();
aggBuffer->addBytes(&Byte, 1, 1);
- Val = Val.lshr(8);
+ Val.lshrInPlace(8);
}
return;
}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index d8fab5b7c01a..d30bf1a56e8a 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -20,7 +20,7 @@ void PPCMCAsmInfoDarwin::anchor() { }
PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) {
if (is64Bit) {
- PointerSize = CalleeSaveStackSlotSize = 8;
+ CodePointerSize = CalleeSaveStackSlotSize = 8;
}
IsLittleEndian = false;
@@ -50,7 +50,7 @@ PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
NeedsLocalForSize = true;
if (is64Bit) {
- PointerSize = CalleeSaveStackSlotSize = 8;
+ CodePointerSize = CalleeSaveStackSlotSize = 8;
}
IsLittleEndian = T.getArch() == Triple::ppc64le;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 9c72638023bb..125c00295f88 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -2977,10 +2977,10 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
SDValue Chain = LD->getChain();
SDValue Ops[] = { Base, Offset, Chain };
- SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
- N->getValueType(0), Ops);
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = LD->getMemOperand();
+ SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
+ N->getValueType(0), Ops);
cast<MachineSDNode>(NewN)->setMemRefs(MemOp, MemOp + 1);
return;
}
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
index b164df8b595a..d622911e92c4 100644
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
@@ -18,7 +18,7 @@ using namespace llvm;
void RISCVMCAsmInfo::anchor() {}
RISCVMCAsmInfo::RISCVMCAsmInfo(const Triple &TT) {
- PointerSize = CalleeSaveStackSlotSize = TT.isArch64Bit() ? 8 : 4;
+ CodePointerSize = CalleeSaveStackSlotSize = TT.isArch64Bit() ? 8 : 4;
CommentString = "#";
AlignmentIsInBytes = false;
SupportsDebugInformation = true;
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
index 3ed09898fb78..21df60237d96 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -28,7 +28,7 @@ SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Triple &TheTriple) {
IsLittleEndian = (TheTriple.getArch() == Triple::sparcel);
if (isV9) {
- PointerSize = CalleeSaveStackSlotSize = 8;
+ CodePointerSize = CalleeSaveStackSlotSize = 8;
}
Data16bitsDirective = "\t.half\t";
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
index b17977d41be1..6e00981939b6 100644
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
@@ -14,7 +14,7 @@
using namespace llvm;
SystemZMCAsmInfo::SystemZMCAsmInfo(const Triple &TT) {
- PointerSize = 8;
+ CodePointerSize = 8;
CalleeSaveStackSlotSize = 8;
IsLittleEndian = false;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 84d3c7bed50a..f2fd581f7847 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -829,7 +829,7 @@ bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType,
return isTruncateFree(FromType, ToType);
}
-bool SystemZTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return CI->isTailCall();
}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 7d92a7355877..1c34dc43e8bb 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -454,7 +454,7 @@ public:
MachineBasicBlock *BB) const override;
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
bool allowTruncateForTailCall(Type *, Type *) const override;
- bool mayBeEmittedAsTailCall(CallInst *CI) const override;
+ bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
index 2dcec5263fa1..5f8c78ed1683 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
@@ -22,7 +22,7 @@ using namespace llvm;
WebAssemblyMCAsmInfoELF::~WebAssemblyMCAsmInfoELF() {}
WebAssemblyMCAsmInfoELF::WebAssemblyMCAsmInfoELF(const Triple &T) {
- PointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4;
+ CodePointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4;
// TODO: What should MaxInstLength be?
@@ -55,7 +55,7 @@ WebAssemblyMCAsmInfoELF::WebAssemblyMCAsmInfoELF(const Triple &T) {
WebAssemblyMCAsmInfo::~WebAssemblyMCAsmInfo() {}
WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) {
- PointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4;
+ CodePointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4;
// TODO: What should MaxInstLength be?
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
index a0b008947491..544cd653fd72 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -94,6 +94,8 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
MCFixupKind(WebAssembly::fixup_code_global_index), MI.getLoc()));
++MCNumFixups;
encodeULEB128(uint64_t(MO.getImm()), OS);
+ } else if (Info.OperandType == WebAssembly::OPERAND_SIGNATURE) {
+ encodeSLEB128(int64_t(MO.getImm()), OS);
} else {
encodeULEB128(uint64_t(MO.getImm()), OS);
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
index f4c9a4ef6b9c..559165e4c86b 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
@@ -54,7 +54,7 @@ FunctionPass *llvm::createWebAssemblyOptimizeReturned() {
void OptimizeReturned::visitCallSite(CallSite CS) {
for (unsigned i = 0, e = CS.getNumArgOperands(); i < e; ++i)
- if (CS.paramHasAttr(0, Attribute::Returned)) {
+ if (CS.paramHasAttr(i, Attribute::Returned)) {
Instruction *Inst = CS.getInstruction();
Value *Arg = CS.getArgOperand(i);
// Ignore constants, globals, undef, etc.
diff --git a/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt b/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt
index 8dd5e8a03e2e..8e8e5fd1eff1 100644
--- a/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt
+++ b/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt
@@ -1,5 +1,15 @@
# Tests which are known to fail from the GCC torture test suite.
+# Syntax: Each line has a single test to be marked as a 'known failure' (or
+# 'exclusion'. Known failures are expected to fail, and will cause an error if
+# they pass. (Known failures that do not run at all will not cause an
+# error). The format is
+# <name> <attributes> # comment
+#
+# The attributes in this case represent the different arguments used to
+# compiler: 'wasm-s' is for compiling to .s files, and 'wasm-o' for compiling
+# to wasm object files (.o).
+
# Computed gotos are not supported (Cannot select BlockAddress/BRIND)
20040302-1.c
20071210-1.c
@@ -66,3 +76,21 @@ pr41935.c
920728-1.c
pr28865.c
widechar-2.c
+
+# crash: Running pass 'WebAssembly Explicit Locals' on function
+20020107-1.c wasm-o
+20030222-1.c wasm-o
+20071220-1.c wasm-o
+20071220-2.c wasm-o
+990130-1.c wasm-o
+pr38533.c wasm-o
+pr41239.c wasm-o
+pr43385.c wasm-o
+pr43560.c wasm-o
+pr45695.c wasm-o
+pr49279.c wasm-o
+pr49390.c wasm-o
+pr52286.c wasm-o
+
+# fatal error: error in backend: data symbols must have a size set with .size
+921110-1.c wasm-o
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 48a1d8f1330c..9c35a251e480 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -43,7 +43,7 @@ void X86MCAsmInfoDarwin::anchor() { }
X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
bool is64Bit = T.getArch() == Triple::x86_64;
if (is64Bit)
- PointerSize = CalleeSaveStackSlotSize = 8;
+ CodePointerSize = CalleeSaveStackSlotSize = 8;
AssemblerDialect = AsmWriterFlavor;
@@ -92,7 +92,7 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
// For ELF, x86-64 pointer size depends on the ABI.
// For x86-64 without the x32 ABI, pointer size is 8. For x86 and for x86-64
// with the x32 ABI, pointer size remains the default 4.
- PointerSize = (is64Bit && !isX32) ? 8 : 4;
+ CodePointerSize = (is64Bit && !isX32) ? 8 : 4;
// OTOH, stack slot size is always 8 for x86-64, even with the x32 ABI.
CalleeSaveStackSlotSize = is64Bit ? 8 : 4;
@@ -129,7 +129,7 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
if (Triple.getArch() == Triple::x86_64) {
PrivateGlobalPrefix = ".L";
PrivateLabelPrefix = ".L";
- PointerSize = 8;
+ CodePointerSize = 8;
WinEHEncodingType = WinEH::EncodingType::Itanium;
} else {
// 32-bit X86 doesn't use CFI, so this isn't a real encoding type. It's just
@@ -156,7 +156,7 @@ X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) {
if (Triple.getArch() == Triple::x86_64) {
PrivateGlobalPrefix = ".L";
PrivateLabelPrefix = ".L";
- PointerSize = 8;
+ CodePointerSize = 8;
WinEHEncodingType = WinEH::EncodingType::Itanium;
ExceptionsType = ExceptionHandling::WinEH;
} else {
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
index 78e0bca4158e..8678a13b95d0 100644
--- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -1698,21 +1698,18 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
}
}
-// NOTE: this only has a subset of the full frame index logic. In
-// particular, the FI < 0 and AfterFPPop logic is handled in
-// X86RegisterInfo::eliminateFrameIndex, but not here. Possibly
-// (probably?) it should be moved into here.
int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
+ bool IsFixed = MFI.isFixedObjectIndex(FI);
// We can't calculate offset from frame pointer if the stack is realigned,
// so enforce usage of stack/base pointer. The base pointer is used when we
// have dynamic allocas in addition to dynamic realignment.
if (TRI->hasBasePointer(MF))
- FrameReg = TRI->getBaseRegister();
+ FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister();
else if (TRI->needsStackRealignment(MF))
- FrameReg = TRI->getStackRegister();
+ FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister();
else
FrameReg = TRI->getFrameRegister(MF);
diff --git a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index eb5c56ff2ff9..2d788bf0cf99 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1311,8 +1311,9 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
++Cost;
// If the base is a register with multiple uses, this
// transformation may save a mov.
- if ((AM.BaseType == X86ISelAddressMode::RegBase &&
- AM.Base_Reg.getNode() &&
+ // FIXME: Don't rely on DELETED_NODEs.
+ if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
+ AM.Base_Reg->getOpcode() != ISD::DELETED_NODE &&
!AM.Base_Reg.getNode()->hasOneUse()) ||
AM.BaseType == X86ISelAddressMode::FrameIndexBase)
--Cost;
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6bf3672c3c08..b5f29fb400ef 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2742,13 +2742,13 @@ static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
}
-bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
auto Attr =
CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
if (!CI->isTailCall() || Attr.getValueAsString() == "true")
return false;
- CallSite CS(CI);
+ ImmutableCallSite CS(CI);
CallingConv::ID CalleeCC = CS.getCallingConv();
if (!mayTailCallThisCC(CalleeCC))
return false;
@@ -8327,13 +8327,13 @@ static APInt computeZeroableShuffleElements(ArrayRef<int> Mask,
Zeroable.setBit(i);
else if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
APInt Val = Cst->getAPIntValue();
- Val = Val.lshr((M % Scale) * ScalarSizeInBits);
+ Val.lshrInPlace((M % Scale) * ScalarSizeInBits);
Val = Val.getLoBits(ScalarSizeInBits);
if (Val == 0)
Zeroable.setBit(i);
} else if (ConstantFPSDNode *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
APInt Val = Cst->getValueAPF().bitcastToAPInt();
- Val = Val.lshr((M % Scale) * ScalarSizeInBits);
+ Val.lshrInPlace((M % Scale) * ScalarSizeInBits);
Val = Val.getLoBits(ScalarSizeInBits);
if (Val == 0)
Zeroable.setBit(i);
@@ -16069,7 +16069,7 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
unsigned EltBits = EltVT.getSizeInBits();
// For FABS, mask is 0x7f...; for FNEG, mask is 0x80...
APInt MaskElt =
- IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignBit(EltBits);
+ IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignMask(EltBits);
const fltSemantics &Sem =
EltVT == MVT::f64 ? APFloat::IEEEdouble() :
(IsF128 ? APFloat::IEEEquad() : APFloat::IEEEsingle());
@@ -16132,9 +16132,9 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
// The mask constants are automatically splatted for vector types.
unsigned EltSizeInBits = VT.getScalarSizeInBits();
SDValue SignMask = DAG.getConstantFP(
- APFloat(Sem, APInt::getSignBit(EltSizeInBits)), dl, LogicVT);
+ APFloat(Sem, APInt::getSignMask(EltSizeInBits)), dl, LogicVT);
SDValue MagMask = DAG.getConstantFP(
- APFloat(Sem, ~APInt::getSignBit(EltSizeInBits)), dl, LogicVT);
+ APFloat(Sem, ~APInt::getSignMask(EltSizeInBits)), dl, LogicVT);
// First, clear all bits but the sign bit from the second operand (sign).
if (IsFakeVector)
@@ -17344,10 +17344,10 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
// bits of the inputs before performing those operations.
if (FlipSigns) {
MVT EltVT = VT.getVectorElementType();
- SDValue SB = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), dl,
+ SDValue SM = DAG.getConstant(APInt::getSignMask(EltVT.getSizeInBits()), dl,
VT);
- Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SB);
- Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SB);
+ Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SM);
+ Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SM);
}
SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
@@ -22111,11 +22111,11 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
}
// i64 vector arithmetic shift can be emulated with the transform:
- // M = lshr(SIGN_BIT, Amt)
+ // M = lshr(SIGN_MASK, Amt)
// ashr(R, Amt) === sub(xor(lshr(R, Amt), M), M)
if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget.hasInt256())) &&
Op.getOpcode() == ISD::SRA) {
- SDValue S = DAG.getConstant(APInt::getSignBit(64), dl, VT);
+ SDValue S = DAG.getConstant(APInt::getSignMask(64), dl, VT);
SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt);
R = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
R = DAG.getNode(ISD::XOR, dl, VT, R, M);
@@ -22647,7 +22647,7 @@ bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
// FIXME: On 32 bits x86, fild/movq might be faster than lock cmpxchg8b.
TargetLowering::AtomicExpansionKind
X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
- auto PTy = cast<PointerType>(LI->getPointerOperand()->getType());
+ auto PTy = cast<PointerType>(LI->getPointerOperandType());
return needsCmpXchgNb(PTy->getElementType()) ? AtomicExpansionKind::CmpXChg
: AtomicExpansionKind::None;
}
@@ -26722,8 +26722,8 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
// Low bits are known zero.
KnownZero.setLowBits(ShAmt);
} else {
- KnownZero = KnownZero.lshr(ShAmt);
- KnownOne = KnownOne.lshr(ShAmt);
+ KnownZero.lshrInPlace(ShAmt);
+ KnownOne.lshrInPlace(ShAmt);
// High bits are known zero.
KnownZero.setHighBits(ShAmt);
}
@@ -30152,7 +30152,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// x s< 0 ? x^C : 0 --> subus x, C
if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR &&
ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
- OpRHSConst->getAPIntValue().isSignBit())
+ OpRHSConst->getAPIntValue().isSignMask())
// Note that we have to rebuild the RHS constant here to ensure we
// don't rely on particular values of undef lanes.
return DAG.getNode(
@@ -30203,7 +30203,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
return SDValue();
assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
- APInt DemandedMask(APInt::getSignBit(BitWidth));
+ APInt DemandedMask(APInt::getSignMask(BitWidth));
APInt KnownZero, KnownOne;
TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(),
DCI.isBeforeLegalizeOps());
@@ -31269,7 +31269,7 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
else if (X86ISD::VSRAI == Opcode)
Elt = Elt.ashr(ShiftImm);
else
- Elt = Elt.lshr(ShiftImm);
+ Elt.lshrInPlace(ShiftImm);
}
return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N));
}
@@ -32234,8 +32234,8 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(V);
if (!BV || !BV->isConstant())
return false;
- for (unsigned i = 0, e = V.getNumOperands(); i < e; i++) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(i));
+ for (SDValue Op : V->ops()) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
if (!C)
return false;
uint64_t Val = C->getZExtValue();
@@ -33428,8 +33428,8 @@ static SDValue isFNEG(SDNode *N) {
SDValue Op0 = peekThroughBitcasts(Op.getOperand(0));
unsigned EltBits = Op1.getScalarValueSizeInBits();
- auto isSignBitValue = [&](const ConstantFP *C) {
- return C->getValueAPF().bitcastToAPInt() == APInt::getSignBit(EltBits);
+ auto isSignMask = [&](const ConstantFP *C) {
+ return C->getValueAPF().bitcastToAPInt() == APInt::getSignMask(EltBits);
};
// There is more than one way to represent the same constant on
@@ -33440,21 +33440,21 @@ static SDValue isFNEG(SDNode *N) {
// We check all variants here.
if (Op1.getOpcode() == X86ISD::VBROADCAST) {
if (auto *C = getTargetConstantFromNode(Op1.getOperand(0)))
- if (isSignBitValue(cast<ConstantFP>(C)))
+ if (isSignMask(cast<ConstantFP>(C)))
return Op0;
} else if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op1)) {
if (ConstantFPSDNode *CN = BV->getConstantFPSplatNode())
- if (isSignBitValue(CN->getConstantFPValue()))
+ if (isSignMask(CN->getConstantFPValue()))
return Op0;
} else if (auto *C = getTargetConstantFromNode(Op1)) {
if (C->getType()->isVectorTy()) {
if (auto *SplatV = C->getSplatValue())
- if (isSignBitValue(cast<ConstantFP>(SplatV)))
+ if (isSignMask(cast<ConstantFP>(SplatV)))
return Op0;
} else if (auto *FPConst = dyn_cast<ConstantFP>(C))
- if (isSignBitValue(FPConst))
+ if (isSignMask(FPConst))
return Op0;
}
return SDValue();
@@ -34631,7 +34631,7 @@ static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG,
return SDValue();
ShrinkMode Mode;
- if (!canReduceVMulWidth(MulOp.getNode(), DAG, Mode))
+ if (!canReduceVMulWidth(MulOp.getNode(), DAG, Mode) || Mode == MULU16)
return SDValue();
EVT VT = N->getValueType(0);
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
index ab4910daca02..190a88335000 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1207,7 +1207,7 @@ namespace llvm {
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
- bool mayBeEmittedAsTailCall(CallInst *CI) const override;
+ bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
ISD::NodeType ExtendKind) const override;
diff --git a/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp b/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp
index 6cc5e8b63597..fb9315792892 100644
--- a/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp
+++ b/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp
@@ -67,6 +67,8 @@ private:
MachineFunction &MF) const;
bool selectConstant(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
+ bool selectTrunc(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
const X86Subtarget &STI;
const X86InstrInfo &TII;
@@ -99,6 +101,10 @@ X86InstructionSelector::X86InstructionSelector(const X86Subtarget &STI,
static const TargetRegisterClass *
getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB) {
if (RB.getID() == X86::GPRRegBankID) {
+ if (Ty.getSizeInBits() <= 8)
+ return &X86::GR8RegClass;
+ if (Ty.getSizeInBits() == 16)
+ return &X86::GR16RegClass;
if (Ty.getSizeInBits() == 32)
return &X86::GR32RegClass;
if (Ty.getSizeInBits() == 64)
@@ -207,6 +213,8 @@ bool X86InstructionSelector::select(MachineInstr &I) const {
return true;
if (selectConstant(I, MRI, MF))
return true;
+ if (selectTrunc(I, MRI, MF))
+ return true;
return selectImpl(I);
}
@@ -509,6 +517,59 @@ bool X86InstructionSelector::selectConstant(MachineInstr &I,
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
+bool X86InstructionSelector::selectTrunc(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+ if (I.getOpcode() != TargetOpcode::G_TRUNC)
+ return false;
+
+ const unsigned DstReg = I.getOperand(0).getReg();
+ const unsigned SrcReg = I.getOperand(1).getReg();
+
+ const LLT DstTy = MRI.getType(DstReg);
+ const LLT SrcTy = MRI.getType(SrcReg);
+
+ const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
+ const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
+
+ if (DstRB.getID() != SrcRB.getID()) {
+ DEBUG(dbgs() << "G_TRUNC input/output on different banks\n");
+ return false;
+ }
+
+ if (DstRB.getID() != X86::GPRRegBankID)
+ return false;
+
+ const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
+ if (!DstRC)
+ return false;
+
+ const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
+ if (!SrcRC)
+ return false;
+
+ if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
+ !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
+ DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
+ return false;
+ }
+
+ if (DstRC == SrcRC) {
+ // Nothing to be done
+ } else if (DstRC == &X86::GR32RegClass) {
+ I.getOperand(1).setSubReg(X86::sub_32bit);
+ } else if (DstRC == &X86::GR16RegClass) {
+ I.getOperand(1).setSubReg(X86::sub_16bit);
+ } else if (DstRC == &X86::GR8RegClass) {
+ I.getOperand(1).setSubReg(X86::sub_8bit);
+ } else {
+ return false;
+ }
+
+ I.setDesc(TII.get(X86::COPY));
+ return true;
+}
+
InstructionSelector *
llvm::createX86InstructionSelector(X86Subtarget &Subtarget,
X86RegisterBankInfo &RBI) {
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterBankInfo.cpp b/contrib/llvm/lib/Target/X86/X86RegisterBankInfo.cpp
index d395c826e6bf..0f8a750a0235 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterBankInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86RegisterBankInfo.cpp
@@ -68,6 +68,7 @@ X86GenRegisterBankInfo::PartialMappingIdx
X86GenRegisterBankInfo::getPartialMappingIdx(const LLT &Ty, bool isFP) {
if ((Ty.isScalar() && !isFP) || Ty.isPointer()) {
switch (Ty.getSizeInBits()) {
+ case 1:
case 8:
return PMI_GPR8;
case 16:
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
index 58fa31e94fba..25958f0c3106 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -133,6 +133,11 @@ public:
unsigned getPtrSizedFrameRegister(const MachineFunction &MF) const;
unsigned getStackRegister() const { return StackPtr; }
unsigned getBaseRegister() const { return BasePtr; }
+ /// Returns physical register used as frame pointer.
+ /// This will always returns the frame pointer register, contrary to
+ /// getFrameRegister() which returns the "base pointer" in situations
+ /// involving a stack, frame and base pointer.
+ unsigned getFramePtr() const { return FramePtr; }
// FIXME: Move to FrameInfok
unsigned getSlotSize() const { return SlotSize; }
};
diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 375b74c494d9..8e26849ea9e3 100644
--- a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -167,15 +167,12 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
// Drop any attributes that were on the vararg arguments.
AttributeList PAL = CS.getAttributes();
- if (!PAL.isEmpty() && PAL.getSlotIndex(PAL.getNumSlots() - 1) > NumArgs) {
- SmallVector<AttributeList, 8> AttributesVec;
- for (unsigned i = 0; PAL.getSlotIndex(i) <= NumArgs; ++i)
- AttributesVec.push_back(PAL.getSlotAttributes(i));
- if (PAL.hasAttributes(AttributeList::FunctionIndex))
- AttributesVec.push_back(AttributeList::get(Fn.getContext(),
- AttributeList::FunctionIndex,
- PAL.getFnAttributes()));
- PAL = AttributeList::get(Fn.getContext(), AttributesVec);
+ if (!PAL.isEmpty()) {
+ SmallVector<AttributeSet, 8> ArgAttrs;
+ for (unsigned ArgNo = 0; ArgNo < NumArgs; ++ArgNo)
+ ArgAttrs.push_back(PAL.getParamAttributes(ArgNo));
+ PAL = AttributeList::get(Fn.getContext(), PAL.getFnAttributes(),
+ PAL.getRetAttributes(), ArgAttrs);
}
SmallVector<OperandBundleDef, 1> OpBundles;
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 4d13b3f40688..9648883b7f27 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -222,15 +222,11 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
MadeChange = true;
// Clear out any existing attributes.
- AttrBuilder B;
- B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone);
- F->removeAttributes(
- AttributeList::FunctionIndex,
- AttributeList::get(F->getContext(), AttributeList::FunctionIndex, B));
+ F->removeFnAttr(Attribute::ReadOnly);
+ F->removeFnAttr(Attribute::ReadNone);
// Add in the new attribute.
- F->addAttribute(AttributeList::FunctionIndex,
- ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone);
+ F->addFnAttr(ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone);
if (ReadsMemory)
++NumReadOnly;
@@ -495,9 +491,6 @@ determinePointerReadAttrs(Argument *A,
static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) {
bool Changed = false;
- AttrBuilder B;
- B.addAttribute(Attribute::Returned);
-
// Check each function in turn, determining if an argument is always returned.
for (Function *F : SCCNodes) {
// We can infer and propagate function attributes only when we know that the
@@ -535,7 +528,7 @@ static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) {
if (Value *RetArg = FindRetArg()) {
auto *A = cast<Argument>(RetArg);
- A->addAttr(AttributeList::get(F->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(Attribute::Returned);
++NumReturned;
Changed = true;
}
@@ -593,9 +586,6 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
ArgumentGraph AG;
- AttrBuilder B;
- B.addAttribute(Attribute::NoCapture);
-
// Check each function in turn, determining which pointer arguments are not
// captured.
for (Function *F : SCCNodes) {
@@ -614,7 +604,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E;
++A) {
if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
- A->addAttr(AttributeList::get(F->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(Attribute::NoCapture);
++NumNoCapture;
Changed = true;
}
@@ -633,8 +623,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
if (!Tracker.Captured) {
if (Tracker.Uses.empty()) {
// If it's trivially not captured, mark it nocapture now.
- A->addAttr(
- AttributeList::get(F->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(Attribute::NoCapture);
++NumNoCapture;
Changed = true;
} else {
@@ -660,9 +649,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
Self.insert(&*A);
Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self);
if (R != Attribute::None) {
- AttrBuilder B;
- B.addAttribute(R);
- A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(R);
Changed = true;
R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg;
}
@@ -687,7 +674,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
if (ArgumentSCC[0]->Uses.size() == 1 &&
ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) {
Argument *A = ArgumentSCC[0]->Definition;
- A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(Attribute::NoCapture);
++NumNoCapture;
Changed = true;
}
@@ -729,7 +716,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
- A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(Attribute::NoCapture);
++NumNoCapture;
Changed = true;
}
@@ -760,15 +747,12 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
}
if (ReadAttr != Attribute::None) {
- AttrBuilder B, R;
- B.addAttribute(ReadAttr);
- R.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone);
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
// Clear out existing readonly/readnone attributes
- A->removeAttr(
- AttributeList::get(A->getContext(), A->getArgNo() + 1, R));
- A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B));
+ A->removeAttr(Attribute::ReadOnly);
+ A->removeAttr(Attribute::ReadNone);
+ A->addAttr(ReadAttr);
ReadAttr == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg;
Changed = true;
}
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index ade4f21ceb52..ae9d4ce11e0d 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1979,16 +1979,11 @@ static void ChangeCalleesToFastCall(Function *F) {
}
}
-static AttributeList StripNest(LLVMContext &C, const AttributeList &Attrs) {
- for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
- unsigned Index = Attrs.getSlotIndex(i);
- if (!Attrs.getSlotAttributes(i).hasAttribute(Index, Attribute::Nest))
- continue;
-
- // There can be only one.
- return Attrs.removeAttribute(C, Index, Attribute::Nest);
- }
-
+static AttributeList StripNest(LLVMContext &C, AttributeList Attrs) {
+ // There can be at most one attribute set with a nest attribute.
+ unsigned NestIndex;
+ if (Attrs.hasAttrSomewhere(Attribute::Nest, &NestIndex))
+ return Attrs.removeAttribute(C, NestIndex, Attribute::Nest);
return Attrs;
}
diff --git a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 3371de6e3d14..e755e2bd8f26 100644
--- a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -43,6 +43,7 @@
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/Pass.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/SampleProfReader.h"
@@ -208,6 +209,12 @@ protected:
/// the same number of times.
EquivalenceClassMap EquivalenceClass;
+ /// Map from function name to Function *. Used to find the function from
+ /// the function name. If the function name contains suffix, additional
+ /// entry is added to map from the stripped name to the function if there
+ /// is one-to-one mapping.
+ StringMap<Function *> SymbolMap;
+
/// \brief Dominance, post-dominance and loop information.
std::unique_ptr<DominatorTree> DT;
std::unique_ptr<DominatorTreeBase<BasicBlock>> PDT;
@@ -670,7 +677,7 @@ bool SampleProfileLoader::inlineHotFunctions(
for (auto &I : BB.getInstList()) {
const FunctionSamples *FS = nullptr;
if ((isa<CallInst>(I) || isa<InvokeInst>(I)) &&
- (FS = findCalleeFunctionSamples(I))) {
+ !isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(I))) {
Candidates.push_back(&I);
if (callsiteIsHot(Samples, FS))
Hot = true;
@@ -689,7 +696,10 @@ bool SampleProfileLoader::inlineHotFunctions(
for (const auto *FS : findIndirectCallFunctionSamples(*I)) {
auto CalleeFunctionName = FS->getName();
const char *Reason = "Callee function not available";
- CalledFunction = F.getParent()->getFunction(CalleeFunctionName);
+ auto R = SymbolMap.find(CalleeFunctionName);
+ if (R == SymbolMap.end())
+ continue;
+ CalledFunction = R->getValue();
if (CalledFunction && isLegalToPromote(I, CalledFunction, &Reason)) {
// The indirect target was promoted and inlined in the profile, as a
// result, we do not have profile info for the branch probability.
@@ -1181,8 +1191,11 @@ void SampleProfileLoader::propagateWeights(Function &F) {
if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
continue;
+ DebugLoc BranchLoc = TI->getDebugLoc();
DEBUG(dbgs() << "\nGetting weights for branch at line "
- << TI->getDebugLoc().getLine() << ".\n");
+ << ((BranchLoc) ? Twine(BranchLoc.getLine())
+ : Twine("<UNKNOWN LOCATION>"))
+ << ".\n");
SmallVector<uint32_t, 4> Weights;
uint32_t MaxWeight = 0;
DebugLoc MaxDestLoc;
@@ -1219,7 +1232,6 @@ void SampleProfileLoader::propagateWeights(Function &F) {
DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
TI->setMetadata(llvm::LLVMContext::MD_prof,
MDB.createBranchWeights(Weights));
- DebugLoc BranchLoc = TI->getDebugLoc();
emitOptimizationRemark(
Ctx, DEBUG_TYPE, F, MaxDestLoc,
Twine("most popular destination for conditional branches at ") +
@@ -1414,6 +1426,26 @@ bool SampleProfileLoader::runOnModule(Module &M) {
for (const auto &I : Reader->getProfiles())
TotalCollectedSamples += I.second.getTotalSamples();
+ // Populate the symbol map.
+ for (const auto &N_F : M.getValueSymbolTable()) {
+ std::string OrigName = N_F.getKey();
+ Function *F = dyn_cast<Function>(N_F.getValue());
+ if (F == nullptr)
+ continue;
+ SymbolMap[OrigName] = F;
+ auto pos = OrigName.find('.');
+ if (pos != std::string::npos) {
+ std::string NewName = OrigName.substr(0, pos);
+ auto r = SymbolMap.insert(std::make_pair(NewName, F));
+ // Failiing to insert means there is already an entry in SymbolMap,
+ // thus there are multiple functions that are mapped to the same
+ // stripped name. In this case of name conflicting, set the value
+ // to nullptr to avoid confusion.
+ if (!r.second)
+ r.first->second = nullptr;
+ }
+ }
+
bool retval = false;
for (auto &F : M)
if (!F.isDeclaration()) {
diff --git a/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 65deb82cd2a5..9801a0a61416 100644
--- a/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -363,6 +363,7 @@ void splitAndWriteThinLTOBitcode(
W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
/*GenerateHash=*/true, &ModHash);
W.writeModule(MergedM.get());
+ W.writeStrtab();
OS << Buffer;
// If a minimized bitcode module was requested for the thin link,
@@ -375,6 +376,7 @@ void splitAndWriteThinLTOBitcode(
W2.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
/*GenerateHash=*/false, &ModHash);
W2.writeModule(MergedM.get());
+ W2.writeStrtab();
*ThinLinkOS << Buffer;
}
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 174ec8036274..e30a4bafb9b0 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1044,14 +1044,14 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
const APInt *RHSC;
if (match(RHS, m_APInt(RHSC))) {
- if (RHSC->isSignBit()) {
+ if (RHSC->isSignMask()) {
// If wrapping is not allowed, then the addition must set the sign bit:
- // X + (signbit) --> X | signbit
+ // X + (signmask) --> X | signmask
if (I.hasNoSignedWrap() || I.hasNoUnsignedWrap())
return BinaryOperator::CreateOr(LHS, RHS);
// If wrapping is allowed, then the addition flips the sign bit of LHS:
- // X + (signbit) --> X ^ signbit
+ // X + (signmask) --> X ^ signmask
return BinaryOperator::CreateXor(LHS, RHS);
}
@@ -1120,9 +1120,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI),
XorLHS);
}
- // (X + signbit) + C could have gotten canonicalized to (X ^ signbit) + C,
- // transform them into (X + (signbit ^ C))
- if (XorRHS->getValue().isSignBit())
+ // (X + signmask) + C could have gotten canonicalized to (X^signmask) + C,
+ // transform them into (X + (signmask ^ C))
+ if (XorRHS->getValue().isSignMask())
return BinaryOperator::CreateAdd(XorLHS,
ConstantExpr::getXor(XorRHS, CI));
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index b2a41c699202..3a98e8937bda 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2078,7 +2078,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
Value *NOr = Builder->CreateOr(A, Op1);
NOr->takeName(Op0);
return BinaryOperator::CreateXor(NOr,
- cast<Instruction>(Op0)->getOperand(1));
+ ConstantInt::get(NOr->getType(), *C));
}
// Y|(X^C) -> (X|Y)^C iff Y&C == 0
@@ -2087,7 +2087,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
Value *NOr = Builder->CreateOr(A, Op0);
NOr->takeName(Op0);
return BinaryOperator::CreateXor(NOr,
- cast<Instruction>(Op1)->getOperand(1));
+ ConstantInt::get(NOr->getType(), *C));
}
}
@@ -2480,8 +2480,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI);
return BinaryOperator::CreateSub(SubOne(NegOp0CI),
Op0I->getOperand(0));
- } else if (RHSC->getValue().isSignBit()) {
- // (X + C) ^ signbit -> (X + C + signbit)
+ } else if (RHSC->getValue().isSignMask()) {
+ // (X + C) ^ signmask -> (X + C + signmask)
Constant *C = Builder->getInt(RHSC->getValue() + Op0CI->getValue());
return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 69484f47223f..e7aa1a457371 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -839,7 +839,8 @@ static Value *simplifyX86extrq(IntrinsicInst &II, Value *Op0,
// Length bits.
if (CI0) {
APInt Elt = CI0->getValue();
- Elt = Elt.lshr(Index).zextOrTrunc(Length);
+ Elt.lshrInPlace(Index);
+ Elt = Elt.zextOrTrunc(Length);
return LowConstantHighUndef(Elt.getZExtValue());
}
@@ -1036,7 +1037,7 @@ static Value *simplifyX86vpermilvar(const IntrinsicInst &II,
// The PD variants uses bit 1 to select per-lane element index, so
// shift down to convert to generic shuffle mask index.
if (IsPD)
- Index = Index.lshr(1);
+ Index.lshrInPlace(1);
// The _256 variants are a bit trickier since the mask bits always index
// into the corresponding 128 half. In order to convert to a generic
@@ -4067,21 +4068,15 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
}
if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
- !CallerPAL.isEmpty())
+ !CallerPAL.isEmpty()) {
// In this case we have more arguments than the new function type, but we
// won't be dropping them. Check that these extra arguments have attributes
// that are compatible with being a vararg call argument.
- for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
- unsigned Index = CallerPAL.getSlotIndex(i - 1);
- if (Index <= FT->getNumParams())
- break;
-
- // Check if it has an attribute that's incompatible with varargs.
- AttributeList PAttrs = CallerPAL.getSlotAttributes(i - 1);
- if (PAttrs.hasAttribute(Index, Attribute::StructRet))
- return false;
- }
-
+ unsigned SRetIdx;
+ if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
+ SRetIdx > FT->getNumParams())
+ return false;
+ }
// Okay, we decided that this is a safe thing to do: go ahead and start
// inserting cast instructions as necessary.
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 25683132c786..9127ddca5915 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1591,7 +1591,7 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
// GEP into CI would undo canonicalizing addrspacecast with different
// pointer types, causing infinite loops.
(!isa<AddrSpaceCastInst>(CI) ||
- GEP->getType() == GEP->getPointerOperand()->getType())) {
+ GEP->getType() == GEP->getPointerOperandType())) {
// Changing the cast operand is usually not a good idea but it is safe
// here because the pointer operand is being replaced with another
// pointer operand so the opcode doesn't need to change.
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index bbafa9e9f468..003029ae39d5 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -140,7 +140,7 @@ static bool isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS,
case ICmpInst::ICMP_UGE:
// True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
TrueIfSigned = true;
- return RHS.isSignBit();
+ return RHS.isSignMask();
default:
return false;
}
@@ -1532,14 +1532,14 @@ Instruction *InstCombiner::foldICmpXorConstant(ICmpInst &Cmp,
}
if (Xor->hasOneUse()) {
- // (icmp u/s (xor X SignBit), C) -> (icmp s/u X, (xor C SignBit))
- if (!Cmp.isEquality() && XorC->isSignBit()) {
+ // (icmp u/s (xor X SignMask), C) -> (icmp s/u X, (xor C SignMask))
+ if (!Cmp.isEquality() && XorC->isSignMask()) {
Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate()
: Cmp.getSignedPredicate();
return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), *C ^ *XorC));
}
- // (icmp u/s (xor X ~SignBit), C) -> (icmp s/u X, (xor C ~SignBit))
+ // (icmp u/s (xor X ~SignMask), C) -> (icmp s/u X, (xor C ~SignMask))
if (!Cmp.isEquality() && XorC->isMaxSignedValue()) {
Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate()
: Cmp.getSignedPredicate();
@@ -2402,9 +2402,9 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp,
const APInt &Upper = CR.getUpper();
const APInt &Lower = CR.getLower();
if (Cmp.isSigned()) {
- if (Lower.isSignBit())
+ if (Lower.isSignMask())
return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantInt::get(Ty, Upper));
- if (Upper.isSignBit())
+ if (Upper.isSignMask())
return new ICmpInst(ICmpInst::ICMP_SGE, X, ConstantInt::get(Ty, Lower));
} else {
if (Lower.isMinValue())
@@ -2604,7 +2604,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
break;
// Replace (and X, (1 << size(X)-1) != 0) with x s< 0
- if (BOC->isSignBit()) {
+ if (BOC->isSignMask()) {
Constant *Zero = Constant::getNullValue(BOp0->getType());
auto NewPred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
return new ICmpInst(NewPred, BOp0, Zero);
@@ -3032,9 +3032,9 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b
return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
BO1->getOperand(0));
- // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b
+ // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) {
- if (CI->getValue().isSignBit()) {
+ if (CI->getValue().isSignMask()) {
ICmpInst::Predicate Pred =
I.isSigned() ? I.getUnsignedPredicate() : I.getSignedPredicate();
return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
@@ -3797,7 +3797,7 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
static APInt getDemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth,
bool isSignCheck) {
if (isSignCheck)
- return APInt::getSignBit(BitWidth);
+ return APInt::getSignMask(BitWidth);
ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1));
if (!CI) return APInt::getAllOnesValue(BitWidth);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 6288e054f1bc..675553017838 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -931,6 +931,18 @@ static Instruction *replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr,
return nullptr;
}
+static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op) {
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
+ const Value *GEPI0 = GEPI->getOperand(0);
+ if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0)
+ return true;
+ }
+ if (isa<UndefValue>(Op) ||
+ (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0))
+ return true;
+ return false;
+}
+
Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
Value *Op = LI.getOperand(0);
@@ -979,27 +991,13 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
if (!LI.isUnordered()) return nullptr;
// load(gep null, ...) -> unreachable
- if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
- const Value *GEPI0 = GEPI->getOperand(0);
- // TODO: Consider a target hook for valid address spaces for this xform.
- if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
- // Insert a new store to null instruction before the load to indicate
- // that this code is not reachable. We do this instead of inserting
- // an unreachable instruction directly because we cannot modify the
- // CFG.
- new StoreInst(UndefValue::get(LI.getType()),
- Constant::getNullValue(Op->getType()), &LI);
- return replaceInstUsesWith(LI, UndefValue::get(LI.getType()));
- }
- }
-
// load null/undef -> unreachable
- // TODO: Consider a target hook for valid address spaces for this xform.
- if (isa<UndefValue>(Op) ||
- (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
- // Insert a new store to null instruction before the load to indicate that
- // this code is not reachable. We do this instead of inserting an
- // unreachable instruction directly because we cannot modify the CFG.
+ // TODO: Consider a target hook for valid address spaces for this xforms.
+ if (canSimplifyNullLoadOrGEP(LI, Op)) {
+ // Insert a new store to null instruction before the load to indicate
+ // that this code is not reachable. We do this instead of inserting
+ // an unreachable instruction directly because we cannot modify the
+ // CFG.
new StoreInst(UndefValue::get(LI.getType()),
Constant::getNullValue(Op->getType()), &LI);
return replaceInstUsesWith(LI, UndefValue::get(LI.getType()));
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index f1ac82057e6c..ce66581a491a 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -944,22 +944,21 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
}
}
- if (ConstantInt *One = dyn_cast<ConstantInt>(Op0)) {
- if (One->isOne() && !I.getType()->isIntegerTy(1)) {
- bool isSigned = I.getOpcode() == Instruction::SDiv;
- if (isSigned) {
- // If Op1 is 0 then it's undefined behaviour, if Op1 is 1 then the
- // result is one, if Op1 is -1 then the result is minus one, otherwise
- // it's zero.
- Value *Inc = Builder->CreateAdd(Op1, One);
- Value *Cmp = Builder->CreateICmpULT(
- Inc, ConstantInt::get(I.getType(), 3));
- return SelectInst::Create(Cmp, Op1, ConstantInt::get(I.getType(), 0));
- } else {
- // If Op1 is 0 then it's undefined behaviour. If Op1 is 1 then the
- // result is one, otherwise it's zero.
- return new ZExtInst(Builder->CreateICmpEQ(Op1, One), I.getType());
- }
+ if (match(Op0, m_One())) {
+ assert(!I.getType()->getScalarType()->isIntegerTy(1) &&
+ "i1 divide not removed?");
+ if (I.getOpcode() == Instruction::SDiv) {
+ // If Op1 is 0 then it's undefined behaviour, if Op1 is 1 then the
+ // result is one, if Op1 is -1 then the result is minus one, otherwise
+ // it's zero.
+ Value *Inc = Builder->CreateAdd(Op1, Op0);
+ Value *Cmp = Builder->CreateICmpULT(
+ Inc, ConstantInt::get(I.getType(), 3));
+ return SelectInst::Create(Cmp, Op1, ConstantInt::get(I.getType(), 0));
+ } else {
+ // If Op1 is 0 then it's undefined behaviour. If Op1 is 1 then the
+ // result is one, otherwise it's zero.
+ return new ZExtInst(Builder->CreateICmpEQ(Op1, Op0), I.getType());
}
}
@@ -1238,25 +1237,23 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
// If the sign bits of both operands are zero (i.e. we can prove they are
// unsigned inputs), turn this into a udiv.
- if (I.getType()->isIntegerTy()) {
- APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
- if (MaskedValueIsZero(Op0, Mask, 0, &I)) {
- if (MaskedValueIsZero(Op1, Mask, 0, &I)) {
- // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
- auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
- BO->setIsExact(I.isExact());
- return BO;
- }
+ APInt Mask(APInt::getSignMask(I.getType()->getScalarSizeInBits()));
+ if (MaskedValueIsZero(Op0, Mask, 0, &I)) {
+ if (MaskedValueIsZero(Op1, Mask, 0, &I)) {
+ // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
+ auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
+ BO->setIsExact(I.isExact());
+ return BO;
+ }
- if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &AC, &I, &DT)) {
- // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
- // Safe because the only negative value (1 << Y) can take on is
- // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have
- // the sign bit set.
- auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
- BO->setIsExact(I.isExact());
- return BO;
- }
+ if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &AC, &I, &DT)) {
+ // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
+ // Safe because the only negative value (1 << Y) can take on is
+ // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have
+ // the sign bit set.
+ auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
+ BO->setIsExact(I.isExact());
+ return BO;
}
}
@@ -1546,13 +1543,11 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
// If the sign bits of both operands are zero (i.e. we can prove they are
// unsigned inputs), turn this into a urem.
- if (I.getType()->isIntegerTy()) {
- APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
- if (MaskedValueIsZero(Op1, Mask, 0, &I) &&
- MaskedValueIsZero(Op0, Mask, 0, &I)) {
- // X srem Y -> X urem Y, iff X and Y don't have sign bit set
- return BinaryOperator::CreateURem(Op0, Op1, I.getName());
- }
+ APInt Mask(APInt::getSignMask(I.getType()->getScalarSizeInBits()));
+ if (MaskedValueIsZero(Op1, Mask, 0, &I) &&
+ MaskedValueIsZero(Op0, Mask, 0, &I)) {
+ // X srem Y -> X urem Y, iff X and Y don't have sign bit set
+ return BinaryOperator::CreateURem(Op0, Op1, I.getName());
}
// If it's a constant vector, flip any negative values positive.
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 693b6c95c169..5d6d899da4b5 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -618,7 +618,7 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI,
{
unsigned BitWidth =
DL.getTypeSizeInBits(TrueVal->getType()->getScalarType());
- APInt MinSignedValue = APInt::getSignBit(BitWidth);
+ APInt MinSignedValue = APInt::getSignedMinValue(BitWidth);
Value *X;
const APInt *Y, *C;
bool TrueWhenUnset;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 9aa679c60e47..f77d713b9b07 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -370,7 +370,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
MaskV <<= Op1C->getZExtValue();
else {
assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift");
- MaskV = MaskV.lshr(Op1C->getZExtValue());
+ MaskV.lshrInPlace(Op1C->getZExtValue());
}
// shift1 & 0x00FF
@@ -760,7 +760,7 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
}
// See if we can turn a signed shr into an unsigned shr.
- if (MaskedValueIsZero(Op0, APInt::getSignBit(BitWidth), 0, &I))
+ if (MaskedValueIsZero(Op0, APInt::getSignMask(BitWidth), 0, &I))
return BinaryOperator::CreateLShr(Op0, Op1);
return nullptr;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 4e6f02058d83..2ba052b7e02d 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -38,7 +38,7 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
// If there are no bits set that aren't demanded, nothing to do.
Demanded = Demanded.zextOrTrunc(C->getBitWidth());
- if ((~Demanded & *C) == 0)
+ if (C->isSubsetOf(Demanded))
return false;
// This instruction is producing bits that are not demanded. Shrink the RHS.
@@ -117,27 +117,16 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
KnownOne.getBitWidth() == BitWidth &&
"Value *V, DemandedMask, KnownZero and KnownOne "
"must have same BitWidth");
- const APInt *C;
- if (match(V, m_APInt(C))) {
- // We know all of the bits for a scalar constant or a splat vector constant!
- KnownOne = *C & DemandedMask;
- KnownZero = ~KnownOne & DemandedMask;
- return nullptr;
- }
- if (isa<ConstantPointerNull>(V)) {
- // We know all of the bits for a constant!
- KnownOne.clearAllBits();
- KnownZero = DemandedMask;
+
+ if (isa<Constant>(V)) {
+ computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
return nullptr;
}
KnownZero.clearAllBits();
KnownOne.clearAllBits();
- if (DemandedMask == 0) { // Not demanding any bits from V.
- if (isa<UndefValue>(V))
- return nullptr;
+ if (DemandedMask == 0) // Not demanding any bits from V.
return UndefValue::get(VTy);
- }
if (Depth == 6) // Limit search depth.
return nullptr;
@@ -187,16 +176,14 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If the client is only demanding bits that we know, return the known
// constant.
- if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
+ if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
return Constant::getIntegerValue(VTy, IKnownOne);
// If all of the demanded bits are known 1 on one side, return the other.
// These bits cannot contribute to the result of the 'and'.
- if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
- (DemandedMask & ~LHSKnownZero))
+ if (DemandedMask.isSubsetOf(LHSKnownZero | RHSKnownOne))
return I->getOperand(0);
- if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
- (DemandedMask & ~RHSKnownZero))
+ if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownOne))
return I->getOperand(1);
// If the RHS is a constant, see if we can simplify it.
@@ -224,25 +211,14 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If the client is only demanding bits that we know, return the known
// constant.
- if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
+ if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
return Constant::getIntegerValue(VTy, IKnownOne);
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'or'.
- if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
- (DemandedMask & ~LHSKnownOne))
+ if (DemandedMask.isSubsetOf(LHSKnownOne | RHSKnownZero))
return I->getOperand(0);
- if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
- (DemandedMask & ~RHSKnownOne))
- return I->getOperand(1);
-
- // If all of the potentially set bits on one side are known to be set on
- // the other side, just use the 'other' side.
- if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
- (DemandedMask & (~RHSKnownZero)))
- return I->getOperand(0);
- if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
- (DemandedMask & (~LHSKnownZero)))
+ if (DemandedMask.isSubsetOf(RHSKnownOne | LHSKnownZero))
return I->getOperand(1);
// If the RHS is a constant, see if we can simplify it.
@@ -271,20 +247,20 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If the client is only demanding bits that we know, return the known
// constant.
- if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
+ if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
return Constant::getIntegerValue(VTy, IKnownOne);
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'xor'.
- if ((DemandedMask & RHSKnownZero) == DemandedMask)
+ if (DemandedMask.isSubsetOf(RHSKnownZero))
return I->getOperand(0);
- if ((DemandedMask & LHSKnownZero) == DemandedMask)
+ if (DemandedMask.isSubsetOf(LHSKnownZero))
return I->getOperand(1);
// If all of the demanded bits are known to be zero on one side or the
// other, turn this into an *inclusive* or.
// e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
- if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
+ if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownZero)) {
Instruction *Or =
BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
I->getName());
@@ -295,14 +271,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// bits on that side are also known to be set on the other side, turn this
// into an AND, as we know the bits will be cleared.
// e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
- if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) {
- // all known
- if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
- Constant *AndC = Constant::getIntegerValue(VTy,
- ~RHSKnownOne & DemandedMask);
- Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
- return InsertNewInstWith(And, *I);
- }
+ if (DemandedMask.isSubsetOf(RHSKnownZero|RHSKnownOne) &&
+ RHSKnownOne.isSubsetOf(LHSKnownOne)) {
+ Constant *AndC = Constant::getIntegerValue(VTy,
+ ~RHSKnownOne & DemandedMask);
+ Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
+ return InsertNewInstWith(And, *I);
}
// If the RHS is a constant, see if we can simplify it.
@@ -529,9 +503,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
KnownZero.setLowBits(ShiftAmt);
}
break;
- case Instruction::LShr:
- // For a logical shift right
- if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ case Instruction::LShr: {
+ const APInt *SA;
+ if (match(I->getOperand(1), m_APInt(SA))) {
uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
// Unsigned shift right.
@@ -546,13 +520,14 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Depth + 1))
return I;
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
- KnownZero = KnownZero.lshr(ShiftAmt);
- KnownOne = KnownOne.lshr(ShiftAmt);
+ KnownZero.lshrInPlace(ShiftAmt);
+ KnownOne.lshrInPlace(ShiftAmt);
if (ShiftAmt)
KnownZero.setHighBits(ShiftAmt); // high bits known zero.
}
break;
- case Instruction::AShr:
+ }
+ case Instruction::AShr: {
// If this is an arithmetic shift right and only the low-bit is set, we can
// always convert this into a logical shr, even if the shift amount is
// variable. The low bit of the shift cannot be an input sign bit unless
@@ -566,15 +541,16 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If the sign bit is the only bit demanded by this ashr, then there is no
// need to do it, the shift doesn't change the high bit.
- if (DemandedMask.isSignBit())
+ if (DemandedMask.isSignMask())
return I->getOperand(0);
- if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ const APInt *SA;
+ if (match(I->getOperand(1), m_APInt(SA))) {
uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
// Signed shift right.
APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
- // If any of the "high bits" are demanded, we should set the sign bit as
+ // If any of the high bits are demanded, we should set the sign bit as
// demanded.
if (DemandedMask.countLeadingZeros() <= ShiftAmt)
DemandedMaskIn.setSignBit();
@@ -587,31 +563,32 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (SimplifyDemandedBits(I, 0, DemandedMaskIn, KnownZero, KnownOne,
Depth + 1))
return I;
+
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
// Compute the new bits that are at the top now.
APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
- KnownZero = KnownZero.lshr(ShiftAmt);
- KnownOne = KnownOne.lshr(ShiftAmt);
+ KnownZero.lshrInPlace(ShiftAmt);
+ KnownOne.lshrInPlace(ShiftAmt);
// Handle the sign bits.
- APInt SignBit(APInt::getSignBit(BitWidth));
+ APInt SignMask(APInt::getSignMask(BitWidth));
// Adjust to where it is now in the mask.
- SignBit = SignBit.lshr(ShiftAmt);
+ SignMask.lshrInPlace(ShiftAmt);
// If the input sign bit is known to be zero, or if none of the top bits
// are demanded, turn this into an unsigned shift right.
if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] ||
(HighBits & ~DemandedMask) == HighBits) {
- // Perform the logical shift right.
- BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0),
- SA, I->getName());
- NewVal->setIsExact(cast<BinaryOperator>(I)->isExact());
- return InsertNewInstWith(NewVal, *I);
- } else if ((KnownOne & SignBit) != 0) { // New bits are known one.
+ BinaryOperator *LShr = BinaryOperator::CreateLShr(I->getOperand(0),
+ I->getOperand(1));
+ LShr->setIsExact(cast<BinaryOperator>(I)->isExact());
+ return InsertNewInstWith(LShr, *I);
+ } else if ((KnownOne & SignMask) != 0) { // New bits are known one.
KnownOne |= HighBits;
}
}
break;
+ }
case Instruction::SRem:
if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
// X % -1 demands all the bits because we don't want to introduce
@@ -624,7 +601,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
return I->getOperand(0);
APInt LowBits = RA - 1;
- APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
+ APInt Mask2 = LowBits | APInt::getSignMask(BitWidth);
if (SimplifyDemandedBits(I, 0, Mask2, LHSKnownZero, LHSKnownOne,
Depth + 1))
return I;
@@ -635,26 +612,26 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If LHS is non-negative or has all low bits zero, then the upper bits
// are all zero.
- if (LHSKnownZero.isNegative() || ((LHSKnownZero & LowBits) == LowBits))
+ if (LHSKnownZero.isSignBitSet() || ((LHSKnownZero & LowBits) == LowBits))
KnownZero |= ~LowBits;
// If LHS is negative and not all low bits are zero, then the upper bits
// are all one.
- if (LHSKnownOne.isNegative() && ((LHSKnownOne & LowBits) != 0))
+ if (LHSKnownOne.isSignBitSet() && ((LHSKnownOne & LowBits) != 0))
KnownOne |= ~LowBits;
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ break;
}
}
// The sign bit is the LHS's sign bit, except when the result of the
// remainder is zero.
- if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
- APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+ if (DemandedMask.isSignBitSet()) {
computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
CxtI);
// If it's known zero, our sign bit is also zero.
- if (LHSKnownZero.isNegative())
+ if (LHSKnownZero.isSignBitSet())
KnownZero.setSignBit();
}
break;
@@ -744,7 +721,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If the client is only demanding bits that we know, return the known
// constant.
- if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
+ if (DemandedMask.isSubsetOf(KnownZero|KnownOne))
return Constant::getIntegerValue(VTy, KnownOne);
return nullptr;
}
@@ -783,17 +760,15 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
// If the client is only demanding bits that we know, return the known
// constant.
- if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
+ if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
return Constant::getIntegerValue(ITy, IKnownOne);
// If all of the demanded bits are known 1 on one side, return the other.
// These bits cannot contribute to the result of the 'and' in this
// context.
- if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
- (DemandedMask & ~LHSKnownZero))
+ if (DemandedMask.isSubsetOf(LHSKnownZero | RHSKnownOne))
return I->getOperand(0);
- if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
- (DemandedMask & ~RHSKnownZero))
+ if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownOne))
return I->getOperand(1);
KnownZero = std::move(IKnownZero);
@@ -817,26 +792,15 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
// If the client is only demanding bits that we know, return the known
// constant.
- if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
+ if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
return Constant::getIntegerValue(ITy, IKnownOne);
// If all of the demanded bits are known zero on one side, return the
// other. These bits cannot contribute to the result of the 'or' in this
// context.
- if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
- (DemandedMask & ~LHSKnownOne))
+ if (DemandedMask.isSubsetOf(LHSKnownOne | RHSKnownZero))
return I->getOperand(0);
- if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
- (DemandedMask & ~RHSKnownOne))
- return I->getOperand(1);
-
- // If all of the potentially set bits on one side are known to be set on
- // the other side, just use the 'other' side.
- if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
- (DemandedMask & (~RHSKnownZero)))
- return I->getOperand(0);
- if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
- (DemandedMask & (~LHSKnownZero)))
+ if (DemandedMask.isSubsetOf(RHSKnownOne | LHSKnownZero))
return I->getOperand(1);
KnownZero = std::move(IKnownZero);
@@ -861,14 +825,14 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
// If the client is only demanding bits that we know, return the known
// constant.
- if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
+ if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
return Constant::getIntegerValue(ITy, IKnownOne);
// If all of the demanded bits are known zero on one side, return the
// other.
- if ((DemandedMask & RHSKnownZero) == DemandedMask)
+ if (DemandedMask.isSubsetOf(RHSKnownZero))
return I->getOperand(0);
- if ((DemandedMask & LHSKnownZero) == DemandedMask)
+ if (DemandedMask.isSubsetOf(LHSKnownZero))
return I->getOperand(1);
// Output known-0 bits are known if clear or set in both the LHS & RHS.
@@ -883,7 +847,7 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
// If this user is only demanding bits that we know, return the known
// constant.
- if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
+ if (DemandedMask.isSubsetOf(KnownZero|KnownOne))
return Constant::getIntegerValue(ITy, KnownOne);
break;
@@ -1641,7 +1605,52 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
UndefElts.setHighBits(VWidth / 2);
break;
case Intrinsic::amdgcn_buffer_load:
- case Intrinsic::amdgcn_buffer_load_format: {
+ case Intrinsic::amdgcn_buffer_load_format:
+ case Intrinsic::amdgcn_image_sample:
+ case Intrinsic::amdgcn_image_sample_cl:
+ case Intrinsic::amdgcn_image_sample_d:
+ case Intrinsic::amdgcn_image_sample_d_cl:
+ case Intrinsic::amdgcn_image_sample_l:
+ case Intrinsic::amdgcn_image_sample_b:
+ case Intrinsic::amdgcn_image_sample_b_cl:
+ case Intrinsic::amdgcn_image_sample_lz:
+ case Intrinsic::amdgcn_image_sample_cd:
+ case Intrinsic::amdgcn_image_sample_cd_cl:
+
+ case Intrinsic::amdgcn_image_sample_c:
+ case Intrinsic::amdgcn_image_sample_c_cl:
+ case Intrinsic::amdgcn_image_sample_c_d:
+ case Intrinsic::amdgcn_image_sample_c_d_cl:
+ case Intrinsic::amdgcn_image_sample_c_l:
+ case Intrinsic::amdgcn_image_sample_c_b:
+ case Intrinsic::amdgcn_image_sample_c_b_cl:
+ case Intrinsic::amdgcn_image_sample_c_lz:
+ case Intrinsic::amdgcn_image_sample_c_cd:
+ case Intrinsic::amdgcn_image_sample_c_cd_cl:
+
+ case Intrinsic::amdgcn_image_sample_o:
+ case Intrinsic::amdgcn_image_sample_cl_o:
+ case Intrinsic::amdgcn_image_sample_d_o:
+ case Intrinsic::amdgcn_image_sample_d_cl_o:
+ case Intrinsic::amdgcn_image_sample_l_o:
+ case Intrinsic::amdgcn_image_sample_b_o:
+ case Intrinsic::amdgcn_image_sample_b_cl_o:
+ case Intrinsic::amdgcn_image_sample_lz_o:
+ case Intrinsic::amdgcn_image_sample_cd_o:
+ case Intrinsic::amdgcn_image_sample_cd_cl_o:
+
+ case Intrinsic::amdgcn_image_sample_c_o:
+ case Intrinsic::amdgcn_image_sample_c_cl_o:
+ case Intrinsic::amdgcn_image_sample_c_d_o:
+ case Intrinsic::amdgcn_image_sample_c_d_cl_o:
+ case Intrinsic::amdgcn_image_sample_c_l_o:
+ case Intrinsic::amdgcn_image_sample_c_b_o:
+ case Intrinsic::amdgcn_image_sample_c_b_cl_o:
+ case Intrinsic::amdgcn_image_sample_c_lz_o:
+ case Intrinsic::amdgcn_image_sample_c_cd_o:
+ case Intrinsic::amdgcn_image_sample_c_cd_cl_o:
+
+ case Intrinsic::amdgcn_image_getlod: {
if (VWidth == 1 || !DemandedElts.isMask())
return nullptr;
@@ -1656,8 +1665,17 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
Type *NewTy = (NewNumElts == 1) ? EltTy :
VectorType::get(EltTy, NewNumElts);
- Function *NewIntrin = Intrinsic::getDeclaration(M, II->getIntrinsicID(),
- NewTy);
+ auto IID = II->getIntrinsicID();
+
+ bool IsBuffer = IID == Intrinsic::amdgcn_buffer_load ||
+ IID == Intrinsic::amdgcn_buffer_load_format;
+
+ Function *NewIntrin = IsBuffer ?
+ Intrinsic::getDeclaration(M, IID, NewTy) :
+ // Samplers have 3 mangled types.
+ Intrinsic::getDeclaration(M, IID,
+ { NewTy, II->getArgOperand(0)->getType(),
+ II->getArgOperand(1)->getType()});
SmallVector<Value *, 5> Args;
for (unsigned I = 0, E = II->getNumArgOperands(); I != E; ++I)
@@ -1669,6 +1687,29 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
CallInst *NewCall = Builder->CreateCall(NewIntrin, Args);
NewCall->takeName(II);
NewCall->copyMetadata(*II);
+
+ if (!IsBuffer) {
+ ConstantInt *DMask = dyn_cast<ConstantInt>(NewCall->getArgOperand(3));
+ if (DMask) {
+ unsigned DMaskVal = DMask->getZExtValue() & 0xf;
+
+ unsigned PopCnt = 0;
+ unsigned NewDMask = 0;
+ for (unsigned I = 0; I < 4; ++I) {
+ const unsigned Bit = 1 << I;
+ if (!!(DMaskVal & Bit)) {
+ if (++PopCnt > NewNumElts)
+ break;
+
+ NewDMask |= Bit;
+ }
+ }
+
+ NewCall->setArgOperand(3, ConstantInt::get(DMask->getType(), NewDMask));
+ }
+ }
+
+
if (NewNumElts == 1) {
return Builder->CreateInsertElement(UndefValue::get(V->getType()),
NewCall, static_cast<uint64_t>(0));
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 88ef17bbc8fa..81f2d9fa179f 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -148,9 +148,9 @@ static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) {
bool Overflow = false;
if (Opcode == Instruction::Add)
- BVal->sadd_ov(*CVal, Overflow);
+ (void)BVal->sadd_ov(*CVal, Overflow);
else
- BVal->ssub_ov(*CVal, Overflow);
+ (void)BVal->ssub_ov(*CVal, Overflow);
return !Overflow;
}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 94cfc69ed555..036dd8d39a08 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -2586,7 +2586,7 @@ void FunctionStackPoisoner::processStaticAllocas() {
Value *NewAllocaPtr = IRB.CreateIntToPtr(
IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)),
AI->getType());
- replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, /*Deref=*/true);
+ replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, /*Deref=*/false);
AI->replaceAllUsesWith(NewAllocaPtr);
}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index fa0c7cc5a4c5..8bdd917a0596 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -59,13 +59,8 @@ using namespace llvm;
static const char *const SanCovModuleInitName = "__sanitizer_cov_module_init";
static const char *const SanCovName = "__sanitizer_cov";
static const char *const SanCovWithCheckName = "__sanitizer_cov_with_check";
-static const char *const SanCovIndirCallName = "__sanitizer_cov_indir_call16";
static const char *const SanCovTracePCIndirName =
"__sanitizer_cov_trace_pc_indir";
-static const char *const SanCovTraceEnterName =
- "__sanitizer_cov_trace_func_enter";
-static const char *const SanCovTraceBBName =
- "__sanitizer_cov_trace_basic_block";
static const char *const SanCovTracePCName = "__sanitizer_cov_trace_pc";
static const char *const SanCovTraceCmp1 = "__sanitizer_cov_trace_cmp1";
static const char *const SanCovTraceCmp2 = "__sanitizer_cov_trace_cmp2";
@@ -86,8 +81,7 @@ static const char *const SanCovTracePCGuardInitName =
static cl::opt<int> ClCoverageLevel(
"sanitizer-coverage-level",
cl::desc("Sanitizer Coverage. 0: none, 1: entry block, 2: all blocks, "
- "3: all blocks and critical edges, "
- "4: above plus indirect calls"),
+ "3: all blocks and critical edges"),
cl::Hidden, cl::init(0));
static cl::opt<unsigned> ClCoverageBlockThreshold(
@@ -96,12 +90,6 @@ static cl::opt<unsigned> ClCoverageBlockThreshold(
" more than this number of blocks."),
cl::Hidden, cl::init(0));
-static cl::opt<bool>
- ClExperimentalTracing("sanitizer-coverage-experimental-tracing",
- cl::desc("Experimental basic-block tracing: insert "
- "callbacks at every basic block"),
- cl::Hidden, cl::init(false));
-
static cl::opt<bool> ClExperimentalTracePC("sanitizer-coverage-trace-pc",
cl::desc("Experimental pc tracing"),
cl::Hidden, cl::init(false));
@@ -128,16 +116,6 @@ static cl::opt<bool>
cl::desc("Reduce the number of instrumented blocks"),
cl::Hidden, cl::init(true));
-// Experimental 8-bit counters used as an additional search heuristic during
-// coverage-guided fuzzing.
-// The counters are not thread-friendly:
-// - contention on these counters may cause significant slowdown;
-// - the counter updates are racy and the results may be inaccurate.
-// They are also inaccurate due to 8-bit integer overflow.
-static cl::opt<bool> ClUse8bitCounters("sanitizer-coverage-8bit-counters",
- cl::desc("Experimental 8-bit counters"),
- cl::Hidden, cl::init(false));
-
namespace {
SanitizerCoverageOptions getOptions(int LegacyCoverageLevel) {
@@ -168,11 +146,9 @@ SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) {
SanitizerCoverageOptions CLOpts = getOptions(ClCoverageLevel);
Options.CoverageType = std::max(Options.CoverageType, CLOpts.CoverageType);
Options.IndirectCalls |= CLOpts.IndirectCalls;
- Options.TraceBB |= ClExperimentalTracing;
Options.TraceCmp |= ClCMPTracing;
Options.TraceDiv |= ClDIVTracing;
Options.TraceGep |= ClGEPTracing;
- Options.Use8bitCounters |= ClUse8bitCounters;
Options.TracePC |= ClExperimentalTracePC;
Options.TracePCGuard |= ClTracePCGuard;
return Options;
@@ -212,16 +188,15 @@ private:
bool UseCalls);
unsigned NumberOfInstrumentedBlocks() {
return SanCovFunction->getNumUses() +
- SanCovWithCheckFunction->getNumUses() + SanCovTraceBB->getNumUses() +
- SanCovTraceEnter->getNumUses();
+ SanCovWithCheckFunction->getNumUses();
}
StringRef getSanCovTracePCGuardSection() const;
StringRef getSanCovTracePCGuardSectionStart() const;
StringRef getSanCovTracePCGuardSectionEnd() const;
Function *SanCovFunction;
Function *SanCovWithCheckFunction;
- Function *SanCovIndirCallFunction, *SanCovTracePCIndir;
- Function *SanCovTraceEnter, *SanCovTraceBB, *SanCovTracePC, *SanCovTracePCGuard;
+ Function *SanCovTracePCIndir;
+ Function *SanCovTracePC, *SanCovTracePCGuard;
Function *SanCovTraceCmpFunction[4];
Function *SanCovTraceDivFunction[2];
Function *SanCovTraceGepFunction;
@@ -235,7 +210,6 @@ private:
GlobalVariable *GuardArray;
GlobalVariable *FunctionGuardArray; // for trace-pc-guard.
- GlobalVariable *EightBitCounterArray;
bool HasSancovGuardsSection;
SanitizerCoverageOptions Options;
@@ -267,9 +241,6 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
M.getOrInsertFunction(SanCovWithCheckName, VoidTy, Int32PtrTy));
SanCovTracePCIndir = checkSanitizerInterfaceFunction(
M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy));
- SanCovIndirCallFunction =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovIndirCallName, VoidTy, IntptrTy, IntptrTy));
SanCovTraceCmpFunction[0] =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
SanCovTraceCmp1, VoidTy, IRB.getInt8Ty(), IRB.getInt8Ty()));
@@ -305,24 +276,15 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
M.getOrInsertFunction(SanCovTracePCName, VoidTy));
SanCovTracePCGuard = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
SanCovTracePCGuardName, VoidTy, Int32PtrTy));
- SanCovTraceEnter = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(SanCovTraceEnterName, VoidTy, Int32PtrTy));
- SanCovTraceBB = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(SanCovTraceBBName, VoidTy, Int32PtrTy));
// At this point we create a dummy array of guards because we don't
// know how many elements we will need.
Type *Int32Ty = IRB.getInt32Ty();
- Type *Int8Ty = IRB.getInt8Ty();
if (!Options.TracePCGuard)
GuardArray =
new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
nullptr, "__sancov_gen_cov_tmp");
- if (Options.Use8bitCounters)
- EightBitCounterArray =
- new GlobalVariable(M, Int8Ty, false, GlobalVariable::ExternalLinkage,
- nullptr, "__sancov_gen_cov_tmp");
for (auto &F : M)
runOnFunction(F);
@@ -344,20 +306,6 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
GuardArray->eraseFromParent();
}
- GlobalVariable *RealEightBitCounterArray;
- if (Options.Use8bitCounters) {
- // Make sure the array is 16-aligned.
- static const int CounterAlignment = 16;
- Type *Int8ArrayNTy = ArrayType::get(Int8Ty, alignTo(N, CounterAlignment));
- RealEightBitCounterArray = new GlobalVariable(
- M, Int8ArrayNTy, false, GlobalValue::PrivateLinkage,
- Constant::getNullValue(Int8ArrayNTy), "__sancov_gen_cov_counter");
- RealEightBitCounterArray->setAlignment(CounterAlignment);
- EightBitCounterArray->replaceAllUsesWith(
- IRB.CreatePointerCast(RealEightBitCounterArray, Int8PtrTy));
- EightBitCounterArray->eraseFromParent();
- }
-
// Create variable for module (compilation unit) name
Constant *ModNameStrConst =
ConstantDataArray::getString(M.getContext(), M.getName(), true);
@@ -396,10 +344,7 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
M, SanCovModuleCtorName, SanCovModuleInitName,
{Int32PtrTy, IntptrTy, Int8PtrTy, Int8PtrTy},
{IRB.CreatePointerCast(RealGuardArray, Int32PtrTy),
- ConstantInt::get(IntptrTy, N),
- Options.Use8bitCounters
- ? IRB.CreatePointerCast(RealEightBitCounterArray, Int8PtrTy)
- : Constant::getNullValue(Int8PtrTy),
+ ConstantInt::get(IntptrTy, N), Constant::getNullValue(Int8PtrTy),
IRB.CreatePointerCast(ModuleName, Int8PtrTy)});
appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority);
@@ -566,26 +511,15 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls(
Function &F, ArrayRef<Instruction *> IndirCalls) {
if (IndirCalls.empty())
return;
- const int CacheSize = 16;
- const int CacheAlignment = 64; // Align for better performance.
- Type *Ty = ArrayType::get(IntptrTy, CacheSize);
+ if (!Options.TracePC && !Options.TracePCGuard)
+ return;
for (auto I : IndirCalls) {
IRBuilder<> IRB(I);
CallSite CS(I);
Value *Callee = CS.getCalledValue();
if (isa<InlineAsm>(Callee))
continue;
- GlobalVariable *CalleeCache = new GlobalVariable(
- *F.getParent(), Ty, false, GlobalValue::PrivateLinkage,
- Constant::getNullValue(Ty), "__sancov_gen_callee_cache");
- CalleeCache->setAlignment(CacheAlignment);
- if (Options.TracePC || Options.TracePCGuard)
- IRB.CreateCall(SanCovTracePCIndir,
- IRB.CreatePointerCast(Callee, IntptrTy));
- else
- IRB.CreateCall(SanCovIndirCallFunction,
- {IRB.CreatePointerCast(Callee, IntptrTy),
- IRB.CreatePointerCast(CalleeCache, IntptrTy)});
+ IRB.CreateCall(SanCovTracePCIndir, IRB.CreatePointerCast(Callee, IntptrTy));
}
}
@@ -735,9 +669,7 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
IRB.CreatePointerCast(GuardArray, IntptrTy),
ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4));
GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy);
- if (Options.TraceBB) {
- IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP);
- } else if (UseCalls) {
+ if (UseCalls) {
IRB.CreateCall(SanCovWithCheckFunction, GuardP);
} else {
LoadInst *Load = IRB.CreateLoad(GuardP);
@@ -755,19 +687,6 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge.
}
}
-
- if (Options.Use8bitCounters) {
- IRB.SetInsertPoint(&*IP);
- Value *P = IRB.CreateAdd(
- IRB.CreatePointerCast(EightBitCounterArray, IntptrTy),
- ConstantInt::get(IntptrTy, NumberOfInstrumentedBlocks() - 1));
- P = IRB.CreateIntToPtr(P, IRB.getInt8PtrTy());
- LoadInst *LI = IRB.CreateLoad(P);
- Value *Inc = IRB.CreateAdd(LI, ConstantInt::get(IRB.getInt8Ty(), 1));
- StoreInst *SI = IRB.CreateStore(Inc, P);
- SetNoSanitizeMetadata(LI);
- SetNoSanitizeMetadata(SI);
- }
}
StringRef SanitizerCoverageModule::getSanCovTracePCGuardSection() const {
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/contrib/llvm/lib/Transforms/Scalar/GVNHoist.cpp
index 6adfe130d148..b7514a6d5793 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVNHoist.cpp
@@ -45,6 +45,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -1010,6 +1011,7 @@ public:
AU.addRequired<MemorySSAWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<MemorySSAWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
};
} // namespace
@@ -1026,6 +1028,7 @@ PreservedAnalyses GVNHoistPass::run(Function &F, FunctionAnalysisManager &AM) {
PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<MemorySSAAnalysis>();
+ PA.preserve<GlobalsAA>();
return PA;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
index cf63cb660db8..20b37c4b70e6 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -197,8 +197,7 @@ public:
continue;
// Only progagate the value if they are of the same type.
- if (Store->getPointerOperand()->getType() !=
- Load->getPointerOperand()->getType())
+ if (Store->getPointerOperandType() != Load->getPointerOperandType())
continue;
Candidates.emplace_front(Load, Store);
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
index 86058fe0b1aa..fd15a9014def 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -557,7 +557,7 @@ bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) {
Instruction *UUser = dyn_cast<Instruction>(UU);
// Skip SExt if we are extending an nsw value
// TODO: Allow ZExt too
- if (BO->hasNoSignedWrap() && UUser && UUser->getNumUses() == 1 &&
+ if (BO->hasNoSignedWrap() && UUser && UUser->hasOneUse() &&
isa<SExtInst>(UUser))
UUser = dyn_cast<Instruction>(*(UUser->user_begin()));
if (!isCompareUsedByBranch(UUser))
@@ -852,7 +852,7 @@ collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
for (auto &KV : Roots) {
if (KV.first == 0)
continue;
- if (KV.second->getNumUses() != NumBaseUses) {
+ if (!KV.second->hasNUses(NumBaseUses)) {
DEBUG(dbgs() << "LRR: Aborting - Root and Base #users not the same: "
<< "#Base=" << NumBaseUses << ", #Root=" <<
KV.second->getNumUses() << "\n");
@@ -867,7 +867,7 @@ void LoopReroll::DAGRootTracker::
findRootsRecursive(Instruction *I, SmallInstructionSet SubsumedInsts) {
// Does the user look like it could be part of a root set?
// All its users must be simple arithmetic ops.
- if (I->getNumUses() > IL_MaxRerollIterations)
+ if (I->hasNUsesOrMore(IL_MaxRerollIterations + 1))
return;
if (I != IV && findRootsBase(I, SubsumedInsts))
diff --git a/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp b/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 3d8ce888867e..a014ddd9ba0a 100644
--- a/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -138,7 +138,8 @@ PHIExpression::~PHIExpression() = default;
// It also wants to hand us SCC's that are unrelated to the phi node we ask
// about, and have us process them there or risk redoing work.
// Graph traits over a filter iterator also doesn't work that well here.
-// This SCC finder is specialized to walk use-def chains, and only follows instructions,
+// This SCC finder is specialized to walk use-def chains, and only follows
+// instructions,
// not generic values (arguments, etc).
struct TarjanSCC {
@@ -170,8 +171,10 @@ private:
Root[I] = std::min(Root.lookup(I), Root.lookup(Op));
}
}
- // See if we really were the root of a component, by seeing if we still have our DFSNumber.
- // If we do, we are the root of the component, and we have completed a component. If we do not,
+ // See if we really were the root of a component, by seeing if we still have
+ // our DFSNumber.
+ // If we do, we are the root of the component, and we have completed a
+ // component. If we do not,
// we are not the root of a component, and belong on the component stack.
if (Root.lookup(I) == OurDFS) {
unsigned ComponentID = Components.size();
@@ -2254,12 +2257,13 @@ void NewGVN::initializeCongruenceClasses(Function &F) {
MemoryAccessToClass[MSSA->getLiveOnEntryDef()] =
createMemoryClass(MSSA->getLiveOnEntryDef());
- for (auto &B : F) {
+ for (auto DTN : nodes(DT)) {
+ BasicBlock *BB = DTN->getBlock();
// All MemoryAccesses are equivalent to live on entry to start. They must
// be initialized to something so that initial changes are noticed. For
// the maximal answer, we initialize them all to be the same as
// liveOnEntry.
- auto *MemoryBlockDefs = MSSA->getBlockDefs(&B);
+ auto *MemoryBlockDefs = MSSA->getBlockDefs(BB);
if (MemoryBlockDefs)
for (const auto &Def : *MemoryBlockDefs) {
MemoryAccessToClass[&Def] = TOPClass;
@@ -2274,7 +2278,7 @@ void NewGVN::initializeCongruenceClasses(Function &F) {
if (MD && isa<StoreInst>(MD->getMemoryInst()))
TOPClass->incStoreCount();
}
- for (auto &I : B) {
+ for (auto &I : *BB) {
// Don't insert void terminators into the class. We don't value number
// them, and they just end up sitting in TOP.
if (isa<TerminatorInst>(I) && I.getType()->isVoidTy())
@@ -2518,14 +2522,11 @@ void NewGVN::verifyMemoryCongruency() const {
auto ReachableAccessPred =
[&](const std::pair<const MemoryAccess *, CongruenceClass *> Pair) {
bool Result = ReachableBlocks.count(Pair.first->getBlock());
- if (!Result)
+ if (!Result || MSSA->isLiveOnEntryDef(Pair.first) ||
+ MemoryToDFSNum(Pair.first) == 0)
return false;
- if (MSSA->isLiveOnEntryDef(Pair.first))
- return true;
if (auto *MemDef = dyn_cast<MemoryDef>(Pair.first))
return !isInstructionTriviallyDead(MemDef->getMemoryInst());
- if (MemoryToDFSNum(Pair.first) == 0)
- return false;
return true;
};
@@ -2719,25 +2720,13 @@ bool NewGVN::runGVN() {
}
// Now a standard depth first ordering of the domtree is equivalent to RPO.
- auto DFI = df_begin(DT->getRootNode());
- for (auto DFE = df_end(DT->getRootNode()); DFI != DFE; ++DFI) {
- BasicBlock *B = DFI->getBlock();
+ for (auto DTN : depth_first(DT->getRootNode())) {
+ BasicBlock *B = DTN->getBlock();
const auto &BlockRange = assignDFSNumbers(B, ICount);
BlockInstRange.insert({B, BlockRange});
ICount += BlockRange.second - BlockRange.first;
}
- // Handle forward unreachable blocks and figure out which blocks
- // have single preds.
- for (auto &B : F) {
- // Assign numbers to unreachable blocks.
- if (!DFI.nodeVisited(DT->getNode(&B))) {
- const auto &BlockRange = assignDFSNumbers(&B, ICount);
- BlockInstRange.insert({&B, BlockRange});
- ICount += BlockRange.second - BlockRange.first;
- }
- }
-
TouchedInstructions.resize(ICount);
// Ensure we don't end up resizing the expressionToClass map, as
// that can be quite expensive. At most, we have one expression per
diff --git a/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 49ce0262c97b..659353e912fe 100644
--- a/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -352,10 +352,20 @@ Value *StructurizeCFG::invert(Value *Condition) {
if (Instruction *Inst = dyn_cast<Instruction>(Condition)) {
// Third: Check all the users for an invert
BasicBlock *Parent = Inst->getParent();
- for (User *U : Condition->users())
- if (Instruction *I = dyn_cast<Instruction>(U))
+ for (User *U : Condition->users()) {
+ if (Instruction *I = dyn_cast<Instruction>(U)) {
if (I->getParent() == Parent && match(I, m_Not(m_Specific(Condition))))
return I;
+ }
+ }
+
+ // Avoid creating a new instruction in the common case of a compare.
+ if (CmpInst *Cmp = dyn_cast<CmpInst>(Inst)) {
+ if (Cmp->hasOneUse()) {
+ Cmp->setPredicate(Cmp->getInversePredicate());
+ return Cmp;
+ }
+ }
// Last option: Create a new instruction
return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator());
diff --git a/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp b/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp
index 60ae3745c835..9f4d9c7e3981 100644
--- a/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp
@@ -73,17 +73,17 @@ bool llvm::decomposeBitTestICmp(const ICmpInst *I, CmpInst::Predicate &Pred,
default:
return false;
case ICmpInst::ICMP_SLT:
- // X < 0 is equivalent to (X & SignBit) != 0.
+ // X < 0 is equivalent to (X & SignMask) != 0.
if (!C->isZero())
return false;
- Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth()));
+ Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth()));
Pred = ICmpInst::ICMP_NE;
break;
case ICmpInst::ICMP_SGT:
- // X > -1 is equivalent to (X & SignBit) == 0.
+ // X > -1 is equivalent to (X & SignMask) == 0.
if (!C->isAllOnesValue())
return false;
- Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth()));
+ Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth()));
Pred = ICmpInst::ICMP_EQ;
break;
case ICmpInst::ICMP_ULT:
diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 644d93b727b3..82552684b832 100644
--- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -112,24 +112,6 @@ buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs) {
return buildExtractionBlockSet(BBs.begin(), BBs.end());
}
-/// \brief Helper to call buildExtractionBlockSet with a RegionNode.
-static SetVector<BasicBlock *>
-buildExtractionBlockSet(const RegionNode &RN) {
- if (!RN.isSubRegion())
- // Just a single BasicBlock.
- return buildExtractionBlockSet(RN.getNodeAs<BasicBlock>());
-
- const Region &R = *RN.getNodeAs<Region>();
-
- return buildExtractionBlockSet(R.block_begin(), R.block_end());
-}
-
-CodeExtractor::CodeExtractor(BasicBlock *BB, bool AggregateArgs,
- BlockFrequencyInfo *BFI,
- BranchProbabilityInfo *BPI)
- : DT(nullptr), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
- BPI(BPI), Blocks(buildExtractionBlockSet(BB)), NumExitBlocks(~0U) {}
-
CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
bool AggregateArgs, BlockFrequencyInfo *BFI,
BranchProbabilityInfo *BPI)
@@ -143,12 +125,6 @@ CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,
BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks())),
NumExitBlocks(~0U) {}
-CodeExtractor::CodeExtractor(DominatorTree &DT, const RegionNode &RN,
- bool AggregateArgs, BlockFrequencyInfo *BFI,
- BranchProbabilityInfo *BPI)
- : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
- BPI(BPI), Blocks(buildExtractionBlockSet(RN)), NumExitBlocks(~0U) {}
-
/// definedInRegion - Return true if the specified value is defined in the
/// extracted region.
static bool definedInRegion(const SetVector<BasicBlock *> &Blocks, Value *V) {
diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
index 49b4bd92faf4..089f2b5f3b18 100644
--- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -85,6 +85,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
UsesToRewrite.clear();
Instruction *I = Worklist.pop_back_val();
+ assert(!I->getType()->isTokenTy() && "Tokens shouldn't be in the worklist");
BasicBlock *InstBB = I->getParent();
Loop *L = LI.getLoopFor(InstBB);
assert(L && "Instruction belongs to a BB that's not part of a loop");
@@ -96,13 +97,6 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
if (ExitBlocks.empty())
continue;
- // Tokens cannot be used in PHI nodes, so we skip over them.
- // We can run into tokens which are live out of a loop with catchswitch
- // instructions in Windows EH if the catchswitch has one catchpad which
- // is inside the loop and another which is not.
- if (I->getType()->isTokenTy())
- continue;
-
for (Use &U : I->uses()) {
Instruction *User = cast<Instruction>(U.getUser());
BasicBlock *UserBB = User->getParent();
@@ -214,13 +208,9 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
// Post process PHI instructions that were inserted into another disjoint
// loop and update their exits properly.
- for (auto *PostProcessPN : PostProcessPHIs) {
- if (PostProcessPN->use_empty())
- continue;
-
- // Reprocess each PHI instruction.
- Worklist.push_back(PostProcessPN);
- }
+ for (auto *PostProcessPN : PostProcessPHIs)
+ if (!PostProcessPN->use_empty())
+ Worklist.push_back(PostProcessPN);
// Keep track of PHI nodes that we want to remove because they did not have
// any uses rewritten.
@@ -241,7 +231,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
// Compute the set of BasicBlocks in the loop `L` dominating at least one exit.
static void computeBlocksDominatingExits(
Loop &L, DominatorTree &DT, SmallVector<BasicBlock *, 8> &ExitBlocks,
- SmallPtrSet<BasicBlock *, 8> &BlocksDominatingExits) {
+ SmallSetVector<BasicBlock *, 8> &BlocksDominatingExits) {
SmallVector<BasicBlock *, 8> BBWorklist;
// We start from the exit blocks, as every block trivially dominates itself
@@ -279,7 +269,7 @@ static void computeBlocksDominatingExits(
if (!L.contains(IDomBB))
continue;
- if (BlocksDominatingExits.insert(IDomBB).second)
+ if (BlocksDominatingExits.insert(IDomBB))
BBWorklist.push_back(IDomBB);
}
}
@@ -293,7 +283,7 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
if (ExitBlocks.empty())
return false;
- SmallPtrSet<BasicBlock *, 8> BlocksDominatingExits;
+ SmallSetVector<BasicBlock *, 8> BlocksDominatingExits;
// We want to avoid use-scanning leveraging dominance informations.
// If a block doesn't dominate any of the loop exits, the none of the values
@@ -315,6 +305,13 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
!isa<PHINode>(I.user_back())))
continue;
+ // Tokens cannot be used in PHI nodes, so we skip over them.
+ // We can run into tokens which are live out of a loop with catchswitch
+ // instructions in Windows EH if the catchswitch has one catchpad which
+ // is inside the loop and another which is not.
+ if (I.getType()->isTokenTy())
+ continue;
+
Worklist.push_back(&I);
}
}
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
index 18b29226c2ef..8c5442762643 100644
--- a/contrib/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -1227,13 +1227,9 @@ bool llvm::LowerDbgDeclare(Function &F) {
// This is a call by-value or some other instruction that
// takes a pointer to the variable. Insert a *value*
// intrinsic that describes the alloca.
- SmallVector<uint64_t, 1> NewDIExpr;
- auto *DIExpr = DDI->getExpression();
- NewDIExpr.push_back(dwarf::DW_OP_deref);
- NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end());
DIB.insertDbgValueIntrinsic(AI, 0, DDI->getVariable(),
- DIB.createExpression(NewDIExpr),
- DDI->getDebugLoc(), CI);
+ DDI->getExpression(), DDI->getDebugLoc(),
+ CI);
}
}
DDI->eraseFromParent();
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
index 73c14f5606b7..5c21490793e7 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -46,6 +46,11 @@ static cl::opt<unsigned> UnrollForcePeelCount(
"unroll-force-peel-count", cl::init(0), cl::Hidden,
cl::desc("Force a peel count regardless of profiling information."));
+// Designates that a Phi is estimated to become invariant after an "infinite"
+// number of loop iterations (i.e. only may become an invariant if the loop is
+// fully unrolled).
+static const unsigned InfiniteIterationsToInvariance = UINT_MAX;
+
// Check whether we are capable of peeling this loop.
static bool canPeel(Loop *L) {
// Make sure the loop is in simplified form
@@ -66,10 +71,62 @@ static bool canPeel(Loop *L) {
return true;
}
+// This function calculates the number of iterations after which the given Phi
+// becomes an invariant. The pre-calculated values are memorized in the map. The
+// function (shortcut is I) is calculated according to the following definition:
+// Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
+// If %y is a loop invariant, then I(%x) = 1.
+// If %y is a Phi from the loop header, I(%x) = I(%y) + 1.
+// Otherwise, I(%x) is infinite.
+// TODO: Actually if %y is an expression that depends only on Phi %z and some
+// loop invariants, we can estimate I(%x) = I(%z) + 1. The example
+// looks like:
+// %x = phi(0, %a), <-- becomes invariant starting from 3rd iteration.
+// %y = phi(0, 5),
+// %a = %y + 1.
+static unsigned calculateIterationsToInvariance(
+ PHINode *Phi, Loop *L, BasicBlock *BackEdge,
+ SmallDenseMap<PHINode *, unsigned> &IterationsToInvariance) {
+ assert(Phi->getParent() == L->getHeader() &&
+ "Non-loop Phi should not be checked for turning into invariant.");
+ assert(BackEdge == L->getLoopLatch() && "Wrong latch?");
+ // If we already know the answer, take it from the map.
+ auto I = IterationsToInvariance.find(Phi);
+ if (I != IterationsToInvariance.end())
+ return I->second;
+
+ // Otherwise we need to analyze the input from the back edge.
+ Value *Input = Phi->getIncomingValueForBlock(BackEdge);
+ // Place infinity to map to avoid infinite recursion for cycled Phis. Such
+ // cycles can never stop on an invariant.
+ IterationsToInvariance[Phi] = InfiniteIterationsToInvariance;
+ unsigned ToInvariance = InfiniteIterationsToInvariance;
+
+ if (L->isLoopInvariant(Input))
+ ToInvariance = 1u;
+ else if (PHINode *IncPhi = dyn_cast<PHINode>(Input)) {
+ // Only consider Phis in header block.
+ if (IncPhi->getParent() != L->getHeader())
+ return InfiniteIterationsToInvariance;
+ // If the input becomes an invariant after X iterations, then our Phi
+ // becomes an invariant after X + 1 iterations.
+ unsigned InputToInvariance = calculateIterationsToInvariance(
+ IncPhi, L, BackEdge, IterationsToInvariance);
+ if (InputToInvariance != InfiniteIterationsToInvariance)
+ ToInvariance = InputToInvariance + 1u;
+ }
+
+ // If we found that this Phi lies in an invariant chain, update the map.
+ if (ToInvariance != InfiniteIterationsToInvariance)
+ IterationsToInvariance[Phi] = ToInvariance;
+ return ToInvariance;
+}
+
// Return the number of iterations we want to peel off.
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP,
unsigned &TripCount) {
+ assert(LoopSize > 0 && "Zero loop size is not allowed!");
UP.PeelCount = 0;
if (!canPeel(L))
return;
@@ -78,30 +135,37 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
if (!L->empty())
return;
- // Try to find a Phi node that has the same loop invariant as an input from
- // its only back edge. If there is such Phi, peeling 1 iteration from the
- // loop is profitable, because starting from 2nd iteration we will have an
- // invariant instead of this Phi.
- if (LoopSize <= UP.Threshold) {
+ // Here we try to get rid of Phis which become invariants after 1, 2, ..., N
+ // iterations of the loop. For this we compute the number for iterations after
+ // which every Phi is guaranteed to become an invariant, and try to peel the
+ // maximum number of iterations among these values, thus turning all those
+ // Phis into invariants.
+ // First, check that we can peel at least one iteration.
+ if (2 * LoopSize <= UP.Threshold && UnrollPeelMaxCount > 0) {
+ // Store the pre-calculated values here.
+ SmallDenseMap<PHINode *, unsigned> IterationsToInvariance;
+ // Now go through all Phis to calculate their the number of iterations they
+ // need to become invariants.
+ unsigned DesiredPeelCount = 0;
BasicBlock *BackEdge = L->getLoopLatch();
assert(BackEdge && "Loop is not in simplified form?");
- BasicBlock *Header = L->getHeader();
- // Iterate over Phis to find one with invariant input on back edge.
- bool FoundCandidate = false;
- PHINode *Phi;
- for (auto BI = Header->begin(); isa<PHINode>(&*BI); ++BI) {
- Phi = cast<PHINode>(&*BI);
- Value *Input = Phi->getIncomingValueForBlock(BackEdge);
- if (L->isLoopInvariant(Input)) {
- FoundCandidate = true;
- break;
- }
+ for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) {
+ PHINode *Phi = cast<PHINode>(&*BI);
+ unsigned ToInvariance = calculateIterationsToInvariance(
+ Phi, L, BackEdge, IterationsToInvariance);
+ if (ToInvariance != InfiniteIterationsToInvariance)
+ DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance);
}
- if (FoundCandidate) {
- DEBUG(dbgs() << "Peel one iteration to get rid of " << *Phi
- << " because starting from 2nd iteration it is always"
- << " an invariant\n");
- UP.PeelCount = 1;
+ if (DesiredPeelCount > 0) {
+ // Pay respect to limitations implied by loop size and the max peel count.
+ unsigned MaxPeelCount = UnrollPeelMaxCount;
+ MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1);
+ DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
+ // Consider max peel count limitation.
+ assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
+ DEBUG(dbgs() << "Peel " << DesiredPeelCount << " iteration(s) to turn"
+ << " some Phis into invariants.\n");
+ UP.PeelCount = DesiredPeelCount;
return;
}
}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 127a44df5344..2f575b9d5027 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3086,7 +3086,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) {
if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
(QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
return false;
- if (PostBB->getNumUses() != 2 || QBI->getParent()->getNumUses() != 2)
+ if (!PostBB->hasNUses(2) || !QBI->getParent()->hasNUses(2))
return false;
// OK, this is a sequence of two diamonds or triangles.
diff --git a/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp b/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp
index 4aeea02b1b1b..83bd29dbca65 100644
--- a/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp
@@ -24,6 +24,11 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
if (DL.getTypeSizeInBits(StoredVal->getType()) < DL.getTypeSizeInBits(LoadTy))
return false;
+ // Don't coerce non-integral pointers to integers or vice versa.
+ if (DL.isNonIntegralPointerType(StoredVal->getType()) !=
+ DL.isNonIntegralPointerType(LoadTy))
+ return false;
+
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 595b2ec88943..7eb8fabe0b2f 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -422,7 +422,8 @@ protected:
// When we if-convert we need to create edge masks. We have to cache values
// so that we don't end up with exponential recursion/IR.
typedef DenseMap<std::pair<BasicBlock *, BasicBlock *>, VectorParts>
- EdgeMaskCache;
+ EdgeMaskCacheTy;
+ typedef DenseMap<BasicBlock *, VectorParts> BlockMaskCacheTy;
/// Create an empty loop, based on the loop ranges of the old loop.
void createEmptyLoop();
@@ -785,7 +786,8 @@ protected:
/// Store instructions that should be predicated, as a pair
/// <StoreInst, Predicate>
SmallVector<std::pair<Instruction *, Value *>, 4> PredicatedInstructions;
- EdgeMaskCache MaskCache;
+ EdgeMaskCacheTy EdgeMaskCache;
+ BlockMaskCacheTy BlockMaskCache;
/// Trip count of the original loop.
Value *TripCount;
/// Trip count of the widened loop (TripCount - TripCount % (VF*UF))
@@ -4560,8 +4562,8 @@ InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
// Look for cached value.
std::pair<BasicBlock *, BasicBlock *> Edge(Src, Dst);
- EdgeMaskCache::iterator ECEntryIt = MaskCache.find(Edge);
- if (ECEntryIt != MaskCache.end())
+ EdgeMaskCacheTy::iterator ECEntryIt = EdgeMaskCache.find(Edge);
+ if (ECEntryIt != EdgeMaskCache.end())
return ECEntryIt->second;
VectorParts SrcMask = createBlockInMask(Src);
@@ -4580,11 +4582,11 @@ InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
for (unsigned part = 0; part < UF; ++part)
EdgeMask[part] = Builder.CreateAnd(EdgeMask[part], SrcMask[part]);
- MaskCache[Edge] = EdgeMask;
+ EdgeMaskCache[Edge] = EdgeMask;
return EdgeMask;
}
- MaskCache[Edge] = SrcMask;
+ EdgeMaskCache[Edge] = SrcMask;
return SrcMask;
}
@@ -4592,10 +4594,17 @@ InnerLoopVectorizer::VectorParts
InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
assert(OrigLoop->contains(BB) && "Block is not a part of a loop");
+ // Look for cached value.
+ BlockMaskCacheTy::iterator BCEntryIt = BlockMaskCache.find(BB);
+ if (BCEntryIt != BlockMaskCache.end())
+ return BCEntryIt->second;
+
// Loop incoming mask is all-one.
if (OrigLoop->getHeader() == BB) {
Value *C = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 1);
- return getVectorValue(C);
+ const VectorParts &BlockMask = getVectorValue(C);
+ BlockMaskCache[BB] = BlockMask;
+ return BlockMask;
}
// This is the block mask. We OR all incoming edges, and with zero.
@@ -4609,6 +4618,7 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
BlockMask[part] = Builder.CreateOr(BlockMask[part], EM[part]);
}
+ BlockMaskCache[BB] = BlockMask;
return BlockMask;
}
diff --git a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index da3ac06ab464..554944404708 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -4146,8 +4146,8 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
if (AllowReorder && R.shouldReorder()) {
// Conceptually, there is nothing actually preventing us from trying to
// reorder a larger list. In fact, we do exactly this when vectorizing
- // reductions. However, at this point, we only expect to get here from
- // tryToVectorizePair().
+ // reductions. However, at this point, we only expect to get here when
+ // there are exactly two operations.
assert(Ops.size() == 2);
assert(BuildVectorSlice.empty());
Value *ReorderedOps[] = {Ops[1], Ops[0]};
@@ -4904,7 +4904,13 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// Try to vectorize them.
unsigned NumElts = (SameTypeIt - IncIt);
DEBUG(errs() << "SLP: Trying to vectorize starting at PHIs (" << NumElts << ")\n");
- if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R)) {
+ // The order in which the phi nodes appear in the program does not matter.
+ // So allow tryToVectorizeList to reorder them if it is beneficial. This
+ // is done when there are exactly two elements since tryToVectorizeList
+ // asserts that there are only two values when AllowReorder is true.
+ bool AllowReorder = NumElts == 2;
+ if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R,
+ None, AllowReorder)) {
// Success start over because instructions might have been changed.
HaveVectorizedPhiNodes = true;
Changed = true;