aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target')
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp2
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp13
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h2
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td15
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp254
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h31
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp96
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h2
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td69
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td106
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp3
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp161
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp14
-rw-r--r--contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td365
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp17
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td5
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td1
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp26
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MIMGInstructions.td26
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp147
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h13
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp22
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp30
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h3
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstructions.td10
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp1
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h9
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/VOP3PInstructions.td31
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp9
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.h11
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp2
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h2
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFastISel.cpp2
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp2
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp8
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp10
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp2
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h2
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp8
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h2
-rw-r--r--contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp51
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp10
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp2
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp2
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp6
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp38
-rw-r--r--contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp2
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp2
-rw-r--r--contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp7
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp123
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp36
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h1
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h2
-rw-r--r--contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp6
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h2
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp2
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsCallLowering.cpp3
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp12
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsFastISel.cpp2
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp6
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelLowering.h5
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstructionSelector.cpp27
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp3
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp1
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSubtarget.h2
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp2
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp22
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.h4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp16
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp6
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp12
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h6
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp6
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp2
-rw-r--r--contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp8
-rw-r--r--contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp8
-rw-r--r--contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp6
-rw-r--r--contrib/llvm/lib/Target/Sparc/Sparc.h2
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcISelLowering.h4
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp4
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp45
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h17
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp80
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h1
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td48
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp5
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h4
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZOperands.td1
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZOperators.td10
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp8
-rw-r--r--contrib/llvm/lib/Target/Target.cpp2
-rw-r--r--contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp4
-rw-r--r--contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp6
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp2
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp74
-rw-r--r--contrib/llvm/lib/Target/X86/X86CallingConv.h2
-rw-r--r--contrib/llvm/lib/Target/X86/X86CmovConversion.cpp2
-rw-r--r--contrib/llvm/lib/Target/X86/X86FastISel.cpp6
-rw-r--r--contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp2
-rw-r--r--contrib/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp7
-rw-r--r--contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp6
-rw-r--r--contrib/llvm/lib/Target/X86/X86FrameLowering.cpp65
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.cpp317
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.h7
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFoldTables.cpp2
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.td16
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td18
-rwxr-xr-xcontrib/llvm/lib/Target/X86/X86SchedBroadwell.td46
-rw-r--r--contrib/llvm/lib/Target/X86/X86SchedHaswell.td52
-rw-r--r--contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td53
-rw-r--r--contrib/llvm/lib/Target/X86/X86SchedSkylakeClient.td48
-rwxr-xr-xcontrib/llvm/lib/Target/X86/X86SchedSkylakeServer.td48
-rw-r--r--contrib/llvm/lib/Target/X86/X86Schedule.td10
-rw-r--r--contrib/llvm/lib/Target/X86/X86ScheduleAtom.td20
-rw-r--r--contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td37
-rw-r--r--contrib/llvm/lib/Target/X86/X86ScheduleSLM.td12
-rw-r--r--contrib/llvm/lib/Target/X86/X86ScheduleZnver1.td12
-rw-r--r--contrib/llvm/lib/Target/X86/X86Subtarget.h2
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp14
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp6
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp36
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h6
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h2
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreSubtarget.h2
146 files changed, 2190 insertions, 1024 deletions
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 43a3ae77a170..572d1c22feea 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -3774,7 +3774,7 @@ bool AArch64FastISel::selectRet(const Instruction *I) {
if (Ret->getNumOperands() > 0) {
CallingConv::ID CC = F.getCallingConv();
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
+ GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0c72f2ebee18..de762a7bb1d4 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8580,7 +8580,7 @@ static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
SDValue
AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,
- std::vector<SDNode *> *Created) const {
+ SmallVectorImpl<SDNode *> &Created) const {
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (isIntDivCheap(N->getValueType(0), Attr))
return SDValue(N,0); // Lower SDIV as SDIV
@@ -8603,11 +8603,9 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp);
- if (Created) {
- Created->push_back(Cmp.getNode());
- Created->push_back(Add.getNode());
- Created->push_back(CSel.getNode());
- }
+ Created.push_back(Cmp.getNode());
+ Created.push_back(Add.getNode());
+ Created.push_back(CSel.getNode());
// Divide by pow2.
SDValue SRA =
@@ -8618,8 +8616,7 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
if (Divisor.isNonNegative())
return SRA;
- if (Created)
- Created->push_back(SRA.getNode());
+ Created.push_back(SRA.getNode());
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 592845640a44..d783c8a6048c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -644,7 +644,7 @@ private:
SelectionDAG &DAG) const;
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
- std::vector<SDNode *> *Created) const override;
+ SmallVectorImpl<SDNode *> &Created) const override;
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
int &ExtraSteps, bool &UseOneConst,
bool Reciprocal) const override;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 1060c64f7b5d..15d61cd1ad26 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -57,6 +57,14 @@ class EncodedI<string cstr, list<dag> pattern> : AArch64Inst<NormalFrm, cstr> {
let Size = 4;
}
+// Enum describing whether an instruction is
+// destructive in its first source operand.
+class DestructiveInstTypeEnum<bits<1> val> {
+ bits<1> Value = val;
+}
+def NotDestructive : DestructiveInstTypeEnum<0>;
+def Destructive : DestructiveInstTypeEnum<1>;
+
// Normal instructions
class I<dag oops, dag iops, string asm, string operands, string cstr,
list<dag> pattern>
@@ -64,6 +72,13 @@ class I<dag oops, dag iops, string asm, string operands, string cstr,
dag OutOperandList = oops;
dag InOperandList = iops;
let AsmString = !strconcat(asm, operands);
+
+ // Destructive operations (SVE)
+ DestructiveInstTypeEnum DestructiveInstType = NotDestructive;
+ ElementSizeEnum ElementSize = ElementSizeB;
+
+ let TSFlags{3} = DestructiveInstType.Value;
+ let TSFlags{2-0} = ElementSize.Value;
}
class TriOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$MHS, node:$RHS), res>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 230480cf1cea..032d53d19620 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -4851,75 +4851,92 @@ AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
return makeArrayRef(TargetFlags);
}
- /// Constants defining how certain sequences should be outlined.
- /// This encompasses how an outlined function should be called, and what kind of
- /// frame should be emitted for that outlined function.
- ///
- /// \p MachineOutlinerDefault implies that the function should be called with
- /// a save and restore of LR to the stack.
- ///
- /// That is,
- ///
- /// I1 Save LR OUTLINED_FUNCTION:
- /// I2 --> BL OUTLINED_FUNCTION I1
- /// I3 Restore LR I2
- /// I3
- /// RET
- ///
- /// * Call construction overhead: 3 (save + BL + restore)
- /// * Frame construction overhead: 1 (ret)
- /// * Requires stack fixups? Yes
- ///
- /// \p MachineOutlinerTailCall implies that the function is being created from
- /// a sequence of instructions ending in a return.
- ///
- /// That is,
- ///
- /// I1 OUTLINED_FUNCTION:
- /// I2 --> B OUTLINED_FUNCTION I1
- /// RET I2
- /// RET
- ///
- /// * Call construction overhead: 1 (B)
- /// * Frame construction overhead: 0 (Return included in sequence)
- /// * Requires stack fixups? No
- ///
- /// \p MachineOutlinerNoLRSave implies that the function should be called using
- /// a BL instruction, but doesn't require LR to be saved and restored. This
- /// happens when LR is known to be dead.
- ///
- /// That is,
- ///
- /// I1 OUTLINED_FUNCTION:
- /// I2 --> BL OUTLINED_FUNCTION I1
- /// I3 I2
- /// I3
- /// RET
- ///
- /// * Call construction overhead: 1 (BL)
- /// * Frame construction overhead: 1 (RET)
- /// * Requires stack fixups? No
- ///
- /// \p MachineOutlinerThunk implies that the function is being created from
- /// a sequence of instructions ending in a call. The outlined function is
- /// called with a BL instruction, and the outlined function tail-calls the
- /// original call destination.
- ///
- /// That is,
- ///
- /// I1 OUTLINED_FUNCTION:
- /// I2 --> BL OUTLINED_FUNCTION I1
- /// BL f I2
- /// B f
- /// * Call construction overhead: 1 (BL)
- /// * Frame construction overhead: 0
- /// * Requires stack fixups? No
- ///
+/// Constants defining how certain sequences should be outlined.
+/// This encompasses how an outlined function should be called, and what kind of
+/// frame should be emitted for that outlined function.
+///
+/// \p MachineOutlinerDefault implies that the function should be called with
+/// a save and restore of LR to the stack.
+///
+/// That is,
+///
+/// I1 Save LR OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION I1
+/// I3 Restore LR I2
+/// I3
+/// RET
+///
+/// * Call construction overhead: 3 (save + BL + restore)
+/// * Frame construction overhead: 1 (ret)
+/// * Requires stack fixups? Yes
+///
+/// \p MachineOutlinerTailCall implies that the function is being created from
+/// a sequence of instructions ending in a return.
+///
+/// That is,
+///
+/// I1 OUTLINED_FUNCTION:
+/// I2 --> B OUTLINED_FUNCTION I1
+/// RET I2
+/// RET
+///
+/// * Call construction overhead: 1 (B)
+/// * Frame construction overhead: 0 (Return included in sequence)
+/// * Requires stack fixups? No
+///
+/// \p MachineOutlinerNoLRSave implies that the function should be called using
+/// a BL instruction, but doesn't require LR to be saved and restored. This
+/// happens when LR is known to be dead.
+///
+/// That is,
+///
+/// I1 OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION I1
+/// I3 I2
+/// I3
+/// RET
+///
+/// * Call construction overhead: 1 (BL)
+/// * Frame construction overhead: 1 (RET)
+/// * Requires stack fixups? No
+///
+/// \p MachineOutlinerThunk implies that the function is being created from
+/// a sequence of instructions ending in a call. The outlined function is
+/// called with a BL instruction, and the outlined function tail-calls the
+/// original call destination.
+///
+/// That is,
+///
+/// I1 OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION I1
+/// BL f I2
+/// B f
+/// * Call construction overhead: 1 (BL)
+/// * Frame construction overhead: 0
+/// * Requires stack fixups? No
+///
+/// \p MachineOutlinerRegSave implies that the function should be called with a
+/// save and restore of LR to an available register. This allows us to avoid
+/// stack fixups. Note that this outlining variant is compatible with the
+/// NoLRSave case.
+///
+/// That is,
+///
+/// I1 Save LR OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION I1
+/// I3 Restore LR I2
+/// I3
+/// RET
+///
+/// * Call construction overhead: 3 (save + BL + restore)
+/// * Frame construction overhead: 1 (ret)
+/// * Requires stack fixups? No
enum MachineOutlinerClass {
MachineOutlinerDefault, /// Emit a save, restore, call, and return.
MachineOutlinerTailCall, /// Only emit a branch.
MachineOutlinerNoLRSave, /// Emit a call and return.
MachineOutlinerThunk, /// Emit a call and tail-call.
+ MachineOutlinerRegSave /// Same as default, but save to a register.
};
enum MachineOutlinerMBBFlags {
@@ -4927,6 +4944,27 @@ enum MachineOutlinerMBBFlags {
HasCalls = 0x4
};
+unsigned
+AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
+ MachineFunction *MF = C.getMF();
+ const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
+ MF->getSubtarget().getRegisterInfo());
+
+ // Check if there is an available register across the sequence that we can
+ // use.
+ for (unsigned Reg : AArch64::GPR64RegClass) {
+ if (!ARI->isReservedReg(*MF, Reg) &&
+ Reg != AArch64::LR && // LR is not reserved, but don't use it.
+ Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
+ Reg != AArch64::X17 && // Ditto for X17.
+ C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
+ return Reg;
+ }
+
+ // No suitable register. Return 0.
+ return 0u;
+}
+
outliner::OutlinedFunction
AArch64InstrInfo::getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
@@ -5015,11 +5053,27 @@ AArch64InstrInfo::getOutliningCandidateInfo(
SetCandidateCallInfo(MachineOutlinerNoLRSave, 4);
}
- // LR is live, so we need to save it to the stack.
+ // LR is live, so we need to save it. Decide whether it should be saved to
+ // the stack, or if it can be saved to a register.
else {
- FrameID = MachineOutlinerDefault;
- NumBytesToCreateFrame = 4;
- SetCandidateCallInfo(MachineOutlinerDefault, 12);
+ if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
+ [this](outliner::Candidate &C) {
+ return findRegisterToSaveLRTo(C);
+ })) {
+ // Every candidate has an available callee-saved register for the save.
+ // We can save LR to a register.
+ FrameID = MachineOutlinerRegSave;
+ NumBytesToCreateFrame = 4;
+ SetCandidateCallInfo(MachineOutlinerRegSave, 12);
+ }
+
+ else {
+ // At least one candidate does not have an available callee-saved
+ // register. We must save LR to the stack.
+ FrameID = MachineOutlinerDefault;
+ NumBytesToCreateFrame = 4;
+ SetCandidateCallInfo(MachineOutlinerDefault, 12);
+ }
}
// Check if the range contains a call. These require a save + restore of the
@@ -5088,7 +5142,7 @@ AArch64InstrInfo::getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const {
MBB.rend(),
[&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
- if (!LRU.available(AArch64::LR))
+ if (!LRU.available(AArch64::LR))
Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
return Flags;
@@ -5114,14 +5168,14 @@ AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
// ahead and skip over them.
if (MI.isKill())
return outliner::InstrType::Invisible;
-
+
// Is this a terminator for a basic block?
if (MI.isTerminator()) {
// Is this the end of a function?
if (MI.getParent()->succ_empty())
return outliner::InstrType::Legal;
-
+
// It's not, so don't outline it.
return outliner::InstrType::Illegal;
}
@@ -5424,7 +5478,7 @@ void AArch64InstrInfo::buildOutlinedFrame(
MBB.insert(MBB.end(), ret);
// Did we have to modify the stack by saving the link register?
- if (OF.FrameConstructionID == MachineOutlinerNoLRSave)
+ if (OF.FrameConstructionID != MachineOutlinerDefault)
return;
// We modified the stack.
@@ -5457,13 +5511,41 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
// We want to return the spot where we inserted the call.
MachineBasicBlock::iterator CallPt;
- // We have a default call. Save the link register.
- MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
- .addReg(AArch64::SP, RegState::Define)
- .addReg(AArch64::LR)
- .addReg(AArch64::SP)
- .addImm(-16);
- It = MBB.insert(It, STRXpre);
+ // Instructions for saving and restoring LR around the call instruction we're
+ // going to insert.
+ MachineInstr *Save;
+ MachineInstr *Restore;
+ // Can we save to a register?
+ if (C.CallConstructionID == MachineOutlinerRegSave) {
+ // FIXME: This logic should be sunk into a target-specific interface so that
+ // we don't have to recompute the register.
+ unsigned Reg = findRegisterToSaveLRTo(C);
+ assert(Reg != 0 && "No callee-saved register available?");
+
+ // Save and restore LR from that register.
+ Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::LR)
+ .addImm(0);
+ Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
+ .addReg(AArch64::XZR)
+ .addReg(Reg)
+ .addImm(0);
+ } else {
+ // We have the default case. Save and restore from SP.
+ Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
+ .addReg(AArch64::SP, RegState::Define)
+ .addReg(AArch64::LR)
+ .addReg(AArch64::SP)
+ .addImm(-16);
+ Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
+ .addReg(AArch64::SP, RegState::Define)
+ .addReg(AArch64::LR, RegState::Define)
+ .addReg(AArch64::SP)
+ .addImm(16);
+ }
+
+ It = MBB.insert(It, Save);
It++;
// Insert the call.
@@ -5472,13 +5554,11 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
CallPt = It;
It++;
- // Restore the link register.
- MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
- .addReg(AArch64::SP, RegState::Define)
- .addReg(AArch64::LR, RegState::Define)
- .addReg(AArch64::SP)
- .addImm(16);
- It = MBB.insert(It, LDRXpost);
-
+ It = MBB.insert(It, Restore);
return CallPt;
}
+
+bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
+ MachineFunction &MF) const {
+ return MF.getFunction().optForMinSize();
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 0e5953f6216d..11882e238b70 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -249,6 +249,7 @@ public:
insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
MachineBasicBlock::iterator &It, MachineFunction &MF,
const outliner::Candidate &C) const override;
+ bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
/// Returns true if the instruction sets to an immediate value that can be
/// executed more efficiently.
bool isExynosResetFast(const MachineInstr &MI) const;
@@ -271,6 +272,10 @@ private:
ArrayRef<MachineOperand> Cond) const;
bool substituteCmpToZero(MachineInstr &CmpInstr, unsigned SrcReg,
const MachineRegisterInfo *MRI) const;
+
+ /// Returns an unused general-purpose register which can be used for
+ /// constructing an outlined call if one exists. Returns 0 otherwise.
+ unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const;
};
/// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg
@@ -339,6 +344,32 @@ static inline bool isIndirectBranchOpcode(int Opc) {
return Opc == AArch64::BR;
}
+// struct TSFlags {
+#define TSFLAG_ELEMENT_SIZE_TYPE(X) (X) // 3-bits
+#define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 1-bit
+// }
+
+namespace AArch64 {
+
+enum ElementSizeType {
+ ElementSizeMask = TSFLAG_ELEMENT_SIZE_TYPE(0x7),
+ ElementSizeNone = TSFLAG_ELEMENT_SIZE_TYPE(0x0),
+ ElementSizeB = TSFLAG_ELEMENT_SIZE_TYPE(0x1),
+ ElementSizeH = TSFLAG_ELEMENT_SIZE_TYPE(0x2),
+ ElementSizeS = TSFLAG_ELEMENT_SIZE_TYPE(0x3),
+ ElementSizeD = TSFLAG_ELEMENT_SIZE_TYPE(0x4),
+};
+
+enum DestructiveInstType {
+ DestructiveInstTypeMask = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1),
+ NotDestructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x0),
+ Destructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1),
+};
+
+#undef TSFLAG_ELEMENT_SIZE_TYPE
+#undef TSFLAG_DESTRUCTIVE_INST_TYPE
+}
+
} // end namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 4d7ca2349ed1..b2b500320b5c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -21,6 +21,7 @@
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -94,6 +95,10 @@ private:
void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
+ // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
+ void materializeLargeCMVal(MachineInstr &I, const Value *V,
+ unsigned char OpFlags) const;
+
const AArch64TargetMachine &TM;
const AArch64Subtarget &STI;
const AArch64InstrInfo &TII;
@@ -655,6 +660,45 @@ bool AArch64InstructionSelector::selectVaStartDarwin(
return true;
}
+void AArch64InstructionSelector::materializeLargeCMVal(
+ MachineInstr &I, const Value *V, unsigned char OpFlags) const {
+ MachineBasicBlock &MBB = *I.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineIRBuilder MIB(I);
+
+ auto MovZ = MIB.buildInstr(AArch64::MOVZXi, &AArch64::GPR64RegClass);
+ MovZ->addOperand(MF, I.getOperand(1));
+ MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
+ AArch64II::MO_NC);
+ MovZ->addOperand(MF, MachineOperand::CreateImm(0));
+ constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
+
+ auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags, unsigned Offset,
+ unsigned ForceDstReg) {
+ unsigned DstReg = ForceDstReg
+ ? ForceDstReg
+ : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+ auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
+ if (auto *GV = dyn_cast<GlobalValue>(V)) {
+ MovI->addOperand(MF, MachineOperand::CreateGA(
+ GV, MovZ->getOperand(1).getOffset(), Flags));
+ } else {
+ MovI->addOperand(
+ MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
+ MovZ->getOperand(1).getOffset(), Flags));
+ }
+ MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
+ constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
+ return DstReg;
+ };
+ unsigned DstReg = BuildMovK(MovZ->getOperand(0).getReg(),
+ AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
+ DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
+ BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
+ return;
+}
+
bool AArch64InstructionSelector::select(MachineInstr &I,
CodeGenCoverage &CoverageInfo) const {
assert(I.getParent() && "Instruction should be in a basic block!");
@@ -936,36 +980,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
I.getOperand(1).setTargetFlags(OpFlags);
} else if (TM.getCodeModel() == CodeModel::Large) {
// Materialize the global using movz/movk instructions.
- unsigned MovZDstReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
- auto InsertPt = std::next(I.getIterator());
- auto MovZ =
- BuildMI(MBB, InsertPt, I.getDebugLoc(), TII.get(AArch64::MOVZXi))
- .addDef(MovZDstReg);
- MovZ->addOperand(MF, I.getOperand(1));
- MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
- AArch64II::MO_NC);
- MovZ->addOperand(MF, MachineOperand::CreateImm(0));
- constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
-
- auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags,
- unsigned Offset, unsigned ForceDstReg) {
- unsigned DstReg =
- ForceDstReg ? ForceDstReg
- : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
- auto MovI = BuildMI(MBB, InsertPt, MovZ->getDebugLoc(),
- TII.get(AArch64::MOVKXi))
- .addDef(DstReg)
- .addReg(SrcReg);
- MovI->addOperand(MF, MachineOperand::CreateGA(
- GV, MovZ->getOperand(1).getOffset(), Flags));
- MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
- constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
- return DstReg;
- };
- unsigned DstReg = BuildMovK(MovZ->getOperand(0).getReg(),
- AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
- DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
- BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
+ materializeLargeCMVal(I, GV, OpFlags);
I.eraseFromParent();
return true;
} else {
@@ -1482,7 +1497,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
.addImm(1);
I.eraseFromParent();
return true;
- case TargetOpcode::G_IMPLICIT_DEF:
+ case TargetOpcode::G_IMPLICIT_DEF: {
I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
const unsigned DstReg = I.getOperand(0).getReg();
@@ -1492,6 +1507,25 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
return true;
}
+ case TargetOpcode::G_BLOCK_ADDR: {
+ if (TM.getCodeModel() == CodeModel::Large) {
+ materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
+ I.eraseFromParent();
+ return true;
+ } else {
+ I.setDesc(TII.get(AArch64::MOVaddrBA));
+ auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
+ I.getOperand(0).getReg())
+ .addBlockAddress(I.getOperand(1).getBlockAddress(),
+ /* Offset */ 0, AArch64II::MO_PAGE)
+ .addBlockAddress(
+ I.getOperand(1).getBlockAddress(), /* Offset */ 0,
+ AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
+ }
+ }
+ }
return false;
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index 9b8c0a34efba..327c758a7f8e 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -293,6 +293,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic)));
}
+ getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
+
// Merge/Unmerge
for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 798340f8fed8..e42214d15699 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -146,7 +146,7 @@ public:
Optional<bool> hasRedZone() const { return HasRedZone; }
void setHasRedZone(bool s) { HasRedZone = s; }
-
+
int getVarArgsStackIndex() const { return VarArgsStackIndex; }
void setVarArgsStackIndex(int Index) { VarArgsStackIndex = Index; }
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index 7a653e117fd1..bbf401b474ca 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -764,18 +764,35 @@ def Z30 : AArch64Reg<30, "z30", [Q30, Z30_HI]>, DwarfRegNum<[126]>;
def Z31 : AArch64Reg<31, "z31", [Q31, Z31_HI]>, DwarfRegNum<[127]>;
}
+// Enum descibing the element size for destructive
+// operations.
+class ElementSizeEnum<bits<3> val> {
+ bits<3> Value = val;
+}
+
+def ElementSizeNone : ElementSizeEnum<0>;
+def ElementSizeB : ElementSizeEnum<1>;
+def ElementSizeH : ElementSizeEnum<2>;
+def ElementSizeS : ElementSizeEnum<3>;
+def ElementSizeD : ElementSizeEnum<4>;
+def ElementSizeQ : ElementSizeEnum<5>; // Unused
+
class SVERegOp <string Suffix, AsmOperandClass C,
+ ElementSizeEnum Size,
RegisterClass RC> : RegisterOperand<RC> {
+ ElementSizeEnum ElementSize;
+
+ let ElementSize = Size;
let PrintMethod = !if(!eq(Suffix, ""),
"printSVERegOp<>",
"printSVERegOp<'" # Suffix # "'>");
let ParserMatchClass = C;
}
-class PPRRegOp <string Suffix, AsmOperandClass C,
- RegisterClass RC> : SVERegOp<Suffix, C, RC> {}
-class ZPRRegOp <string Suffix, AsmOperandClass C,
- RegisterClass RC> : SVERegOp<Suffix, C, RC> {}
+class PPRRegOp <string Suffix, AsmOperandClass C, ElementSizeEnum Size,
+ RegisterClass RC> : SVERegOp<Suffix, C, Size, RC> {}
+class ZPRRegOp <string Suffix, AsmOperandClass C, ElementSizeEnum Size,
+ RegisterClass RC> : SVERegOp<Suffix, C, Size, RC> {}
//******************************************************************************
@@ -805,11 +822,11 @@ def PPRAsmOp16 : PPRAsmOperand<"PredicateH", "PPR", 16>;
def PPRAsmOp32 : PPRAsmOperand<"PredicateS", "PPR", 32>;
def PPRAsmOp64 : PPRAsmOperand<"PredicateD", "PPR", 64>;
-def PPRAny : PPRRegOp<"", PPRAsmOpAny, PPR>;
-def PPR8 : PPRRegOp<"b", PPRAsmOp8, PPR>;
-def PPR16 : PPRRegOp<"h", PPRAsmOp16, PPR>;
-def PPR32 : PPRRegOp<"s", PPRAsmOp32, PPR>;
-def PPR64 : PPRRegOp<"d", PPRAsmOp64, PPR>;
+def PPRAny : PPRRegOp<"", PPRAsmOpAny, ElementSizeNone, PPR>;
+def PPR8 : PPRRegOp<"b", PPRAsmOp8, ElementSizeB, PPR>;
+def PPR16 : PPRRegOp<"h", PPRAsmOp16, ElementSizeH, PPR>;
+def PPR32 : PPRRegOp<"s", PPRAsmOp32, ElementSizeS, PPR>;
+def PPR64 : PPRRegOp<"d", PPRAsmOp64, ElementSizeD, PPR>;
def PPRAsmOp3bAny : PPRAsmOperand<"Predicate3bAny", "PPR_3b", 0>;
def PPRAsmOp3b8 : PPRAsmOperand<"Predicate3bB", "PPR_3b", 8>;
@@ -817,11 +834,11 @@ def PPRAsmOp3b16 : PPRAsmOperand<"Predicate3bH", "PPR_3b", 16>;
def PPRAsmOp3b32 : PPRAsmOperand<"Predicate3bS", "PPR_3b", 32>;
def PPRAsmOp3b64 : PPRAsmOperand<"Predicate3bD", "PPR_3b", 64>;
-def PPR3bAny : PPRRegOp<"", PPRAsmOp3bAny, PPR_3b>;
-def PPR3b8 : PPRRegOp<"b", PPRAsmOp3b8, PPR_3b>;
-def PPR3b16 : PPRRegOp<"h", PPRAsmOp3b16, PPR_3b>;
-def PPR3b32 : PPRRegOp<"s", PPRAsmOp3b32, PPR_3b>;
-def PPR3b64 : PPRRegOp<"d", PPRAsmOp3b64, PPR_3b>;
+def PPR3bAny : PPRRegOp<"", PPRAsmOp3bAny, ElementSizeNone, PPR_3b>;
+def PPR3b8 : PPRRegOp<"b", PPRAsmOp3b8, ElementSizeB, PPR_3b>;
+def PPR3b16 : PPRRegOp<"h", PPRAsmOp3b16, ElementSizeH, PPR_3b>;
+def PPR3b32 : PPRRegOp<"s", PPRAsmOp3b32, ElementSizeS, PPR_3b>;
+def PPR3b64 : PPRRegOp<"d", PPRAsmOp3b64, ElementSizeD, PPR_3b>;
//******************************************************************************
@@ -874,28 +891,28 @@ def ZPRAsmOp32 : ZPRAsmOperand<"VectorS", 32>;
def ZPRAsmOp64 : ZPRAsmOperand<"VectorD", 64>;
def ZPRAsmOp128 : ZPRAsmOperand<"VectorQ", 128>;
-def ZPRAny : ZPRRegOp<"", ZPRAsmOpAny, ZPR>;
-def ZPR8 : ZPRRegOp<"b", ZPRAsmOp8, ZPR>;
-def ZPR16 : ZPRRegOp<"h", ZPRAsmOp16, ZPR>;
-def ZPR32 : ZPRRegOp<"s", ZPRAsmOp32, ZPR>;
-def ZPR64 : ZPRRegOp<"d", ZPRAsmOp64, ZPR>;
-def ZPR128 : ZPRRegOp<"q", ZPRAsmOp128, ZPR>;
+def ZPRAny : ZPRRegOp<"", ZPRAsmOpAny, ElementSizeNone, ZPR>;
+def ZPR8 : ZPRRegOp<"b", ZPRAsmOp8, ElementSizeB, ZPR>;
+def ZPR16 : ZPRRegOp<"h", ZPRAsmOp16, ElementSizeH, ZPR>;
+def ZPR32 : ZPRRegOp<"s", ZPRAsmOp32, ElementSizeS, ZPR>;
+def ZPR64 : ZPRRegOp<"d", ZPRAsmOp64, ElementSizeD, ZPR>;
+def ZPR128 : ZPRRegOp<"q", ZPRAsmOp128, ElementSizeQ, ZPR>;
def ZPRAsmOp3b8 : ZPRAsmOperand<"Vector3bB", 8, "_3b">;
def ZPRAsmOp3b16 : ZPRAsmOperand<"Vector3bH", 16, "_3b">;
def ZPRAsmOp3b32 : ZPRAsmOperand<"Vector3bS", 32, "_3b">;
-def ZPR3b8 : ZPRRegOp<"b", ZPRAsmOp3b8, ZPR_3b>;
-def ZPR3b16 : ZPRRegOp<"h", ZPRAsmOp3b16, ZPR_3b>;
-def ZPR3b32 : ZPRRegOp<"s", ZPRAsmOp3b32, ZPR_3b>;
+def ZPR3b8 : ZPRRegOp<"b", ZPRAsmOp3b8, ElementSizeB, ZPR_3b>;
+def ZPR3b16 : ZPRRegOp<"h", ZPRAsmOp3b16, ElementSizeH, ZPR_3b>;
+def ZPR3b32 : ZPRRegOp<"s", ZPRAsmOp3b32, ElementSizeS, ZPR_3b>;
def ZPRAsmOp4b16 : ZPRAsmOperand<"Vector4bH", 16, "_4b">;
def ZPRAsmOp4b32 : ZPRAsmOperand<"Vector4bS", 32, "_4b">;
def ZPRAsmOp4b64 : ZPRAsmOperand<"Vector4bD", 64, "_4b">;
-def ZPR4b16 : ZPRRegOp<"h", ZPRAsmOp4b16, ZPR_4b>;
-def ZPR4b32 : ZPRRegOp<"s", ZPRAsmOp4b32, ZPR_4b>;
-def ZPR4b64 : ZPRRegOp<"d", ZPRAsmOp4b64, ZPR_4b>;
+def ZPR4b16 : ZPRRegOp<"h", ZPRAsmOp4b16, ElementSizeH, ZPR_4b>;
+def ZPR4b32 : ZPRRegOp<"s", ZPRAsmOp4b32, ElementSizeS, ZPR_4b>;
+def ZPR4b64 : ZPRRegOp<"d", ZPRAsmOp4b64, ElementSizeD, ZPR_4b>;
class FPRasZPR<int Width> : AsmOperandClass{
let Name = "FPR" # Width # "asZPR";
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 16e6ddda6398..0fde68011e86 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -220,10 +220,33 @@ let Predicates = [HasSVE] in {
def PUNPKLO_PP : sve_int_perm_punpk<0b0, "punpklo">;
def PUNPKHI_PP : sve_int_perm_punpk<0b1, "punpkhi">;
+ defm MOVPRFX_ZPzZ : sve_int_movprfx_pred_zero<0b000, "movprfx">;
+ defm MOVPRFX_ZPmZ : sve_int_movprfx_pred_merge<0b001, "movprfx">;
+ def MOVPRFX_ZZ : sve_int_bin_cons_misc_0_c<0b00000001, "movprfx", ZPRAny>;
def FEXPA_ZZ_H : sve_int_bin_cons_misc_0_c<0b01000000, "fexpa", ZPR16>;
def FEXPA_ZZ_S : sve_int_bin_cons_misc_0_c<0b10000000, "fexpa", ZPR32>;
def FEXPA_ZZ_D : sve_int_bin_cons_misc_0_c<0b11000000, "fexpa", ZPR64>;
+ def BRKPA_PPzPP : sve_int_brkp<0b00, "brkpa">;
+ def BRKPAS_PPzPP : sve_int_brkp<0b10, "brkpas">;
+ def BRKPB_PPzPP : sve_int_brkp<0b01, "brkpb">;
+ def BRKPBS_PPzPP : sve_int_brkp<0b11, "brkpbs">;
+
+ def BRKN_PPzP : sve_int_brkn<0b0, "brkn">;
+ def BRKNS_PPzP : sve_int_brkn<0b1, "brkns">;
+
+ defm BRKA_PPzP : sve_int_break_z<0b000, "brka">;
+ defm BRKA_PPmP : sve_int_break_m<0b001, "brka">;
+ defm BRKAS_PPzP : sve_int_break_z<0b010, "brkas">;
+ defm BRKB_PPzP : sve_int_break_z<0b100, "brkb">;
+ defm BRKB_PPmP : sve_int_break_m<0b101, "brkb">;
+ defm BRKBS_PPzP : sve_int_break_z<0b110, "brkbs">;
+
+ def PTEST_PP : sve_int_ptest<0b010000, "ptest">;
+ def PFALSE : sve_int_pfalse<0b000000, "pfalse">;
+ defm PFIRST : sve_int_pfirst<0b00000, "pfirst">;
+ defm PNEXT : sve_int_pnext<0b00110, "pnext">;
+
def AND_PPzPP : sve_int_pred_log<0b0000, "and">;
def BIC_PPzPP : sve_int_pred_log<0b0001, "bic">;
def EOR_PPzPP : sve_int_pred_log<0b0010, "eor">;
@@ -731,6 +754,21 @@ let Predicates = [HasSVE] in {
defm FCMEQ_PPzZ0 : sve_fp_2op_p_pd<0b100, "fcmeq">;
defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne">;
+ defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt">;
+ defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele">;
+ defm WHILELO_PWW : sve_int_while4_rr<0b110, "whilelo">;
+ defm WHILELS_PWW : sve_int_while4_rr<0b111, "whilels">;
+
+ defm WHILELT_PXX : sve_int_while8_rr<0b010, "whilelt">;
+ defm WHILELE_PXX : sve_int_while8_rr<0b011, "whilele">;
+ defm WHILELO_PXX : sve_int_while8_rr<0b110, "whilelo">;
+ defm WHILELS_PXX : sve_int_while8_rr<0b111, "whilels">;
+
+ def CTERMEQ_WW : sve_int_cterm<0b0, 0b0, "ctermeq", GPR32>;
+ def CTERMNE_WW : sve_int_cterm<0b0, 0b1, "ctermne", GPR32>;
+ def CTERMEQ_XX : sve_int_cterm<0b1, 0b0, "ctermeq", GPR64>;
+ def CTERMNE_XX : sve_int_cterm<0b1, 0b1, "ctermne", GPR64>;
+
def RDVLI_XI : sve_int_read_vl_a<0b0, 0b11111, "rdvl">;
def ADDVL_XXI : sve_int_arith_vl<0b0, "addvl">;
def ADDPL_XXI : sve_int_arith_vl<0b1, "addpl">;
@@ -854,40 +892,40 @@ let Predicates = [HasSVE] in {
defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr">;
defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl">;
- def FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16>;
- def FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32>;
- def SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16>;
- def SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32>;
- def UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32>;
- def UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16>;
- def FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16>;
- def FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32>;
- def FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16>;
- def FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32>;
- def FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16>;
- def FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64>;
- def FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32>;
- def FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64>;
- def SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64>;
- def UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64>;
- def UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16>;
- def SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32>;
- def SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16>;
- def SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16>;
- def UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32>;
- def UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16>;
- def SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64>;
- def UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64>;
- def FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32>;
- def FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32>;
- def FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64>;
- def FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32>;
- def FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64>;
- def FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32>;
- def FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64>;
- def FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64>;
- def FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64>;
- def FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64>;
+ def FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, ElementSizeS>;
+ def FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, ElementSizeS>;
+ def SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16, ElementSizeH>;
+ def SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32, ElementSizeS>;
+ def UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32, ElementSizeS>;
+ def UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16, ElementSizeH>;
+ def FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, ElementSizeH>;
+ def FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, ElementSizeS>;
+ def FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, ElementSizeH>;
+ def FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, ElementSizeS>;
+ def FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16, ElementSizeD>;
+ def FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64, ElementSizeD>;
+ def FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32, ElementSizeD>;
+ def FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64, ElementSizeD>;
+ def SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64, ElementSizeD>;
+ def UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64, ElementSizeD>;
+ def UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16, ElementSizeS>;
+ def SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32, ElementSizeD>;
+ def SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16, ElementSizeS>;
+ def SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16, ElementSizeD>;
+ def UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32, ElementSizeD>;
+ def UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16, ElementSizeD>;
+ def SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64, ElementSizeD>;
+ def UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64, ElementSizeD>;
+ def FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, ElementSizeD>;
+ def FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, ElementSizeD>;
+ def FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, ElementSizeD>;
+ def FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, ElementSizeS>;
+ def FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, ElementSizeD>;
+ def FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, ElementSizeS>;
+ def FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, ElementSizeD>;
+ def FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, ElementSizeD>;
+ def FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, ElementSizeD>;
+ def FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, ElementSizeD>;
defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn">;
defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp">;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 01a997e5aed7..120d71381c67 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -255,6 +255,9 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
// AArch64 supports the MachineOutliner.
setMachineOutliner(true);
+
+ // AArch64 supports default outlining behaviour.
+ setSupportsDefaultOutlining(true);
}
AArch64TargetMachine::~AArch64TargetMachine() = default;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index d75fef7b0171..96e751e86971 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -577,7 +577,7 @@ int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
unsigned NumVectorInstToHideOverhead = 10;
int MaxMergeDistance = 64;
- if (Ty->isVectorTy() && SE &&
+ if (Ty->isVectorTy() && SE &&
!BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
return NumVectorInstToHideOverhead;
diff --git a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index a51c41d70915..30a9a08f2346 100644
--- a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -11,6 +11,7 @@
#include "MCTargetDesc/AArch64MCExpr.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "MCTargetDesc/AArch64TargetStreamer.h"
+#include "AArch64InstrInfo.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
@@ -79,6 +80,67 @@ private:
// Map of register aliases registers via the .req directive.
StringMap<std::pair<RegKind, unsigned>> RegisterReqs;
+ class PrefixInfo {
+ public:
+ static PrefixInfo CreateFromInst(const MCInst &Inst, uint64_t TSFlags) {
+ PrefixInfo Prefix;
+ switch (Inst.getOpcode()) {
+ case AArch64::MOVPRFX_ZZ:
+ Prefix.Active = true;
+ Prefix.Dst = Inst.getOperand(0).getReg();
+ break;
+ case AArch64::MOVPRFX_ZPmZ_B:
+ case AArch64::MOVPRFX_ZPmZ_H:
+ case AArch64::MOVPRFX_ZPmZ_S:
+ case AArch64::MOVPRFX_ZPmZ_D:
+ Prefix.Active = true;
+ Prefix.Predicated = true;
+ Prefix.ElementSize = TSFlags & AArch64::ElementSizeMask;
+ assert(Prefix.ElementSize != AArch64::ElementSizeNone &&
+ "No destructive element size set for movprfx");
+ Prefix.Dst = Inst.getOperand(0).getReg();
+ Prefix.Pg = Inst.getOperand(2).getReg();
+ break;
+ case AArch64::MOVPRFX_ZPzZ_B:
+ case AArch64::MOVPRFX_ZPzZ_H:
+ case AArch64::MOVPRFX_ZPzZ_S:
+ case AArch64::MOVPRFX_ZPzZ_D:
+ Prefix.Active = true;
+ Prefix.Predicated = true;
+ Prefix.ElementSize = TSFlags & AArch64::ElementSizeMask;
+ assert(Prefix.ElementSize != AArch64::ElementSizeNone &&
+ "No destructive element size set for movprfx");
+ Prefix.Dst = Inst.getOperand(0).getReg();
+ Prefix.Pg = Inst.getOperand(1).getReg();
+ break;
+ default:
+ break;
+ }
+
+ return Prefix;
+ }
+
+ PrefixInfo() : Active(false), Predicated(false) {}
+ bool isActive() const { return Active; }
+ bool isPredicated() const { return Predicated; }
+ unsigned getElementSize() const {
+ assert(Predicated);
+ return ElementSize;
+ }
+ unsigned getDstReg() const { return Dst; }
+ unsigned getPgReg() const {
+ assert(Predicated);
+ return Pg;
+ }
+
+ private:
+ bool Active;
+ bool Predicated;
+ unsigned ElementSize;
+ unsigned Dst;
+ unsigned Pg;
+ } NextPrefix;
+
AArch64TargetStreamer &getTargetStreamer() {
MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
return static_cast<AArch64TargetStreamer &>(TS);
@@ -113,7 +175,8 @@ private:
bool parseDirectiveReq(StringRef Name, SMLoc L);
bool parseDirectiveUnreq(SMLoc L);
- bool validateInstruction(MCInst &Inst, SmallVectorImpl<SMLoc> &Loc);
+ bool validateInstruction(MCInst &Inst, SMLoc &IDLoc,
+ SmallVectorImpl<SMLoc> &Loc);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
@@ -3665,12 +3728,89 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
return false;
}
+static inline bool isMatchingOrAlias(unsigned ZReg, unsigned Reg) {
+ assert((ZReg >= AArch64::Z0) && (ZReg <= AArch64::Z31));
+ return (ZReg == ((Reg - AArch64::B0) + AArch64::Z0)) ||
+ (ZReg == ((Reg - AArch64::H0) + AArch64::Z0)) ||
+ (ZReg == ((Reg - AArch64::S0) + AArch64::Z0)) ||
+ (ZReg == ((Reg - AArch64::D0) + AArch64::Z0)) ||
+ (ZReg == ((Reg - AArch64::Q0) + AArch64::Z0)) ||
+ (ZReg == ((Reg - AArch64::Z0) + AArch64::Z0));
+}
+
// FIXME: This entire function is a giant hack to provide us with decent
// operand range validation/diagnostics until TableGen/MC can be extended
// to support autogeneration of this kind of validation.
-bool AArch64AsmParser::validateInstruction(MCInst &Inst,
- SmallVectorImpl<SMLoc> &Loc) {
+bool AArch64AsmParser::validateInstruction(MCInst &Inst, SMLoc &IDLoc,
+ SmallVectorImpl<SMLoc> &Loc) {
const MCRegisterInfo *RI = getContext().getRegisterInfo();
+ const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
+
+ // A prefix only applies to the instruction following it. Here we extract
+ // prefix information for the next instruction before validating the current
+ // one so that in the case of failure we don't erronously continue using the
+ // current prefix.
+ PrefixInfo Prefix = NextPrefix;
+ NextPrefix = PrefixInfo::CreateFromInst(Inst, MCID.TSFlags);
+
+ // Before validating the instruction in isolation we run through the rules
+ // applicable when it follows a prefix instruction.
+ // NOTE: brk & hlt can be prefixed but require no additional validation.
+ if (Prefix.isActive() &&
+ (Inst.getOpcode() != AArch64::BRK) &&
+ (Inst.getOpcode() != AArch64::HLT)) {
+
+ // Prefixed intructions must have a destructive operand.
+ if ((MCID.TSFlags & AArch64::DestructiveInstTypeMask) ==
+ AArch64::NotDestructive)
+ return Error(IDLoc, "instruction is unpredictable when following a"
+ " movprfx, suggest replacing movprfx with mov");
+
+ // Destination operands must match.
+ if (Inst.getOperand(0).getReg() != Prefix.getDstReg())
+ return Error(Loc[0], "instruction is unpredictable when following a"
+ " movprfx writing to a different destination");
+
+ // Destination operand must not be used in any other location.
+ for (unsigned i = 1; i < Inst.getNumOperands(); ++i) {
+ if (Inst.getOperand(i).isReg() &&
+ (MCID.getOperandConstraint(i, MCOI::TIED_TO) == -1) &&
+ isMatchingOrAlias(Prefix.getDstReg(), Inst.getOperand(i).getReg()))
+ return Error(Loc[0], "instruction is unpredictable when following a"
+ " movprfx and destination also used as non-destructive"
+ " source");
+ }
+
+ auto PPRRegClass = AArch64MCRegisterClasses[AArch64::PPRRegClassID];
+ if (Prefix.isPredicated()) {
+ int PgIdx = -1;
+
+ // Find the instructions general predicate.
+ for (unsigned i = 1; i < Inst.getNumOperands(); ++i)
+ if (Inst.getOperand(i).isReg() &&
+ PPRRegClass.contains(Inst.getOperand(i).getReg())) {
+ PgIdx = i;
+ break;
+ }
+
+ // Instruction must be predicated if the movprfx is predicated.
+ if (PgIdx == -1 ||
+ (MCID.TSFlags & AArch64::ElementSizeMask) == AArch64::ElementSizeNone)
+ return Error(IDLoc, "instruction is unpredictable when following a"
+ " predicated movprfx, suggest using unpredicated movprfx");
+
+ // Instruction must use same general predicate as the movprfx.
+ if (Inst.getOperand(PgIdx).getReg() != Prefix.getPgReg())
+ return Error(IDLoc, "instruction is unpredictable when following a"
+ " predicated movprfx using a different general predicate");
+
+ // Instruction element type must match the movprfx.
+ if ((MCID.TSFlags & AArch64::ElementSizeMask) != Prefix.getElementSize())
+ return Error(IDLoc, "instruction is unpredictable when following a"
+ " predicated movprfx with a different element size");
+ }
+ }
+
// Check for indexed addressing modes w/ the base register being the
// same as a destination/source register or pair load where
// the Rt == Rt2. All of those are undefined behaviour.
@@ -4516,7 +4656,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
NumOperands = Operands.size();
for (unsigned i = 1; i < NumOperands; ++i)
OperandLocs.push_back(Operands[i]->getStartLoc());
- if (validateInstruction(Inst, OperandLocs))
+ if (validateInstruction(Inst, IDLoc, OperandLocs))
return true;
Inst.setLoc(IDLoc);
@@ -4719,7 +4859,6 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
const MCObjectFileInfo::Environment Format =
getContext().getObjectFileInfo()->getObjectFileType();
bool IsMachO = Format == MCObjectFileInfo::IsMachO;
- bool IsCOFF = Format == MCObjectFileInfo::IsCOFF;
StringRef IDVal = DirectiveID.getIdentifier();
SMLoc Loc = DirectiveID.getLoc();
@@ -4733,14 +4872,14 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
parseDirectiveLtorg(Loc);
else if (IDVal == ".unreq")
parseDirectiveUnreq(Loc);
- else if (!IsMachO && !IsCOFF) {
- if (IDVal == ".inst")
- parseDirectiveInst(Loc);
+ else if (IDVal == ".inst")
+ parseDirectiveInst(Loc);
+ else if (IsMachO) {
+ if (IDVal == MCLOHDirectiveName())
+ parseDirectiveLOH(IDVal, Loc);
else
return true;
- } else if (IDVal == MCLOHDirectiveName())
- parseDirectiveLOH(IDVal, Loc);
- else
+ } else
return true;
return false;
}
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
index 1b949b54590c..dee964df2635 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -39,4 +39,16 @@ void AArch64TargetStreamer::emitCurrentConstantPool() {
// finish() - write out any non-empty assembler constant pools.
void AArch64TargetStreamer::finish() { ConstantPools->emitAll(Streamer); }
-void AArch64TargetStreamer::emitInst(uint32_t Inst) {}
+void AArch64TargetStreamer::emitInst(uint32_t Inst) {
+ char Buffer[4];
+
+ // We can't just use EmitIntValue here, as that will swap the
+ // endianness on big-endian systems (instructions are always
+ // little-endian).
+ for (unsigned I = 0; I < 4; ++I) {
+ Buffer[I] = uint8_t(Inst);
+ Inst >>= 8;
+ }
+
+ getStreamer().EmitBytes(StringRef(Buffer, 4));
+}
diff --git a/contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td b/contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 17b3f6041279..7a8dd8bc5aee 100644
--- a/contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -282,6 +282,79 @@ let Predicates = [HasSVE] in {
//===----------------------------------------------------------------------===//
+// SVE Predicate Misc Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_pfalse<bits<6> opc, string asm>
+: I<(outs PPR8:$Pd), (ins),
+ asm, "\t$Pd",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pd;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = opc{5-4};
+ let Inst{21-19} = 0b011;
+ let Inst{18-16} = opc{3-1};
+ let Inst{15-10} = 0b111001;
+ let Inst{9} = opc{0};
+ let Inst{8-4} = 0b00000;
+ let Inst{3-0} = Pd;
+}
+
+class sve_int_ptest<bits<6> opc, string asm>
+: I<(outs), (ins PPRAny:$Pg, PPR8:$Pn),
+ asm, "\t$Pg, $Pn",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pg;
+ bits<4> Pn;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = opc{5-4};
+ let Inst{21-19} = 0b010;
+ let Inst{18-16} = opc{3-1};
+ let Inst{15-14} = 0b11;
+ let Inst{13-10} = Pg;
+ let Inst{9} = opc{0};
+ let Inst{8-5} = Pn;
+ let Inst{4-0} = 0b00000;
+
+ let Defs = [NZCV];
+}
+
+class sve_int_pfirst_next<bits<2> sz8_64, bits<5> opc, string asm,
+ PPRRegOp pprty>
+: I<(outs pprty:$Pdn), (ins PPRAny:$Pg, pprty:$_Pdn),
+ asm, "\t$Pdn, $Pg, $_Pdn",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pdn;
+ bits<4> Pg;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-19} = 0b011;
+ let Inst{18-16} = opc{4-2};
+ let Inst{15-11} = 0b11000;
+ let Inst{10-9} = opc{1-0};
+ let Inst{8-5} = Pg;
+ let Inst{4} = 0;
+ let Inst{3-0} = Pdn;
+
+ let Constraints = "$Pdn = $_Pdn";
+ let Defs = [NZCV];
+}
+
+multiclass sve_int_pfirst<bits<5> opc, string asm> {
+ def : sve_int_pfirst_next<0b01, opc, asm, PPR8>;
+}
+
+multiclass sve_int_pnext<bits<5> opc, string asm> {
+ def _B : sve_int_pfirst_next<0b00, opc, asm, PPR8>;
+ def _H : sve_int_pfirst_next<0b01, opc, asm, PPR16>;
+ def _S : sve_int_pfirst_next<0b10, opc, asm, PPR32>;
+ def _D : sve_int_pfirst_next<0b11, opc, asm, PPR64>;
+}
+
+//===----------------------------------------------------------------------===//
// SVE Predicate Count Group
//===----------------------------------------------------------------------===//
@@ -348,6 +421,8 @@ class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_int_count_v<bits<5> opc, string asm> {
@@ -433,6 +508,8 @@ class sve_int_countvlv<bits<5> opc, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_int_countvlv<bits<5> opc, string asm, ZPRRegOp zprty> {
@@ -738,6 +815,8 @@ class sve_int_perm_insrs<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_int_perm_insrs<string asm> {
@@ -762,6 +841,8 @@ class sve_int_perm_insrv<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_int_perm_insrv<string asm> {
@@ -790,6 +871,8 @@ class sve_int_perm_extract_i<string asm>
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
//===----------------------------------------------------------------------===//
@@ -883,6 +966,8 @@ class sve_int_log_imm<bits<2> opc, string asm>
let Constraints = "$Zdn = $_Zdn";
let DecoderMethod = "DecodeSVELogicalImmInstruction";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_int_log_imm<bits<2> opc, string asm, string alias> {
@@ -993,6 +1078,8 @@ class sve_fp_2op_i_p_zds<bits<2> sz, bits<3> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_fp_2op_i_p_zds<bits<3> opc, string asm, Operand imm_ty> {
@@ -1020,6 +1107,8 @@ class sve_fp_2op_p_zds<bits<2> sz, bits<4> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_fp_2op_p_zds<bits<4> opc, string asm> {
@@ -1045,6 +1134,8 @@ class sve_fp_ftmad<bits<2> sz, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_fp_ftmad<string asm> {
@@ -1106,6 +1197,8 @@ class sve_fp_3op_p_zds_a<bits<2> sz, bits<2> opc, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_fp_3op_p_zds_a<bits<2> opc, string asm> {
@@ -1135,6 +1228,8 @@ class sve_fp_3op_p_zds_b<bits<2> sz, bits<2> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_fp_3op_p_zds_b<bits<2> opc, string asm> {
@@ -1163,6 +1258,8 @@ class sve_fp_fma_by_indexed_elem<bits<2> sz, bit opc, string asm,
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_fp_fma_by_indexed_elem<bit opc, string asm> {
@@ -1253,6 +1350,8 @@ class sve_fp_fcmla<bits<2> sz, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_fp_fcmla<string asm> {
@@ -1284,6 +1383,8 @@ class sve_fp_fcmla_by_indexed_elem<bits<2> sz, string asm,
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_fp_fcmla_by_indexed_elem<string asm> {
@@ -1325,6 +1426,8 @@ class sve_fp_fcadd<bits<2> sz, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_fp_fcadd<string asm> {
@@ -1405,7 +1508,7 @@ multiclass sve_int_perm_bin_perm_zz<bits<3> opc, string asm> {
//===----------------------------------------------------------------------===//
class sve_fp_2op_p_zd<bits<7> opc, string asm, RegisterOperand i_zprtype,
- RegisterOperand o_zprtype>
+ RegisterOperand o_zprtype, ElementSizeEnum size>
: I<(outs o_zprtype:$Zd), (ins i_zprtype:$_Zd, PPR3bAny:$Pg, i_zprtype:$Zn),
asm, "\t$Zd, $Pg/m, $Zn",
"",
@@ -1423,12 +1526,14 @@ class sve_fp_2op_p_zd<bits<7> opc, string asm, RegisterOperand i_zprtype,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = Destructive;
+ let ElementSize = size;
}
multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm> {
- def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16>;
- def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32>;
- def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64>;
+ def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>;
+ def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>;
+ def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>;
}
//===----------------------------------------------------------------------===//
@@ -1480,6 +1585,8 @@ class sve_int_bin_pred_arit_log<bits<2> sz8_64, bits<2> fmt, bits<3> opc,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_bin_pred_log<bits<3> opc, string asm> {
@@ -1541,6 +1648,8 @@ class sve_int_mladdsub_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_mladdsub_vvv_pred<bits<1> opc, string asm> {
@@ -1571,6 +1680,8 @@ class sve_int_mlas_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm,
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm> {
@@ -1601,6 +1712,8 @@ class sve_intx_dot<bit sz, bit U, string asm, ZPRRegOp zprty1,
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty1.ElementSize;
}
multiclass sve_intx_dot<bit opc, string asm> {
@@ -1629,6 +1742,8 @@ class sve_intx_dot_by_indexed_elem<bit sz, bit U, string asm,
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm> {
@@ -1670,6 +1785,8 @@ class sve_int_un_pred_arit<bits<2> sz8_64, bits<4> opc,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_un_pred_arit_0<bits<3> opc, string asm> {
@@ -1800,6 +1917,8 @@ class sve_int_arith_imm0<bits<2> sz8_64, bits<3> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_int_arith_imm0<bits<3> opc, string asm> {
@@ -1825,6 +1944,8 @@ class sve_int_arith_imm<bits<2> sz8_64, bits<6> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_int_arith_imm1<bits<2> opc, string asm, Operand immtype> {
@@ -1885,6 +2006,8 @@ class sve_int_dup_fpimm_pred<bits<2> sz, Operand fpimmtype,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_dup_fpimm_pred<string asm> {
@@ -1917,6 +2040,9 @@ class sve_int_dup_imm_pred<bits<2> sz8_64, bit m, string asm,
let Inst{13} = imm{8}; // sh
let Inst{12-5} = imm{7-0}; // imm8
let Inst{4-0} = Zd;
+
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_dup_imm_pred_merge<string asm> {
@@ -2083,6 +2209,65 @@ multiclass sve_int_ucmp_vi<bits<2> opc, string asm> {
//===----------------------------------------------------------------------===//
+// SVE Integer Compare - Scalars Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_cterm<bit sz, bit opc, string asm, RegisterClass rt>
+: I<(outs), (ins rt:$Rn, rt:$Rm),
+ asm, "\t$Rn, $Rm",
+ "",
+ []>, Sched<[]> {
+ bits<5> Rm;
+ bits<5> Rn;
+ let Inst{31-23} = 0b001001011;
+ let Inst{22} = sz;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rm;
+ let Inst{15-10} = 0b001000;
+ let Inst{9-5} = Rn;
+ let Inst{4} = opc;
+ let Inst{3-0} = 0b0000;
+
+ let Defs = [NZCV];
+}
+
+class sve_int_while_rr<bits<2> sz8_64, bits<4> opc, string asm,
+ RegisterClass gprty, PPRRegOp pprty>
+: I<(outs pprty:$Pd), (ins gprty:$Rn, gprty:$Rm),
+ asm, "\t$Pd, $Rn, $Rm",
+ "", []>, Sched<[]> {
+ bits<4> Pd;
+ bits<5> Rm;
+ bits<5> Rn;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rm;
+ let Inst{15-13} = 0b000;
+ let Inst{12-10} = opc{3-1};
+ let Inst{9-5} = Rn;
+ let Inst{4} = opc{0};
+ let Inst{3-0} = Pd;
+
+ let Defs = [NZCV];
+}
+
+multiclass sve_int_while4_rr<bits<3> opc, string asm> {
+ def _B : sve_int_while_rr<0b00, { 0, opc }, asm, GPR32, PPR8>;
+ def _H : sve_int_while_rr<0b01, { 0, opc }, asm, GPR32, PPR16>;
+ def _S : sve_int_while_rr<0b10, { 0, opc }, asm, GPR32, PPR32>;
+ def _D : sve_int_while_rr<0b11, { 0, opc }, asm, GPR32, PPR64>;
+}
+
+multiclass sve_int_while8_rr<bits<3> opc, string asm> {
+ def _B : sve_int_while_rr<0b00, { 1, opc }, asm, GPR64, PPR8>;
+ def _H : sve_int_while_rr<0b01, { 1, opc }, asm, GPR64, PPR16>;
+ def _S : sve_int_while_rr<0b10, { 1, opc }, asm, GPR64, PPR32>;
+ def _D : sve_int_while_rr<0b11, { 1, opc }, asm, GPR64, PPR64>;
+}
+
+
+//===----------------------------------------------------------------------===//
// SVE Floating Point Fast Reduction Group
//===----------------------------------------------------------------------===//
@@ -2312,9 +2497,9 @@ multiclass sve_int_index_rr<string asm> {
//===----------------------------------------------------------------------===//
// SVE Bitwise Shift - Predicated Group
//===----------------------------------------------------------------------===//
-
class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<3> opc, string asm,
- ZPRRegOp zprty, Operand immtype>
+ ZPRRegOp zprty, Operand immtype,
+ ElementSizeEnum size>
: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, immtype:$imm),
asm, "\t$Zdn, $Pg/m, $_Zdn, $imm",
"",
@@ -2333,31 +2518,41 @@ class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<3> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = size;
}
multiclass sve_int_bin_pred_shift_imm_left<bits<3> opc, string asm> {
- def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
- def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
+ def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8,
+ ElementSizeB>;
+ def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16,
+ ElementSizeH> {
let Inst{8} = imm{3};
}
- def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {
+ def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32,
+ ElementSizeS> {
let Inst{9-8} = imm{4-3};
}
- def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {
+ def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64,
+ ElementSizeD> {
let Inst{22} = imm{5};
let Inst{9-8} = imm{4-3};
}
}
multiclass sve_int_bin_pred_shift_imm_right<bits<3> opc, string asm> {
- def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
- def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
+ def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8,
+ ElementSizeB>;
+ def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16,
+ ElementSizeH> {
let Inst{8} = imm{3};
}
- def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
+ def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32,
+ ElementSizeS> {
let Inst{9-8} = imm{4-3};
}
- def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
+ def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64,
+ ElementSizeD> {
let Inst{22} = imm{5};
let Inst{9-8} = imm{4-3};
}
@@ -2383,6 +2578,8 @@ class sve_int_bin_pred_shift<bits<2> sz8_64, bit wide, bits<3> opc,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_bin_pred_shift<bits<3> opc, string asm> {
@@ -3017,6 +3214,8 @@ class sve_int_perm_clast_zz<bits<2> sz8_64, bit ab, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_int_perm_clast_zz<bit ab, string asm> {
@@ -3094,6 +3293,8 @@ class sve_int_perm_splice<bits<2> sz8_64, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_int_perm_splice<string asm> {
@@ -3122,6 +3323,8 @@ class sve_int_perm_rev<bits<2> sz8_64, bits<2> opc, string asm,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_perm_rev_rbit<string asm> {
@@ -3163,6 +3366,8 @@ class sve_int_perm_cpy_r<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_perm_cpy_r<string asm> {
@@ -3198,6 +3403,8 @@ class sve_int_perm_cpy_v<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_perm_cpy_v<string asm> {
@@ -4117,3 +4324,133 @@ multiclass sve_int_reduce_2<bits<3> opc, string asm> {
def _S : sve_int_reduce<0b10, 0b11, opc, asm, ZPR32, FPR32>;
def _D : sve_int_reduce<0b11, 0b11, opc, asm, ZPR64, FPR64>;
}
+
+class sve_int_movprfx_pred<bits<2> sz8_32, bits<3> opc, string asm,
+ ZPRRegOp zprty, string pg_suffix, dag iops>
+: I<(outs zprty:$Zd), iops,
+ asm, "\t$Zd, $Pg"#pg_suffix#", $Zn",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Zd;
+ bits<5> Zn;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = sz8_32;
+ let Inst{21-19} = 0b010;
+ let Inst{18-16} = opc;
+ let Inst{15-13} = 0b001;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+
+ let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve_int_movprfx_pred_merge<bits<3> opc, string asm> {
+let Constraints = "$Zd = $_Zd" in {
+ def _B : sve_int_movprfx_pred<0b00, opc, asm, ZPR8, "/m",
+ (ins ZPR8:$_Zd, PPR3bAny:$Pg, ZPR8:$Zn)>;
+ def _H : sve_int_movprfx_pred<0b01, opc, asm, ZPR16, "/m",
+ (ins ZPR16:$_Zd, PPR3bAny:$Pg, ZPR16:$Zn)>;
+ def _S : sve_int_movprfx_pred<0b10, opc, asm, ZPR32, "/m",
+ (ins ZPR32:$_Zd, PPR3bAny:$Pg, ZPR32:$Zn)>;
+ def _D : sve_int_movprfx_pred<0b11, opc, asm, ZPR64, "/m",
+ (ins ZPR64:$_Zd, PPR3bAny:$Pg, ZPR64:$Zn)>;
+}
+}
+
+multiclass sve_int_movprfx_pred_zero<bits<3> opc, string asm> {
+ def _B : sve_int_movprfx_pred<0b00, opc, asm, ZPR8, "/z",
+ (ins PPR3bAny:$Pg, ZPR8:$Zn)>;
+ def _H : sve_int_movprfx_pred<0b01, opc, asm, ZPR16, "/z",
+ (ins PPR3bAny:$Pg, ZPR16:$Zn)>;
+ def _S : sve_int_movprfx_pred<0b10, opc, asm, ZPR32, "/z",
+ (ins PPR3bAny:$Pg, ZPR32:$Zn)>;
+ def _D : sve_int_movprfx_pred<0b11, opc, asm, ZPR64, "/z",
+ (ins PPR3bAny:$Pg, ZPR64:$Zn)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Propagate Break Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_brkp<bits<2> opc, string asm>
+: I<(outs PPR8:$Pd), (ins PPRAny:$Pg, PPR8:$Pn, PPR8:$Pm),
+ asm, "\t$Pd, $Pg/z, $Pn, $Pm",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pd;
+ bits<4> Pg;
+ bits<4> Pm;
+ bits<4> Pn;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23} = 0b0;
+ let Inst{22} = opc{1};
+ let Inst{21-20} = 0b00;
+ let Inst{19-16} = Pm;
+ let Inst{15-14} = 0b11;
+ let Inst{13-10} = Pg;
+ let Inst{9} = 0b0;
+ let Inst{8-5} = Pn;
+ let Inst{4} = opc{0};
+ let Inst{3-0} = Pd;
+
+ let Defs = !if(!eq (opc{1}, 1), [NZCV], []);
+}
+
+
+//===----------------------------------------------------------------------===//
+// SVE Partition Break Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_brkn<bit S, string asm>
+: I<(outs PPR8:$Pdm), (ins PPRAny:$Pg, PPR8:$Pn, PPR8:$_Pdm),
+ asm, "\t$Pdm, $Pg/z, $Pn, $_Pdm",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pdm;
+ bits<4> Pg;
+ bits<4> Pn;
+ let Inst{31-23} = 0b001001010;
+ let Inst{22} = S;
+ let Inst{21-14} = 0b01100001;
+ let Inst{13-10} = Pg;
+ let Inst{9} = 0b0;
+ let Inst{8-5} = Pn;
+ let Inst{4} = 0b0;
+ let Inst{3-0} = Pdm;
+
+ let Constraints = "$Pdm = $_Pdm";
+ let Defs = !if(!eq (S, 0b1), [NZCV], []);
+}
+
+class sve_int_break<bits<3> opc, string asm, string suffix, dag iops>
+: I<(outs PPR8:$Pd), iops,
+ asm, "\t$Pd, $Pg"#suffix#", $Pn",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pd;
+ bits<4> Pg;
+ bits<4> Pn;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = opc{2-1};
+ let Inst{21-14} = 0b01000001;
+ let Inst{13-10} = Pg;
+ let Inst{9} = 0b0;
+ let Inst{8-5} = Pn;
+ let Inst{4} = opc{0};
+ let Inst{3-0} = Pd;
+
+ let Constraints = !if(!eq (opc{0}, 1), "$Pd = $_Pd", "");
+ let Defs = !if(!eq (opc{1}, 1), [NZCV], []);
+
+}
+
+multiclass sve_int_break_m<bits<3> opc, string asm> {
+ def NAME : sve_int_break<opc, asm, "/m", (ins PPR8:$_Pd, PPRAny:$Pg, PPR8:$Pn)>;
+}
+
+multiclass sve_int_break_z<bits<3> opc, string asm> {
+ def NAME : sve_int_break<opc, asm, "/z", (ins PPRAny:$Pg, PPR8:$Pn)>;
+}
+
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index b201126c593b..21e44e9589d3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -554,6 +554,7 @@ static bool fnegFoldsIntoOp(unsigned Opc) {
case ISD::FTRUNC:
case ISD::FRINT:
case ISD::FNEARBYINT:
+ case ISD::FCANONICALIZE:
case AMDGPUISD::RCP:
case AMDGPUISD::RCP_LEGACY:
case AMDGPUISD::RCP_IFLAG:
@@ -907,6 +908,7 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(
LLVMContext &Ctx = Fn.getParent()->getContext();
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF);
const unsigned ExplicitOffset = ST.getExplicitKernelArgOffset(Fn);
+ CallingConv::ID CC = Fn.getCallingConv();
unsigned MaxAlign = 1;
uint64_t ExplicitArgOffset = 0;
@@ -940,16 +942,10 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(
EVT ArgVT = ValueVTs[Value];
EVT MemVT = ArgVT;
- MVT RegisterVT =
- getRegisterTypeForCallingConv(Ctx, ArgVT);
- unsigned NumRegs =
- getNumRegistersForCallingConv(Ctx, ArgVT);
-
- if (!Subtarget->isAmdHsaOS() &&
- (ArgVT == MVT::i16 || ArgVT == MVT::i8 || ArgVT == MVT::f16)) {
- // The ABI says the caller will extend these values to 32-bits.
- MemVT = ArgVT.isInteger() ? MVT::i32 : MVT::f32;
- } else if (NumRegs == 1) {
+ MVT RegisterVT = getRegisterTypeForCallingConv(Ctx, CC, ArgVT);
+ unsigned NumRegs = getNumRegistersForCallingConv(Ctx, CC, ArgVT);
+
+ if (NumRegs == 1) {
// This argument is not split, so the IR type is the memory type.
if (ArgVT.isExtended()) {
// We have an extended type, like i24, so we should just use the
@@ -3600,6 +3596,7 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
case ISD::FRINT:
case ISD::FNEARBYINT: // XXX - Should fround be handled?
case ISD::FSIN:
+ case ISD::FCANONICALIZE:
case AMDGPUISD::RCP:
case AMDGPUISD::RCP_LEGACY:
case AMDGPUISD::RCP_IFLAG:
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 96b7568eec1f..7442a59e594f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -342,8 +342,9 @@ def AMDGPUumed3 : SDNode<"AMDGPUISD::UMED3", AMDGPUDTIntTernaryOp,
def AMDGPUfmed3 : SDNode<"AMDGPUISD::FMED3", SDTFPTernaryOp, []>;
def AMDGPUfdot2 : SDNode<"AMDGPUISD::FDOT2",
- SDTypeProfile<1, 3, [SDTCisSameAs<0, 3>, SDTCisSameAs<1, 2>,
- SDTCisFP<0>, SDTCisVec<1>]>,
+ SDTypeProfile<1, 4, [SDTCisSameAs<0, 3>, SDTCisSameAs<1, 2>,
+ SDTCisFP<0>, SDTCisVec<1>,
+ SDTCisInt<4>]>,
[]>;
def AMDGPUperm : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 9426df399597..c9c932ef2f5f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -567,6 +567,7 @@ int PI = 0x40490fdb;
int TWO_PI_INV = 0x3e22f983;
int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
int FP16_ONE = 0x3C00;
+int FP16_NEG_ONE = 0xBC00;
int V2FP16_ONE = 0x3C003C00;
int FP32_ONE = 0x3f800000;
int FP32_NEG_ONE = 0xbf800000;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index 8cc7e38f7b29..c147830e12ed 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -100,16 +100,6 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
unsigned Size = DL.getTypeSizeInBits(ArgTy);
unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
-
- // Clover seems to always pad i8/i16 to i32, but doesn't properly align
- // them?
- // Make sure the struct elements have correct size and alignment for ext
- // args. These seem to be padded up to 4-bytes but not correctly aligned.
- bool IsExtArg = AllocSize < 32 && (Arg.hasZExtAttr() || Arg.hasSExtAttr()) &&
- !ST.isAmdHsaOS();
- if (IsExtArg)
- AllocSize = 4;
-
uint64_t EltOffset = alignTo(ExplicitArgOffset, Align) + BaseOffset;
ExplicitArgOffset = alignTo(ExplicitArgOffset, Align) + AllocSize;
@@ -164,8 +154,6 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
ArgPtr->getName() + ".cast");
}
- assert((!IsExtArg || !IsV3) && "incompatible situation");
-
if (IsV3 && Size >= 32) {
V4Ty = VectorType::get(VT->getVectorElementType(), 4);
// Use the hack that clang uses to avoid SelectionDAG ruining v3 loads
@@ -212,20 +200,6 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
// TODO: Convert noalias arg to !noalias
if (Size < 32 && !ArgTy->isAggregateType()) {
- if (IsExtArg && OffsetDiff == 0) {
- Type *I32Ty = Builder.getInt32Ty();
- bool IsSext = Arg.hasSExtAttr();
- Metadata *LowAndHigh[] = {
- ConstantAsMetadata::get(
- ConstantInt::get(I32Ty, IsSext ? minIntN(Size) : 0)),
- ConstantAsMetadata::get(
- ConstantInt::get(I32Ty,
- IsSext ? maxIntN(Size) + 1 : maxUIntN(Size) + 1))
- };
-
- Load->setMetadata(LLVMContext::MD_range, MDNode::get(Ctx, LowAndHigh));
- }
-
Value *ExtractBits = OffsetDiff == 0 ?
Load : Builder.CreateLShr(Load, OffsetDiff * 8);
diff --git a/contrib/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/contrib/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 1e0bc62c45a6..44c2d366e461 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -66,6 +66,22 @@ def MIMGDimInfoTable : GenericTable {
let PrimaryKeyName = "getMIMGDimInfo";
}
+class MIMGLZMapping<MIMGBaseOpcode l, MIMGBaseOpcode lz> {
+ MIMGBaseOpcode L = l;
+ MIMGBaseOpcode LZ = lz;
+}
+
+def MIMGLZMappingTable : GenericTable {
+ let FilterClass = "MIMGLZMapping";
+ let CppTypeName = "MIMGLZMappingInfo";
+ let Fields = ["L", "LZ"];
+ GenericEnum TypeOf_L = MIMGBaseOpcode;
+ GenericEnum TypeOf_LZ = MIMGBaseOpcode;
+
+ let PrimaryKey = ["L"];
+ let PrimaryKeyName = "getMIMGLZMappingInfo";
+}
+
class mimg <bits<7> si, bits<7> vi = si> {
field bits<7> SI = si;
field bits<7> VI = vi;
@@ -547,3 +563,13 @@ foreach intr = !listconcat(AMDGPUImageDimIntrinsics,
AMDGPUImageDimAtomicIntrinsics) in {
def : ImageDimIntrinsicInfo<intr>;
}
+
+// L to LZ Optimization Mapping
+def : MIMGLZMapping<IMAGE_SAMPLE_L, IMAGE_SAMPLE_LZ>;
+def : MIMGLZMapping<IMAGE_SAMPLE_C_L, IMAGE_SAMPLE_C_LZ>;
+def : MIMGLZMapping<IMAGE_SAMPLE_L_O, IMAGE_SAMPLE_LZ_O>;
+def : MIMGLZMapping<IMAGE_SAMPLE_C_L_O, IMAGE_SAMPLE_C_LZ_O>;
+def : MIMGLZMapping<IMAGE_GATHER4_L, IMAGE_GATHER4_LZ>;
+def : MIMGLZMapping<IMAGE_GATHER4_C_L, IMAGE_GATHER4_C_LZ>;
+def : MIMGLZMapping<IMAGE_GATHER4_L_O, IMAGE_GATHER4_LZ_O>;
+def : MIMGLZMapping<IMAGE_GATHER4_C_L_O, IMAGE_GATHER4_C_LZ_O>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 5b7fc2656a20..25007861fd15 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -694,6 +694,87 @@ bool SITargetLowering::isShuffleMaskLegal(ArrayRef<int>, EVT) const {
return false;
}
+MVT SITargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
+ EVT VT) const {
+ // TODO: Consider splitting all arguments into 32-bit pieces.
+ if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) {
+ EVT ScalarVT = VT.getScalarType();
+ unsigned Size = ScalarVT.getSizeInBits();
+ if (Size == 32)
+ return ScalarVT.getSimpleVT();
+
+ if (Size == 64)
+ return MVT::i32;
+
+ if (Size == 16 &&
+ Subtarget->has16BitInsts() &&
+ isPowerOf2_32(VT.getVectorNumElements()))
+ return VT.isInteger() ? MVT::v2i16 : MVT::v2f16;
+ }
+
+ return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
+}
+
+unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
+ EVT VT) const {
+ if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) {
+ unsigned NumElts = VT.getVectorNumElements();
+ EVT ScalarVT = VT.getScalarType();
+ unsigned Size = ScalarVT.getSizeInBits();
+
+ if (Size == 32)
+ return NumElts;
+
+ if (Size == 64)
+ return 2 * NumElts;
+
+ // FIXME: Fails to break down as we want with v3.
+ if (Size == 16 && Subtarget->has16BitInsts() && isPowerOf2_32(NumElts))
+ return VT.getVectorNumElements() / 2;
+ }
+
+ return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
+}
+
+unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv(
+ LLVMContext &Context, CallingConv::ID CC,
+ EVT VT, EVT &IntermediateVT,
+ unsigned &NumIntermediates, MVT &RegisterVT) const {
+ if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) {
+ unsigned NumElts = VT.getVectorNumElements();
+ EVT ScalarVT = VT.getScalarType();
+ unsigned Size = ScalarVT.getSizeInBits();
+ if (Size == 32) {
+ RegisterVT = ScalarVT.getSimpleVT();
+ IntermediateVT = RegisterVT;
+ NumIntermediates = NumElts;
+ return NumIntermediates;
+ }
+
+ if (Size == 64) {
+ RegisterVT = MVT::i32;
+ IntermediateVT = RegisterVT;
+ NumIntermediates = 2 * NumElts;
+ return NumIntermediates;
+ }
+
+ // FIXME: We should fix the ABI to be the same on targets without 16-bit
+ // support, but unless we can properly handle 3-vectors, it will be still be
+ // inconsistent.
+ if (Size == 16 && Subtarget->has16BitInsts() && isPowerOf2_32(NumElts)) {
+ RegisterVT = VT.isInteger() ? MVT::v2i16 : MVT::v2f16;
+ IntermediateVT = RegisterVT;
+ NumIntermediates = NumElts / 2;
+ return NumIntermediates;
+ }
+ }
+
+ return TargetLowering::getVectorTypeBreakdownForCallingConv(
+ Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
+}
+
bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &CI,
MachineFunction &MF,
@@ -1268,6 +1349,8 @@ static void processShaderInputArgs(SmallVectorImpl<ISD::InputArg> &Splits,
for (unsigned I = 0, E = Ins.size(), PSInputNum = 0; I != E; ++I) {
const ISD::InputArg *Arg = &Ins[I];
+ assert(!Arg->VT.isVector() && "vector type argument should have been split");
+
// First check if it's a PS input addr.
if (CallConv == CallingConv::AMDGPU_PS &&
!Arg->Flags.isInReg() && !Arg->Flags.isByVal() && PSInputNum <= 15) {
@@ -1301,25 +1384,7 @@ static void processShaderInputArgs(SmallVectorImpl<ISD::InputArg> &Splits,
++PSInputNum;
}
- // Second split vertices into their elements.
- if (Arg->VT.isVector()) {
- ISD::InputArg NewArg = *Arg;
- NewArg.Flags.setSplit();
- NewArg.VT = Arg->VT.getVectorElementType();
-
- // We REALLY want the ORIGINAL number of vertex elements here, e.g. a
- // three or five element vertex only needs three or five registers,
- // NOT four or eight.
- Type *ParamType = FType->getParamType(Arg->getOrigArgIndex());
- unsigned NumElements = ParamType->getVectorNumElements();
-
- for (unsigned J = 0; J != NumElements; ++J) {
- Splits.push_back(NewArg);
- NewArg.PartOffset += NewArg.VT.getStoreSize();
- }
- } else {
- Splits.push_back(*Arg);
- }
+ Splits.push_back(*Arg);
}
}
@@ -4490,6 +4555,9 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
+ const AMDGPU::MIMGLZMappingInfo *LZMappingInfo =
+ AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode);
+ unsigned IntrOpcode = Intr->BaseOpcode;
SmallVector<EVT, 2> ResultTypes(Op->value_begin(), Op->value_end());
bool IsD16 = false;
@@ -4575,6 +4643,18 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
SmallVector<SDValue, 4> VAddrs;
for (unsigned i = 0; i < NumVAddrs; ++i)
VAddrs.push_back(Op.getOperand(AddrIdx + i));
+
+ // Optimize _L to _LZ when _L is zero
+ if (LZMappingInfo) {
+ if (auto ConstantLod =
+ dyn_cast<ConstantFPSDNode>(VAddrs[NumVAddrs-1].getNode())) {
+ if (ConstantLod->isZero() || ConstantLod->isNegative()) {
+ IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l
+ VAddrs.pop_back(); // remove 'lod'
+ }
+ }
+ }
+
SDValue VAddr = getBuildDwordsVector(DAG, DL, VAddrs);
SDValue True = DAG.getTargetConstant(1, DL, MVT::i1);
@@ -4634,10 +4714,10 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
int Opcode = -1;
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
- Opcode = AMDGPU::getMIMGOpcode(Intr->BaseOpcode, AMDGPU::MIMGEncGfx8,
+ Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx8,
NumVDataDwords, NumVAddrDwords);
if (Opcode == -1)
- Opcode = AMDGPU::getMIMGOpcode(Intr->BaseOpcode, AMDGPU::MIMGEncGfx6,
+ Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx6,
NumVDataDwords, NumVAddrDwords);
assert(Opcode != -1);
@@ -4945,7 +5025,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::amdgcn_fdot2:
return DAG.getNode(AMDGPUISD::FDOT2, DL, VT,
- Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
+ Op.getOperand(4));
case Intrinsic::amdgcn_fmul_legacy:
return DAG.getNode(AMDGPUISD::FMUL_LEGACY, DL, VT,
Op.getOperand(1), Op.getOperand(2));
@@ -6754,10 +6835,6 @@ static bool isCanonicalized(SelectionDAG &DAG, SDValue Op,
return Op.getOperand(0).getValueType().getScalarType() != MVT::f16 ||
ST->hasFP16Denormals();
- case ISD::FP16_TO_FP:
- case ISD::FP_TO_FP16:
- return ST->hasFP16Denormals();
-
// It can/will be lowered or combined as a bit operation.
// Need to check their input recursively to handle.
case ISD::FNEG:
@@ -6799,8 +6876,16 @@ SDValue SITargetLowering::performFCanonicalizeCombine(
SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
- ConstantFPSDNode *CFP = isConstOrConstSplatFP(N->getOperand(0));
+ SDValue N0 = N->getOperand(0);
+ // fcanonicalize undef -> qnan
+ if (N0.isUndef()) {
+ EVT VT = N->getValueType(0);
+ APFloat QNaN = APFloat::getQNaN(SelectionDAG::EVTToAPFloatSemantics(VT));
+ return DAG.getConstantFP(QNaN, SDLoc(N), VT);
+ }
+
+ ConstantFPSDNode *CFP = isConstOrConstSplatFP(N0);
if (!CFP) {
SDValue N0 = N->getOperand(0);
EVT VT = N0.getValueType().getScalarType();
@@ -6853,7 +6938,7 @@ SDValue SITargetLowering::performFCanonicalizeCombine(
return DAG.getConstantFP(CanonicalQNaN, SDLoc(N), VT);
}
- return N->getOperand(0);
+ return N0;
}
static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
@@ -7544,8 +7629,10 @@ SDValue SITargetLowering::performFMACombine(SDNode *N,
return SDValue();
if ((Vec1 == Vec3 && Vec2 == Vec4) ||
- (Vec1 == Vec4 && Vec2 == Vec3))
- return DAG.getNode(AMDGPUISD::FDOT2, SL, MVT::f32, Vec1, Vec2, FMAAcc);
+ (Vec1 == Vec4 && Vec2 == Vec3)) {
+ return DAG.getNode(AMDGPUISD::FDOT2, SL, MVT::f32, Vec1, Vec2, FMAAcc,
+ DAG.getTargetConstant(0, SL, MVT::i1));
+ }
}
return SDValue();
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
index ad049f2a71c3..5b3d49b3d8e3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -25,6 +25,19 @@ class SITargetLowering final : public AMDGPUTargetLowering {
private:
const GCNSubtarget *Subtarget;
+public:
+ MVT getRegisterTypeForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
+ EVT VT) const override;
+ unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
+ EVT VT) const override;
+
+ unsigned getVectorTypeBreakdownForCallingConv(
+ LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
+ unsigned &NumIntermediates, MVT &RegisterVT) const override;
+
+private:
SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL,
SDValue Chain, uint64_t Offset) const;
SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
index 61c8f359e168..dc9397cf7b85 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -133,28 +133,10 @@ bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
return true;
- // V_READFIRSTLANE/V_READLANE destination register may be used as operand
- // by some SALU instruction. If exec mask is zero vector instruction
- // defining the register that is used by the scalar one is not executed
- // and scalar instruction will operate on undefined data. For
- // V_READFIRSTLANE/V_READLANE we should avoid predicated execution.
- if ((I->getOpcode() == AMDGPU::V_READFIRSTLANE_B32) ||
- (I->getOpcode() == AMDGPU::V_READLANE_B32)) {
+ if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
return true;
- }
-
- if (I->isInlineAsm()) {
- const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
- const char *AsmStr = I->getOperand(0).getSymbolName();
-
- // inlineasm length estimate is number of bytes assuming the longest
- // instruction.
- uint64_t MaxAsmSize = TII->getInlineAsmLength(AsmStr, *MAI);
- NumInstr += MaxAsmSize / MAI->getMaxInstLength();
- } else {
- ++NumInstr;
- }
+ ++NumInstr;
if (NumInstr >= SkipThreshold)
return true;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 6c85c92454c3..f3745382a6f4 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2332,6 +2332,36 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
changesVGPRIndexingMode(MI);
}
+bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const {
+ unsigned Opcode = MI.getOpcode();
+
+ if (MI.mayStore() && isSMRD(MI))
+ return true; // scalar store or atomic
+
+ // These instructions cause shader I/O that may cause hardware lockups
+ // when executed with an empty EXEC mask.
+ //
+ // Note: exp with VM = DONE = 0 is automatically skipped by hardware when
+ // EXEC = 0, but checking for that case here seems not worth it
+ // given the typical code patterns.
+ if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
+ Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE)
+ return true;
+
+ if (MI.isInlineAsm())
+ return true; // conservative assumption
+
+ // These are like SALU instructions in terms of effects, so it's questionable
+ // whether we should return true for those.
+ //
+ // However, executing them with EXEC = 0 causes them to operate on undefined
+ // data, which we avoid by returning true here.
+ if (Opcode == AMDGPU::V_READFIRSTLANE_B32 || Opcode == AMDGPU::V_READLANE_B32)
+ return true;
+
+ return false;
+}
+
bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
switch (Imm.getBitWidth()) {
case 32:
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 0a735257d34e..d681b926504e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -597,6 +597,9 @@ public:
return !RI.isSGPRReg(MRI, Dest);
}
+ /// Whether we must prevent this instruction from executing with EXEC = 0.
+ bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
+
bool isInlineConstant(const APInt &Imm) const;
bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
index c3f8bfb53ef4..5c10646161b3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1387,6 +1387,11 @@ def : GCNPat<
>;
def : GCNPat<
+ (fcanonicalize (f16 (fneg (VOP3Mods f16:$src, i32:$src_mods)))),
+ (V_MUL_F16_e64 0, (i32 CONST.FP16_NEG_ONE), $src_mods, $src, 0, 0)
+>;
+
+def : GCNPat<
(fcanonicalize (v2f16 (VOP3PMods v2f16:$src, i32:$src_mods))),
(V_PK_MUL_F16 0, (i32 CONST.V2FP16_ONE), $src_mods, $src, DSTCLAMP.NONE)
>;
@@ -1411,6 +1416,11 @@ def : GCNPat<
(fcanonicalize (f32 (VOP3Mods f32:$src, i32:$src_mods))),
(V_MUL_F32_e64 0, (i32 CONST.FP32_ONE), $src_mods, $src, 0, 0)
>;
+
+def : GCNPat<
+ (fcanonicalize (f32 (fneg (VOP3Mods f32:$src, i32:$src_mods)))),
+ (V_MUL_F32_e64 0, (i32 CONST.FP32_NEG_ONE), $src_mods, $src, 0, 0)
+>;
}
let OtherPredicates = [FP32Denormals] in {
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 3fd3c75874a3..4eba19382315 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -110,6 +110,7 @@ struct MIMGInfo {
#define GET_MIMGBaseOpcodesTable_IMPL
#define GET_MIMGDimInfoTable_IMPL
#define GET_MIMGInfoTable_IMPL
+#define GET_MIMGLZMappingTable_IMPL
#include "AMDGPUGenSearchableTables.inc"
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 70681c271697..5b7af8268cda 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -42,6 +42,7 @@ namespace AMDGPU {
#define GET_MIMGBaseOpcode_DECL
#define GET_MIMGDim_DECL
#define GET_MIMGEncoding_DECL
+#define GET_MIMGLZMapping_DECL
#include "AMDGPUGenSearchableTables.inc"
namespace IsaInfo {
@@ -211,6 +212,14 @@ struct MIMGDimInfo {
LLVM_READONLY
const MIMGDimInfo *getMIMGDimInfo(unsigned Dim);
+struct MIMGLZMappingInfo {
+ MIMGBaseOpcode L;
+ MIMGBaseOpcode LZ;
+};
+
+LLVM_READONLY
+const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
+
LLVM_READONLY
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
unsigned VDataDwords, unsigned VAddrDwords);
diff --git a/contrib/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/contrib/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 5c78ada3211e..b51828b54679 100644
--- a/contrib/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -167,13 +167,30 @@ defm : MadFmaMixPats<fma, V_FMA_MIX_F32, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>;
let SubtargetPredicate = HasDLInsts in {
-def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", VOP3_Profile<VOP_F32_V2F16_V2F16_F32>, AMDGPUfdot2>;
-def V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_sdot2>;
-def V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_udot2>;
-def V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot4>;
-def V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4>;
-def V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot8>;
-def V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8>;
+def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", VOP3_Profile<VOP_F32_V2F16_V2F16_F32>>;
+def V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>>;
+def V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>>;
+def V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
+def V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
+def V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
+def V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
+
+multiclass DotPats<SDPatternOperator dot_op,
+ VOP3PInst dot_inst> {
+ def : GCNPat <
+ (dot_op (dot_inst.Pfl.Src0VT (VOP3PMods0 dot_inst.Pfl.Src0VT:$src0, i32:$src0_modifiers)),
+ (dot_inst.Pfl.Src1VT (VOP3PMods dot_inst.Pfl.Src1VT:$src1, i32:$src1_modifiers)),
+ (dot_inst.Pfl.Src2VT (VOP3PMods dot_inst.Pfl.Src2VT:$src2, i32:$src2_modifiers)), i1:$clamp),
+ (dot_inst $src0_modifiers, $src0, $src1_modifiers, $src1, $src2_modifiers, $src2, (as_i1imm $clamp))>;
+}
+
+defm : DotPats<AMDGPUfdot2, V_DOT2_F32_F16>;
+defm : DotPats<int_amdgcn_sdot2, V_DOT2_I32_I16>;
+defm : DotPats<int_amdgcn_udot2, V_DOT2_U32_U16>;
+defm : DotPats<int_amdgcn_sdot4, V_DOT4_I32_I8>;
+defm : DotPats<int_amdgcn_udot4, V_DOT4_U32_U8>;
+defm : DotPats<int_amdgcn_sdot8, V_DOT8_I32_I4>;
+defm : DotPats<int_amdgcn_udot8, V_DOT8_U32_U4>;
} // End SubtargetPredicate = HasDLInsts
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 2196f9b47f3b..b227eaed8d61 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -117,7 +117,7 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// globals from all functions in PromotedGlobals.
for (auto *GV : AFI->getGlobalsPromotedToConstantPool())
PromotedGlobals.insert(GV);
-
+
// Calculate this function's optimization goal.
unsigned OptimizationGoal;
if (F.hasFnAttribute(Attribute::OptimizeNone))
@@ -367,8 +367,9 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
unsigned RC;
- InlineAsm::hasRegClassConstraint(Flags, RC);
- if (RC == ARM::GPRPairRegClassID) {
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ if (InlineAsm::hasRegClassConstraint(Flags, RC) &&
+ ARM::GPRPairRegClass.hasSubClassEq(TRI->getRegClass(RC))) {
if (NumVals != 1)
return true;
const MachineOperand &MO = MI->getOperand(OpNum);
@@ -990,7 +991,7 @@ void ARMAsmPrinter::EmitJumpTableTBInst(const MachineInstr *MI,
if (Subtarget->isThumb1Only())
EmitAlignment(2);
-
+
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
OutStreamer->EmitLabel(JTISymbol);
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 43e8b7d66c62..5342e6e2cd13 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -584,7 +584,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// don't know for sure yet whether we'll need that, so we guess based
// on whether there are any local variables that would trigger it.
unsigned StackAlign = TFI->getStackAlignment();
- if (TFI->hasFP(MF) &&
+ if (TFI->hasFP(MF) &&
!((MFI.getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) {
if (isFrameOffsetLegal(MI, getFrameRegister(MF), FPOffset))
return false;
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
index 63bf48abb7ac..543165de38d0 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
@@ -269,14 +269,15 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
for (auto Reg : RegList)
State.AllocateReg(Reg);
+ // After the first item has been allocated, the rest are packed as tightly as
+ // possible. (E.g. an incoming i64 would have starting Align of 8, but we'll
+ // be allocating a bunch of i32 slots).
+ unsigned RestAlign = std::min(Align, Size);
+
for (auto &It : PendingMembers) {
It.convertToMem(State.AllocateStack(Size, Align));
State.addLoc(It);
-
- // After the first item has been allocated, the rest are packed as tightly
- // as possible. (E.g. an incoming i64 would have starting Align of 8, but
- // we'll be allocating a bunch of i32 slots).
- Align = Size;
+ Align = RestAlign;
}
// All pending members have now been allocated
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index de08eb8c6985..2c4738d3cb74 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -2128,7 +2128,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
unsigned DeadSize = 0;
bool CanDeleteLEA = false;
bool BaseRegKill = false;
-
+
unsigned IdxReg = ~0U;
bool IdxRegKill = true;
if (isThumb2) {
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
index 5139a18f9263..55194ed94532 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
@@ -113,7 +113,7 @@ public:
bool isLSDA() const { return Kind == ARMCP::CPLSDA; }
bool isMachineBasicBlock() const{ return Kind == ARMCP::CPMachineBasicBlock; }
bool isPromotedGlobal() const{ return Kind == ARMCP::CPPromotedGlobal; }
-
+
int getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) override;
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
index 26d4aaa12acf..a66cd7053c0a 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -2116,7 +2116,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
CallingConv::ID CC = F.getCallingConv();
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
+ GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index af983ce2606a..a8c75702d7b5 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -372,7 +372,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
DebugLoc dl;
-
+
unsigned FramePtr = RegInfo->getFrameRegister(MF);
// Determine the sizes of each callee-save spill areas and record which frame
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 081d4ff033bd..9592dd53c347 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -2539,7 +2539,7 @@ void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
}
};
-
+
if (Range->second == 0) {
// 1. Mask includes the LSB -> Simply shift the top N bits off
NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
@@ -2633,7 +2633,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
MachineMemOperand::MOLoad, 4, 4);
cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp+1);
-
+
ReplaceNode(N, ResNode);
return;
}
@@ -2920,7 +2920,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
assert(N3.getOpcode() == ISD::Register);
unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
-
+
if (InFlag.getOpcode() == ARMISD::CMPZ) {
bool SwitchEQNEToPLMI;
SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
@@ -3023,7 +3023,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
// Other cases are autogenerated.
break;
}
-
+
case ARMISD::VZIP: {
unsigned Opc = 0;
EVT VT = N->getValueType(0);
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 47222a66f798..ede276dd91bb 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -3096,7 +3096,7 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
// need to be duplicated) or duplicating the constant wouldn't increase code
// size (implying the constant is no larger than 4 bytes).
const Function &F = DAG.getMachineFunction().getFunction();
-
+
// We rely on this decision to inline being idemopotent and unrelated to the
// use-site. We know that if we inline a variable at one use site, we'll
// inline it elsewhere too (and reuse the constant pool entry). Fast-isel
@@ -5162,7 +5162,7 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
return SDValue();
// SoftFP: read half-precision arguments:
//
- // t2: i32,ch = ...
+ // t2: i32,ch = ...
// t7: i16 = truncate t2 <~~~~ Op
// t8: f16 = bitcast t7 <~~~~ N
//
@@ -5173,7 +5173,7 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
- // Half-precision return values
+ // Half-precision return values
if (SrcVT == MVT::f16 && DstVT == MVT::i16) {
if (!HasFullFP16)
return SDValue();
@@ -13461,13 +13461,13 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
if (!RHS || RHS->getZExtValue() != 4)
return false;
-
+
Offset = Op->getOperand(1);
Base = Op->getOperand(0);
AM = ISD::POST_INC;
return true;
}
-
+
bool isInc;
bool isLegal = false;
if (Subtarget->isThumb2())
diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 901138dbdfd5..db5f28480e90 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -1275,7 +1275,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
// we're minimizing code size.
if (!MBB.getParent()->getFunction().optForMinSize() || !BaseKill)
return false;
-
+
bool HighRegsUsed = false;
for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
if (MI->getOperand(i).getReg() >= ARM::R8) {
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index 816116772995..91310e81e398 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -126,7 +126,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// The amount the literal pool has been increasedby due to promoted globals.
int PromotedGlobalsIncrease = 0;
-
+
public:
ARMFunctionInfo() = default;
diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index d4fbf76f299f..4d685158e258 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -49,7 +49,7 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
case RTLIB::MEMMOVE:
AEABILibcall = AEABI_MEMMOVE;
break;
- case RTLIB::MEMSET:
+ case RTLIB::MEMSET:
AEABILibcall = AEABI_MEMSET;
if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
if (ConstantSrc->getZExtValue() == 0)
@@ -93,14 +93,14 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
else if (Src.getValueType().bitsLT(MVT::i32))
Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
- Entry.Node = Src;
+ Entry.Node = Src;
Entry.Ty = Type::getInt32Ty(*DAG.getContext());
Entry.IsSExt = false;
Args.push_back(Entry);
} else {
Entry.Node = Src;
Args.push_back(Entry);
-
+
Entry.Node = Size;
Args.push_back(Entry);
}
@@ -121,7 +121,7 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
std::move(Args))
.setDiscardResult();
std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
-
+
return CallResult.second;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index f8cae31641ff..94f9cefe429c 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -389,7 +389,7 @@ int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
unsigned NumVectorInstToHideOverhead = 10;
int MaxMergeDistance = 64;
- if (Ty->isVectorTy() && SE &&
+ if (Ty->isVectorTy() && SE &&
!BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
return NumVectorInstToHideOverhead;
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index cd9fa0709020..e0cd2d8e26a6 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -153,7 +153,7 @@ public:
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
- int getAddressComputationCost(Type *Val, ScalarEvolution *SE,
+ int getAddressComputationCost(Type *Val, ScalarEvolution *SE,
const SCEV *Ptr);
int getArithmeticInstrCost(
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 807d62547337..a5fbbbf26be9 100644
--- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -969,7 +969,7 @@ public:
// checks whether this operand is a memory operand computed as an offset
// applied to PC. the offset may have 8 bits of magnitude and is represented
- // with two bits of shift. textually it may be either [pc, #imm], #imm or
+ // with two bits of shift. textually it may be either [pc, #imm], #imm or
// relocable expression...
bool isThumbMemPC() const {
int64_t Val = 0;
@@ -2284,7 +2284,7 @@ public:
}
const MCSymbolRefExpr *SR = dyn_cast<MCSymbolRefExpr>(Imm.Val);
-
+
assert(SR && "Unknown value type!");
Inst.addOperand(MCOperand::createExpr(SR));
return;
@@ -2326,7 +2326,7 @@ public:
assert(isImm() && "Not an immediate!");
// If we have an immediate that's not a constant, treat it as a label
- // reference needing a fixup.
+ // reference needing a fixup.
if (!isa<MCConstantExpr>(getImm())) {
Inst.addOperand(MCOperand::createExpr(getImm()));
return;
@@ -3419,7 +3419,7 @@ int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
- return -1;
+ return -1;
std::string lowerCase = Tok.getString().lower();
ARM_AM::ShiftOpc ShiftTy = StringSwitch<ARM_AM::ShiftOpc>(lowerCase)
@@ -4311,7 +4311,7 @@ ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
- if (!Tok.is(AsmToken::Identifier))
+ if (!Tok.is(AsmToken::Identifier))
return MatchOperand_NoMatch;
StringRef IFlagsStr = Tok.getString();
@@ -4353,7 +4353,7 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
return MatchOperand_NoMatch;
}
unsigned SYSmvalue = Val & 0xFF;
- Parser.Lex();
+ Parser.Lex();
Operands.push_back(ARMOperand::CreateMSRMask(SYSmvalue, S));
return MatchOperand_Success;
}
@@ -4996,7 +4996,7 @@ void ARMAsmParser::cvtThumbBranches(MCInst &Inst,
// first decide whether or not the branch should be conditional
// by looking at it's location relative to an IT block
if(inITBlock()) {
- // inside an IT block we cannot have any conditional branches. any
+ // inside an IT block we cannot have any conditional branches. any
// such instructions needs to be converted to unconditional form
switch(Inst.getOpcode()) {
case ARM::tBcc: Inst.setOpcode(ARM::tB); break;
@@ -5008,11 +5008,11 @@ void ARMAsmParser::cvtThumbBranches(MCInst &Inst,
unsigned Cond = static_cast<ARMOperand &>(*Operands[CondOp]).getCondCode();
switch(Inst.getOpcode()) {
case ARM::tB:
- case ARM::tBcc:
- Inst.setOpcode(Cond == ARMCC::AL ? ARM::tB : ARM::tBcc);
+ case ARM::tBcc:
+ Inst.setOpcode(Cond == ARMCC::AL ? ARM::tB : ARM::tBcc);
break;
case ARM::t2B:
- case ARM::t2Bcc:
+ case ARM::t2Bcc:
Inst.setOpcode(Cond == ARMCC::AL ? ARM::t2B : ARM::t2Bcc);
break;
}
@@ -8882,7 +8882,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
case ARM::MOVsi: {
ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(Inst.getOperand(2).getImm());
// rrx shifts and asr/lsr of #32 is encoded as 0
- if (SOpc == ARM_AM::rrx || SOpc == ARM_AM::asr || SOpc == ARM_AM::lsr)
+ if (SOpc == ARM_AM::rrx || SOpc == ARM_AM::asr || SOpc == ARM_AM::lsr)
return false;
if (ARM_AM::getSORegOffset(Inst.getOperand(2).getImm()) == 0) {
// Shifting by zero is accepted as a vanilla 'MOVr'
@@ -9371,6 +9371,12 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
return parseDirectiveAlign(DirectiveID.getLoc()); // Use Generic on failure.
else if (IDVal == ".thumb_set")
parseDirectiveThumbSet(DirectiveID.getLoc());
+ else if (IDVal == ".inst")
+ parseDirectiveInst(DirectiveID.getLoc());
+ else if (IDVal == ".inst.n")
+ parseDirectiveInst(DirectiveID.getLoc(), 'n');
+ else if (IDVal == ".inst.w")
+ parseDirectiveInst(DirectiveID.getLoc(), 'w');
else if (!IsMachO && !IsCOFF) {
if (IDVal == ".arch")
parseDirectiveArch(DirectiveID.getLoc());
@@ -9382,12 +9388,6 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
parseDirectiveFPU(DirectiveID.getLoc());
else if (IDVal == ".fnstart")
parseDirectiveFnStart(DirectiveID.getLoc());
- else if (IDVal == ".inst")
- parseDirectiveInst(DirectiveID.getLoc());
- else if (IDVal == ".inst.n")
- parseDirectiveInst(DirectiveID.getLoc(), 'n');
- else if (IDVal == ".inst.w")
- parseDirectiveInst(DirectiveID.getLoc(), 'w');
else if (IDVal == ".object_arch")
parseDirectiveObjectArch(DirectiveID.getLoc());
else if (IDVal == ".tlsdescseq")
@@ -10012,8 +10012,8 @@ bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) {
case 'w':
break;
default:
- return Error(Loc, "cannot determine Thumb instruction size, "
- "use inst.n/inst.w instead");
+ Width = 0;
+ break;
}
} else {
if (Suffix)
@@ -10029,6 +10029,7 @@ bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) {
return Error(Loc, "expected constant expression");
}
+ char CurSuffix = Suffix;
switch (Width) {
case 2:
if (Value->getValue() > 0xffff)
@@ -10039,11 +10040,21 @@ bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) {
return Error(Loc, StringRef(Suffix ? "inst.w" : "inst") +
" operand is too big");
break;
+ case 0:
+ // Thumb mode, no width indicated. Guess from the opcode, if possible.
+ if (Value->getValue() < 0xe800)
+ CurSuffix = 'n';
+ else if (Value->getValue() >= 0xe8000000)
+ CurSuffix = 'w';
+ else
+ return Error(Loc, "cannot determine Thumb instruction size, "
+ "use inst.n/inst.w instead");
+ break;
default:
llvm_unreachable("only supported widths are 2 and 4");
}
- getTargetStreamer().emitInst(Value->getValue(), Suffix);
+ getTargetStreamer().emitInst(Value->getValue(), CurSuffix);
return false;
};
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 4733cf49827e..61bec04678dd 100644
--- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -620,7 +620,7 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
// assume a predicate of AL.
unsigned CC;
CC = ITBlock.getITCC();
- if (CC == 0xF)
+ if (CC == 0xF)
CC = ARMCC::AL;
if (ITBlock.instrInITBlock())
ITBlock.advanceITState();
@@ -888,7 +888,7 @@ DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- if (RegNo == 15)
+ if (RegNo == 15)
S = MCDisassembler::SoftFail;
Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder));
@@ -2171,7 +2171,7 @@ static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn,
const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
const FeatureBitset &FeatureBits = Dis->getSubtargetInfo().getFeatureBits();
- if (!FeatureBits[ARM::HasV8_1aOps] ||
+ if (!FeatureBits[ARM::HasV8_1aOps] ||
!FeatureBits[ARM::HasV8Ops])
return MCDisassembler::Fail;
@@ -4467,7 +4467,7 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
index = fieldFromInstruction(Insn, 7, 1);
switch (fieldFromInstruction(Insn, 4, 2)) {
- case 0:
+ case 0:
align = 0; break;
case 3:
align = 4; break;
@@ -5279,7 +5279,7 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
return MCDisassembler::Fail;
if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
- if (!Check(S, DecodeAddrMode7Operand(Inst, Rn, Address, Decoder)))
+ if (!Check(S, DecodeAddrMode7Operand(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
if (!Check(S, DecodePostIdxReg(Inst, Rm, Address, Decoder)))
return MCDisassembler::Fail;
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 75ed40c18fa2..bfc32073ba18 100644
--- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -834,7 +834,7 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
return;
}
- O << SYSm;
+ O << SYSm;
return;
}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index dfa339091a7b..7d04c73fb3f2 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -64,7 +64,7 @@ bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
}
}
-// Need to examine the Fixup when determining whether to
+// Need to examine the Fixup when determining whether to
// emit the relocation as an explicit symbol or as a section relative
// offset
unsigned ARMELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 0dab789505d5..b37b8073548f 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -740,7 +740,7 @@ getARMBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
const MCOperand MO = MI.getOperand(OpIdx);
if (MO.isExpr()) {
if (HasConditionalBranch(MI))
- return ::getBranchTargetOpValue(MI, OpIdx,
+ return ::getBranchTargetOpValue(MI, OpIdx,
ARM::fixup_arm_condbl, Fixups, STI);
return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_uncondbl, Fixups, STI);
}
@@ -766,10 +766,10 @@ uint32_t ARMMCCodeEmitter::getThumbBranchTargetOpValue(
const MCSubtargetInfo &STI) const {
unsigned Val = 0;
const MCOperand MO = MI.getOperand(OpIdx);
-
+
if(MO.isExpr())
return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_uncondbranch, Fixups, STI);
- else
+ else
Val = MO.getImm() >> 1;
bool I = (Val & 0x800000);
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index 42371736fef4..63aa9735e8a4 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -13,6 +13,8 @@
#include "ARMTargetMachine.h"
#include "llvm/MC/ConstantPools.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -47,6 +49,41 @@ void ARMTargetStreamer::finish() { ConstantPools->emitAll(Streamer); }
// reset() - Reset any state
void ARMTargetStreamer::reset() {}
+void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {
+ unsigned Size;
+ char Buffer[4];
+ const bool LittleEndian = getStreamer().getContext().getAsmInfo()->isLittleEndian();
+
+ switch (Suffix) {
+ case '\0':
+ Size = 4;
+
+ for (unsigned II = 0, IE = Size; II != IE; II++) {
+ const unsigned I = LittleEndian ? (Size - II - 1) : II;
+ Buffer[Size - II - 1] = uint8_t(Inst >> I * CHAR_BIT);
+ }
+
+ break;
+ case 'n':
+ case 'w':
+ Size = (Suffix == 'n' ? 2 : 4);
+
+ // Thumb wide instructions are emitted as a pair of 16-bit words of the
+ // appropriate endianness.
+ for (unsigned II = 0, IE = Size; II != IE; II = II + 2) {
+ const unsigned I0 = LittleEndian ? II + 0 : II + 1;
+ const unsigned I1 = LittleEndian ? II + 1 : II + 0;
+ Buffer[Size - II - 2] = uint8_t(Inst >> I0 * CHAR_BIT);
+ Buffer[Size - II - 1] = uint8_t(Inst >> I1 * CHAR_BIT);
+ }
+
+ break;
+ default:
+ llvm_unreachable("Invalid Suffix");
+ }
+ getStreamer().EmitBytes(StringRef(Buffer, Size));
+}
+
// The remaining callbacks should be handled separately by each
// streamer.
void ARMTargetStreamer::emitFnStart() {}
@@ -76,7 +113,6 @@ void ARMTargetStreamer::emitArchExtension(unsigned ArchExt) {}
void ARMTargetStreamer::emitObjectArch(ARM::ArchKind Arch) {}
void ARMTargetStreamer::emitFPU(unsigned FPU) {}
void ARMTargetStreamer::finishAttributeSection() {}
-void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {}
void
ARMTargetStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) {}
void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {}
diff --git a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
index 637e4a44c428..7f03e1463c1d 100644
--- a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
@@ -233,7 +233,7 @@ bool MLxExpansion::FindMLxHazard(MachineInstr *MI) {
// On Swift, we mostly care about hazards from multiplication instructions
// writing the accumulator and the pipelining of loop iterations by out-of-
- // order execution.
+ // order execution.
if (isSwift)
return isFpMulInstruction(DefMI->getOpcode()) || hasLoopHazard(MI);
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index a65e22fd86e8..5c745e112b2e 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -127,7 +127,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
DebugLoc dl;
-
+
unsigned FramePtr = RegInfo->getFrameRegister(MF);
unsigned BasePtr = RegInfo->getBaseRegister();
int CFAOffset = 0;
diff --git a/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp b/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp
index c1515571aae5..1b412a9c6813 100644
--- a/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp
@@ -63,6 +63,13 @@ AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm)
setTruncStoreAction(MVT::i16, MVT::i8, Expand);
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setOperationAction(ISD::ADDC, VT, Legal);
+ setOperationAction(ISD::SUBC, VT, Legal);
+ setOperationAction(ISD::ADDE, VT, Legal);
+ setOperationAction(ISD::SUBE, VT, Legal);
+ }
+
// sub (x, imm) gets canonicalized to add (x, -imm), so for illegal types
// revert into a sub since we don't have an add with immediate instruction.
setOperationAction(ISD::ADD, MVT::i32, Custom);
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
index 4791b067aa8d..ba255d30fede 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -1777,6 +1777,7 @@ namespace {
const BitTracker::RegisterCell &RC);
bool simplifyExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD,
const BitTracker::RegisterCell &RC, const RegisterSet &AVs);
+ bool simplifyRCmp0(MachineInstr *MI, BitTracker::RegisterRef RD);
// Cache of created instructions to avoid creating duplicates.
// XXX Currently only used by genBitSplit.
@@ -2567,6 +2568,127 @@ bool BitSimplification::simplifyExtractLow(MachineInstr *MI,
return Changed;
}
+bool BitSimplification::simplifyRCmp0(MachineInstr *MI,
+ BitTracker::RegisterRef RD) {
+ unsigned Opc = MI->getOpcode();
+ if (Opc != Hexagon::A4_rcmpeqi && Opc != Hexagon::A4_rcmpneqi)
+ return false;
+ MachineOperand &CmpOp = MI->getOperand(2);
+ if (!CmpOp.isImm() || CmpOp.getImm() != 0)
+ return false;
+
+ const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
+ if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass)
+ return false;
+ assert(RD.Sub == 0);
+
+ MachineBasicBlock &B = *MI->getParent();
+ const DebugLoc &DL = MI->getDebugLoc();
+ auto At = MI->isPHI() ? B.getFirstNonPHI()
+ : MachineBasicBlock::iterator(MI);
+ bool KnownZ = true;
+ bool KnownNZ = false;
+
+ BitTracker::RegisterRef SR = MI->getOperand(1);
+ if (!BT.has(SR.Reg))
+ return false;
+ const BitTracker::RegisterCell &SC = BT.lookup(SR.Reg);
+ unsigned F, W;
+ if (!HBS::getSubregMask(SR, F, W, MRI))
+ return false;
+
+ for (uint16_t I = F; I != F+W; ++I) {
+ const BitTracker::BitValue &V = SC[I];
+ if (!V.is(0))
+ KnownZ = false;
+ if (V.is(1))
+ KnownNZ = true;
+ }
+
+ auto ReplaceWithConst = [&] (int C) {
+ unsigned NewR = MRI.createVirtualRegister(FRC);
+ BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), NewR)
+ .addImm(C);
+ HBS::replaceReg(RD.Reg, NewR, MRI);
+ BitTracker::RegisterCell NewRC(W);
+ for (uint16_t I = 0; I != W; ++I) {
+ NewRC[I] = BitTracker::BitValue(C & 1);
+ C = unsigned(C) >> 1;
+ }
+ BT.put(BitTracker::RegisterRef(NewR), NewRC);
+ return true;
+ };
+
+ auto IsNonZero = [] (const MachineOperand &Op) {
+ if (Op.isGlobal() || Op.isBlockAddress())
+ return true;
+ if (Op.isImm())
+ return Op.getImm() != 0;
+ if (Op.isCImm())
+ return !Op.getCImm()->isZero();
+ if (Op.isFPImm())
+ return !Op.getFPImm()->isZero();
+ return false;
+ };
+
+ auto IsZero = [] (const MachineOperand &Op) {
+ if (Op.isGlobal() || Op.isBlockAddress())
+ return false;
+ if (Op.isImm())
+ return Op.getImm() == 0;
+ if (Op.isCImm())
+ return Op.getCImm()->isZero();
+ if (Op.isFPImm())
+ return Op.getFPImm()->isZero();
+ return false;
+ };
+
+ // If the source register is known to be 0 or non-0, the comparison can
+ // be folded to a load of a constant.
+ if (KnownZ || KnownNZ) {
+ assert(KnownZ != KnownNZ && "Register cannot be both 0 and non-0");
+ return ReplaceWithConst(KnownZ == (Opc == Hexagon::A4_rcmpeqi));
+ }
+
+ // Special case: if the compare comes from a C2_muxii, then we know the
+ // two possible constants that can be the source value.
+ MachineInstr *InpDef = MRI.getVRegDef(SR.Reg);
+ if (!InpDef)
+ return false;
+ if (SR.Sub == 0 && InpDef->getOpcode() == Hexagon::C2_muxii) {
+ MachineOperand &Src1 = InpDef->getOperand(2);
+ MachineOperand &Src2 = InpDef->getOperand(3);
+ // Check if both are non-zero.
+ bool KnownNZ1 = IsNonZero(Src1), KnownNZ2 = IsNonZero(Src2);
+ if (KnownNZ1 && KnownNZ2)
+ return ReplaceWithConst(Opc == Hexagon::A4_rcmpneqi);
+ // Check if both are zero.
+ bool KnownZ1 = IsZero(Src1), KnownZ2 = IsZero(Src2);
+ if (KnownZ1 && KnownZ2)
+ return ReplaceWithConst(Opc == Hexagon::A4_rcmpeqi);
+
+ // If for both operands we know that they are either 0 or non-0,
+ // replace the comparison with a C2_muxii, using the same predicate
+ // register, but with operands substituted with 0/1 accordingly.
+ if ((KnownZ1 || KnownNZ1) && (KnownZ2 || KnownNZ2)) {
+ unsigned NewR = MRI.createVirtualRegister(FRC);
+ BuildMI(B, At, DL, HII.get(Hexagon::C2_muxii), NewR)
+ .addReg(InpDef->getOperand(1).getReg())
+ .addImm(KnownZ1 == (Opc == Hexagon::A4_rcmpeqi))
+ .addImm(KnownZ2 == (Opc == Hexagon::A4_rcmpeqi));
+ HBS::replaceReg(RD.Reg, NewR, MRI);
+ // Create a new cell with only the least significant bit unknown.
+ BitTracker::RegisterCell NewRC(W);
+ NewRC[0] = BitTracker::BitValue::self();
+ NewRC.fill(1, W, BitTracker::BitValue::Zero);
+ BT.put(BitTracker::RegisterRef(NewR), NewRC);
+ return true;
+ }
+ }
+
+ return false;
+}
+
bool BitSimplification::processBlock(MachineBasicBlock &B,
const RegisterSet &AVs) {
if (!BT.reached(&B))
@@ -2615,6 +2737,7 @@ bool BitSimplification::processBlock(MachineBasicBlock &B,
T = T || genExtractHalf(MI, RD, RC);
T = T || genCombineHalf(MI, RD, RC);
T = T || genExtractLow(MI, RD, RC);
+ T = T || simplifyRCmp0(MI, RD);
Changed |= T;
continue;
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
index e13cfd3f655a..94aacbed6af6 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -347,9 +347,11 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
return rr0(RC, Outputs);
}
case C2_tfrrp: {
- RegisterCell RC = RegisterCell::self(Reg[0].Reg, W0);
- W0 = 8; // XXX Pred size
- return rr0(eINS(RC, eXTR(rc(1), 0, W0), 0), Outputs);
+ uint16_t RW = W0;
+ uint16_t PW = 8; // XXX Pred size: getRegBitWidth(Reg[1]);
+ RegisterCell RC = RegisterCell::self(Reg[0].Reg, RW);
+ RC.fill(PW, RW, BT::BitValue::Zero);
+ return rr0(eINS(RC, eXTR(rc(1), 0, PW), 0), Outputs);
}
// Arithmetic:
@@ -950,6 +952,19 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
}
default:
+ // For instructions that define a single predicate registers, store
+ // the low 8 bits of the register only.
+ if (unsigned DefR = getUniqueDefVReg(MI)) {
+ if (MRI.getRegClass(DefR) == &Hexagon::PredRegsRegClass) {
+ BT::RegisterRef PD(DefR, 0);
+ uint16_t RW = getRegBitWidth(PD);
+ uint16_t PW = 8; // XXX Pred size: getRegBitWidth(Reg[1]);
+ RegisterCell RC = RegisterCell::self(DefR, RW);
+ RC.fill(PW, RW, BT::BitValue::Zero);
+ putCell(PD, RC, Outputs);
+ return true;
+ }
+ }
return MachineEvaluator::evaluate(MI, Inputs, Outputs);
}
#undef im
@@ -1016,6 +1031,21 @@ bool HexagonEvaluator::evaluate(const MachineInstr &BI,
return true;
}
+unsigned HexagonEvaluator::getUniqueDefVReg(const MachineInstr &MI) const {
+ unsigned DefReg = 0;
+ for (const MachineOperand &Op : MI.operands()) {
+ if (!Op.isReg() || !Op.isDef())
+ continue;
+ unsigned R = Op.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ if (DefReg != 0)
+ return 0;
+ DefReg = R;
+ }
+ return DefReg;
+}
+
bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI,
const CellMapType &Inputs,
CellMapType &Outputs) const {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h
index d9dd04e1b088..f0b7c9d91950 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h
@@ -49,6 +49,7 @@ struct HexagonEvaluator : public BitTracker::MachineEvaluator {
const HexagonInstrInfo &TII;
private:
+ unsigned getUniqueDefVReg(const MachineInstr &MI) const;
bool evaluateLoad(const MachineInstr &MI, const CellMapType &Inputs,
CellMapType &Outputs) const;
bool evaluateFormalCopy(const MachineInstr &MI, const CellMapType &Inputs,
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
index 183dee36a047..de486ec4b7bd 100644
--- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
@@ -2,7 +2,7 @@
//
// The LLVM Compiler Infrastructure
//
-// This file is distributed under the University of Illinois Open Source
+// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 2acf701b43cb..ce7db657f5e9 100644
--- a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -7371,7 +7371,7 @@ bool MipsAsmParser::parseDirectiveGpWord() {
getParser().getStreamer().EmitGPRel32Value(Value);
if (getLexer().isNot(AsmToken::EndOfStatement))
- return Error(getLexer().getLoc(),
+ return Error(getLexer().getLoc(),
"unexpected token, expected end of statement");
Parser.Lex(); // Eat EndOfStatement token.
return false;
@@ -7506,7 +7506,7 @@ bool MipsAsmParser::parseDirectiveOption() {
}
// Unknown option.
- Warning(Parser.getTok().getLoc(),
+ Warning(Parser.getTok().getLoc(),
"unknown option, expected 'pic0' or 'pic2'");
Parser.eatToEndOfStatement();
return false;
@@ -8193,7 +8193,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".abicalls") {
getTargetStreamer().emitDirectiveAbiCalls();
if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
- Error(Parser.getTok().getLoc(),
+ Error(Parser.getTok().getLoc(),
"unexpected token, expected end of statement");
}
return false;
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index fdb560f3c72f..d7f6cf91db73 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -114,7 +114,7 @@ namespace Mips {
// resulting in - R_MIPS_GOT_DISP
fixup_Mips_GOT_DISP,
- // resulting in - R_MIPS_HIGHER/R_MICROMIPS_HIGHER
+ // resulting in - R_MIPS_HIGHER/R_MICROMIPS_HIGHER
fixup_Mips_HIGHER,
fixup_MICROMIPS_HIGHER,
diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index 8ffc0731abcb..2e0c25de2bc8 100644
--- a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -1094,7 +1094,7 @@ void MipsAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) {
// ALIGN
// B .tmpN
// 11 NOP instructions (44 bytes)
- // ADDIU T9, T9, 52
+ // ADDIU T9, T9, 52
// .tmpN
//
// We need the 44 bytes (11 instructions) because at runtime, we'd
diff --git a/contrib/llvm/lib/Target/Mips/MipsCallLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsCallLowering.cpp
index e82f62260b3f..a705ebb6b193 100644
--- a/contrib/llvm/lib/Target/Mips/MipsCallLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsCallLowering.cpp
@@ -418,7 +418,8 @@ void MipsCallLowering::subTargetRegTypeForCallingConv(
for (auto &Arg : Args) {
EVT VT = TLI.getValueType(DL, Arg.Ty);
- MVT RegisterVT = TLI.getRegisterTypeForCallingConv(F.getContext(), VT);
+ MVT RegisterVT = TLI.getRegisterTypeForCallingConv(F.getContext(),
+ F.getCallingConv(), VT);
ISD::ArgFlagsTy Flags = Arg.Flags;
Flags.setOrigAlign(TLI.getABIAlignmentForCallingConv(Arg.Ty, DL));
diff --git a/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp b/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
index 9eb13a68e561..744523cc6cb9 100644
--- a/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//
// This pass is used to make Pc relative loads of constants.
-// For now, only Mips16 will use this.
+// For now, only Mips16 will use this.
//
// Loading constants inline is expensive on Mips16 and it's in general better
// to place the constant nearby in code space and then it can be loaded with a
@@ -1171,7 +1171,7 @@ static inline unsigned getUnconditionalBrDisp(int Opc) {
/// findAvailableWater - Look for an existing entry in the WaterList in which
/// we can place the CPE referenced from U so it's within range of U's MI.
/// Returns true if found, false if not. If it returns true, WaterIter
-/// is set to the WaterList entry.
+/// is set to the WaterList entry.
/// To ensure that this pass
/// terminates, the CPE location for a particular CPUser is only allowed to
/// move to a lower address, so search backward from the end of the list and
@@ -1231,7 +1231,7 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex,
const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()];
// If the block does not end in an unconditional branch already, and if the
- // end of the block is within range, make new water there.
+ // end of the block is within range, make new water there.
if (BBHasFallthrough(UserMBB)) {
// Size of branch to insert.
unsigned Delta = 2;
@@ -1258,7 +1258,7 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex,
}
}
- // What a big block. Find a place within the block to split it.
+ // What a big block. Find a place within the block to split it.
// Try to split the block so it's fully aligned. Compute the latest split
// point where we can add a 4-byte branch instruction, and then align to
@@ -1582,7 +1582,7 @@ MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) {
MachineInstr *BMI = &MBB->back();
bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
unsigned OppositeBranchOpcode = TII->getOppositeBranchOpc(Opcode);
-
+
++NumCBrFixed;
if (BMI != MI) {
if (std::next(MachineBasicBlock::iterator(MI)) == std::prev(MBB->end()) &&
@@ -1595,7 +1595,7 @@ MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) {
// bnez L2
// b L1
unsigned BMITargetOperand = branchTargetOperand(BMI);
- MachineBasicBlock *NewDest =
+ MachineBasicBlock *NewDest =
BMI->getOperand(BMITargetOperand).getMBB();
if (isBBInRange(MI, NewDest, Br.MaxDisp)) {
LLVM_DEBUG(
diff --git a/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp b/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp
index 7b39507812ed..19b30a44e86a 100644
--- a/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp
@@ -1662,7 +1662,7 @@ bool MipsFastISel::selectRet(const Instruction *I) {
return false;
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
+ GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 9ffc38356b76..0677d378a115 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -111,6 +111,7 @@ static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
// The MIPS MSA ABI passes vector arguments in the integer register set.
// The number of integer registers used is dependant on the ABI used.
MVT MipsTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
EVT VT) const {
if (VT.isVector()) {
if (Subtarget.isABI_O32()) {
@@ -123,6 +124,7 @@ MVT MipsTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
}
unsigned MipsTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
EVT VT) const {
if (VT.isVector())
return std::max((VT.getSizeInBits() / (Subtarget.isABI_O32() ? 32 : 64)),
@@ -131,10 +133,10 @@ unsigned MipsTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
}
unsigned MipsTargetLowering::getVectorTypeBreakdownForCallingConv(
- LLVMContext &Context, EVT VT, EVT &IntermediateVT,
+ LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
unsigned &NumIntermediates, MVT &RegisterVT) const {
// Break down vector types to either 2 i64s or 4 i32s.
- RegisterVT = getRegisterTypeForCallingConv(Context, VT) ;
+ RegisterVT = getRegisterTypeForCallingConv(Context, CC, VT);
IntermediateVT = RegisterVT;
NumIntermediates = VT.getSizeInBits() < RegisterVT.getSizeInBits()
? VT.getVectorNumElements()
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
index b58d92c370d8..5a0de45c44f3 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -288,17 +288,18 @@ class TargetRegisterClass;
/// Return the register type for a given MVT, ensuring vectors are treated
/// as a series of gpr sized integers.
- MVT getRegisterTypeForCallingConv(LLVMContext &Context,
+ MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
EVT VT) const override;
/// Return the number of registers for a given MVT, ensuring vectors are
/// treated as a series of gpr sized integers.
unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
EVT VT) const override;
/// Break down vectors to the correct number of gpr sized integers.
unsigned getVectorTypeBreakdownForCallingConv(
- LLVMContext &Context, EVT VT, EVT &IntermediateVT,
+ LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
unsigned &NumIntermediates, MVT &RegisterVT) const override;
/// Return the correct alignment for the current calling convention.
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstructionSelector.cpp b/contrib/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
index af0ac006bc9e..6c5b83021f74 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
@@ -166,6 +166,33 @@ bool MipsInstructionSelector::select(MachineInstr &I,
I.eraseFromParent();
return true;
}
+ case G_GLOBAL_VALUE: {
+ if (MF.getTarget().isPositionIndependent())
+ return false;
+
+ const llvm::GlobalValue *GVal = I.getOperand(1).getGlobal();
+ unsigned LUiReg = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ MachineInstr *LUi, *ADDiu;
+
+ LUi = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LUi))
+ .addDef(LUiReg)
+ .addGlobalAddress(GVal);
+ LUi->getOperand(1).setTargetFlags(MipsII::MO_ABS_HI);
+
+ ADDiu = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDiu))
+ .addDef(I.getOperand(0).getReg())
+ .addUse(LUiReg)
+ .addGlobalAddress(GVal);
+ ADDiu->getOperand(2).setTargetFlags(MipsII::MO_ABS_LO);
+
+ if (!constrainSelectedInstRegOperands(*LUi, TII, TRI, RBI))
+ return false;
+ if (!constrainSelectedInstRegOperands(*ADDiu, TII, TRI, RBI))
+ return false;
+
+ I.eraseFromParent();
+ return true;
+ }
default:
return false;
diff --git a/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
index da6f9dabdaaf..fb259516be09 100644
--- a/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
@@ -36,6 +36,9 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
getActionDefinitionsBuilder(G_FRAME_INDEX)
.legalFor({p0});
+ getActionDefinitionsBuilder(G_GLOBAL_VALUE)
+ .legalFor({p0});
+
computeTables();
verify(*ST.getInstrInfo());
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
index cef21f447205..351135079217 100644
--- a/contrib/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
@@ -88,6 +88,7 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
case G_CONSTANT:
case G_FRAME_INDEX:
+ case G_GLOBAL_VALUE:
OperandsMapping =
getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], nullptr});
break;
diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
index 676d702ba63e..896dd0eb0a5e 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
@@ -163,7 +163,7 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
// HasEVA -- supports EVA ASE.
bool HasEVA;
-
+
// nomadd4 - disables generation of 4-operand madd.s, madd.d and
// related instructions.
bool DisableMadd4;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 3b042c74b26c..efe98003b1c8 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -248,7 +248,7 @@ protected:
private:
bool GlobalsEmitted;
-
+
// This is specific per MachineFunction.
const MachineRegisterInfo *MRI;
// The contents are specific for each
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
index f12ed81b6d9f..ad1d7cbb52fc 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
@@ -2,7 +2,7 @@
//
// The LLVM Compiler Infrastructure
//
-// This file is distributed under the University of Illinois Open Source
+// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/contrib/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
index 10f1135ad841..5a9115f6f7f1 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
@@ -2,7 +2,7 @@
//
// The LLVM Compiler Infrastructure
//
-// This file is distributed under the University of Illinois Open Source
+// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index ea709a73ebf2..fd7f81591426 100644
--- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -175,7 +175,7 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O,
+ raw_ostream &O,
const char *Modifier) {
unsigned Code = MI->getOperand(OpNo).getImm();
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index f000fbb98110..351ccefa2da2 100644
--- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -35,11 +35,11 @@ public:
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
const MCSubtargetInfo &STI) override;
-
+
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
-
+
bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
unsigned PrintMethodIdx,
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 8ac461b96b88..fb7bf23509c7 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -61,7 +61,7 @@ PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
CommentString = "#";
// Uses '.section' before '.bss' directive
- UsesELFSectionDirectiveForBSS = true;
+ UsesELFSectionDirectiveForBSS = true;
// Debug Information
SupportsDebugInformation = true;
@@ -73,7 +73,7 @@ PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
// Exceptions handling
ExceptionsType = ExceptionHandling::DwarfCFI;
-
+
ZeroDirective = "\t.space\t";
Data64bitsDirective = is64Bit ? "\t.quad\t" : nullptr;
AssemblerDialect = 1; // New-Style mnemonics.
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 2b948ca60028..57bda1403c62 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -102,7 +102,7 @@ public:
unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
-
+
// getBinaryCodeForInstr - TableGen'erated function for getting the
// binary encoding for an instruction.
uint64_t getBinaryCodeForInstr(const MCInst &MI,
@@ -138,7 +138,7 @@ public:
default:
llvm_unreachable("Invalid instruction size");
}
-
+
++MCNumEmitted; // Keep track of the # of mi's emitted.
}
@@ -147,7 +147,7 @@ private:
void verifyInstructionPredicates(const MCInst &MI,
uint64_t AvailableFeatures) const;
};
-
+
} // end anonymous namespace
MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII,
@@ -162,7 +162,7 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
-
+
// Add a fixup for the branch target.
Fixups.push_back(MCFixup::create(0, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_br24));
@@ -212,7 +212,7 @@ unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
-
+
// Add a fixup for the immediate field.
Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_half16));
@@ -226,11 +226,11 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
// displacement and the next 5 bits as the register #.
assert(MI.getOperand(OpNo+1).isReg());
unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 16;
-
+
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isImm())
return (getMachineOpValue(MI, MO, Fixups, STI) & 0xFFFF) | RegBits;
-
+
// Add a fixup for the displacement field.
Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_half16));
@@ -244,11 +244,11 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
// displacement and the next 5 bits as the register #.
assert(MI.getOperand(OpNo+1).isReg());
unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 14;
-
+
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isImm())
return ((getMachineOpValue(MI, MO, Fixups, STI) >> 2) & 0x3FFF) | RegBits;
-
+
// Add a fixup for the displacement field.
Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_half16ds));
@@ -320,7 +320,7 @@ unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isReg()) return getMachineOpValue(MI, MO, Fixups, STI);
-
+
// Add a fixup for the TLS register, which simply provides a relocation
// hint to the linker that this statement is part of a relocation sequence.
// Return the thread-pointer register's encoding.
@@ -373,7 +373,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
return Encode;
}
-
+
assert(MO.isImm() &&
"Relocation required in an instruction that we cannot encode!");
return MO.getImm();
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index fe7e7aeeb182..481ba3f09cc7 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -58,7 +58,7 @@ namespace PPC {
PRED_BIT_SET = 1024,
PRED_BIT_UNSET = 1025
};
-
+
// Bit for branch taken (plus) or not-taken (minus) hint
enum BranchHintBit {
BR_NO_HINT = 0x0,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h
index dfdec246e868..bfc613af3dc0 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.h
@@ -66,7 +66,7 @@ namespace llvm {
extern char &PPCVSXFMAMutateID;
namespace PPCII {
-
+
/// Target Operand Flag enum.
enum TOF {
//===------------------------------------------------------------------===//
@@ -111,7 +111,7 @@ namespace llvm {
MO_TLS = 8 << 4
};
} // end namespace PPCII
-
+
} // end namespace llvm;
#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
index 64b8f1168beb..0d1bb9297bcb 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -130,7 +130,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
BlockSizes[MBB->getNumber()].first = BlockSize;
FuncSize += BlockSize;
}
-
+
// If the entire function is smaller than the displacement of a branch field,
// we know we don't need to shrink any branches in this function. This is a
// common case.
@@ -138,7 +138,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
BlockSizes.clear();
return false;
}
-
+
// For each conditional branch, if the offset to its destination is larger
// than the offset field allows, transform it into a long branch sequence
// like this:
@@ -153,7 +153,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
while (MadeChange) {
// Iteratively expand branches until we reach a fixed point.
MadeChange = false;
-
+
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
++MFI) {
MachineBasicBlock &MBB = *MFI;
@@ -175,7 +175,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
MBBStartOffset += TII->getInstSizeInBytes(*I);
continue;
}
-
+
// Determine the offset from the current branch to the destination
// block.
int BranchSize;
@@ -184,7 +184,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
// start of this block to this branch, plus the sizes of all blocks
// from this block to the dest.
BranchSize = MBBStartOffset;
-
+
for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
BranchSize += BlockSizes[i].first;
} else {
@@ -213,7 +213,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
// 2. Target MBB
PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm();
unsigned CRReg = I->getOperand(1).getReg();
-
+
// Jump over the uncond branch inst (i.e. $PC+8) on opposite condition.
BuildMI(MBB, I, dl, TII->get(PPC::BCC))
.addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2);
@@ -234,7 +234,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
} else {
llvm_unreachable("Unhandled branch type!");
}
-
+
// Uncond branch to the real destination.
I = BuildMI(MBB, I, dl, TII->get(PPC::B)).addMBB(Dest);
@@ -277,7 +277,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
EverMadeChange |= MadeChange;
}
-
+
BlockSizes.clear();
return true;
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp b/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
index ed5e496b32fd..ac931f7d0ec0 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
@@ -73,7 +73,7 @@ protected:
if ((*PI)->empty())
continue;
-
+
for (MachineBasicBlock::iterator J = (*PI)->getLastNonDebugInstr();;) {
if (J == (*PI)->end())
break;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index b00655b50229..f212894035db 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -1697,7 +1697,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
+ GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index f0000c5bafd7..84dacf396462 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -174,7 +174,7 @@ const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
{PPC::V22, -160},
{PPC::V21, -176},
{PPC::V20, -192},
-
+
// SPE register save area (overlaps Vector save area).
{PPC::S31, -8},
{PPC::S30, -16},
@@ -1229,7 +1229,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
if (MBBI != MBB.end())
dl = MBBI->getDebugLoc();
-
+
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
@@ -1315,7 +1315,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
}
bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
-
+
if (IsReturnBlock) {
unsigned RetOpcode = MBBI->getOpcode();
bool UsesTCRet = RetOpcode == PPC::TCRETURNri ||
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 551220466901..793a4dd7f624 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -50,7 +50,7 @@ bool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) {
return true;
}
- return false;
+ return false;
}
bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) {
@@ -76,7 +76,7 @@ bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) {
return true;
}
- return false;
+ return false;
}
// FIXME: Remove this when we don't need this:
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 1e3e14c71144..51ff8a5cf77e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1224,6 +1224,7 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
}
unsigned PPCTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv:: ID CC,
EVT VT) const {
if (Subtarget.hasSPE() && VT == MVT::f64)
return 2;
@@ -1231,6 +1232,7 @@ unsigned PPCTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
}
MVT PPCTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+ CallingConv:: ID CC,
EVT VT) const {
if (Subtarget.hasSPE() && VT == MVT::f64)
return MVT::i32;
@@ -13102,8 +13104,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
SDValue
PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
- SelectionDAG &DAG,
- std::vector<SDNode *> *Created) const {
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const {
// fold (sdiv X, pow2)
EVT VT = N->getValueType(0);
if (VT == MVT::i64 && !Subtarget.isPPC64())
@@ -13120,13 +13122,11 @@ PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
- if (Created)
- Created->push_back(Op.getNode());
+ Created.push_back(Op.getNode());
if (IsNegPow2) {
Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
- if (Created)
- Created->push_back(Op.getNode());
+ Created.push_back(Op.getNode());
}
return Op;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 9b8d6435515b..f174943a8004 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -665,7 +665,7 @@ namespace llvm {
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
- std::vector<SDNode *> *Created) const override;
+ SmallVectorImpl<SDNode *> &Created) const override;
unsigned getRegisterByName(const char* RegName, EVT VT,
SelectionDAG &DAG) const override;
@@ -872,9 +872,11 @@ namespace llvm {
MCContext &Ctx) const override;
unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv:: ID CC,
EVT VT) const override;
MVT getRegisterTypeForCallingConv(LLVMContext &Context,
+ CallingConv:: ID CC,
EVT VT) const override;
private:
@@ -1141,7 +1143,7 @@ namespace llvm {
ISD::ArgFlagsTy &ArgFlags,
CCState &State);
- bool
+ bool
CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 4669719744bc..0930f7d3b8d7 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -316,11 +316,11 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
}
// For opcodes with the ReMaterializable flag set, this function is called to
-// verify the instruction is really rematable.
+// verify the instruction is really rematable.
bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
AliasAnalysis *AA) const {
switch (MI.getOpcode()) {
- default:
+ default:
// This function should only be called for opcodes with the ReMaterializable
// flag set.
llvm_unreachable("Unknown rematerializable operation!");
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
index 2217fa4693ce..0b57dd9b618d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -360,7 +360,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
// generate direct offsets from both the pre-incremented and
// post-incremented pointer values. Thus, we'll pick the first non-prefetch
// instruction in each bucket, and adjust the recurrence and other offsets
- // accordingly.
+ // accordingly.
for (int j = 0, je = Buckets[i].Elements.size(); j != je; ++j) {
if (auto *II = dyn_cast<IntrinsicInst>(Buckets[i].Elements[j].Instr))
if (II->getIntrinsicID() == Intrinsic::prefetch)
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index 62a612feb55c..e731c0bc0c23 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -75,7 +75,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO,
}
return Sym;
}
-
+
return Sym;
}
@@ -130,7 +130,7 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
// Subtract off the PIC base if required.
if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG) {
const MachineFunction *MF = MO.getParent()->getParent()->getParent();
-
+
const MCExpr *PB = MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
Expr = MCBinaryExpr::createSub(Expr, PB, Ctx);
}
@@ -151,7 +151,7 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP, bool isDarwin) {
OutMI.setOpcode(MI->getOpcode());
-
+
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MCOperand MCOp;
if (LowerPPCMachineOperandToMCOperand(MI->getOperand(i), MCOp, AP,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index dbe1fe37ddf8..0068df19f0c8 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -891,7 +891,7 @@ static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
auto BII = BB.getFirstInstrTerminator();
// We optimize BBs ending with a conditional branch.
// We check only for BCC here, not BCCLR, because BCCLR
- // will be formed only later in the pipeline.
+ // will be formed only later in the pipeline.
if (BB.succ_size() == 2 &&
BII != BB.instr_end() &&
(*BII).getOpcode() == PPC::BCC &&
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index b14bbad2039a..8a3f50aa9565 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -29,7 +29,7 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// stored. Also used as an anchor for instructions that need to be altered
/// when using frame pointers (dyna_add, dyna_sub.)
int FramePointerSaveIndex = 0;
-
+
/// ReturnAddrSaveIndex - Frame index of where the return address is stored.
///
int ReturnAddrSaveIndex = 0;
@@ -128,7 +128,7 @@ public:
int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
-
+
int getReturnAddrSaveIndex() const { return ReturnAddrSaveIndex; }
void setReturnAddrSaveIndex(int idx) { ReturnAddrSaveIndex = idx; }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 6647ceace5eb..96923a97a82c 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -979,7 +979,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
SReg = MF.getRegInfo().createVirtualRegister(RC);
// Insert a set of rA with the full offset value before the ld, st, or add
- if (isInt<16>(Offset))
+ if (isInt<16>(Offset))
BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LI8 : PPC::LI), SReg)
.addImm(Offset);
else {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 226c75f704f4..b0da9b5a6d70 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -201,7 +201,7 @@ unsigned PPCTTIImpl::getUserCost(const User *U,
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, U->getType());
return LT.first * BaseT::getUserCost(U, Operands);
}
-
+
return BaseT::getUserCost(U, Operands);
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index 1e8a1750ec3b..1be193e08c01 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -443,7 +443,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
// We can handle STXSDX and STXSSPX similarly to LXSDX and LXSSPX,
// by adding special handling for narrowing copies as well as
// widening ones. However, I've experimented with this, and in
- // practice we currently do not appear to use STXSDX fed by
+ // practice we currently do not appear to use STXSDX fed by
// a narrowing copy from a full vector register. Since I can't
// generate any useful test cases, I've left this alone for now.
case PPC::STXSDX:
diff --git a/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index c7a5a1e8e6ee..35f52f7d279b 100644
--- a/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -190,7 +190,7 @@ public:
Sparc::C8_C9, Sparc::C10_C11, Sparc::C12_C13, Sparc::C14_C15,
Sparc::C16_C17, Sparc::C18_C19, Sparc::C20_C21, Sparc::C22_C23,
Sparc::C24_C25, Sparc::C26_C27, Sparc::C28_C29, Sparc::C30_C31};
-
+
namespace {
/// SparcOperand - Instances of this class represent a parsed Sparc machine
@@ -459,7 +459,7 @@ public:
Op.Reg.Kind = rk_CoprocPairReg;
return true;
}
-
+
static std::unique_ptr<SparcOperand>
MorphToMEMrr(unsigned Base, std::unique_ptr<SparcOperand> Op) {
unsigned offsetReg = Op->getReg();
@@ -1000,7 +1000,7 @@ bool SparcAsmParser::matchRegisterName(const AsmToken &Tok, unsigned &RegNo,
RegKind = SparcOperand::rk_Special;
return true;
}
-
+
if (name.equals("wim")) {
RegNo = Sparc::WIM;
RegKind = SparcOperand::rk_Special;
@@ -1093,7 +1093,7 @@ bool SparcAsmParser::matchRegisterName(const AsmToken &Tok, unsigned &RegNo,
RegKind = SparcOperand::rk_CoprocReg;
return true;
}
-
+
if (name.equals("tpc")) {
RegNo = Sparc::TPC;
RegKind = SparcOperand::rk_Special;
diff --git a/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index 8e298e8316da..3e30dae1537f 100644
--- a/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -350,18 +350,18 @@ DecodeStatus SparcDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
return MCDisassembler::Fail;
// Calling the auto-generated decoder function.
-
+
if (STI.getFeatureBits()[Sparc::FeatureV9])
{
Result = decodeInstruction(DecoderTableSparcV932, Instr, Insn, Address, this, STI);
}
else
{
- Result = decodeInstruction(DecoderTableSparcV832, Instr, Insn, Address, this, STI);
+ Result = decodeInstruction(DecoderTableSparcV832, Instr, Insn, Address, this, STI);
}
if (Result != MCDisassembler::Fail)
return Result;
-
+
Result =
decodeInstruction(DecoderTableSparc32, Instr, Insn, Address, this, STI);
@@ -662,7 +662,7 @@ static DecodeStatus DecodeTRAP(MCInst &MI, unsigned insn, uint64_t Address,
if (status != MCDisassembler::Success)
return status;
}
-
+
// Decode CC
MI.addOperand(MCOperand::createImm(cc));
diff --git a/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp b/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
index 4981deae6af6..c1512cbdc44f 100644
--- a/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
@@ -118,9 +118,9 @@ void SparcInstPrinter::printOperand(const MCInst *MI, int opNum,
if (MO.isImm()) {
switch (MI->getOpcode()) {
default:
- O << (int)MO.getImm();
+ O << (int)MO.getImm();
return;
-
+
case SP::TICCri: // Fall through
case SP::TICCrr: // Fall through
case SP::TRAPri: // Fall through
@@ -128,7 +128,7 @@ void SparcInstPrinter::printOperand(const MCInst *MI, int opNum,
case SP::TXCCri: // Fall through
case SP::TXCCrr: // Fall through
// Only seven-bit values up to 127.
- O << ((int) MO.getImm() & 0x7f);
+ O << ((int) MO.getImm() & 0x7f);
return;
}
}
diff --git a/contrib/llvm/lib/Target/Sparc/Sparc.h b/contrib/llvm/lib/Target/Sparc/Sparc.h
index 4135e4e1b61d..0cea53b359eb 100644
--- a/contrib/llvm/lib/Target/Sparc/Sparc.h
+++ b/contrib/llvm/lib/Target/Sparc/Sparc.h
@@ -73,7 +73,7 @@ namespace llvm {
FCC_LE = 13+16, // Less or Equal
FCC_ULE = 14+16, // Unordered or Less or Equal
FCC_O = 15+16, // Ordered
-
+
CPCC_A = 8+32, // Always
CPCC_N = 0+32, // Never
CPCC_3 = 7+32,
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
index bf700d6a99d8..0cbbda787881 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -59,9 +59,9 @@ namespace llvm {
public:
SparcTargetLowering(const TargetMachine &TM, const SparcSubtarget &STI);
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
-
+
bool useSoftFloat() const override;
-
+
/// computeKnownBitsForTargetNode - Determine which of the bits specified
/// in Mask are known to be either zero or one and return them in the
/// KnownZero/KnownOne bitsets.
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
index 6750763d8ee5..47b42444b94d 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -115,7 +115,7 @@ static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
case SPCC::FCC_UE: return SPCC::FCC_LG;
case SPCC::FCC_NE: return SPCC::FCC_E;
case SPCC::FCC_E: return SPCC::FCC_NE;
-
+
case SPCC::CPCC_A: return SPCC::CPCC_N;
case SPCC::CPCC_N: return SPCC::CPCC_A;
case SPCC::CPCC_3: LLVM_FALLTHROUGH;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
index a0d40653fd9b..07f9e7250bd9 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -100,7 +100,7 @@ SparcTargetMachine::SparcTargetMachine(
SparcTargetMachine::~SparcTargetMachine() {}
-const SparcSubtarget *
+const SparcSubtarget *
SparcTargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
Attribute FSAttr = F.getFnAttribute("target-features");
@@ -119,7 +119,7 @@ SparcTargetMachine::getSubtargetImpl(const Function &F) const {
F.hasFnAttribute("use-soft-float") &&
F.getFnAttribute("use-soft-float").getValueAsString() == "true";
- if (softFloat)
+ if (softFloat)
FS += FS.empty() ? "+soft-float" : ",+soft-float";
auto &I = SubtargetMap[CPU + FS];
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
index d300d1d88abc..b9e5788cf018 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
@@ -55,7 +55,7 @@ getNumDecoderSlots(SUnit *SU) const {
else
return 3; // Expanded/group-alone instruction
}
-
+
return 1; // Normal instruction
}
@@ -81,6 +81,7 @@ getHazardType(SUnit *m, int Stalls) {
void SystemZHazardRecognizer::Reset() {
CurrGroupSize = 0;
+ CurrGroupHas4RegOps = false;
clearProcResCounters();
GrpCount = 0;
LastFPdOpCycleIdx = UINT_MAX;
@@ -99,6 +100,12 @@ SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
if (SC->BeginGroup)
return (CurrGroupSize == 0);
+ // An instruction with 4 register operands will not fit in last slot.
+ assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) &&
+ "Current decoder group is already full!");
+ if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
+ return false;
+
// Since a full group is handled immediately in EmitInstruction(),
// SU should fit into current group. NumSlots should be 1 or 0,
// since it is not a cracked or expanded instruction.
@@ -108,6 +115,23 @@ SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
return true;
}
+bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const {
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
+ const MCInstrDesc &MID = MI->getDesc();
+ unsigned Count = 0;
+ for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) {
+ const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF);
+ if (RC == nullptr)
+ continue;
+ if (OpIdx >= MID.getNumDefs() &&
+ MID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+ continue;
+ Count++;
+ }
+ return Count >= 4;
+}
+
void SystemZHazardRecognizer::nextGroup() {
if (CurrGroupSize == 0)
return;
@@ -119,6 +143,7 @@ void SystemZHazardRecognizer::nextGroup() {
// Reset counter for next group.
CurrGroupSize = 0;
+ CurrGroupHas4RegOps = false;
// Decrease counters for execution units by one.
for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
@@ -142,7 +167,7 @@ void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
const MCSchedClassDesc *SC = getSchedClass(SU);
if (!SC->isValid())
return;
-
+
for (TargetSchedModel::ProcResIter
PI = SchedModel->getWriteProcResBegin(SC),
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
@@ -172,6 +197,8 @@ void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
OS << "/EndsGroup";
if (SU->isUnbuffered)
OS << "/Unbuffered";
+ if (has4RegOps(SU->getInstr()))
+ OS << "/4RegOps";
}
void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
@@ -184,6 +211,7 @@ void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
dbgs() << "{ " << CurGroupDbg << " }";
dbgs() << " (" << CurrGroupSize << " decoder slot"
<< (CurrGroupSize > 1 ? "s":"")
+ << (CurrGroupHas4RegOps ? ", 4RegOps" : "")
<< ")\n";
}
}
@@ -294,11 +322,14 @@ EmitInstruction(SUnit *SU) {
// Insert SU into current group by increasing number of slots used
// in current group.
CurrGroupSize += getNumDecoderSlots(SU);
- assert (CurrGroupSize <= 3);
+ CurrGroupHas4RegOps |= has4RegOps(SU->getInstr());
+ unsigned GroupLim =
+ ((CurrGroupHas4RegOps && getNumDecoderSlots(SU) < 3) ? 2 : 3);
+ assert (CurrGroupSize <= GroupLim && "SU does not fit into decoder group!");
// Check if current group is now full/ended. If so, move on to next
// group to be ready to evaluate more candidates.
- if (CurrGroupSize == 3 || SC->EndGroup)
+ if (CurrGroupSize == GroupLim || SC->EndGroup)
nextGroup();
}
@@ -306,7 +337,7 @@ int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
const MCSchedClassDesc *SC = getSchedClass(SU);
if (!SC->isValid())
return 0;
-
+
// If SU begins new group, it can either break a current group early
// or fit naturally if current group is empty (negative cost).
if (SC->BeginGroup) {
@@ -325,6 +356,10 @@ int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
return -1;
}
+ // An instruction with 4 register operands will not fit in last slot.
+ if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
+ return 1;
+
// Most instructions can be placed in any decoder slot.
return 0;
}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h b/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h
index 40cb3acc7009..6292feefbfea 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h
@@ -45,15 +45,17 @@ namespace llvm {
/// SystemZHazardRecognizer maintains the state for one MBB during scheduling.
class SystemZHazardRecognizer : public ScheduleHazardRecognizer {
-#ifndef NDEBUG
const SystemZInstrInfo *TII;
-#endif
const TargetSchedModel *SchedModel;
/// Keep track of the number of decoder slots used in the current
/// decoder group.
unsigned CurrGroupSize;
+ /// True if an instruction with four reg operands have been scheduled into
+ /// the current decoder group.
+ bool CurrGroupHas4RegOps;
+
/// The tracking of resources here are quite similar to the common
/// code use of a critical resource. However, z13 differs in the way
/// that it has two processor sides which may be interesting to
@@ -73,6 +75,9 @@ class SystemZHazardRecognizer : public ScheduleHazardRecognizer {
/// Return true if MI fits into current decoder group.
bool fitsIntoCurrentGroup(SUnit *SU) const;
+ /// Return true if this instruction has four register operands.
+ bool has4RegOps(const MachineInstr *MI) const;
+
/// Two decoder groups per cycle are formed (for z13), meaning 2x3
/// instructions. This function returns a number between 0 and 5,
/// representing the current decoder slot of the current cycle. If an SU
@@ -105,11 +110,7 @@ class SystemZHazardRecognizer : public ScheduleHazardRecognizer {
public:
SystemZHazardRecognizer(const SystemZInstrInfo *tii,
const TargetSchedModel *SM)
- :
-#ifndef NDEBUG
- TII(tii),
-#endif
- SchedModel(SM) {
+ : TII(tii), SchedModel(SM) {
Reset();
}
@@ -134,7 +135,7 @@ public:
/// new decoder group, this is negative if this fits the schedule or
/// positive if it would mean ending a group prematurely. For normal
/// instructions this returns 0.
- int groupingCost(SUnit *SU) const;
+ int groupingCost(SUnit *SU) const;
/// Return the cost of SU in regards to processor resources usage.
/// A positive value means it would be better to wait with SU, while
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 302c7883f97b..e76fa71dacd7 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -527,10 +527,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::FP_ROUND);
setTargetDAGCombine(ISD::BSWAP);
- setTargetDAGCombine(ISD::SHL);
- setTargetDAGCombine(ISD::SRA);
- setTargetDAGCombine(ISD::SRL);
- setTargetDAGCombine(ISD::ROTL);
// Handle intrinsics.
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
@@ -609,7 +605,7 @@ struct AddressingMode {
// True if use of index register is supported.
bool IndexReg;
-
+
AddressingMode(bool LongDispl, bool IdxReg) :
LongDisplacement(LongDispl), IndexReg(IdxReg) {}
};
@@ -5524,76 +5520,6 @@ SDValue SystemZTargetLowering::combineBSWAP(
return SDValue();
}
-SDValue SystemZTargetLowering::combineSHIFTROT(
- SDNode *N, DAGCombinerInfo &DCI) const {
-
- SelectionDAG &DAG = DCI.DAG;
-
- // Shift/rotate instructions only use the last 6 bits of the second operand
- // register. If the second operand is the result of an AND with an immediate
- // value that has its last 6 bits set, we can safely remove the AND operation.
- //
- // If the AND operation doesn't have the last 6 bits set, we can't remove it
- // entirely, but we can still truncate it to a 16-bit value. This prevents
- // us from ending up with a NILL with a signed operand, which will cause the
- // instruction printer to abort.
- SDValue N1 = N->getOperand(1);
- if (N1.getOpcode() == ISD::AND) {
- SDValue AndMaskOp = N1->getOperand(1);
- auto *AndMask = dyn_cast<ConstantSDNode>(AndMaskOp);
-
- // The AND mask is constant
- if (AndMask) {
- auto AmtVal = AndMask->getZExtValue();
-
- // Bottom 6 bits are set
- if ((AmtVal & 0x3f) == 0x3f) {
- SDValue AndOp = N1->getOperand(0);
-
- // This is the only use, so remove the node
- if (N1.hasOneUse()) {
- // Combine the AND away
- DCI.CombineTo(N1.getNode(), AndOp);
-
- // Return N so it isn't rechecked
- return SDValue(N, 0);
-
- // The node will be reused, so create a new node for this one use
- } else {
- SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N),
- N->getValueType(0), N->getOperand(0),
- AndOp);
- DCI.AddToWorklist(Replace.getNode());
-
- return Replace;
- }
-
- // We can't remove the AND, but we can use NILL here (normally we would
- // use NILF). Only keep the last 16 bits of the mask. The actual
- // transformation will be handled by .td definitions.
- } else if (AmtVal >> 16 != 0) {
- SDValue AndOp = N1->getOperand(0);
-
- auto NewMask = DAG.getConstant(AndMask->getZExtValue() & 0x0000ffff,
- SDLoc(AndMaskOp),
- AndMaskOp.getValueType());
-
- auto NewAnd = DAG.getNode(N1.getOpcode(), SDLoc(N1), N1.getValueType(),
- AndOp, NewMask);
-
- SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N),
- N->getValueType(0), N->getOperand(0),
- NewAnd);
- DCI.AddToWorklist(Replace.getNode());
-
- return Replace;
- }
- }
- }
-
- return SDValue();
-}
-
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
// We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
// set by the CCReg instruction using the CCValid / CCMask masks,
@@ -5752,10 +5678,6 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
case ISD::BSWAP: return combineBSWAP(N, DCI);
- case ISD::SHL:
- case ISD::SRA:
- case ISD::SRL:
- case ISD::ROTL: return combineSHIFTROT(N, DCI);
case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 0ca93a38a016..267e31a85216 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -602,7 +602,6 @@ private:
SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const;
- SDValue combineSHIFTROT(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSELECT_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineGET_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 9d7312269957..bb5b7aae883b 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -1352,8 +1352,8 @@ def : Pat<(z_udivrem GR64:$src1, (i64 (load bdxaddr20only:$src2))),
//===----------------------------------------------------------------------===//
// Logical shift left.
-defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>;
-def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>;
+defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shiftop<shl>, GR32>;
+def SLLG : BinaryRSY<"sllg", 0xEB0D, shiftop<shl>, GR64>;
def SLDL : BinaryRS<"sldl", 0x8D, null_frag, GR128>;
// Arithmetic shift left.
@@ -1364,20 +1364,20 @@ let Defs = [CC] in {
}
// Logical shift right.
-defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>;
-def SRLG : BinaryRSY<"srlg", 0xEB0C, srl, GR64>;
+defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, shiftop<srl>, GR32>;
+def SRLG : BinaryRSY<"srlg", 0xEB0C, shiftop<srl>, GR64>;
def SRDL : BinaryRS<"srdl", 0x8C, null_frag, GR128>;
// Arithmetic shift right.
let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in {
- defm SRA : BinaryRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>;
- def SRAG : BinaryRSY<"srag", 0xEB0A, sra, GR64>;
+ defm SRA : BinaryRSAndK<"sra", 0x8A, 0xEBDC, shiftop<sra>, GR32>;
+ def SRAG : BinaryRSY<"srag", 0xEB0A, shiftop<sra>, GR64>;
def SRDA : BinaryRS<"srda", 0x8E, null_frag, GR128>;
}
// Rotate left.
-def RLL : BinaryRSY<"rll", 0xEB1D, rotl, GR32>;
-def RLLG : BinaryRSY<"rllg", 0xEB1C, rotl, GR64>;
+def RLL : BinaryRSY<"rll", 0xEB1D, shiftop<rotl>, GR32>;
+def RLLG : BinaryRSY<"rllg", 0xEB1C, shiftop<rotl>, GR64>;
// Rotate second operand left and inserted selected bits into first operand.
// These can act like 32-bit operands provided that the constant start and
@@ -2162,29 +2162,29 @@ def : Pat<(and (xor GR64:$x, (i64 -1)), GR64:$y),
// Complexity is added so that we match this before we match NILF on the AND
// operation alone.
let AddedComplexity = 4 in {
- def : Pat<(shl GR32:$val, (and GR32:$shift, uimm32:$imm)),
- (SLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(shl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (SLL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
- def : Pat<(sra GR32:$val, (and GR32:$shift, uimm32:$imm)),
- (SRA GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(sra GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (SRA GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
- def : Pat<(srl GR32:$val, (and GR32:$shift, uimm32:$imm)),
- (SRL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(srl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (SRL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
- def : Pat<(shl GR64:$val, (and GR32:$shift, uimm32:$imm)),
- (SLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(shl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (SLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
- def : Pat<(sra GR64:$val, (and GR32:$shift, uimm32:$imm)),
- (SRAG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(sra GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (SRAG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
- def : Pat<(srl GR64:$val, (and GR32:$shift, uimm32:$imm)),
- (SRLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(srl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (SRLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
- def : Pat<(rotl GR32:$val, (and GR32:$shift, uimm32:$imm)),
- (RLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(rotl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (RLL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
- def : Pat<(rotl GR64:$val, (and GR32:$shift, uimm32:$imm)),
- (RLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(rotl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (RLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
}
// Peepholes for turning scalar operations into block operations.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp
index fcbf4c4b5fe4..98e761ef87fe 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp
@@ -129,7 +129,7 @@ SystemZPostRASchedStrategy::
SystemZPostRASchedStrategy(const MachineSchedContext *C)
: MLI(C->MLI),
TII(static_cast<const SystemZInstrInfo *>
- (C->MF->getSubtarget().getInstrInfo())),
+ (C->MF->getSubtarget().getInstrInfo())),
MBB(nullptr), HazardRec(nullptr) {
const TargetSubtargetInfo *ST = &C->MF->getSubtarget();
SchedModel.init(ST);
@@ -169,8 +169,7 @@ SUnit *SystemZPostRASchedStrategy::pickNode(bool &IsTopNode) {
return *Available.begin();
}
- // All nodes that are possible to schedule are stored by in the
- // Available set.
+ // All nodes that are possible to schedule are stored in the Available set.
LLVM_DEBUG(dbgs() << "** Available: "; Available.dump(*HazardRec););
Candidate Best;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h b/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h
index cb0304825966..ab820e5d3e63 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h
@@ -26,7 +26,7 @@
using namespace llvm;
namespace llvm {
-
+
/// A MachineSchedStrategy implementation for SystemZ post RA scheduling.
class SystemZPostRASchedStrategy : public MachineSchedStrategy {
@@ -37,7 +37,7 @@ class SystemZPostRASchedStrategy : public MachineSchedStrategy {
// non-scheduled instructions, so it would not always be possible to call
// DAG->getSchedClass(SU).
TargetSchedModel SchedModel;
-
+
/// A candidate during instruction evaluation.
struct Candidate {
SUnit *SU = nullptr;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td
index da682cb4e5ab..7bf32bf19a4a 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td
@@ -357,6 +357,7 @@ def imm32zx16 : Immediate<i32, [{
}], UIMM16, "U16Imm">;
def imm32sx16trunc : Immediate<i32, [{}], SIMM16, "S16Imm">;
+def imm32zx16trunc : Immediate<i32, [{}], UIMM16, "U16Imm">;
// Full 32-bit immediates. we need both signed and unsigned versions
// because the assembler is picky. E.g. AFI requires signed operands
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td
index 3cfe23aec417..5103867e2d9a 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -697,6 +697,16 @@ class storei<SDPatternOperator operator, SDPatternOperator store = store>
: PatFrag<(ops node:$addr),
(store (operator), node:$addr)>;
+// Create a shift operator that optionally ignores an AND of the
+// shift count with an immediate if the bottom 6 bits are all set.
+def imm32bottom6set : PatLeaf<(i32 imm), [{
+ return (N->getZExtValue() & 0x3f) == 0x3f;
+}]>;
+class shiftop<SDPatternOperator operator>
+ : PatFrags<(ops node:$val, node:$count),
+ [(operator node:$val, node:$count),
+ (operator node:$val, (and node:$count, imm32bottom6set))]>;
+
// Vector representation of all-zeros and all-ones.
def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>;
def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index e2a3efda5c5e..c5cdc22f2099 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -329,7 +329,7 @@ bool SystemZTTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) {
}
int SystemZTTIImpl::getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
+ unsigned Opcode, Type *Ty,
TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info,
TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo,
@@ -469,7 +469,7 @@ int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
assert (Tp->isVectorTy());
assert (ST->hasVector() && "getShuffleCost() called.");
unsigned NumVectors = getNumberOfParts(Tp);
-
+
// TODO: Since fp32 is expanded, the shuffle cost should always be 0.
// FP128 values are always in scalar registers, so there is no work
@@ -647,7 +647,7 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
return Cost;
}
}
-
+
if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP ||
Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI) {
// TODO: Fix base implementation which could simplify things a bit here
@@ -704,7 +704,7 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP)
return (SrcScalarBits >= 32 ? 1 : 2 /*i8/i16 extend*/);
-
+
if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
Src->isIntegerTy(1)) {
// This should be extension of a compare i1 result, which is done with
diff --git a/contrib/llvm/lib/Target/Target.cpp b/contrib/llvm/lib/Target/Target.cpp
index 42d92622d6c8..f23ea72eb513 100644
--- a/contrib/llvm/lib/Target/Target.cpp
+++ b/contrib/llvm/lib/Target/Target.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the common infrastructure (including C bindings) for
+// This file implements the common infrastructure (including C bindings) for
// libLLVMTarget.a, which implements target information.
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
index 907ecf46e8ff..6bcf60fafc3e 100644
--- a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
+++ b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
@@ -92,10 +92,10 @@ static bool IsNullTerminatedString(const Constant *C) {
if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(C)) {
unsigned NumElts = CDS->getNumElements();
assert(NumElts != 0 && "Can't have an empty CDS");
-
+
if (CDS->getElementAsInteger(NumElts-1) != 0)
return false; // Not null terminated.
-
+
// Verify that the null doesn't occur anywhere else in the string.
for (unsigned i = 0; i != NumElts-1; ++i)
if (CDS->getElementAsInteger(i) == 0)
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index b84c2d31a63e..fafbed0bd935 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -2603,11 +2603,11 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
bool HadVerifyError = false;
// Append default arguments to "ins[bwld]"
- if (Name.startswith("ins") &&
+ if (Name.startswith("ins") &&
(Operands.size() == 1 || Operands.size() == 3) &&
(Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
Name == "ins")) {
-
+
AddDefaultSrcDestOperands(TmpOperands,
X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
DefaultMemDIOperand(NameLoc));
@@ -2615,7 +2615,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
}
// Append default arguments to "outs[bwld]"
- if (Name.startswith("outs") &&
+ if (Name.startswith("outs") &&
(Operands.size() == 1 || Operands.size() == 3) &&
(Name == "outsb" || Name == "outsw" || Name == "outsl" ||
Name == "outsd" || Name == "outs")) {
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index 82e82fe1efd9..0e861d5ddbc9 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -92,7 +92,7 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
// the hex value of the immediate operand when it isn't in the range
// [-256,255].
if (CommentStream && !HasCustomInstComment && (Imm > 255 || Imm < -256)) {
- // Don't print unnecessary hex sign bits.
+ // Don't print unnecessary hex sign bits.
if (Imm == (int16_t)(Imm))
*CommentStream << format("imm = 0x%" PRIX16 "\n", (uint16_t)Imm);
else if (Imm == (int32_t)(Imm))
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index d030f26d98de..f1d15e66918b 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -307,10 +307,84 @@ class X86MCInstrAnalysis : public MCInstrAnalysis {
public:
X86MCInstrAnalysis(const MCInstrInfo *MCII) : MCInstrAnalysis(MCII) {}
+ bool isDependencyBreaking(const MCSubtargetInfo &STI,
+ const MCInst &Inst) const override;
bool clearsSuperRegisters(const MCRegisterInfo &MRI, const MCInst &Inst,
APInt &Mask) const override;
};
+bool X86MCInstrAnalysis::isDependencyBreaking(const MCSubtargetInfo &STI,
+ const MCInst &Inst) const {
+ if (STI.getCPU() == "btver2") {
+ // Reference: Agner Fog's microarchitecture.pdf - Section 20 "AMD Bobcat and
+ // Jaguar pipeline", subsection 8 "Dependency-breaking instructions".
+ switch (Inst.getOpcode()) {
+ default:
+ return false;
+ case X86::SUB32rr:
+ case X86::SUB64rr:
+ case X86::SBB32rr:
+ case X86::SBB64rr:
+ case X86::XOR32rr:
+ case X86::XOR64rr:
+ case X86::XORPSrr:
+ case X86::XORPDrr:
+ case X86::VXORPSrr:
+ case X86::VXORPDrr:
+ case X86::ANDNPSrr:
+ case X86::VANDNPSrr:
+ case X86::ANDNPDrr:
+ case X86::VANDNPDrr:
+ case X86::PXORrr:
+ case X86::VPXORrr:
+ case X86::PANDNrr:
+ case X86::VPANDNrr:
+ case X86::PSUBBrr:
+ case X86::PSUBWrr:
+ case X86::PSUBDrr:
+ case X86::PSUBQrr:
+ case X86::VPSUBBrr:
+ case X86::VPSUBWrr:
+ case X86::VPSUBDrr:
+ case X86::VPSUBQrr:
+ case X86::PCMPEQBrr:
+ case X86::PCMPEQWrr:
+ case X86::PCMPEQDrr:
+ case X86::PCMPEQQrr:
+ case X86::VPCMPEQBrr:
+ case X86::VPCMPEQWrr:
+ case X86::VPCMPEQDrr:
+ case X86::VPCMPEQQrr:
+ case X86::PCMPGTBrr:
+ case X86::PCMPGTWrr:
+ case X86::PCMPGTDrr:
+ case X86::PCMPGTQrr:
+ case X86::VPCMPGTBrr:
+ case X86::VPCMPGTWrr:
+ case X86::VPCMPGTDrr:
+ case X86::VPCMPGTQrr:
+ case X86::MMX_PXORirr:
+ case X86::MMX_PANDNirr:
+ case X86::MMX_PSUBBirr:
+ case X86::MMX_PSUBDirr:
+ case X86::MMX_PSUBQirr:
+ case X86::MMX_PSUBWirr:
+ case X86::MMX_PCMPGTBirr:
+ case X86::MMX_PCMPGTDirr:
+ case X86::MMX_PCMPGTWirr:
+ case X86::MMX_PCMPEQBirr:
+ case X86::MMX_PCMPEQDirr:
+ case X86::MMX_PCMPEQWirr:
+ return Inst.getOperand(1).getReg() == Inst.getOperand(2).getReg();
+ case X86::CMP32rr:
+ case X86::CMP64rr:
+ return Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg();
+ }
+ }
+
+ return false;
+}
+
bool X86MCInstrAnalysis::clearsSuperRegisters(const MCRegisterInfo &MRI,
const MCInst &Inst,
APInt &Mask) const {
diff --git a/contrib/llvm/lib/Target/X86/X86CallingConv.h b/contrib/llvm/lib/Target/X86/X86CallingConv.h
index c49a6838fa44..d0fcbd313312 100644
--- a/contrib/llvm/lib/Target/X86/X86CallingConv.h
+++ b/contrib/llvm/lib/Target/X86/X86CallingConv.h
@@ -66,7 +66,7 @@ inline bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT,
// not to split i64 and double between a register and stack
static const MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX};
static const unsigned NumRegs = sizeof(RegList)/sizeof(RegList[0]);
-
+
SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
// If this is the first part of an double/i64/i128, or if we're already
diff --git a/contrib/llvm/lib/Target/X86/X86CmovConversion.cpp b/contrib/llvm/lib/Target/X86/X86CmovConversion.cpp
index f73455cc31b8..1c5f110d8c60 100644
--- a/contrib/llvm/lib/Target/X86/X86CmovConversion.cpp
+++ b/contrib/llvm/lib/Target/X86/X86CmovConversion.cpp
@@ -622,7 +622,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches(
// If the CMOV group is not packed, e.g., there are debug instructions between
// first CMOV and last CMOV, then pack the group and make the CMOV instruction
- // consecutive by moving the debug instructions to after the last CMOV.
+ // consecutive by moving the debug instructions to after the last CMOV.
packCmovGroup(Group.front(), Group.back());
// To convert a CMOVcc instruction, we actually have to insert the diamond
diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
index de8b40f28a86..35a15577fe09 100644
--- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
@@ -1195,7 +1195,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
+ GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
@@ -2649,7 +2649,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::VMOVPDI2DIrr), ResultReg)
.addReg(InputReg, RegState::Kill);
-
+
// The result value is in the lower 16-bits of ResultReg.
unsigned RegIdx = X86::sub_16bit;
ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, true, RegIdx);
@@ -3687,7 +3687,7 @@ X86FastISel::fastSelectInstruction(const Instruction *I) {
unsigned Reg = getRegForValue(I->getOperand(0));
if (Reg == 0)
return false;
-
+
// No instruction is needed for conversion. Reuse the register used by
// the fist operand.
updateValueMap(I, Reg);
diff --git a/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp b/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
index d85389a0a7f1..f3f7f6a37360 100644
--- a/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
@@ -578,7 +578,7 @@ bool FixupLEAPass::processBasicBlock(MachineFunction &MF,
continue;
if (OptLEA) {
- if (MF.getSubtarget<X86Subtarget>().isSLM())
+ if (MF.getSubtarget<X86Subtarget>().slowLEA())
processInstructionForSLM(I, MFI);
else {
diff --git a/contrib/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/contrib/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
index 1ba08d39c595..c17c51a7aeac 100644
--- a/contrib/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
@@ -730,9 +730,12 @@ CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs(
for (MachineInstr &MI :
llvm::reverse(llvm::make_range(MBB.begin(), TestPos))) {
X86::CondCode Cond = X86::getCondFromSETOpc(MI.getOpcode());
- if (Cond != X86::COND_INVALID && MI.getOperand(0).isReg() &&
- TRI->isVirtualRegister(MI.getOperand(0).getReg()))
+ if (Cond != X86::COND_INVALID && !MI.mayStore() && MI.getOperand(0).isReg() &&
+ TRI->isVirtualRegister(MI.getOperand(0).getReg())) {
+ assert(MI.getOperand(0).isDef() &&
+ "A non-storing SETcc should always define a register!");
CondRegs[Cond] = MI.getOperand(0).getReg();
+ }
// Stop scanning when we see the first definition of the EFLAGS as prior to
// this we would potentially capture the wrong flag state.
diff --git a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
index ae748901164a..f330acff61a1 100644
--- a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -347,12 +347,12 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
LiveBundle &Bundle =
LiveBundles[Bundles->getBundle(Entry->getNumber(), false)];
-
+
// In regcall convention, some FP registers may not be passed through
// the stack, so they will need to be assigned to the stack first
if ((Entry->getParent()->getFunction().getCallingConv() ==
CallingConv::X86_RegCall) && (Bundle.Mask && !Bundle.FixCount)) {
- // In the register calling convention, up to one FP argument could be
+ // In the register calling convention, up to one FP argument could be
// saved in the first FP register.
// If bundle.mask is non-zero and Bundle.FixCount is zero, it means
// that the FP registers contain arguments.
@@ -991,7 +991,7 @@ void FPS::handleCall(MachineBasicBlock::iterator &I) {
assert(STReturns == 0 || (isMask_32(STReturns) && N <= 2));
// Reset the FP Stack - It is required because of possible leftovers from
- // passed arguments. The caller should assume that the FP stack is
+ // passed arguments. The caller should assume that the FP stack is
// returned empty (unless the callee returns values on FP stack).
while (StackTop > 0)
popReg();
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
index a257ec41f75b..e207c343fac8 100644
--- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -68,7 +68,7 @@ X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
// needsFrameIndexResolution - Do we need to perform FI resolution for
// this function. Normally, this is required only when the function
// has any stack objects. However, FI resolution actually has another job,
-// not apparent from the title - it resolves callframesetup/destroy
+// not apparent from the title - it resolves callframesetup/destroy
// that were not simplified earlier.
// So, this is required for x86 functions that have push sequences even
// when there are no stack objects.
@@ -607,8 +607,7 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
int64_t RCXShadowSlot = 0;
int64_t RDXShadowSlot = 0;
- // If inlining in the prolog, save RCX and RDX.
- // Future optimization: don't save or restore if not live in.
+ // If inlining in the prolog, save RCX and RDX.
if (InProlog) {
// Compute the offsets. We need to account for things already
// pushed onto the stack at this point: return address, frame
@@ -616,15 +615,30 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
const bool HasFP = hasFP(MF);
- RCXShadowSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
- RDXShadowSlot = RCXShadowSlot + 8;
- // Emit the saves.
- addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
- RCXShadowSlot)
- .addReg(X86::RCX);
- addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
- RDXShadowSlot)
- .addReg(X86::RDX);
+
+ // Check if we need to spill RCX and/or RDX.
+ // Here we assume that no earlier prologue instruction changes RCX and/or
+ // RDX, so checking the block live-ins is enough.
+ const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX);
+ const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX);
+ int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
+ // Assign the initial slot to both registers, then change RDX's slot if both
+ // need to be spilled.
+ if (IsRCXLiveIn)
+ RCXShadowSlot = InitSlot;
+ if (IsRDXLiveIn)
+ RDXShadowSlot = InitSlot;
+ if (IsRDXLiveIn && IsRCXLiveIn)
+ RDXShadowSlot += 8;
+ // Emit the saves if needed.
+ if (IsRCXLiveIn)
+ addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
+ RCXShadowSlot)
+ .addReg(X86::RCX);
+ if (IsRDXLiveIn)
+ addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
+ RDXShadowSlot)
+ .addReg(X86::RDX);
} else {
// Not in the prolog. Copy RAX to a virtual reg.
BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
@@ -661,6 +675,7 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
BuildMI(&MBB, DL, TII.get(X86::JAE_1)).addMBB(ContinueMBB);
// Add code to roundMBB to round the final stack pointer to a page boundary.
+ RoundMBB->addLiveIn(FinalReg);
BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
.addReg(FinalReg)
.addImm(PageMask);
@@ -677,6 +692,7 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
.addMBB(LoopMBB);
}
+ LoopMBB->addLiveIn(JoinReg);
addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
false, -PageSize);
@@ -688,6 +704,8 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
.addImm(0)
.addReg(0)
.addImm(0);
+
+ LoopMBB->addLiveIn(RoundedReg);
BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
.addReg(RoundedReg)
.addReg(ProbeReg);
@@ -697,16 +715,19 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
// If in prolog, restore RDX and RCX.
if (InProlog) {
- addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::MOV64rm),
- X86::RCX),
- X86::RSP, false, RCXShadowSlot);
- addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::MOV64rm),
- X86::RDX),
- X86::RSP, false, RDXShadowSlot);
+ if (RCXShadowSlot) // It means we spilled RCX in the prologue.
+ addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
+ TII.get(X86::MOV64rm), X86::RCX),
+ X86::RSP, false, RCXShadowSlot);
+ if (RDXShadowSlot) // It means we spilled RDX in the prologue.
+ addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
+ TII.get(X86::MOV64rm), X86::RDX),
+ X86::RSP, false, RDXShadowSlot);
}
// Now that the probing is done, add code to continueMBB to update
// the stack pointer for real.
+ ContinueMBB->addLiveIn(SizeReg);
BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
.addReg(X86::RSP)
.addReg(SizeReg);
@@ -734,8 +755,6 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
CMBBI->setFlag(MachineInstr::FrameSetup);
}
}
-
- // Possible TODO: physreg liveness for InProlog case.
}
void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
@@ -2694,7 +2713,7 @@ bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
Regs[FoundRegs++] = Regs[0];
for (int i = 0; i < NumPops; ++i)
- BuildMI(MBB, MBBI, DL,
+ BuildMI(MBB, MBBI, DL,
TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r), Regs[i]);
return true;
@@ -2984,7 +3003,7 @@ struct X86FrameSortingComparator {
// in general. Something to keep in mind, though.
if (DensityAScaled == DensityBScaled)
return A.ObjectAlignment < B.ObjectAlignment;
-
+
return DensityAScaled < DensityBScaled;
}
};
@@ -3020,7 +3039,7 @@ void X86FrameLowering::orderFrameObjects(
if (ObjectSize == 0)
// Variable size. Just use 4.
SortingObjects[Obj].ObjectSize = 4;
- else
+ else
SortingObjects[Obj].ObjectSize = ObjectSize;
}
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7dcdb7967058..2820004cfc6d 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1800,17 +1800,19 @@ X86TargetLowering::getPreferredVectorAction(EVT VT) const {
}
MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
EVT VT) const {
if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
return MVT::v32i8;
- return TargetLowering::getRegisterTypeForCallingConv(Context, VT);
+ return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
}
unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
EVT VT) const {
if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
return 1;
- return TargetLowering::getNumRegistersForCallingConv(Context, VT);
+ return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
}
EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
@@ -23366,7 +23368,7 @@ static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl,
return DAG.getBuildVector(VT, dl, Elts);
}
- // If the target doesn't support variable shifts, use either FP conversion
+ // If the target doesn't support variable shifts, use either FP conversion
// or integer multiplication to avoid shifting each element individually.
if (VT == MVT::v4i32) {
Amt = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, dl, VT));
@@ -23509,6 +23511,24 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
if (SDValue Scale = convertShiftLeftToScale(Amt, dl, Subtarget, DAG))
return DAG.getNode(ISD::MUL, dl, VT, R, Scale);
+ // Constant ISD::SRL can be performed efficiently on vXi8/vXi16 vectors as we
+ // can replace with ISD::MULHU, creating scale factor from (NumEltBits - Amt).
+ // TODO: Improve support for the shift by zero special case.
+ if (Op.getOpcode() == ISD::SRL && ConstantAmt &&
+ ((Subtarget.hasSSE41() && VT == MVT::v8i16) ||
+ DAG.isKnownNeverZero(Amt)) &&
+ (VT == MVT::v16i8 || VT == MVT::v8i16 ||
+ ((VT == MVT::v32i8 || VT == MVT::v16i16) && Subtarget.hasInt256()))) {
+ SDValue EltBits = DAG.getConstant(VT.getScalarSizeInBits(), dl, VT);
+ SDValue RAmt = DAG.getNode(ISD::SUB, dl, VT, EltBits, Amt);
+ if (SDValue Scale = convertShiftLeftToScale(RAmt, dl, Subtarget, DAG)) {
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue ZAmt = DAG.getSetCC(dl, VT, Amt, Zero, ISD::SETEQ);
+ SDValue Res = DAG.getNode(ISD::MULHU, dl, VT, R, Scale);
+ return DAG.getSelect(dl, VT, ZAmt, R, Res);
+ }
+ }
+
// v4i32 Non Uniform Shifts.
// If the shift amount is constant we can shift each lane using the SSE2
// immediate shifts, else we need to zero-extend each lane to the lower i64
@@ -33425,33 +33445,32 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
}
}
- // Handle (CMOV C-1, (ADD (CTTZ X), C), (X != 0)) ->
- // (ADD (CMOV (CTTZ X), -1, (X != 0)), C) or
- // (CMOV (ADD (CTTZ X), C), C-1, (X == 0)) ->
- // (ADD (CMOV C-1, (CTTZ X), (X == 0)), C)
- if (CC == X86::COND_NE || CC == X86::COND_E) {
- auto *Cnst = CC == X86::COND_E ? dyn_cast<ConstantSDNode>(TrueOp)
- : dyn_cast<ConstantSDNode>(FalseOp);
- SDValue Add = CC == X86::COND_E ? FalseOp : TrueOp;
-
- if (Cnst && Add.getOpcode() == ISD::ADD && Add.hasOneUse()) {
- auto *AddOp1 = dyn_cast<ConstantSDNode>(Add.getOperand(1));
- SDValue AddOp2 = Add.getOperand(0);
- if (AddOp1 && (AddOp2.getOpcode() == ISD::CTTZ_ZERO_UNDEF ||
- AddOp2.getOpcode() == ISD::CTTZ)) {
- APInt Diff = Cnst->getAPIntValue() - AddOp1->getAPIntValue();
- if (CC == X86::COND_E) {
- Add = DAG.getNode(X86ISD::CMOV, DL, Add.getValueType(), AddOp2,
- DAG.getConstant(Diff, DL, Add.getValueType()),
- DAG.getConstant(CC, DL, MVT::i8), Cond);
- } else {
- Add = DAG.getNode(X86ISD::CMOV, DL, Add.getValueType(),
- DAG.getConstant(Diff, DL, Add.getValueType()),
- AddOp2, DAG.getConstant(CC, DL, MVT::i8), Cond);
- }
- return DAG.getNode(X86ISD::ADD, DL, Add.getValueType(), Add,
- SDValue(AddOp1, 0));
- }
+ // Fold (CMOV C1, (ADD (CTTZ X), C2), (X != 0)) ->
+ // (ADD (CMOV C1-C2, (CTTZ X), (X != 0)), C2)
+ // Or (CMOV (ADD (CTTZ X), C2), C1, (X == 0)) ->
+ // (ADD (CMOV (CTTZ X), C1-C2, (X == 0)), C2)
+ if ((CC == X86::COND_NE || CC == X86::COND_E) &&
+ Cond.getOpcode() == X86ISD::CMP && isNullConstant(Cond.getOperand(1))) {
+ SDValue Add = TrueOp;
+ SDValue Const = FalseOp;
+ // Canonicalize the condition code for easier matching and output.
+ if (CC == X86::COND_E) {
+ std::swap(Add, Const);
+ CC = X86::COND_NE;
+ }
+
+ // Ok, now make sure that Add is (add (cttz X), C2) and Const is a constant.
+ if (isa<ConstantSDNode>(Const) && Add.getOpcode() == ISD::ADD &&
+ Add.hasOneUse() && isa<ConstantSDNode>(Add.getOperand(1)) &&
+ (Add.getOperand(0).getOpcode() == ISD::CTTZ_ZERO_UNDEF ||
+ Add.getOperand(0).getOpcode() == ISD::CTTZ) &&
+ Add.getOperand(0).getOperand(0) == Cond.getOperand(0)) {
+ EVT VT = N->getValueType(0);
+ // This should constant fold.
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, VT, Const, Add.getOperand(1));
+ SDValue CMov = DAG.getNode(X86ISD::CMOV, DL, VT, Diff, Add.getOperand(0),
+ DAG.getConstant(CC, DL, MVT::i8), Cond);
+ return DAG.getNode(ISD::ADD, DL, VT, CMov, Add.getOperand(1));
}
}
@@ -33873,31 +33892,42 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!C)
return SDValue();
- uint64_t MulAmt = C->getZExtValue();
- if (isPowerOf2_64(MulAmt))
+ if (isPowerOf2_64(C->getZExtValue()))
return SDValue();
+ int64_t SignMulAmt = C->getSExtValue();
+ assert(SignMulAmt != INT64_MIN && "Int min should have been handled!");
+ uint64_t AbsMulAmt = SignMulAmt < 0 ? -SignMulAmt : SignMulAmt;
+
SDLoc DL(N);
- if (MulAmt == 3 || MulAmt == 5 || MulAmt == 9)
- return DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
- N->getOperand(1));
+ if (AbsMulAmt == 3 || AbsMulAmt == 5 || AbsMulAmt == 9) {
+ SDValue NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
+ DAG.getConstant(AbsMulAmt, DL, VT));
+ if (SignMulAmt < 0)
+ NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ NewMul);
+
+ return NewMul;
+ }
uint64_t MulAmt1 = 0;
uint64_t MulAmt2 = 0;
- if ((MulAmt % 9) == 0) {
+ if ((AbsMulAmt % 9) == 0) {
MulAmt1 = 9;
- MulAmt2 = MulAmt / 9;
- } else if ((MulAmt % 5) == 0) {
+ MulAmt2 = AbsMulAmt / 9;
+ } else if ((AbsMulAmt % 5) == 0) {
MulAmt1 = 5;
- MulAmt2 = MulAmt / 5;
- } else if ((MulAmt % 3) == 0) {
+ MulAmt2 = AbsMulAmt / 5;
+ } else if ((AbsMulAmt % 3) == 0) {
MulAmt1 = 3;
- MulAmt2 = MulAmt / 3;
+ MulAmt2 = AbsMulAmt / 3;
}
SDValue NewMul;
+ // For negative multiply amounts, only allow MulAmt2 to be a power of 2.
if (MulAmt2 &&
- (isPowerOf2_64(MulAmt2) || MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)){
+ (isPowerOf2_64(MulAmt2) ||
+ (SignMulAmt >= 0 && (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)))) {
if (isPowerOf2_64(MulAmt2) &&
!(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD))
@@ -33919,17 +33949,19 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
else
NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
DAG.getConstant(MulAmt2, DL, VT));
+
+ // Negate the result.
+ if (SignMulAmt < 0)
+ NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ NewMul);
} else if (!Subtarget.slowLEA())
- NewMul = combineMulSpecial(MulAmt, N, DAG, VT, DL);
+ NewMul = combineMulSpecial(C->getZExtValue(), N, DAG, VT, DL);
if (!NewMul) {
- assert(MulAmt != 0 &&
- MulAmt != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) &&
+ assert(C->getZExtValue() != 0 &&
+ C->getZExtValue() != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) &&
"Both cases that could cause potential overflows should have "
"already been handled.");
- int64_t SignMulAmt = C->getSExtValue();
- assert(SignMulAmt != INT64_MIN && "Int min should have been handled!");
- uint64_t AbsMulAmt = SignMulAmt < 0 ? -SignMulAmt : SignMulAmt;
if (isPowerOf2_64(AbsMulAmt - 1)) {
// (mul x, 2^N + 1) => (add (shl x, N), x)
NewMul = DAG.getNode(
@@ -36738,6 +36770,145 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL,
return DAG.getNode(Opc, DL, VT, LHS, RHS);
}
+// Attempt to match PMADDUBSW, which multiplies corresponding unsigned bytes
+// from one vector with signed bytes from another vector, adds together
+// adjacent pairs of 16-bit products, and saturates the result before
+// truncating to 16-bits.
+//
+// Which looks something like this:
+// (i16 (ssat (add (mul (zext (even elts (i8 A))), (sext (even elts (i8 B)))),
+// (mul (zext (odd elts (i8 A)), (sext (odd elts (i8 B))))))))
+static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget,
+ const SDLoc &DL) {
+ if (!VT.isVector() || !Subtarget.hasSSSE3())
+ return SDValue();
+
+ unsigned NumElems = VT.getVectorNumElements();
+ EVT ScalarVT = VT.getVectorElementType();
+ if (ScalarVT != MVT::i16 || NumElems < 8 || !isPowerOf2_32(NumElems))
+ return SDValue();
+
+ SDValue SSatVal = detectSSatPattern(In, VT);
+ if (!SSatVal || SSatVal.getOpcode() != ISD::ADD)
+ return SDValue();
+
+ // Ok this is a signed saturation of an ADD. See if this ADD is adding pairs
+ // of multiplies from even/odd elements.
+ SDValue N0 = SSatVal.getOperand(0);
+ SDValue N1 = SSatVal.getOperand(1);
+
+ if (N0.getOpcode() != ISD::MUL || N1.getOpcode() != ISD::MUL)
+ return SDValue();
+
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ SDValue N10 = N1.getOperand(0);
+ SDValue N11 = N1.getOperand(1);
+
+ // TODO: Handle constant vectors and use knownbits/computenumsignbits?
+ // Canonicalize zero_extend to LHS.
+ if (N01.getOpcode() == ISD::ZERO_EXTEND)
+ std::swap(N00, N01);
+ if (N11.getOpcode() == ISD::ZERO_EXTEND)
+ std::swap(N10, N11);
+
+ // Ensure we have a zero_extend and a sign_extend.
+ if (N00.getOpcode() != ISD::ZERO_EXTEND ||
+ N01.getOpcode() != ISD::SIGN_EXTEND ||
+ N10.getOpcode() != ISD::ZERO_EXTEND ||
+ N11.getOpcode() != ISD::SIGN_EXTEND)
+ return SDValue();
+
+ // Peek through the extends.
+ N00 = N00.getOperand(0);
+ N01 = N01.getOperand(0);
+ N10 = N10.getOperand(0);
+ N11 = N11.getOperand(0);
+
+ // Ensure the extend is from vXi8.
+ if (N00.getValueType().getVectorElementType() != MVT::i8 ||
+ N01.getValueType().getVectorElementType() != MVT::i8 ||
+ N10.getValueType().getVectorElementType() != MVT::i8 ||
+ N11.getValueType().getVectorElementType() != MVT::i8)
+ return SDValue();
+
+ // All inputs should be build_vectors.
+ if (N00.getOpcode() != ISD::BUILD_VECTOR ||
+ N01.getOpcode() != ISD::BUILD_VECTOR ||
+ N10.getOpcode() != ISD::BUILD_VECTOR ||
+ N11.getOpcode() != ISD::BUILD_VECTOR)
+ return SDValue();
+
+ // N00/N10 are zero extended. N01/N11 are sign extended.
+
+ // For each element, we need to ensure we have an odd element from one vector
+ // multiplied by the odd element of another vector and the even element from
+ // one of the same vectors being multiplied by the even element from the
+ // other vector. So we need to make sure for each element i, this operator
+ // is being performed:
+ // A[2 * i] * B[2 * i] + A[2 * i + 1] * B[2 * i + 1]
+ SDValue ZExtIn, SExtIn;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDValue N00Elt = N00.getOperand(i);
+ SDValue N01Elt = N01.getOperand(i);
+ SDValue N10Elt = N10.getOperand(i);
+ SDValue N11Elt = N11.getOperand(i);
+ // TODO: Be more tolerant to undefs.
+ if (N00Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ N01Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ N10Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ N11Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+ auto *ConstN00Elt = dyn_cast<ConstantSDNode>(N00Elt.getOperand(1));
+ auto *ConstN01Elt = dyn_cast<ConstantSDNode>(N01Elt.getOperand(1));
+ auto *ConstN10Elt = dyn_cast<ConstantSDNode>(N10Elt.getOperand(1));
+ auto *ConstN11Elt = dyn_cast<ConstantSDNode>(N11Elt.getOperand(1));
+ if (!ConstN00Elt || !ConstN01Elt || !ConstN10Elt || !ConstN11Elt)
+ return SDValue();
+ unsigned IdxN00 = ConstN00Elt->getZExtValue();
+ unsigned IdxN01 = ConstN01Elt->getZExtValue();
+ unsigned IdxN10 = ConstN10Elt->getZExtValue();
+ unsigned IdxN11 = ConstN11Elt->getZExtValue();
+ // Add is commutative so indices can be reordered.
+ if (IdxN00 > IdxN10) {
+ std::swap(IdxN00, IdxN10);
+ std::swap(IdxN01, IdxN11);
+ }
+ // N0 indices be the even element. N1 indices must be the next odd element.
+ if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 ||
+ IdxN01 != 2 * i || IdxN11 != 2 * i + 1)
+ return SDValue();
+ SDValue N00In = N00Elt.getOperand(0);
+ SDValue N01In = N01Elt.getOperand(0);
+ SDValue N10In = N10Elt.getOperand(0);
+ SDValue N11In = N11Elt.getOperand(0);
+ // First time we find an input capture it.
+ if (!ZExtIn) {
+ ZExtIn = N00In;
+ SExtIn = N01In;
+ }
+ if (ZExtIn != N00In || SExtIn != N01In ||
+ ZExtIn != N10In || SExtIn != N11In)
+ return SDValue();
+ }
+
+ auto PMADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
+ ArrayRef<SDValue> Ops) {
+ // Shrink by adding truncate nodes and let DAGCombine fold with the
+ // sources.
+ EVT InVT = Ops[0].getValueType();
+ assert(InVT.getScalarType() == MVT::i8 &&
+ "Unexpected scalar element type");
+ assert(InVT == Ops[1].getValueType() && "Operands' types mismatch");
+ EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
+ InVT.getVectorNumElements() / 2);
+ return DAG.getNode(X86ISD::VPMADDUBSW, DL, ResVT, Ops[0], Ops[1]);
+ };
+ return SplitOpsAndApply(DAG, Subtarget, DL, VT, { ZExtIn, SExtIn },
+ PMADDBuilder);
+}
+
static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
@@ -36752,6 +36923,10 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
return Avg;
+ // Try to detect PMADD
+ if (SDValue PMAdd = detectPMADDUBSW(Src, VT, DAG, Subtarget, DL))
+ return PMAdd;
+
// Try to combine truncation with signed/unsigned saturation.
if (SDValue Val = combineTruncateWithSat(Src, VT, DL, DAG, Subtarget))
return Val;
@@ -36793,38 +36968,14 @@ static SDValue isFNEG(SDNode *N) {
if (!Op1.getValueType().isFloatingPoint())
return SDValue();
- SDValue Op0 = peekThroughBitcasts(Op.getOperand(0));
-
- unsigned EltBits = Op1.getScalarValueSizeInBits();
- auto isSignMask = [&](const ConstantFP *C) {
- return C->getValueAPF().bitcastToAPInt() == APInt::getSignMask(EltBits);
- };
-
- // There is more than one way to represent the same constant on
- // the different X86 targets. The type of the node may also depend on size.
- // - load scalar value and broadcast
- // - BUILD_VECTOR node
- // - load from a constant pool.
- // We check all variants here.
- if (Op1.getOpcode() == X86ISD::VBROADCAST) {
- if (auto *C = getTargetConstantFromNode(Op1.getOperand(0)))
- if (isSignMask(cast<ConstantFP>(C)))
- return Op0;
-
- } else if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op1)) {
- if (ConstantFPSDNode *CN = BV->getConstantFPSplatNode())
- if (isSignMask(CN->getConstantFPValue()))
- return Op0;
+ // Extract constant bits and see if they are all sign bit masks.
+ APInt UndefElts;
+ SmallVector<APInt, 16> EltBits;
+ if (getTargetConstantBitsFromNode(Op1, Op1.getScalarValueSizeInBits(),
+ UndefElts, EltBits, false, false))
+ if (llvm::all_of(EltBits, [](APInt &I) { return I.isSignMask(); }))
+ return peekThroughBitcasts(Op.getOperand(0));
- } else if (auto *C = getTargetConstantFromNode(Op1)) {
- if (C->getType()->isVectorTy()) {
- if (auto *SplatV = C->getSplatValue())
- if (isSignMask(cast<ConstantFP>(SplatV)))
- return Op0;
- } else if (auto *FPConst = dyn_cast<ConstantFP>(C))
- if (isSignMask(FPConst))
- return Op0;
- }
return SDValue();
}
@@ -37777,8 +37928,7 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
// Look through extract_vector_elts. If it comes from an FNEG, create a
// new extract from the FNEG input.
if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- isa<ConstantSDNode>(V.getOperand(1)) &&
- cast<ConstantSDNode>(V.getOperand(1))->getZExtValue() == 0) {
+ isNullConstant(V.getOperand(1))) {
if (SDValue NegVal = isFNEG(V.getOperand(0).getNode())) {
NegVal = DAG.getBitcast(V.getOperand(0).getValueType(), NegVal);
V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), V.getValueType(),
@@ -38896,7 +39046,7 @@ static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1,
std::swap(IdxN00, IdxN10);
std::swap(IdxN01, IdxN11);
}
- // N0 indices be the even elemtn. N1 indices must be the next odd element.
+ // N0 indices be the even element. N1 indices must be the next odd element.
if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 ||
IdxN01 != 2 * i || IdxN11 != 2 * i + 1)
return SDValue();
@@ -39322,8 +39472,7 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
if ((IdxVal == OpVT.getVectorNumElements() / 2) &&
Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2) {
- auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2));
- if (Idx2 && Idx2->getZExtValue() == 0) {
+ if (isNullConstant(Vec.getOperand(2))) {
SDValue SubVec2 = Vec.getOperand(1);
// If needed, look through bitcasts to get to the load.
if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(SubVec2))) {
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
index 32215b170a8c..ff5006d208e5 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1097,10 +1097,11 @@ namespace llvm {
/// Customize the preferred legalization strategy for certain types.
LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
- MVT getRegisterTypeForCallingConv(LLVMContext &Context,
+ MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
EVT VT) const override;
unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
EVT VT) const override;
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
@@ -1125,8 +1126,8 @@ namespace llvm {
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
unsigned Factor) const override;
- SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
- SDValue Addr, SelectionDAG &DAG)
+ SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
+ SDValue Addr, SelectionDAG &DAG)
const override;
protected:
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/contrib/llvm/lib/Target/X86/X86InstrFoldTables.cpp
index 5d8400595bfa..7d31cfab4137 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFoldTables.cpp
+++ b/contrib/llvm/lib/Target/X86/X86InstrFoldTables.cpp
@@ -1576,7 +1576,7 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::SUBSDrr_Int, X86::SUBSDrm_Int, TB_NO_REVERSE },
{ X86::SUBSSrr, X86::SUBSSrm, 0 },
{ X86::SUBSSrr_Int, X86::SUBSSrm_Int, TB_NO_REVERSE },
- // FIXME: TEST*rr -> swapped operand of TEST *mr.
+ // FIXME: TEST*rr -> swapped operand of TEST *mr.
{ X86::UNPCKHPDrr, X86::UNPCKHPDrm, TB_ALIGN_16 },
{ X86::UNPCKHPSrr, X86::UNPCKHPSrm, TB_ALIGN_16 },
{ X86::UNPCKLPDrr, X86::UNPCKLPDrm, TB_ALIGN_16 },
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
index 1b61accfb42b..96db8b4e7585 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -7725,7 +7725,7 @@ X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
if (C.CallConstructionID == MachineOutlinerTailCall) {
// Yes, just insert a JMP.
It = MBB.insert(It,
- BuildMI(MF, DebugLoc(), get(X86::JMP_1))
+ BuildMI(MF, DebugLoc(), get(X86::TAILJMPd64))
.addGlobalAddress(M.getNamedValue(MF.getName())));
} else {
// No, insert a call.
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
index 7509b312c100..bc7afd32d494 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
@@ -1750,7 +1750,7 @@ def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>, // AH = flags
// Bit tests instructions: BT, BTS, BTR, BTC.
let Defs = [EFLAGS] in {
-let SchedRW = [WriteALU] in {
+let SchedRW = [WriteBitTest] in {
def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))]>,
@@ -1783,7 +1783,7 @@ let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in {
[]>, TB, NotMemoryFoldable;
}
-let SchedRW = [WriteALU] in {
+let SchedRW = [WriteBitTest] in {
def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))]>,
@@ -1818,7 +1818,7 @@ def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
} // SchedRW
let hasSideEffects = 0 in {
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTC16rr : I<0xBB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB, NotMemoryFoldable;
@@ -1842,7 +1842,7 @@ def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
NotMemoryFoldable;
}
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTC16ri8 : Ii8<0xBA, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
def BTC32ri8 : Ii8<0xBA, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
@@ -1861,7 +1861,7 @@ def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
Requires<[In64BitMode]>;
}
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTR16rr : I<0xB3, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB, NotMemoryFoldable;
@@ -1885,7 +1885,7 @@ def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
NotMemoryFoldable;
}
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTR16ri8 : Ii8<0xBA, MRM6r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB;
@@ -1908,7 +1908,7 @@ def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
Requires<[In64BitMode]>;
}
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTS16rr : I<0xAB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB, NotMemoryFoldable;
@@ -1932,7 +1932,7 @@ def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
NotMemoryFoldable;
}
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTS16ri8 : Ii8<0xBA, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
def BTS32ri8 : Ii8<0xBA, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
diff --git a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
index ee3b01159174..023137634df1 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -650,9 +650,9 @@ def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
// Double shift instructions (generalizations of rotate)
//===----------------------------------------------------------------------===//
-let Constraints = "$src1 = $dst", SchedRW = [WriteShiftDouble] in {
+let Constraints = "$src1 = $dst" in {
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteSHDrrcl] in {
def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst),
(ins GR16:$src1, GR16:$src2),
"shld{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
@@ -683,9 +683,9 @@ def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst),
"shrd{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
[(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>,
TB;
-}
+} // SchedRW
-let isCommutable = 1 in { // These instructions commute to each other.
+let isCommutable = 1, SchedRW = [WriteSHDrri] in { // These instructions commute to each other.
def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
(outs GR16:$dst),
(ins GR16:$src1, GR16:$src2, u8imm:$src3),
@@ -728,11 +728,10 @@ def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
[(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2,
(i8 imm:$src3)))]>,
TB;
-}
-} // Constraints = "$src = $dst", SchedRW
+} // SchedRW
+} // Constraints = "$src = $dst"
-let SchedRW = [WriteShiftDoubleLd, WriteRMW] in {
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteSHDmrcl] in {
def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
"shld{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
[(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
@@ -759,8 +758,9 @@ def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
"shrd{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
[(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL),
addr:$dst)]>, TB;
-}
+} // SchedRW
+let SchedRW = [WriteSHDmri] in {
def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
(outs), (ins i16mem:$dst, GR16:$src2, u8imm:$src3),
"shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
diff --git a/contrib/llvm/lib/Target/X86/X86SchedBroadwell.td b/contrib/llvm/lib/Target/X86/X86SchedBroadwell.td
index c7713fea70fa..6334d9e89a60 100755
--- a/contrib/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/contrib/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -119,8 +119,8 @@ defm : BWWriteResPair<WriteIDiv16, [BWPort0, BWDivider], 25, [1, 10]>;
defm : BWWriteResPair<WriteIDiv32, [BWPort0, BWDivider], 25, [1, 10]>;
defm : BWWriteResPair<WriteIDiv64, [BWPort0, BWDivider], 25, [1, 10]>;
-defm : BWWriteResPair<WriteBSWAP32,[BWPort15], 1>; //
-defm : BWWriteResPair<WriteBSWAP64,[BWPort06, BWPort15], 2, [1, 1], 2>; //
+defm : X86WriteRes<WriteBSWAP32, [BWPort15], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [BWPort06, BWPort15], 2, [1, 1], 2>;
defm : BWWriteResPair<WriteCRC32, [BWPort1], 3>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.
@@ -137,6 +137,7 @@ def : WriteRes<WriteSETCCStore, [BWPort06,BWPort4,BWPort237]> {
let NumMicroOps = 3;
}
def : WriteRes<WriteLAHFSAHF, [BWPort06]>;
+def : WriteRes<WriteBitTest,[BWPort06]>; // Bit Test instrs
// Bit counts.
defm : BWWriteResPair<WriteBSF, [BWPort1], 3>;
@@ -148,8 +149,11 @@ defm : BWWriteResPair<WritePOPCNT, [BWPort1], 3>;
// Integer shifts and rotates.
defm : BWWriteResPair<WriteShift, [BWPort06], 1>;
-// Double shift instructions.
-defm : BWWriteResPair<WriteShiftDouble, [BWPort06], 1>;
+// SHLD/SHRD.
+defm : X86WriteRes<WriteSHDrri, [BWPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteSHDrrcl,[BWPort1,BWPort06,BWPort0156], 6, [1, 1, 2], 4>;
+defm : X86WriteRes<WriteSHDmri, [BWPort1,BWPort23,BWPort237,BWPort0156], 9, [1, 1, 1, 1], 4>;
+defm : X86WriteRes<WriteSHDmrcl,[BWPort1,BWPort23,BWPort237,BWPort06,BWPort0156], 11, [1, 1, 1, 1, 2], 6>;
// BMI1 BEXTR, BMI2 BZHI
defm : BWWriteResPair<WriteBEXTR, [BWPort06,BWPort15], 2, [1,1], 2>;
@@ -600,14 +604,6 @@ def BWWriteResGroup6 : SchedWriteRes<[BWPort06]> {
let ResourceCycles = [1];
}
def: InstRW<[BWWriteResGroup6], (instrs CDQ, CQO)>;
-def: InstRW<[BWWriteResGroup6], (instregex "BT(16|32|64)ri8",
- "BT(16|32|64)rr",
- "BTC(16|32|64)ri8",
- "BTC(16|32|64)rr",
- "BTR(16|32|64)ri8",
- "BTR(16|32|64)rr",
- "BTS(16|32|64)ri8",
- "BTS(16|32|64)rr")>;
def BWWriteResGroup7 : SchedWriteRes<[BWPort15]> {
let Latency = 1;
@@ -746,8 +742,6 @@ def BWWriteResGroup27 : SchedWriteRes<[BWPort1]> {
def: InstRW<[BWWriteResGroup27], (instregex "MMX_CVTPI2PSirr",
"PDEP(32|64)rr",
"PEXT(32|64)rr",
- "SHLD(16|32|64)rri8",
- "SHRD(16|32|64)rri8",
"(V?)CVTDQ2PS(Y?)rr")>;
def BWWriteResGroup27_16 : SchedWriteRes<[BWPort1, BWPort0156]> {
@@ -1055,14 +1049,6 @@ def BWWriteResGroup66 : SchedWriteRes<[BWPort23,BWPort0156]> {
def: InstRW<[BWWriteResGroup66], (instrs POP16r, POP32r, POP64r)>;
def: InstRW<[BWWriteResGroup66], (instregex "POP(16|32|64)rmr")>;
-def BWWriteResGroup67 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> {
- let Latency = 6;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[BWWriteResGroup67], (instregex "SHLD(16|32|64)rrCL",
- "SHRD(16|32|64)rrCL")>;
-
def BWWriteResGroup68 : SchedWriteRes<[BWPort1,BWPort6,BWPort06,BWPort0156]> {
let Latency = 6;
let NumMicroOps = 4;
@@ -1307,14 +1293,6 @@ def BWWriteResGroup108 : SchedWriteRes<[BWPort5,BWPort23,BWPort015]> {
def: InstRW<[BWWriteResGroup108], (instregex "VPBROADCASTB(Y?)rm",
"VPBROADCASTW(Y?)rm")>;
-def BWWriteResGroup111 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort0156]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[BWWriteResGroup111], (instregex "SHLD(16|32|64)mri8",
- "SHRD(16|32|64)mri8")>;
-
def BWWriteResGroup112 : SchedWriteRes<[BWPort23,BWPort06,BWPort0156]> {
let Latency = 9;
let NumMicroOps = 5;
@@ -1380,14 +1358,6 @@ def BWWriteResGroup128 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> {
}
def: InstRW<[BWWriteResGroup128], (instregex "VCVTDQ2PDYrm")>;
-def BWWriteResGroup130 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort06,BWPort0156]> {
- let Latency = 11;
- let NumMicroOps = 6;
- let ResourceCycles = [1,1,1,1,2];
-}
-def: InstRW<[BWWriteResGroup130], (instregex "SHLD(16|32|64)mrCL",
- "SHRD(16|32|64)mrCL")>;
-
def BWWriteResGroup131 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> {
let Latency = 11;
let NumMicroOps = 7;
diff --git a/contrib/llvm/lib/Target/X86/X86SchedHaswell.td b/contrib/llvm/lib/Target/X86/X86SchedHaswell.td
index 189dd4183839..876c3e4162cf 100644
--- a/contrib/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/contrib/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -118,17 +118,26 @@ defm : X86WriteRes<WriteLoad, [HWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteMove, [HWPort0156], 1, [1], 1>;
def : WriteRes<WriteZero, []>;
+// Arithmetic.
defm : HWWriteResPair<WriteALU, [HWPort0156], 1>;
-defm : HWWriteResPair<WriteADC, [HWPort06,HWPort0156], 2, [1,1], 2>;
+defm : HWWriteResPair<WriteADC, [HWPort06, HWPort0156], 2, [1,1], 2>;
defm : HWWriteResPair<WriteIMul, [HWPort1], 3>;
defm : HWWriteResPair<WriteIMul64, [HWPort1], 3>;
-defm : HWWriteResPair<WriteBSWAP32,[HWPort15], 1>;
-defm : HWWriteResPair<WriteBSWAP64,[HWPort06, HWPort15], 2, [1,1], 2>;
+defm : X86WriteRes<WriteBSWAP32, [HWPort15], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [HWPort06, HWPort15], 2, [1,1], 2>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; }
+
+// Integer shifts and rotates.
defm : HWWriteResPair<WriteShift, [HWPort06], 1>;
-defm : HWWriteResPair<WriteShiftDouble, [HWPort06], 1>;
+
+// SHLD/SHRD.
+defm : X86WriteRes<WriteSHDrri, [HWPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteSHDrrcl,[HWPort1, HWPort06, HWPort0156], 6, [1, 1, 2], 4>;
+defm : X86WriteRes<WriteSHDmri, [HWPort1, HWPort23, HWPort237, HWPort0156], 10, [1, 1, 1, 1], 4>;
+defm : X86WriteRes<WriteSHDmrcl,[HWPort1, HWPort23, HWPort237, HWPort06, HWPort0156], 12, [1, 1, 1, 1, 2], 6>;
+
defm : HWWriteResPair<WriteJump, [HWPort06], 1>;
defm : HWWriteResPair<WriteCRC32, [HWPort1], 3>;
@@ -141,6 +150,7 @@ def : WriteRes<WriteSETCCStore, [HWPort06,HWPort4,HWPort237]> {
let NumMicroOps = 3;
}
def : WriteRes<WriteLAHFSAHF, [HWPort06]>;
+def : WriteRes<WriteBitTest,[HWPort06]>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
@@ -886,14 +896,6 @@ def HWWriteResGroup7 : SchedWriteRes<[HWPort06]> {
let ResourceCycles = [1];
}
def: InstRW<[HWWriteResGroup7], (instrs CDQ, CQO)>;
-def: InstRW<[HWWriteResGroup7], (instregex "BT(16|32|64)ri8",
- "BT(16|32|64)rr",
- "BTC(16|32|64)ri8",
- "BTC(16|32|64)rr",
- "BTR(16|32|64)ri8",
- "BTR(16|32|64)rr",
- "BTS(16|32|64)ri8",
- "BTS(16|32|64)rr")>;
def HWWriteResGroup8 : SchedWriteRes<[HWPort15]> {
let Latency = 1;
@@ -1240,8 +1242,6 @@ def HWWriteResGroup50 : SchedWriteRes<[HWPort1]> {
def: InstRW<[HWWriteResGroup50], (instregex "MMX_CVTPI2PSirr",
"PDEP(32|64)rr",
"PEXT(32|64)rr",
- "SHLD(16|32|64)rri8",
- "SHRD(16|32|64)rri8",
"(V?)CVTDQ2PS(Y?)rr")>;
def HWWriteResGroup50_16i : SchedWriteRes<[HWPort1, HWPort0156]> {
@@ -1513,14 +1513,6 @@ def HWWriteResGroup83 : SchedWriteRes<[HWPort1,HWPort6,HWPort0156]> {
}
def: InstRW<[HWWriteResGroup83], (instregex "LAR(16|32|64)rr")>;
-def HWWriteResGroup86 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort0156]> {
- let Latency = 10;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[HWWriteResGroup86], (instregex "SHLD(16|32|64)mri8",
- "SHRD(16|32|64)mri8")>;
-
def HWWriteResGroup87 : SchedWriteRes<[HWPort1,HWPort6,HWPort23,HWPort0156]> {
let Latency = 9;
let NumMicroOps = 5;
@@ -1638,14 +1630,6 @@ def HWWriteResGroup104 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
}
def: InstRW<[HWWriteResGroup104], (instregex "VCVTDQ2PDYrm")>;
-def HWWriteResGroup105 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156]> {
- let Latency = 6;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[HWWriteResGroup105], (instregex "SHLD(16|32|64)rrCL",
- "SHRD(16|32|64)rrCL")>;
-
def HWWriteResGroup107 : SchedWriteRes<[HWPort1,HWPort6,HWPort06,HWPort0156]> {
let Latency = 6;
let NumMicroOps = 4;
@@ -1660,14 +1644,6 @@ def HWWriteResGroup108 : SchedWriteRes<[HWPort6,HWPort0156]> {
}
def: InstRW<[HWWriteResGroup108], (instrs STD)>;
-def HWWriteResGroup109 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPort0156]> {
- let Latency = 12;
- let NumMicroOps = 6;
- let ResourceCycles = [1,1,1,1,2];
-}
-def: InstRW<[HWWriteResGroup109], (instregex "SHLD(16|32|64)mrCL",
- "SHRD(16|32|64)mrCL")>;
-
def HWWriteResGroup114 : SchedWriteRes<[HWPort6,HWPort06,HWPort15,HWPort0156]> {
let Latency = 7;
let NumMicroOps = 7;
diff --git a/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td b/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td
index 3b543c680ef4..6b7bbdea860a 100644
--- a/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -106,13 +106,14 @@ def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 5; }
def : WriteRes<WriteMove, [SBPort015]>;
def : WriteRes<WriteZero, []>;
+// Arithmetic.
defm : SBWriteResPair<WriteALU, [SBPort015], 1>;
defm : SBWriteResPair<WriteADC, [SBPort05,SBPort015], 2, [1,1], 2>;
defm : SBWriteResPair<WriteIMul, [SBPort1], 3>;
defm : SBWriteResPair<WriteIMul64, [SBPort1], 3>;
-defm : SBWriteResPair<WriteBSWAP32,[SBPort1], 1>;
-defm : SBWriteResPair<WriteBSWAP64,[SBPort1,SBPort05], 2, [1,1], 2>;
+defm : X86WriteRes<WriteBSWAP32, [SBPort1], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [SBPort1,SBPort05], 2, [1,1], 2>;
defm : SBWriteResPair<WriteDiv8, [SBPort0, SBDivider], 25, [1, 10]>;
defm : SBWriteResPair<WriteDiv16, [SBPort0, SBDivider], 25, [1, 10]>;
@@ -125,8 +126,13 @@ defm : SBWriteResPair<WriteIDiv64, [SBPort0, SBDivider], 25, [1, 10]>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; }
+// SHLD/SHRD.
+defm : X86WriteRes<WriteSHDrri, [SBPort05, SBPort015], 2, [1, 1], 2>;
+defm : X86WriteRes<WriteSHDrrcl,[SBPort05, SBPort015], 4, [3, 1], 4>;
+defm : X86WriteRes<WriteSHDmri, [SBPort4,SBPort23,SBPort05,SBPort015], 8, [1, 2, 1, 1], 5>;
+defm : X86WriteRes<WriteSHDmrcl,[SBPort4,SBPort23,SBPort05,SBPort015], 10, [1, 2, 3, 1], 7>;
+
defm : SBWriteResPair<WriteShift, [SBPort05], 1>;
-defm : SBWriteResPair<WriteShiftDouble, [SBPort05], 1>;
defm : SBWriteResPair<WriteJump, [SBPort5], 1>;
defm : SBWriteResPair<WriteCRC32, [SBPort1], 3, [1], 1, 5>;
@@ -139,6 +145,7 @@ def : WriteRes<WriteSETCCStore, [SBPort05,SBPort4,SBPort23]> {
let NumMicroOps = 3;
}
def : WriteRes<WriteLAHFSAHF, [SBPort05]>;
+def : WriteRes<WriteBitTest,[SBPort05]>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
@@ -564,14 +571,6 @@ def SBWriteResGroup4 : SchedWriteRes<[SBPort05]> {
let ResourceCycles = [1];
}
def: InstRW<[SBWriteResGroup4], (instrs CDQ, CQO)>;
-def: InstRW<[SBWriteResGroup4], (instregex "BT(16|32|64)ri8",
- "BT(16|32|64)rr",
- "BTC(16|32|64)ri8",
- "BTC(16|32|64)rr",
- "BTR(16|32|64)ri8",
- "BTR(16|32|64)rr",
- "BTS(16|32|64)ri8",
- "BTS(16|32|64)rr")>;
def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> {
let Latency = 1;
@@ -630,14 +629,6 @@ def SBWriteResGroup18 : SchedWriteRes<[SBPort5,SBPort015]> {
def: InstRW<[SBWriteResGroup18], (instrs JCXZ, JECXZ, JRCXZ)>;
def: InstRW<[SBWriteResGroup18], (instregex "MMX_MOVDQ2Qrr")>;
-def SBWriteResGroup19 : SchedWriteRes<[SBPort05,SBPort015]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup19], (instregex "SHLD(16|32|64)rri8",
- "SHRD(16|32|64)rri8")>;
-
def SBWriteResGroup21 : SchedWriteRes<[SBPort1]> {
let Latency = 3;
let NumMicroOps = 1;
@@ -728,14 +719,6 @@ def SBWriteResGroup29_2 : SchedWriteRes<[SBPort5,SBPort015]> {
}
def: InstRW<[SBWriteResGroup29_2], (instrs PAUSE)>;
-def SBWriteResGroup29_3 : SchedWriteRes<[SBPort05,SBPort015]> {
- let Latency = 4;
- let NumMicroOps = 4;
- let ResourceCycles = [3,1];
-}
-def: InstRW<[SBWriteResGroup29_3], (instregex "SHLD(16|32|64)rrCL",
- "SHRD(16|32|64)rrCL")>;
-
def SBWriteResGroup30 : SchedWriteRes<[SBPort0]> {
let Latency = 5;
let NumMicroOps = 1;
@@ -1027,14 +1010,6 @@ def SBWriteResGroup87 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
}
def: InstRW<[SBWriteResGroup87], (instrs FARCALL64)>;
-def SBWriteResGroup88 : SchedWriteRes<[SBPort4,SBPort23,SBPort05,SBPort015]> {
- let Latency = 8;
- let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
-}
-def: InstRW<[SBWriteResGroup88], (instregex "SHLD(16|32|64)mri8",
- "SHRD(16|32|64)mri8")>;
-
def SBWriteResGroup93 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
let Latency = 9;
let NumMicroOps = 3;
@@ -1130,14 +1105,6 @@ def SBWriteResGroup101 : SchedWriteRes<[SBPort1,SBPort23]> {
def: InstRW<[SBWriteResGroup101], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"ILD_F(16|32|64)m")>;
-def SBWriteResGroup103_2 : SchedWriteRes<[SBPort4,SBPort23,SBPort05,SBPort015]> {
- let Latency = 10;
- let NumMicroOps = 7;
- let ResourceCycles = [1,2,3,1];
-}
-def: InstRW<[SBWriteResGroup103_2], (instregex "SHLD(16|32|64)mrCL",
- "SHRD(16|32|64)mrCL")>;
-
def SBWriteResGroup104 : SchedWriteRes<[SBPort0,SBPort23]> {
let Latency = 11;
let NumMicroOps = 2;
diff --git a/contrib/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/contrib/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index 1417799d76be..bda088e1512f 100644
--- a/contrib/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/contrib/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -110,8 +110,8 @@ defm : SKLWriteResPair<WriteADC, [SKLPort06], 1>; // Integer ALU + flags op
defm : SKLWriteResPair<WriteIMul, [SKLPort1], 3>; // Integer multiplication.
defm : SKLWriteResPair<WriteIMul64, [SKLPort1], 3>; // Integer 64-bit multiplication.
-defm : SKLWriteResPair<WriteBSWAP32,[SKLPort15], 1>; //
-defm : SKLWriteResPair<WriteBSWAP64,[SKLPort06, SKLPort15], 2, [1,1], 2>; //
+defm : X86WriteRes<WriteBSWAP32, [SKLPort15], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [SKLPort06, SKLPort15], 2, [1,1], 2>;
defm : SKLWriteResPair<WriteDiv8, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
defm : SKLWriteResPair<WriteDiv16, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
@@ -136,6 +136,7 @@ def : WriteRes<WriteSETCCStore, [SKLPort06,SKLPort4,SKLPort237]> {
let NumMicroOps = 3;
}
def : WriteRes<WriteLAHFSAHF, [SKLPort06]>;
+def : WriteRes<WriteBitTest,[SKLPort06]>; //
// Bit counts.
defm : SKLWriteResPair<WriteBSF, [SKLPort1], 3>;
@@ -147,8 +148,11 @@ defm : SKLWriteResPair<WritePOPCNT, [SKLPort1], 3>;
// Integer shifts and rotates.
defm : SKLWriteResPair<WriteShift, [SKLPort06], 1>;
-// Double shift instructions.
-defm : SKLWriteResPair<WriteShiftDouble, [SKLPort06], 1>;
+// SHLD/SHRD.
+defm : X86WriteRes<WriteSHDrri, [SKLPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteSHDrrcl,[SKLPort1,SKLPort06,SKLPort0156], 6, [1, 2, 1], 4>;
+defm : X86WriteRes<WriteSHDmri, [SKLPort1,SKLPort23,SKLPort237,SKLPort0156], 9, [1, 1, 1, 1], 4>;
+defm : X86WriteRes<WriteSHDmrcl,[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort0156], 11, [1, 1, 1, 2, 1], 6>;
// BMI1 BEXTR, BMI2 BZHI
defm : SKLWriteResPair<WriteBEXTR, [SKLPort06,SKLPort15], 2, [1,1], 2>;
@@ -602,14 +606,6 @@ def SKLWriteResGroup7 : SchedWriteRes<[SKLPort06]> {
let ResourceCycles = [1];
}
def: InstRW<[SKLWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>;
-def: InstRW<[SKLWriteResGroup7], (instregex "BT(16|32|64)ri8",
- "BT(16|32|64)rr",
- "BTC(16|32|64)ri8",
- "BTC(16|32|64)rr",
- "BTR(16|32|64)ri8",
- "BTR(16|32|64)rr",
- "BTS(16|32|64)ri8",
- "BTS(16|32|64)rr")>;
def SKLWriteResGroup8 : SchedWriteRes<[SKLPort15]> {
let Latency = 1;
@@ -743,9 +739,7 @@ def SKLWriteResGroup29 : SchedWriteRes<[SKLPort1]> {
let ResourceCycles = [1];
}
def: InstRW<[SKLWriteResGroup29], (instregex "PDEP(32|64)rr",
- "PEXT(32|64)rr",
- "SHLD(16|32|64)rri8",
- "SHRD(16|32|64)rri8")>;
+ "PEXT(32|64)rr")>;
def SKLWriteResGroup29_16i : SchedWriteRes<[SKLPort1, SKLPort0156]> {
let Latency = 4;
@@ -1096,14 +1090,6 @@ def SKLWriteResGroup78 : SchedWriteRes<[SKLPort5,SKLPort01]> {
}
def: InstRW<[SKLWriteResGroup78], (instregex "(V?)CVTSI642SSrr")>;
-def SKLWriteResGroup79 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> {
- let Latency = 6;
- let NumMicroOps = 4;
- let ResourceCycles = [1,2,1];
-}
-def: InstRW<[SKLWriteResGroup79], (instregex "SHLD(16|32|64)rrCL",
- "SHRD(16|32|64)rrCL")>;
-
def SKLWriteResGroup80 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort06,SKLPort0156]> {
let Latency = 6;
let NumMicroOps = 4;
@@ -1392,14 +1378,6 @@ def SKLWriteResGroup128 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> {
def: InstRW<[SKLWriteResGroup128], (instregex "(V?)PHADDSWrm",
"(V?)PHSUBSWrm")>;
-def SKLWriteResGroup130 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort0156]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SKLWriteResGroup130], (instregex "SHLD(16|32|64)mri8",
- "SHRD(16|32|64)mri8")>;
-
def SKLWriteResGroup131 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort23,SKLPort0156]> {
let Latency = 9;
let NumMicroOps = 5;
@@ -1519,14 +1497,6 @@ def: InstRW<[SKLWriteResGroup152], (instregex "CVTPD2PSrm",
"CVT(T?)PD2DQrm",
"MMX_CVT(T?)PD2PIirm")>;
-def SKLWriteResGroup153 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> {
- let Latency = 11;
- let NumMicroOps = 6;
- let ResourceCycles = [1,1,1,2,1];
-}
-def: InstRW<[SKLWriteResGroup153], (instregex "SHLD(16|32|64)mrCL",
- "SHRD(16|32|64)mrCL")>;
-
def SKLWriteResGroup154 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> {
let Latency = 11;
let NumMicroOps = 7;
diff --git a/contrib/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/contrib/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 7095ec081bd9..9d5f8555c505 100755
--- a/contrib/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/contrib/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -110,8 +110,8 @@ defm : SKXWriteResPair<WriteADC, [SKXPort06], 1>; // Integer ALU + flags op
defm : SKXWriteResPair<WriteIMul, [SKXPort1], 3>; // Integer multiplication.
defm : SKXWriteResPair<WriteIMul64, [SKXPort1], 3>; // Integer 64-bit multiplication.
-defm : SKXWriteResPair<WriteBSWAP32,[SKXPort15], 1>; //
-defm : SKXWriteResPair<WriteBSWAP64,[SKXPort06, SKXPort15], 2, [1,1], 2>; //
+defm : X86WriteRes<WriteBSWAP32, [SKXPort15], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [SKXPort06, SKXPort15], 2, [1,1], 2>;
defm : SKXWriteResPair<WriteDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
defm : SKXWriteResPair<WriteDiv16, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
@@ -136,12 +136,16 @@ def : WriteRes<WriteSETCCStore, [SKXPort06,SKXPort4,SKXPort237]> {
let NumMicroOps = 3;
}
def : WriteRes<WriteLAHFSAHF, [SKXPort06]>;
+def : WriteRes<WriteBitTest,[SKXPort06]>; //
// Integer shifts and rotates.
defm : SKXWriteResPair<WriteShift, [SKXPort06], 1>;
-// Double shift instructions.
-defm : SKXWriteResPair<WriteShiftDouble, [SKXPort06], 1>;
+// SHLD/SHRD.
+defm : X86WriteRes<WriteSHDrri, [SKXPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteSHDrrcl,[SKXPort1,SKXPort06,SKXPort0156], 6, [1, 2, 1], 4>;
+defm : X86WriteRes<WriteSHDmri, [SKXPort1,SKXPort23,SKXPort237,SKXPort0156], 9, [1, 1, 1, 1], 4>;
+defm : X86WriteRes<WriteSHDmrcl,[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort0156], 11, [1, 1, 1, 2, 1], 6>;
// Bit counts.
defm : SKXWriteResPair<WriteBSF, [SKXPort1], 3>;
@@ -615,14 +619,6 @@ def SKXWriteResGroup7 : SchedWriteRes<[SKXPort06]> {
let ResourceCycles = [1];
}
def: InstRW<[SKXWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>;
-def: InstRW<[SKXWriteResGroup7], (instregex "BT(16|32|64)ri8",
- "BT(16|32|64)rr",
- "BTC(16|32|64)ri8",
- "BTC(16|32|64)rr",
- "BTR(16|32|64)ri8",
- "BTR(16|32|64)rr",
- "BTS(16|32|64)ri8",
- "BTS(16|32|64)rr")>;
def SKXWriteResGroup8 : SchedWriteRes<[SKXPort15]> {
let Latency = 1;
@@ -783,9 +779,7 @@ def SKXWriteResGroup31 : SchedWriteRes<[SKXPort1]> {
let ResourceCycles = [1];
}
def: InstRW<[SKXWriteResGroup31], (instregex "PDEP(32|64)rr",
- "PEXT(32|64)rr",
- "SHLD(16|32|64)rri8",
- "SHRD(16|32|64)rri8")>;
+ "PEXT(32|64)rr")>;
def SKXWriteResGroup31_16i : SchedWriteRes<[SKXPort1, SKXPort0156]> {
let Latency = 4;
@@ -1270,14 +1264,6 @@ def: InstRW<[SKXWriteResGroup82], (instregex "(V?)CVTSI642SSrr",
"VCVTSI642SSZrr",
"VCVTUSI642SSZrr")>;
-def SKXWriteResGroup83 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> {
- let Latency = 6;
- let NumMicroOps = 4;
- let ResourceCycles = [1,2,1];
-}
-def: InstRW<[SKXWriteResGroup83], (instregex "SHLD(16|32|64)rrCL",
- "SHRD(16|32|64)rrCL")>;
-
def SKXWriteResGroup84 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort06,SKXPort0156]> {
let Latency = 6;
let NumMicroOps = 4;
@@ -1830,14 +1816,6 @@ def SKXWriteResGroup143 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> {
def: InstRW<[SKXWriteResGroup143], (instregex "(V?)PHADDSWrm",
"(V?)PHSUBSWrm")>;
-def SKXWriteResGroup145 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort0156]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SKXWriteResGroup145], (instregex "SHLD(16|32|64)mri8",
- "SHRD(16|32|64)mri8")>;
-
def SKXWriteResGroup146 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort23,SKXPort0156]> {
let Latency = 9;
let NumMicroOps = 5;
@@ -2033,14 +2011,6 @@ def SKXWriteResGroup167 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
}
def: InstRW<[SKXWriteResGroup167], (instregex "VPCONFLICTQZ128rm(b?)")>;
-def SKXWriteResGroup168 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
- let Latency = 11;
- let NumMicroOps = 6;
- let ResourceCycles = [1,1,1,2,1];
-}
-def: InstRW<[SKXWriteResGroup168], (instregex "SHLD(16|32|64)mrCL",
- "SHRD(16|32|64)mrCL")>;
-
def SKXWriteResGroup169 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> {
let Latency = 11;
let NumMicroOps = 7;
diff --git a/contrib/llvm/lib/Target/X86/X86Schedule.td b/contrib/llvm/lib/Target/X86/X86Schedule.td
index d0167753ccd4..ef9ce94706df 100644
--- a/contrib/llvm/lib/Target/X86/X86Schedule.td
+++ b/contrib/llvm/lib/Target/X86/X86Schedule.td
@@ -118,8 +118,8 @@ defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication.
def WriteIMulH : SchedWrite; // Integer multiplication, high part.
def WriteLEA : SchedWrite; // LEA instructions can't fold loads.
-defm WriteBSWAP32: X86SchedWritePair; // Byte Order (Endiannes) Swap
-defm WriteBSWAP64: X86SchedWritePair; // Byte Order (Endiannes) Swap
+def WriteBSWAP32 : SchedWrite; // Byte Order (Endianness) 32-bit Swap.
+def WriteBSWAP64 : SchedWrite; // Byte Order (Endianness) 64-bit Swap.
// Integer division.
defm WriteDiv8 : X86SchedWritePair;
@@ -142,11 +142,15 @@ def WriteFCMOV : SchedWrite; // X87 conditional move.
def WriteSETCC : SchedWrite; // Set register based on condition code.
def WriteSETCCStore : SchedWrite;
def WriteLAHFSAHF : SchedWrite; // Load/Store flags in AH.
+def WriteBitTest : SchedWrite; // Bit Test - TODO add memory folding support
// Integer shifts and rotates.
defm WriteShift : X86SchedWritePair;
// Double shift instructions.
-defm WriteShiftDouble : X86SchedWritePair;
+def WriteSHDrri : SchedWrite;
+def WriteSHDrrcl : SchedWrite;
+def WriteSHDmri : SchedWrite;
+def WriteSHDmrcl : SchedWrite;
// BMI1 BEXTR, BMI2 BZHI
defm WriteBEXTR : X86SchedWritePair;
diff --git a/contrib/llvm/lib/Target/X86/X86ScheduleAtom.td b/contrib/llvm/lib/Target/X86/X86ScheduleAtom.td
index d1e902e6c43f..a7f461c456bd 100644
--- a/contrib/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/contrib/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -81,8 +81,8 @@ defm : AtomWriteResPair<WriteADC, [AtomPort01], [AtomPort0]>;
defm : AtomWriteResPair<WriteIMul, [AtomPort01], [AtomPort01], 7, 7, [7], [7]>;
defm : AtomWriteResPair<WriteIMul64, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
-defm : AtomWriteResPair<WriteBSWAP32, [AtomPort0], [AtomPort0]>;
-defm : AtomWriteResPair<WriteBSWAP64, [AtomPort0], [AtomPort0]>;
+defm : X86WriteRes<WriteBSWAP32, [AtomPort0], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [AtomPort0], 1, [1], 1>;
defm : AtomWriteResPair<WriteDiv8, [AtomPort01], [AtomPort01], 50, 68, [50], [68]>;
defm : AtomWriteResPair<WriteDiv16, [AtomPort01], [AtomPort01], 50, 50, [50], [50]>;
@@ -108,6 +108,7 @@ def : WriteRes<WriteLAHFSAHF, [AtomPort01]> {
let Latency = 2;
let ResourceCycles = [2];
}
+def : WriteRes<WriteBitTest,[AtomPort01]>;
defm : X86WriteResUnsupported<WriteIMulH>;
@@ -150,11 +151,10 @@ defm : X86WriteResPairUnsupported<WriteBZHI>;
defm : AtomWriteResPair<WriteShift, [AtomPort0], [AtomPort0]>;
-////////////////////////////////////////////////////////////////////////////////
-// Double shift instructions.
-////////////////////////////////////////////////////////////////////////////////
-
-defm : AtomWriteResPair<WriteShiftDouble, [AtomPort0], [AtomPort0]>;
+defm : X86WriteRes<WriteSHDrri, [AtomPort01], 2, [2], 1>;
+defm : X86WriteRes<WriteSHDrrcl,[AtomPort01], 2, [2], 1>;
+defm : X86WriteRes<WriteSHDmri, [AtomPort01], 4, [4], 1>;
+defm : X86WriteRes<WriteSHDmrcl,[AtomPort01], 4, [4], 1>;
////////////////////////////////////////////////////////////////////////////////
// Loads, stores, and moves, not folded with other operations.
@@ -562,9 +562,7 @@ def AtomWrite01_2 : SchedWriteRes<[AtomPort01]> {
def : InstRW<[AtomWrite01_2], (instrs LEAVE, LEAVE64, POP16r,
PUSH16rmm, PUSH32rmm, PUSH64rmm,
LODSB, LODSL, LODSQ, LODSW,
- SCASB, SCASL, SCASQ, SCASW,
- SHLD32rrCL, SHRD32rrCL,
- SHLD32rri8, SHRD32rri8)>;
+ SCASB, SCASL, SCASQ, SCASW)>;
def : InstRW<[AtomWrite01_2], (instregex "BT(C|R|S)(16|32|64)mi8",
"PUSH(CS|DS|ES|FS|GS|SS)(16|32|64)",
"XADD(8|16|32|64)rr",
@@ -598,8 +596,6 @@ def AtomWrite01_4 : SchedWriteRes<[AtomPort01]> {
}
def : InstRW<[AtomWrite01_4], (instrs CBW, CWD, CWDE, CDQ, CDQE, CQO,
JCXZ, JECXZ, JRCXZ,
- SHLD32mrCL, SHRD32mrCL,
- SHLD32mri8, SHRD32mri8,
LD_F80m)>;
def : InstRW<[AtomWrite01_4], (instregex "PH(ADD|SUB)Drm",
"(MMX_)?PEXTRWrr(_REV)?")>;
diff --git a/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index d78c343ebd5c..719e71cd25e5 100644
--- a/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -168,8 +168,8 @@ defm : JWriteResIntPair<WriteIMul, [JALU1, JMul], 3, [1, 1], 2>; // i8/i16/i32
defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>; // i64 multiplication
defm : X86WriteRes<WriteIMulH, [JALU1], 6, [4], 1>;
-defm : JWriteResIntPair<WriteBSWAP32,[JALU01], 1>;
-defm : JWriteResIntPair<WriteBSWAP64,[JALU01], 1>;
+defm : X86WriteRes<WriteBSWAP32, [JALU01], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [JALU01], 1, [1], 1>;
defm : JWriteResIntPair<WriteDiv8, [JALU1, JDiv], 12, [1, 12], 1>;
defm : JWriteResIntPair<WriteDiv16, [JALU1, JDiv], 17, [1, 17], 2>;
@@ -188,6 +188,7 @@ defm : X86WriteRes<WriteFCMOV, [JFPU0, JFPA], 3, [1,1], 1>; // x87 conditional m
def : WriteRes<WriteSETCC, [JALU01]>; // Setcc.
def : WriteRes<WriteSETCCStore, [JALU01,JSAGU]>;
def : WriteRes<WriteLAHFSAHF, [JALU01]>;
+def : WriteRes<WriteBitTest,[JALU01]>;
// This is for simple LEAs with one or two input operands.
def : WriteRes<WriteLEA, [JALU01]>;
@@ -209,33 +210,11 @@ defm : X86WriteResPairUnsupported<WriteBZHI>;
defm : JWriteResIntPair<WriteShift, [JALU01], 1>;
-defm : JWriteResIntPair<WriteShiftDouble, [JALU01], 1>;
-
-def JWriteSHLDrri : SchedWriteRes<[JALU01]> {
- let Latency = 3;
- let ResourceCycles = [6];
- let NumMicroOps = 6;
-}
-def: InstRW<[JWriteSHLDrri], (instrs SHLD16rri8, SHLD32rri8, SHLD64rri8,
- SHRD16rri8, SHRD32rri8, SHRD64rri8)>;
-
-def JWriteSHLDrrCL : SchedWriteRes<[JALU01]> {
- let Latency = 4;
- let ResourceCycles = [8];
- let NumMicroOps = 7;
-}
-def: InstRW<[JWriteSHLDrrCL], (instrs SHLD16rrCL, SHLD32rrCL, SHLD64rrCL,
- SHRD16rrCL, SHRD32rrCL, SHRD64rrCL)>;
-
-def JWriteSHLDm : SchedWriteRes<[JLAGU, JALU01]> {
- let Latency = 9;
- let ResourceCycles = [1, 22];
- let NumMicroOps = 8;
-}
-def: InstRW<[JWriteSHLDm],(instrs SHLD16mri8, SHLD32mri8, SHLD64mri8,
- SHLD16mrCL, SHLD32mrCL, SHLD64mrCL,
- SHRD16mri8, SHRD32mri8, SHRD64mri8,
- SHRD16mrCL, SHRD32mrCL, SHRD64mrCL)>;
+// SHLD/SHRD.
+defm : X86WriteRes<WriteSHDrri, [JALU01], 3, [6], 6>;
+defm : X86WriteRes<WriteSHDrrcl,[JALU01], 4, [8], 7>;
+defm : X86WriteRes<WriteSHDmri, [JLAGU, JALU01], 9, [1, 22], 8>;
+defm : X86WriteRes<WriteSHDmrcl,[JLAGU, JALU01], 9, [1, 22], 8>;
////////////////////////////////////////////////////////////////////////////////
// Loads, stores, and moves, not folded with other operations.
diff --git a/contrib/llvm/lib/Target/X86/X86ScheduleSLM.td b/contrib/llvm/lib/Target/X86/X86ScheduleSLM.td
index c938a4a8939e..b1e843013707 100644
--- a/contrib/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/contrib/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -98,11 +98,16 @@ defm : SLMWriteResPair<WriteADC, [SLM_IEC_RSV01], 1>;
defm : SLMWriteResPair<WriteIMul, [SLM_IEC_RSV1], 3>;
defm : SLMWriteResPair<WriteIMul64, [SLM_IEC_RSV1], 3>;
-defm : SLMWriteResPair<WriteBSWAP32,[SLM_IEC_RSV01], 1>;
-defm : SLMWriteResPair<WriteBSWAP64,[SLM_IEC_RSV01], 1>;
+defm : X86WriteRes<WriteBSWAP32, [SLM_IEC_RSV01], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [SLM_IEC_RSV01], 1, [1], 1>;
defm : SLMWriteResPair<WriteShift, [SLM_IEC_RSV0], 1>;
-defm : SLMWriteResPair<WriteShiftDouble, [SLM_IEC_RSV0], 1>;
+
+defm : X86WriteRes<WriteSHDrri, [SLM_IEC_RSV0], 1, [1], 1>;
+defm : X86WriteRes<WriteSHDrrcl,[SLM_IEC_RSV0], 1, [1], 1>;
+defm : X86WriteRes<WriteSHDmri, [SLM_MEC_RSV, SLM_IEC_RSV0], 4, [2, 1], 2>;
+defm : X86WriteRes<WriteSHDmrcl,[SLM_MEC_RSV, SLM_IEC_RSV0], 4, [2, 1], 2>;
+
defm : SLMWriteResPair<WriteJump, [SLM_IEC_RSV1], 1>;
defm : SLMWriteResPair<WriteCRC32, [SLM_IEC_RSV1], 3>;
@@ -115,6 +120,7 @@ def : WriteRes<WriteSETCCStore, [SLM_IEC_RSV01, SLM_MEC_RSV]> {
let ResourceCycles = [2,1];
}
def : WriteRes<WriteLAHFSAHF, [SLM_IEC_RSV01]>;
+def : WriteRes<WriteBitTest,[SLM_IEC_RSV01]>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
diff --git a/contrib/llvm/lib/Target/X86/X86ScheduleZnver1.td b/contrib/llvm/lib/Target/X86/X86ScheduleZnver1.td
index d28d58580752..7184b850a195 100644
--- a/contrib/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/contrib/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -180,11 +180,16 @@ defm : ZnWriteResPair<WriteADC, [ZnALU], 1>;
defm : ZnWriteResPair<WriteIMul, [ZnALU1, ZnMultiplier], 4>;
defm : ZnWriteResPair<WriteIMul64, [ZnALU1, ZnMultiplier], 4, [1,1], 2>;
-defm : ZnWriteResPair<WriteBSWAP32,[ZnALU], 1, [4]>;
-defm : ZnWriteResPair<WriteBSWAP64,[ZnALU], 1, [4]>;
+defm : X86WriteRes<WriteBSWAP32, [ZnALU], 1, [4], 1>;
+defm : X86WriteRes<WriteBSWAP64, [ZnALU], 1, [4], 1>;
defm : ZnWriteResPair<WriteShift, [ZnALU], 1>;
-defm : ZnWriteResPair<WriteShiftDouble, [ZnALU], 1>;
+
+defm : X86WriteRes<WriteSHDrri, [ZnALU], 1, [1], 1>;
+defm : X86WriteResUnsupported<WriteSHDrrcl>;
+defm : X86WriteResUnsupported<WriteSHDmri>;
+defm : X86WriteResUnsupported<WriteSHDmrcl>;
+
defm : ZnWriteResPair<WriteJump, [ZnALU], 1>;
defm : ZnWriteResFpuPair<WriteCRC32, [ZnFPU0], 3>;
@@ -193,6 +198,7 @@ defm : ZnWriteResPair<WriteCMOV2, [ZnALU], 1>;
def : WriteRes<WriteSETCC, [ZnALU]>;
def : WriteRes<WriteSETCCStore, [ZnALU, ZnAGU]>;
defm : X86WriteRes<WriteLAHFSAHF, [ZnALU], 2, [1], 2>;
+def : WriteRes<WriteBitTest,[ZnALU]>;
// Bit counts.
defm : ZnWriteResPair<WriteBSF, [ZnALU], 3>;
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h
index fedb13f89e19..85e8256a6e94 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.h
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h
@@ -51,7 +51,7 @@ enum Style {
} // end namespace PICStyles
class X86Subtarget final : public X86GenSubtargetInfo {
-public:
+public:
enum X86ProcFamilyEnum {
Others,
IntelAtom,
diff --git a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index bae2ef80c365..865462622627 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2274,8 +2274,8 @@ int X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
// Sign-extend all constants to a multiple of 64-bit.
APInt ImmVal = Imm;
- if (BitSize & 0x3f)
- ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
+ if (BitSize % 64 != 0)
+ ImmVal = Imm.sext(alignTo(BitSize, 64));
// Split the constant into 64-bit chunks and calculate the cost for each
// chunk.
@@ -2332,9 +2332,15 @@ int X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
// immediates here as the normal path expects bit 31 to be sign extended.
if (Idx == 1 && Imm.getBitWidth() == 64 && isUInt<32>(Imm.getZExtValue()))
return TTI::TCC_Free;
- LLVM_FALLTHROUGH;
+ ImmIdx = 1;
+ break;
case Instruction::Add:
case Instruction::Sub:
+ // For add/sub, we can use the opposite instruction for INT32_MIN.
+ if (Idx == 1 && Imm.getBitWidth() == 64 && Imm.getZExtValue() == 0x80000000)
+ return TTI::TCC_Free;
+ ImmIdx = 1;
+ break;
case Instruction::Mul:
case Instruction::UDiv:
case Instruction::SDiv:
@@ -2366,7 +2372,7 @@ int X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
}
if (Idx == ImmIdx) {
- int NumConstants = (BitSize + 63) / 64;
+ int NumConstants = divideCeil(BitSize, 64);
int Cost = X86TTIImpl::getIntImmCost(Imm, Ty);
return (Cost <= NumConstants * TTI::TCC_Basic)
? static_cast<int>(TTI::TCC_Free)
diff --git a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
index 8f7c8a82380a..916bca6392de 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -146,7 +146,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
}
EmitAlignment(Align > 2 ? Align : 2, GV);
-
+
if (GV->isThreadLocal()) {
report_fatal_error("TLS is not supported by this target!");
}
@@ -162,7 +162,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// are padded to 32 bits.
if (Size < 4)
OutStreamer->EmitZeros(4 - Size);
-
+
// Mark the end of the global
getTargetStreamer().emitCCBottomData(GVSym->getName());
}
@@ -295,6 +295,6 @@ void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
// Force static initialization.
-extern "C" void LLVMInitializeXCoreAsmPrinter() {
+extern "C" void LLVMInitializeXCoreAsmPrinter() {
RegisterAsmPrinter<XCoreAsmPrinter> X(getTheXCoreTarget());
}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
index d5e276788f71..b0de048672df 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -63,7 +63,7 @@ static bool isZeroImm(const MachineOperand &op) {
unsigned XCoreInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
int Opcode = MI.getOpcode();
- if (Opcode == XCore::LDWFI)
+ if (Opcode == XCore::LDWFI)
{
if ((MI.getOperand(1).isFI()) && // is a stack slot
(MI.getOperand(2).isImm()) && // the imm is zero
@@ -74,7 +74,7 @@ unsigned XCoreInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
}
return 0;
}
-
+
/// isStoreToStackSlot - If the specified machine instruction is a direct
/// store to a stack slot, return the virtual or physical register number of
/// the source reg along with the FrameIndex of the loaded stack slot. If
@@ -129,9 +129,9 @@ static inline bool IsBR_JT(unsigned BrOpc) {
|| BrOpc == XCore::BR_JT32;
}
-/// GetCondFromBranchOpc - Return the XCore CC that matches
+/// GetCondFromBranchOpc - Return the XCore CC that matches
/// the correspondent Branch instruction opcode.
-static XCore::CondCode GetCondFromBranchOpc(unsigned BrOpc)
+static XCore::CondCode GetCondFromBranchOpc(unsigned BrOpc)
{
if (IsBRT(BrOpc)) {
return XCore::COND_TRUE;
@@ -144,7 +144,7 @@ static XCore::CondCode GetCondFromBranchOpc(unsigned BrOpc)
/// GetCondBranchFromCond - Return the Branch instruction
/// opcode that matches the cc.
-static inline unsigned GetCondBranchFromCond(XCore::CondCode CC)
+static inline unsigned GetCondBranchFromCond(XCore::CondCode CC)
{
switch (CC) {
default: llvm_unreachable("Illegal condition code!");
@@ -153,7 +153,7 @@ static inline unsigned GetCondBranchFromCond(XCore::CondCode CC)
}
}
-/// GetOppositeBranchCondition - Return the inverse of the specified
+/// GetOppositeBranchCondition - Return the inverse of the specified
/// condition, e.g. turning COND_E to COND_NE.
static inline XCore::CondCode GetOppositeBranchCondition(XCore::CondCode CC)
{
@@ -209,11 +209,11 @@ bool XCoreInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
TBB = LastInst->getOperand(0).getMBB();
return false;
}
-
+
XCore::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode());
if (BranchCode == XCore::COND_INVALID)
return true; // Can't handle indirect branch.
-
+
// Conditional branch
// Block ends with fall-through condbranch.
@@ -222,17 +222,17 @@ bool XCoreInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
Cond.push_back(LastInst->getOperand(0));
return false;
}
-
+
// Get the instruction before it if it's a terminator.
MachineInstr *SecondLastInst = &*I;
// If there are three terminators, we don't know what sort of block this is.
if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
return true;
-
+
unsigned SecondLastOpc = SecondLastInst->getOpcode();
XCore::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc);
-
+
// If the block ends with conditional branch followed by unconditional,
// handle it.
if (BranchCode != XCore::COND_INVALID
@@ -245,10 +245,10 @@ bool XCoreInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
FBB = LastInst->getOperand(0).getMBB();
return false;
}
-
+
// If the block ends with two unconditional branches, handle it. The second
// one is not executed, so remove it.
- if (IsBRU(SecondLastInst->getOpcode()) &&
+ if (IsBRU(SecondLastInst->getOpcode()) &&
IsBRU(LastInst->getOpcode())) {
TBB = SecondLastInst->getOperand(0).getMBB();
I = LastInst;
@@ -293,7 +293,7 @@ unsigned XCoreInstrInfo::insertBranch(MachineBasicBlock &MBB,
}
return 1;
}
-
+
// Two-way Conditional branch.
assert(Cond.size() == 2 && "Unexpected number of components!");
unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm());
@@ -313,17 +313,17 @@ XCoreInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved) const {
if (!IsBRU(I->getOpcode()) && !IsCondBranch(I->getOpcode()))
return 0;
-
+
// Remove the branch.
I->eraseFromParent();
-
+
I = MBB.end();
if (I == MBB.begin()) return 1;
--I;
if (!IsCondBranch(I->getOpcode()))
return 1;
-
+
// Remove the branch.
I->eraseFromParent();
return 2;
@@ -342,7 +342,7 @@ void XCoreInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addImm(0);
return;
}
-
+
if (GRDest && SrcReg == XCore::SP) {
BuildMI(MBB, I, DL, get(XCore::LDAWSP_ru6), DestReg).addImm(0);
return;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h b/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h
index cf469ec3cf1a..6c05ab3f10df 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -43,11 +43,11 @@ class XCoreFunctionInfo : public MachineFunctionInfo {
public:
XCoreFunctionInfo() = default;
-
+
explicit XCoreFunctionInfo(MachineFunction &MF) {}
-
+
~XCoreFunctionInfo() override = default;
-
+
void setVarArgsFrameIndex(int off) { VarArgsFrameIndex = off; }
int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
index 1915aaedc35d..e119d9555f9d 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -296,12 +296,12 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// fold constant into offset.
Offset += MI.getOperand(FIOperandNum + 1).getImm();
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
-
+
assert(Offset%4 == 0 && "Misaligned stack offset");
LLVM_DEBUG(errs() << "Offset : " << Offset << "\n"
<< "<--------->\n");
Offset/=4;
-
+
unsigned Reg = MI.getOperand(0).getReg();
assert(XCore::GRRegsRegClass.contains(Reg) && "Unexpected register operand");
diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
index c31f5d5a7c44..9451a05d8d58 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
@@ -32,7 +32,7 @@ public:
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
-
+
bool enableMultipleCopyHints() const override { return true; }
bool requiresRegisterScavenging(const MachineFunction &MF) const override;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreSubtarget.h b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.h
index 140ddba68aab..ed9936ebf2b8 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreSubtarget.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.h
@@ -43,7 +43,7 @@ public:
XCoreSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM);
- /// ParseSubtargetFeatures - Parses features string setting specified
+ /// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);