Commit e6d24789 by Jim Stichnoth

Subzero: Randomize register assignment.

Randomize the order that registers appear in the free list. Only randomize fully "equivalent" registers to ensure no extra spills. This adds the -randomize-regalloc option. This is a continuation of https://codereview.chromium.org/456033003/ which Matt owns. BUG= none R=jfb@chromium.org Review URL: https://codereview.chromium.org/807293003
parent 4019f084
......@@ -122,6 +122,7 @@ enum VerboseItem {
IceV_LinearScan = 1 << 8,
IceV_Frame = 1 << 9,
IceV_AddrOpt = 1 << 10,
IceV_Random = 1 << 11,
IceV_All = ~IceV_None,
IceV_Most = IceV_All & ~IceV_LinearScan
};
......
......@@ -42,7 +42,7 @@ class RandomNumberGeneratorWrapper {
operator=(const RandomNumberGeneratorWrapper &) = delete;
public:
uint64_t next(uint64_t Max) { return RNG.next(Max); }
uint64_t operator()(uint64_t Max) { return RNG.next(Max); }
bool getTrueWithProbability(float Probability);
RandomNumberGeneratorWrapper(RandomNumberGenerator &RNG) : RNG(RNG) {}
......
......@@ -141,7 +141,7 @@ void LinearScan::initForGlobal() {
//
// Some properties we aren't (yet) taking advantage of:
//
// * Because live ranges are a single segment, the Unhandled set will
// * Because live ranges are a single segment, the Inactive set will
// always be empty, and the live range trimming operation is
// unnecessary.
//
......@@ -258,7 +258,8 @@ void LinearScan::init(RegAllocKind Kind) {
// Requires running Cfg::liveness(Liveness_Intervals) in
// preparation. Results are assigned to Variable::RegNum for each
// Variable.
void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull,
bool Randomized) {
TimerMarker T(TimerStack::TT_linearScan, Func);
assert(RegMaskFull.any()); // Sanity check
Ostream &Str = Func->getContext()->getStrDump();
......@@ -266,6 +267,13 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
ALLOW_DUMP && Func->getContext()->isVerbose(IceV_LinearScan);
Func->resetCurrentNode();
VariablesMetadata *VMetadata = Func->getVMetadata();
const size_t NumRegisters = RegMaskFull.size();
llvm::SmallBitVector PreDefinedRegisters(NumRegisters);
if (Randomized) {
for (Variable *Var : UnhandledPrecolored) {
PreDefinedRegisters[Var->getRegNum()] = true;
}
}
// Build a LiveRange representing the Kills list.
LiveRange KillsRange(Kills);
......@@ -274,7 +282,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
// RegUses[I] is the number of live ranges (variables) that register
// I is currently assigned to. It can be greater than 1 as a result
// of AllowOverlap inference below.
std::vector<int> RegUses(RegMaskFull.size());
std::vector<int> RegUses(NumRegisters);
// Unhandled is already set to all ranges in increasing order of
// start points.
assert(Active.empty());
......@@ -662,23 +670,39 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
Inactive.clear();
dump(Func);
// Finish up by assigning RegNumTmp->RegNum for each Variable.
// TODO(stichnot): Statically choose the size based on the target
// being compiled.
const size_t REGS_SIZE = 32;
llvm::SmallVector<int32_t, REGS_SIZE> Permutation(NumRegisters);
if (Randomized) {
Func->getTarget()->makeRandomRegisterPermutation(
Permutation, PreDefinedRegisters | ~RegMaskFull);
}
// Finish up by assigning RegNumTmp->RegNum (or a random permutation
// thereof) for each Variable.
for (Variable *Item : Handled) {
int32_t RegNum = Item->getRegNumTmp();
int32_t AssignedRegNum = RegNum;
if (Randomized && Item->hasRegTmp() && !Item->hasReg()) {
AssignedRegNum = Permutation[RegNum];
}
if (Verbose) {
if (!Item->hasRegTmp()) {
Str << "Not assigning ";
Item->dump(Func);
Str << "\n";
} else {
Str << (RegNum == Item->getRegNum() ? "Reassigning " : "Assigning ")
<< Func->getTarget()->getRegName(RegNum, IceType_i32) << "(r"
<< RegNum << ") to ";
Str << (AssignedRegNum == Item->getRegNum() ? "Reassigning "
: "Assigning ")
<< Func->getTarget()->getRegName(AssignedRegNum, IceType_i32)
<< "(r" << AssignedRegNum << ") to ";
Item->dump(Func);
Str << "\n";
}
}
Item->setRegNum(Item->getRegNumTmp());
Item->setRegNum(AssignedRegNum);
}
// TODO: Consider running register allocation one more time, with
......
......@@ -29,7 +29,7 @@ public:
LinearScan(Cfg *Func)
: Func(Func), FindPreference(false), FindOverlap(false) {}
void init(RegAllocKind Kind);
void scan(const llvm::SmallBitVector &RegMask);
void scan(const llvm::SmallBitVector &RegMask, bool Randomized);
void dump(Cfg *Func) const;
private:
......
......@@ -29,6 +29,7 @@ namespace Ice {
namespace {
// TODO(stichnot): Move this machinery into llvm2ice.cpp.
namespace cl = llvm::cl;
cl::opt<bool> DoNopInsertion("nop-insertion", cl::desc("Randomly insert NOPs"),
cl::init(false));
......@@ -40,6 +41,11 @@ cl::opt<int> MaxNopsPerInstruction(
cl::opt<int> NopProbabilityAsPercentage(
"nop-insertion-percentage",
cl::desc("Nop insertion probability as percentage"), cl::init(10));
cl::opt<bool>
CLRandomizeRegisterAllocation("randomize-regalloc",
cl::desc("Randomize register allocation"),
cl::init(false));
} // end of anonymous namespace
void LoweringContext::init(CfgNode *N) {
......@@ -95,6 +101,12 @@ TargetLowering *TargetLowering::createLowering(TargetArch Target, Cfg *Func) {
return NULL;
}
TargetLowering::TargetLowering(Cfg *Func)
: Func(Func), Ctx(Func->getContext()),
RandomizeRegisterAllocation(CLRandomizeRegisterAllocation),
HasComputedFrame(false), CallsReturnsTwice(false), StackAdjustment(0),
Context() {}
Assembler *TargetLowering::createAssembler(TargetArch Target, Cfg *Func) {
// These statements can be #ifdef'd to specialize the assembler
// to a subset of the available targets. TODO: use CRTP.
......@@ -236,7 +248,7 @@ void TargetLowering::regAlloc(RegAllocKind Kind) {
RegExclude |= RegSet_FramePointer;
LinearScan.init(Kind);
llvm::SmallBitVector RegMask = getRegisterSet(RegInclude, RegExclude);
LinearScan.scan(RegMask);
LinearScan.scan(RegMask, RandomizeRegisterAllocation);
}
TargetGlobalInitLowering *
......
......@@ -191,6 +191,10 @@ public:
virtual const llvm::SmallBitVector &getRegisterSetForType(Type Ty) const = 0;
void regAlloc(RegAllocKind Kind);
virtual void makeRandomRegisterPermutation(
llvm::SmallVectorImpl<int32_t> &Permutation,
const llvm::SmallBitVector &ExcludeRegisters) const = 0;
virtual void emitVariable(const Variable *Var) const = 0;
// Performs target-specific argument lowering.
......@@ -204,9 +208,7 @@ public:
virtual ~TargetLowering() {}
protected:
TargetLowering(Cfg *Func)
: Func(Func), Ctx(Func->getContext()), HasComputedFrame(false),
CallsReturnsTwice(false), StackAdjustment(0) {}
TargetLowering(Cfg *Func);
virtual void lowerAlloca(const InstAlloca *Inst) = 0;
virtual void lowerArithmetic(const InstArithmetic *Inst) = 0;
virtual void lowerAssign(const InstAssign *Inst) = 0;
......@@ -235,6 +237,7 @@ protected:
Cfg *Func;
GlobalContext *Ctx;
const bool RandomizeRegisterAllocation;
bool HasComputedFrame;
bool CallsReturnsTwice;
// StackAdjustment keeps track of the current stack offset from its
......
......@@ -3869,7 +3869,7 @@ void TargetX8632::doAddressOptLoad() {
void TargetX8632::randomlyInsertNop(float Probability) {
RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
if (RNG.getTrueWithProbability(Probability)) {
_nop(RNG.next(X86_NUM_NOP_VARIANTS));
_nop(RNG(X86_NUM_NOP_VARIANTS));
}
}
......@@ -4530,6 +4530,72 @@ void TargetX8632::postLower() {
}
}
void TargetX8632::makeRandomRegisterPermutation(
llvm::SmallVectorImpl<int32_t> &Permutation,
const llvm::SmallBitVector &ExcludeRegisters) const {
// TODO(stichnot): Declaring Permutation this way loses type/size
// information. Fix this in conjunction with the caller-side TODO.
assert(Permutation.size() >= RegX8632::Reg_NUM);
// Expected upper bound on the number of registers in a single
// equivalence class. For x86-32, this would comprise the 8 XMM
// registers. This is for performance, not correctness.
static const unsigned MaxEquivalenceClassSize = 8;
typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList;
typedef std::map<uint32_t, RegisterList> EquivalenceClassMap;
EquivalenceClassMap EquivalenceClasses;
SizeT NumShuffled = 0, NumPreserved = 0;
// Build up the equivalence classes of registers by looking at the
// register properties as well as whether the registers should be
// explicitly excluded from shuffling.
#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
frameptr, isI8, isInt, isFP) \
if (ExcludeRegisters[RegX8632::val]) { \
/* val stays the same in the resulting permutation. */ \
Permutation[RegX8632::val] = RegX8632::val; \
++NumPreserved; \
} else { \
const uint32_t Index = (scratch << 0) | (preserved << 1) | (isI8 << 2) | \
(isInt << 3) | (isFP << 4); \
/* val is assigned to an equivalence class based on its properties. */ \
EquivalenceClasses[Index].push_back(RegX8632::val); \
}
REGX8632_TABLE
#undef X
RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
// Shuffle the resulting equivalence classes.
for (auto I : EquivalenceClasses) {
const RegisterList &List = I.second;
RegisterList Shuffled(List);
std::random_shuffle(Shuffled.begin(), Shuffled.end(), RNG);
for (size_t SI = 0, SE = Shuffled.size(); SI < SE; ++SI) {
Permutation[List[SI]] = Shuffled[SI];
++NumShuffled;
}
}
assert(NumShuffled + NumPreserved == RegX8632::Reg_NUM);
if (Func->getContext()->isVerbose(IceV_Random)) {
Ostream &Str = Func->getContext()->getStrDump();
Str << "Register equivalence classes:\n";
for (auto I : EquivalenceClasses) {
Str << "{";
const RegisterList &List = I.second;
bool First = true;
for (int32_t Register : List) {
if (!First)
Str << " ";
First = false;
Str << getRegName(Register, IceType_i32);
}
Str << "}\n";
}
}
}
template <> void ConstantInteger32::emit(GlobalContext *Ctx) const {
if (!ALLOW_DUMP)
return;
......
......@@ -180,6 +180,10 @@ protected:
OperandX8632Mem *getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
uint32_t Offset = 0);
void makeRandomRegisterPermutation(
llvm::SmallVectorImpl<int32_t> &Permutation,
const llvm::SmallBitVector &ExcludeRegisters) const;
// The following are helpers that insert lowered x86 instructions
// with minimal syntactic overhead, so that the lowering code can
// look as close to assembly as practical.
......
......@@ -47,6 +47,7 @@ static cl::list<Ice::VerboseItem> VerboseList(
clEnumValN(Ice::IceV_LinearScan, "regalloc", "Linear scan details"),
clEnumValN(Ice::IceV_Frame, "frame", "Stack frame layout details"),
clEnumValN(Ice::IceV_AddrOpt, "addropt", "Address mode optimization"),
clEnumValN(Ice::IceV_Random, "random", "Randomization details"),
clEnumValN(Ice::IceV_All, "all", "Use all verbose options"),
clEnumValN(Ice::IceV_Most, "most",
"Use all verbose options except 'regalloc' and 'time'"),
......
; This is a smoke test of randomized register allocation. The output
; of this test will change with changes to the random number generator
; implementation.
; RUN: %p2i -i %s --args -O2 -sz-seed=1 -randomize-regalloc \
; RUN: | llvm-mc -triple=i686-none-nacl -filetype=obj \
; RUN: | llvm-objdump -d --symbolize -x86-asm-syntax=intel - \
; RUN: | FileCheck %s --check-prefix=CHECK_1
; RUN: %p2i -i %s --args -Om1 -sz-seed=1 -randomize-regalloc \
; RUN: | llvm-mc -triple=i686-none-nacl -filetype=obj \
; RUN: | llvm-objdump -d --symbolize -x86-asm-syntax=intel - \
; RUN: | FileCheck %s --check-prefix=OPTM1_1
; Same tests but with a different seed, just to verify randomness.
; RUN: %p2i -i %s --args -O2 -sz-seed=123 -randomize-regalloc \
; RUN: | llvm-mc -triple=i686-none-nacl -filetype=obj \
; RUN: | llvm-objdump -d --symbolize -x86-asm-syntax=intel - \
; RUN: | FileCheck %s --check-prefix=CHECK_123
; RUN: %p2i -i %s --args -Om1 -sz-seed=123 -randomize-regalloc \
; RUN: | llvm-mc -triple=i686-none-nacl -filetype=obj \
; RUN: | llvm-objdump -d --symbolize -x86-asm-syntax=intel - \
; RUN: | FileCheck %s --check-prefix=OPTM1_123
define <4 x i32> @mul_v4i32(<4 x i32> %a, <4 x i32> %b) {
entry:
%res = mul <4 x i32> %a, %b
ret <4 x i32> %res
; OPTM1_1-LABEL: mul_v4i32:
; OPTM1_1: sub esp, 60
; OPTM1_1-NEXT: movups xmmword ptr [esp + 32], xmm0
; OPTM1_1-NEXT: movups xmmword ptr [esp + 16], xmm1
; OPTM1_1-NEXT: movups xmm0, xmmword ptr [esp + 32]
; OPTM1_1-NEXT: pshufd xmm7, xmmword ptr [esp + 32], 49
; OPTM1_1-NEXT: pshufd xmm4, xmmword ptr [esp + 16], 49
; OPTM1_1-NEXT: pmuludq xmm0, xmmword ptr [esp + 16]
; OPTM1_1-NEXT: pmuludq xmm7, xmm4
; OPTM1_1-NEXT: shufps xmm0, xmm7, -120
; OPTM1_1-NEXT: pshufd xmm0, xmm0, -40
; OPTM1_1-NEXT: movups xmmword ptr [esp], xmm0
; OPTM1_1-NEXT: movups xmm0, xmmword ptr [esp]
; OPTM1_1-NEXT: add esp, 60
; OPTM1_1-NEXT: ret
; CHECK_1-LABEL: mul_v4i32:
; CHECK_1: movups xmm6, xmm0
; CHECK_1-NEXT: pshufd xmm0, xmm0, 49
; CHECK_1-NEXT: pshufd xmm5, xmm1, 49
; CHECK_1-NEXT: pmuludq xmm6, xmm1
; CHECK_1-NEXT: pmuludq xmm0, xmm5
; CHECK_1-NEXT: shufps xmm6, xmm0, -120
; CHECK_1-NEXT: pshufd xmm6, xmm6, -40
; CHECK_1-NEXT: movups xmm0, xmm6
; CHECK_1-NEXT: ret
; OPTM1_123-LABEL: mul_v4i32:
; OPTM1_123: sub esp, 60
; OPTM1_123-NEXT: movups xmmword ptr [esp + 32], xmm0
; OPTM1_123-NEXT: movups xmmword ptr [esp + 16], xmm1
; OPTM1_123-NEXT: movups xmm0, xmmword ptr [esp + 32]
; OPTM1_123-NEXT: pshufd xmm2, xmmword ptr [esp + 32], 49
; OPTM1_123-NEXT: pshufd xmm6, xmmword ptr [esp + 16], 49
; OPTM1_123-NEXT: pmuludq xmm0, xmmword ptr [esp + 16]
; OPTM1_123-NEXT: pmuludq xmm2, xmm6
; OPTM1_123-NEXT: shufps xmm0, xmm2, -120
; OPTM1_123-NEXT: pshufd xmm0, xmm0, -40
; OPTM1_123-NEXT: movups xmmword ptr [esp], xmm0
; OPTM1_123-NEXT: movups xmm0, xmmword ptr [esp]
; OPTM1_123-NEXT: add esp, 60
; OPTM1_123-NEXT: ret
; CHECK_123-LABEL: mul_v4i32:
; CHECK_123: movups xmm3, xmm0
; CHECK_123-NEXT: pshufd xmm0, xmm0, 49
; CHECK_123-NEXT: pshufd xmm7, xmm1, 49
; CHECK_123-NEXT: pmuludq xmm3, xmm1
; CHECK_123-NEXT: pmuludq xmm0, xmm7
; CHECK_123-NEXT: shufps xmm3, xmm0, -120
; CHECK_123-NEXT: pshufd xmm3, xmm3, -40
; CHECK_123-NEXT: movups xmm0, xmm3
; CHECK_123-NEXT: ret
}
; ERRORS-NOT: ICE translation error
; DUMP-NOT: SZ
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment