Commit 0b506e1d by Nicolas Capens Committed by Nicolas Capens

Propagate stores of stack addresses into other local variables

Pointers to the base address of arrays often get stored and then loaded multiple time. This change adds an optimization pass to replace these loads with the result of the alloca of the array, and eliminates the allocas of the pointer variables. The PropagateAlloca unit test without this optimization produces: sub rsp,38h mov dword ptr [rsp+8],16h cmp ecx,0 je 000002543C4E407E lea rax,[rsp+8] mov qword ptr [rsp],rax mov rax,qword ptr [rsp] mov eax,dword ptr [rax] add rsp,38h ret With the optimization pass it becomes: sub rsp,38h mov dword ptr [rsp],16h cmp ecx,0 jne 0000015DC3B33074 mov eax,dword ptr [rsp] add rsp,38h ret Also add a couple of unit tests for corner cases where the propagation is not safe to perform. Bug: b/179279298 Change-Id: I784899319bf5360f47c6fbc22fb82134d135dbfc Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/52468 Presubmit-Ready: Nicolas Capens <nicolascapens@google.com> Tested-by: 's avatarNicolas Capens <nicolascapens@google.com> Reviewed-by: 's avatarAntonio Maiorano <amaiorano@google.com>
parent 8603b126
......@@ -28,7 +28,9 @@ public:
private:
void analyzeUses(Ice::Cfg *function);
void eliminateDeadCode();
void propagateAlloca();
void eliminateUnitializedLoads();
void eliminateLoadsFollowingSingleStore();
void optimizeStoresInSingleBasicBlock();
......@@ -95,7 +97,13 @@ void Optimizer::run(Ice::Cfg *function)
analyzeUses(function);
// Start by eliminating any dead code, to avoid redundant work for the
// subsequent optimization passes.
eliminateDeadCode();
// Eliminate allocas which store the address of other allocas.
propagateAlloca();
eliminateUnitializedLoads();
eliminateLoadsFollowingSingleStore();
optimizeStoresInSingleBasicBlock();
......@@ -109,6 +117,63 @@ void Optimizer::run(Ice::Cfg *function)
operandsWithUses.clear();
}
// Eliminates allocas which store the address of other allocas.
void Optimizer::propagateAlloca()
{
Ice::CfgNode *entryBlock = function->getEntryNode();
Ice::InstList &instList = entryBlock->getInsts();
for(Ice::Inst &inst : instList)
{
if(inst.isDeleted())
{
continue;
}
auto *alloca = llvm::dyn_cast<Ice::InstAlloca>(&inst);
if(!alloca)
{
break; // Allocas are all at the top
}
// Look for stores of this alloca's address.
Ice::Operand *address = alloca->getDest();
Uses uses = *getUses(address); // Hard copy
for(auto *store : uses)
{
if(isStore(*store) && store->getData() == address)
{
Ice::Operand *dest = store->getStoreAddress();
Ice::Variable *destVar = llvm::dyn_cast<Ice::Variable>(dest);
Ice::Inst *def = destVar ? getDefinition(destVar) : nullptr;
// If the address is stored into another stack variable, eliminate the latter.
if(def && def->getKind() == Ice::Inst::Alloca)
{
Uses destUses = *getUses(dest); // Hard copy
// Make sure the only store into the stack variable is this address, and that all of its other uses are loads.
// This prevents dynamic array loads/stores to be replaced by a scalar.
if((destUses.stores.size() == 1) && (destUses.loads.size() == destUses.size() - 1))
{
for(auto *load : destUses.loads)
{
replace(load, address);
}
// The address is now only stored, never loaded, so the store can be eliminated, together with its alloca.
assert(getUses(dest)->size() == 1);
deleteInstruction(store);
assert(def->isDeleted());
}
}
}
}
}
}
void Optimizer::eliminateDeadCode()
{
bool modified;
......
......@@ -376,6 +376,83 @@ TEST(ReactorUnitTests, LoopAfterReturn)
EXPECT_EQ(result, 7);
}
// This test excercises the Optimizer::propagateAlloca() optimization pass.
// The pointer variable should not get stored to / loaded from memory.
// TODO(b/180665600): Check that the optimization took place.
TEST(ReactorUnitTests, PropagateAlloca)
{
FunctionT<int(int)> function;
{
Int b = function.Arg<0>();
Int a = 22;
Pointer<Int> p;
// This branch materializes both `a` and `p`, and ensures single basic block
// optimizations don't also eliminate the pointer store and load.
If(b != 0) // TODO(b/179922668): Support If(b)
{
p = &a;
}
Return(Int(*p)); // TODO(b/179694472): Support Return(*p)
}
auto routine = function(testName().c_str());
int result = routine(true);
EXPECT_EQ(result, 22);
}
// Corner case for Optimizer::propagateAlloca(). It should not replace loading of `p`
// with the addres of `a`, since it also got the address of `b` assigned.
TEST(ReactorUnitTests, PointerToPointer)
{
FunctionT<int()> function;
{
Int a = 444;
Int b = 555;
Int c = 666;
Pointer<Int> p = &a;
Pointer<Pointer<Int>> pp = &p;
p = &b;
Return(Int(*Pointer<Int>(*pp))); // TODO(b/179694472): Support **pp
}
auto routine = function(testName().c_str());
int result = routine();
EXPECT_EQ(result, 555);
}
// Corner case for Optimizer::propagateAlloca(). It should not replace loading of `p[i]`
// with any of the addresses of the `a`, `b`, or `c`.
TEST(ReactorUnitTests, ArrayOfPointersToLocals)
{
FunctionT<int(int)> function;
{
Int i = function.Arg<0>();
Int a = 111;
Int b = 222;
Int c = 333;
Array<Pointer<Int>, 3> p;
p[0] = &a;
p[1] = &b;
p[2] = &c;
Return(Int(*Pointer<Int>(p[i]))); // TODO(b/179694472): Support *p[i]
}
auto routine = function(testName().c_str());
int result = routine(1);
EXPECT_EQ(result, 222);
}
TEST(ReactorUnitTests, SubVectorLoadStore)
{
FunctionT<int(void *, void *)> function;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment