Commit 3ffbd62b by Nicolas Capens Committed by Nicolas Capens

Implement propagation of stores to loads in single basic blocks

For loads following stores in the same basic block, replace their result with the data that was stored. It transforms the output of the StoresInMultipleBlocks test from: sub rsp,38h mov dword ptr [rsp],0Dh cmp ecx,0 je a mov dword ptr [rsp],4 add dword ptr [rsp],3 b: mov eax,dword ptr [rsp] add rsp,38h ret a: mov dword ptr [rsp],6 add dword ptr [rsp],5 jmp b Into: sub rsp,38h mov dword ptr [rsp],0Dh cmp ecx,0 je a mov dword ptr [rsp],4 mov eax,4 add eax,3 mov dword ptr [rsp],eax b: mov eax,dword ptr [rsp] add rsp,38h ret a: mov dword ptr [rsp],6 mov eax,6 add eax,5 mov dword ptr [rsp],eax jmp b While at first this might seem like a regression, note that `add [rsp],3` performs both a load and a store. The optimization pass eliminated two load operations from this test. The redundant stores will be eliminated by a subsequent change. Also adds a unit test for the case where store-to-load propagation should not be performed due to an indirect store through a pointer happening in between. Bug: b/179668593 Change-Id: I6ca133ac4e77bbbc3efd517dff6e8dee6d2dc147 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/52533 Kokoro-Result: kokoro <noreply+kokoro@google.com> Tested-by: 's avatarNicolas Capens <nicolascapens@google.com> Presubmit-Ready: Nicolas Capens <nicolascapens@google.com> Reviewed-by: 's avatarAntonio Maiorano <amaiorano@google.com>
parent d85a7a27
......@@ -17,6 +17,7 @@
#include "src/IceCfg.h"
#include "src/IceCfgNode.h"
#include <unordered_map>
#include <vector>
namespace {
......@@ -37,6 +38,7 @@ private:
void eliminateDeadCode();
void propagateAlloca();
void performScalarReplacementOfAggregates();
void optimizeSingleBasicBlockLoadsStores();
void eliminateUnitializedLoads();
void eliminateLoadsFollowingSingleStore();
void optimizeStoresInSingleBasicBlock();
......@@ -45,6 +47,7 @@ private:
void deleteInstruction(Ice::Inst *instruction);
bool isDead(Ice::Inst *instruction);
bool isStaticallyIndexedArray(Ice::Operand *allocaAddress);
Ice::InstAlloca *allocaOf(Ice::Operand *address);
static const Ice::InstIntrinsic *asLoadSubVector(const Ice::Inst *instruction);
static const Ice::InstIntrinsic *asStoreSubVector(const Ice::Inst *instruction);
......@@ -118,6 +121,9 @@ void Optimizer::run(Ice::Cfg *function)
// Replace arrays with individual elements if only statically indexed.
performScalarReplacementOfAggregates();
// Iterate through basic blocks to propagate loads following stores.
optimizeSingleBasicBlockLoadsStores();
eliminateUnitializedLoads();
eliminateLoadsFollowingSingleStore();
optimizeStoresInSingleBasicBlock();
......@@ -621,6 +627,61 @@ void Optimizer::optimizeStoresInSingleBasicBlock()
}
}
// Iterate through basic blocks to propagate stores to subsequent loads.
void Optimizer::optimizeSingleBasicBlockLoadsStores()
{
for(Ice::CfgNode *block : function->getNodes())
{
// For each stack variable keep track of the last store instruction.
std::unordered_map<const Ice::InstAlloca *, const Ice::Inst *> lastStoreTo;
for(Ice::Inst &inst : block->getInsts())
{
if(inst.isDeleted())
{
continue;
}
if(isStore(inst))
{
Ice::Operand *address = inst.getStoreAddress();
if(Ice::InstAlloca *alloca = allocaOf(address))
{
// Only consider this store for propagation if its address is not used as
// a pointer which could be used for indirect stores.
if(getUses(address)->areOnlyLoadStore())
{
lastStoreTo[alloca] = &inst;
}
}
}
else if(isLoad(inst))
{
if(Ice::InstAlloca *alloca = allocaOf(inst.getLoadAddress()))
{
auto entry = lastStoreTo.find(alloca);
if(entry != lastStoreTo.end())
{
const Ice::Inst *store = entry->second;
// Conservatively check that the types match.
// TODO(b/179668593): Also valid to propagate if the load is smaller or equal in size to the store.
if(loadTypeMatchesStore(&inst, store))
{
replace(&inst, store->getData());
}
}
}
}
}
}
// This can leave some dead instructions. Specifically stores.
// TODO((b/179668593): Eliminate stores superseded by subsequent stores?
eliminateDeadCode();
}
void Optimizer::analyzeUses(Ice::Cfg *function)
{
for(Ice::CfgNode *basicBlock : function->getNodes())
......@@ -802,6 +863,15 @@ bool Optimizer::isStaticallyIndexedArray(Ice::Operand *allocaAddress)
return true;
}
Ice::InstAlloca *Optimizer::allocaOf(Ice::Operand *address)
{
Ice::Variable *addressVar = llvm::dyn_cast<Ice::Variable>(address);
Ice::Inst *def = addressVar ? getDefinition(addressVar) : nullptr;
Ice::InstAlloca *alloca = def ? llvm::dyn_cast<Ice::InstAlloca>(def) : nullptr;
return alloca;
}
const Ice::InstIntrinsic *Optimizer::asLoadSubVector(const Ice::Inst *instruction)
{
if(auto *instrinsic = llvm::dyn_cast<Ice::InstIntrinsic>(instruction))
......
......@@ -596,6 +596,76 @@ TEST(ReactorUnitTests, ReactorArray)
EXPECT_EQ(result, 3);
}
// Excercises the optimizeSingleBasicBlockLoadsStores optimization pass.
TEST(ReactorUnitTests, StoresInMultipleBlocks)
{
FunctionT<int(int)> function;
{
Int b = function.Arg<0>();
Int a = 13;
If(b != 0) // TODO(b/179922668): Support If(b)
{
a = 4;
a = a + 3;
}
Else
{
a = 6;
a = a + 5;
}
Return(a);
}
Nucleus::setOptimizerCallback([](const Nucleus::OptimizerReport *report) {
EXPECT_EQ(report->allocas, 1);
EXPECT_EQ(report->loads, 1);
EXPECT_EQ(report->stores, 5);
});
auto routine = function(testName().c_str());
int result = routine(true);
EXPECT_EQ(result, 7);
}
// This is similar to the LoadAfterIndirectStore test except that the indirect
// store is preceded by a direct store. The subsequent load should not be replaced
// by the value written by the direct store.
TEST(ReactorUnitTests, StoreBeforeIndirectStore)
{
FunctionT<int(int)> function;
{
//Int b = function.Arg<0>();
Int b;
Pointer<Int> p = &b;
Int a = 13;
For(Int i = 0, i < 2, i++)
{
a = 10;
*p = 4;
// This load of `a` should not be replaced by the 10 written above, since
// in the second iteration `p` points to `a` and writes 4.
b = a;
p = &a;
}
Return(b);
}
auto routine = function(testName().c_str());
int result = routine(true);
EXPECT_EQ(result, 4);
}
TEST(ReactorUnitTests, SubVectorLoadStore)
{
FunctionT<int(void *, void *)> function;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment