Commit d951f198 by Ben Clayton

GLES: Remove hardcoded shader limits

Perform whole-shader analysis to determine usage limits. Use these limits to allocate compile time and runtime arrays. Removes the constants: MAX_SHADER_NESTED_LOOPS MAX_SHADER_NESTED_IFS MAX_SHADER_CALL_STACK_SIZE Also switched to using dynamic containers to remove the MAX_SHADER_CALL_SITES limit, which I believe to have been buggy and broken. Bug: b/123587120 Change-Id: I89be80072183ac2aac28124df236888309e7207c Reviewed-on: https://swiftshader-review.googlesource.com/c/24668Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Tested-by: 's avatarBen Clayton <bclayton@google.com>
parent 6397ed0f
......@@ -152,11 +152,13 @@ namespace sw
return v;
}
template <int limit> class BoundedIndex
class BoundedIndex
{
public:
BoundedIndex(int index) : index(index) {}
inline void setLimit(int limit) { this->limit = limit; }
inline int operator++(int) { return index++; }
inline int operator--(int) { return index--; }
inline void operator=(int i) { index = i; }
......@@ -190,6 +192,7 @@ namespace sw
private:
int index = 0;
int limit = 0;
};
// The OFFSET macro is a generalization of the offsetof() macro defined in <cstddef>.
......
......@@ -97,9 +97,6 @@ namespace sw
MAX_TEXTURE_LOD = MIPMAP_LEVELS - 2, // Trilinear accesses lod+1
RENDERTARGETS = 8,
NUM_TEMPORARY_REGISTERS = 4096,
MAX_SHADER_CALL_SITES = 2048,
MAX_SHADER_NESTED_LOOPS = 4,
MAX_SHADER_NESTED_IFS = 24 + 24,
MAX_SHADER_CALL_STACK_SIZE = 64,
MAX_SHADER_ENABLE_STACK_SIZE = 1 + 24,
};
......
......@@ -25,6 +25,39 @@ namespace sw
extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates
extern bool fullPixelPositionRegister;
PixelProgram::PixelProgram(const PixelProcessor::State &state, const PixelShader *shader) :
PixelRoutine(state, shader),
r(shader->indirectAddressableTemporaries),
aL(shader->getLimits().loops),
increment(shader->getLimits().loops),
iteration(shader->getLimits().loops),
callStack(shader->getLimits().stack)
{
auto limits = shader->getLimits();
ifDepth.setLimit(limits.ifs);
loopRepDepth.setLimit(limits.loops);
currentLabel.setLimit(limits.functions);
ifFalseBlock.resize(limits.ifs);
loopRepTestBlock.resize(limits.loops);
loopRepEndBlock.resize(limits.loops);
labelBlock.resize(limits.functions);
isConditionalIf.resize(limits.ifs);
loopDepth = -1;
enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
if(shader->containsBreakInstruction())
{
enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
}
if(shader->containsContinueInstruction())
{
enableContinue = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
}
}
void PixelProgram::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w)
{
if(shader->getShaderModel() >= 0x0300)
......
......@@ -18,33 +18,14 @@
#include "PixelRoutine.hpp"
#include "SamplerCore.hpp"
#include <unordered_map>
namespace sw
{
class PixelProgram : public PixelRoutine
{
public:
PixelProgram(const PixelProcessor::State &state, const PixelShader *shader) :
PixelRoutine(state, shader), r(shader->indirectAddressableTemporaries)
{
for(int i = 0; i < MAX_SHADER_CALL_SITES; ++i)
{
labelBlock[i] = 0;
}
loopDepth = -1;
enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
if(shader->containsBreakInstruction())
{
enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
}
if(shader->containsContinueInstruction())
{
enableContinue = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
}
}
PixelProgram(const PixelProcessor::State &state, const PixelShader *shader);
virtual ~PixelProgram() {}
protected:
......@@ -67,13 +48,13 @@ namespace sw
// DX9 specific variables
Vector4f p0;
Array<Int, MAX_SHADER_NESTED_LOOPS> aL;
Array<Int, MAX_SHADER_NESTED_LOOPS> increment;
Array<Int, MAX_SHADER_NESTED_LOOPS> iteration;
Array<Int> aL; // loop counter register
Array<Int> increment; // increment value per loop
Array<Int> iteration; // iteration count
Int loopDepth; // FIXME: Add support for switch
Int stackIndex; // FIXME: Inc/decrement callStack
Array<UInt, MAX_SHADER_CALL_STACK_SIZE> callStack;
Array<UInt> callStack;
// Per pixel based on conditions reached
Int enableIndex;
......@@ -153,18 +134,18 @@ namespace sw
void RET();
void LEAVE();
BoundedIndex<MAX_SHADER_NESTED_IFS> ifDepth = 0;
BoundedIndex<MAX_SHADER_NESTED_LOOPS> loopRepDepth = 0;
BoundedIndex<MAX_SHADER_CALL_SITES> currentLabel = -1;
BoundedIndex ifDepth = 0;
BoundedIndex loopRepDepth = 0;
BoundedIndex currentLabel = -1;
bool scalar = false;
BasicBlock *ifFalseBlock[MAX_SHADER_NESTED_IFS];
BasicBlock *loopRepTestBlock[MAX_SHADER_NESTED_LOOPS];
BasicBlock *loopRepEndBlock[MAX_SHADER_NESTED_LOOPS];
BasicBlock *labelBlock[MAX_SHADER_CALL_SITES];
std::vector<BasicBlock*> callRetBlock[MAX_SHADER_CALL_SITES];
std::vector<BasicBlock*> ifFalseBlock;
std::vector<BasicBlock*> loopRepTestBlock;
std::vector<BasicBlock*> loopRepEndBlock;
std::vector<BasicBlock*> labelBlock;
std::unordered_map<unsigned int, std::vector<BasicBlock*>> callRetBlock; // label -> list of call sites
BasicBlock *returnBlock;
bool isConditionalIf[MAX_SHADER_NESTED_IFS];
std::vector<bool> isConditionalIf;
std::vector<Int4> restoreContinue;
};
}
......
......@@ -161,6 +161,7 @@ namespace sw
analyzeSamplers();
analyzeCallSites();
analyzeIndirectAddressing();
analyzeLimits();
}
void PixelShader::analyzeZOverride()
......
......@@ -19,10 +19,14 @@
#include "Common/Math.hpp"
#include "Common/Debug.hpp"
#include <algorithm>
#include <set>
#include <fstream>
#include <functional>
#include <sstream>
#include <stdarg.h>
#include <unordered_map>
#include <unordered_set>
namespace sw
{
......@@ -1877,15 +1881,13 @@ namespace sw
// This is used to know what basic block to return to.
void Shader::analyzeCallSites()
{
int callSiteIndex[MAX_SHADER_CALL_SITES] = {0};
std::unordered_map<int, int> callSiteIndices;
for(auto &inst : instruction)
{
if(inst->opcode == OPCODE_CALL || inst->opcode == OPCODE_CALLNZ)
{
int label = sw::min(inst->dst.label, static_cast<unsigned int>(MAX_SHADER_CALL_SITES));
inst->dst.callSite = callSiteIndex[label]++;
inst->dst.callSite = callSiteIndices[inst->dst.label]++;
}
}
}
......@@ -1924,4 +1926,177 @@ namespace sw
}
}
}
// analyzeLimits analyzes the whole shader program to determine the deepest
// nesting of control flow blocks and function calls. These calculations
// are stored into the limits member, and is used by the programs to
// allocate stack storage variables.
void Shader::analyzeLimits()
{
typedef unsigned int FunctionID;
// Identifier of the function with the main entry point.
constexpr FunctionID MAIN_ID = 0xF0000000;
// Invalid function identifier.
constexpr FunctionID INVALID_ID = ~0U;
// Limits on a single function.
struct FunctionLimits
{
uint32_t loops = 0; // maximum nested loop and reps.
uint32_t ifs = 0; // maximum nested if statements.
uint32_t stack = 0; // maximum call depth.
};
// Information about a single function in the shader.
struct FunctionInfo
{
FunctionLimits limits;
std::unordered_set<FunctionID> calls; // What this function calls.
bool reachable; // Is this function reachable?
};
// Begin by doing a pass over the instructions to identify all the
// labels that denote the start of a function.
std::unordered_map<FunctionID, FunctionInfo> functions;
for(auto &inst : instruction)
{
if (inst->isCall())
{
FunctionID id = inst->dst.label;
ASSERT(id != MAIN_ID); // If this fires, we're going to have to represent main with something else.
functions[id] = FunctionInfo();
}
}
// Add a definition for the main entry point.
// This starts at the beginning of the instructions and does not have
// its own label.
functions[MAIN_ID] = FunctionInfo();
functions[MAIN_ID].reachable = true;
FunctionID currentFunc = MAIN_ID;
// Limits for the currently analyzed function.
FunctionLimits currentLimits;
// Now loop over the instructions gathering the limits of each of the
// functions.
for(size_t i = 0; i < instruction.size(); i++)
{
const auto& inst = instruction[i];
switch (inst->opcode)
{
case OPCODE_LABEL:
{
FunctionID id = inst->dst.label;
auto it = functions.find(id);
if (it == functions.end())
{
// Label does not start a new function.
continue;
}
ASSERT(currentFunc == INVALID_ID); // Functions overlap
currentFunc = id;
break;
}
case OPCODE_CALL:
case OPCODE_CALLNZ:
{
ASSERT(currentFunc != INVALID_ID);
FunctionID id = inst->dst.label;
functions[currentFunc].calls.emplace(id);
functions[id].reachable = true;
break;
}
case OPCODE_LOOP:
case OPCODE_REP:
case OPCODE_WHILE:
case OPCODE_SWITCH: // Not a mistake - switches share loopReps.
{
ASSERT(currentFunc != INVALID_ID);
++currentLimits.loops;
auto& func = functions[currentFunc];
func.limits.loops = std::max(func.limits.loops, currentLimits.loops);
break;
}
case OPCODE_ENDLOOP:
case OPCODE_ENDREP:
case OPCODE_ENDWHILE:
case OPCODE_ENDSWITCH:
{
ASSERT(currentLimits.loops > 0);
--currentLimits.loops;
break;
}
case OPCODE_IF:
case OPCODE_IFC:
{
ASSERT(currentFunc != INVALID_ID);
++currentLimits.ifs;
auto& func = functions[currentFunc];
func.limits.ifs = std::max(func.limits.ifs, currentLimits.ifs);
break;
}
case OPCODE_ENDIF:
{
ASSERT(currentLimits.ifs > 0);
currentLimits.ifs--;
break;
}
case OPCODE_RET:
{
// Must be in a function to return.
ASSERT(currentFunc != INVALID_ID);
// All stacks should be popped before returning.
ASSERT(currentLimits.ifs == 0);
ASSERT(currentLimits.loops == 0);
currentFunc = INVALID_ID;
currentLimits = FunctionLimits();
break;
}
default:
break;
}
}
// Assert that every function is reachable (these should have been
// stripped in earlier stages). Unreachable functions may be code
// generated, but their own limits are not considered below, potentially
// causing OOB indexing in later stages.
for (auto it : functions) { ASSERT(it.second.reachable); }
// We have now gathered all the information about each of the functions
// in the shader. Traverse these functions starting from the main
// function to calculate the maximum limits across the entire shader.
std::unordered_set<FunctionID> visited;
std::function<Limits(FunctionID)> traverse;
traverse = [&](FunctionID id) -> Limits
{
const auto& func = functions[id];
ASSERT(visited.count(id) == 0); // Sanity check: Recursive functions are not allowed.
visited.insert(id);
Limits limits;
limits.stack = 1;
for (auto callee : func.calls)
{
auto calleeLimits = traverse(callee);
limits.loops = std::max(limits.loops, calleeLimits.loops);
limits.ifs = std::max(limits.ifs, calleeLimits.ifs);
limits.stack = std::max(limits.stack, calleeLimits.stack + 1);
}
visited.erase(id);
limits.loops += func.limits.loops;
limits.ifs += func.limits.ifs;
return limits;
};
limits = traverse(MAIN_ID);
limits.functions = (uint32_t)functions.size();
}
}
......@@ -35,6 +35,7 @@ namespace sw
enum Opcode
{
// Matches order in d3d9types.h
// See https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/content/d3d9types/ne-d3d9types-_d3dshader_instruction_opcode_type
OPCODE_NOP = 0,
OPCODE_MOV,
OPCODE_ADD,
......@@ -554,6 +555,15 @@ namespace sw
};
};
// Limits holds the maximum nested counts for the shader.
struct Limits
{
uint32_t loops = 0; // maximum nested loop and reps.
uint32_t ifs = 0; // maximum nested if statements.
uint32_t stack = 0; // maximum call depth.
uint32_t functions = 0; // total number of functions.
};
Shader();
virtual ~Shader();
......@@ -562,6 +572,7 @@ namespace sw
size_t getLength() const;
ShaderType getShaderType() const;
unsigned short getShaderModel() const;
inline const Limits& getLimits() const { return limits; }
void append(Instruction *instruction);
void declareSampler(int i);
......@@ -629,8 +640,11 @@ namespace sw
void analyzeSamplers();
void analyzeCallSites();
void analyzeIndirectAddressing();
void analyzeLimits();
void markFunctionAnalysis(unsigned int functionLabel, Analysis flag);
Limits limits; // Calculated in analyzeLimits().
ShaderType shaderType;
union
......
......@@ -24,12 +24,24 @@
namespace sw
{
VertexProgram::VertexProgram(const VertexProcessor::State &state, const VertexShader *shader)
: VertexRoutine(state, shader), shader(shader), r(shader->indirectAddressableTemporaries)
: VertexRoutine(state, shader),
shader(shader),
r(shader->indirectAddressableTemporaries),
aL(shader->getLimits().loops),
increment(shader->getLimits().loops),
iteration(shader->getLimits().loops),
callStack(shader->getLimits().stack)
{
for(int i = 0; i < MAX_SHADER_CALL_SITES; i++)
{
labelBlock[i] = 0;
}
auto limits = shader->getLimits();
ifDepth.setLimit(limits.ifs);
loopRepDepth.setLimit(limits.loops);
currentLabel.setLimit(limits.functions);
ifFalseBlock.resize(limits.ifs);
loopRepTestBlock.resize(limits.loops);
loopRepEndBlock.resize(limits.loops);
labelBlock.resize(limits.functions);
isConditionalIf.resize(limits.ifs);
loopDepth = -1;
enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
......
......@@ -22,6 +22,8 @@
#include "Renderer/Stream.hpp"
#include "Common/Types.hpp"
#include <unordered_map>
namespace sw
{
struct Stream;
......@@ -39,15 +41,15 @@ namespace sw
RegisterArray<NUM_TEMPORARY_REGISTERS> r; // Temporary registers
Vector4f a0;
Array<Int, MAX_SHADER_NESTED_LOOPS> aL;
Array<Int> aL; // loop counter register
Vector4f p0;
Array<Int, MAX_SHADER_NESTED_LOOPS> increment;
Array<Int, MAX_SHADER_NESTED_LOOPS> iteration;
Array<Int> increment;
Array<Int> iteration;
Int loopDepth;
Int stackIndex; // FIXME: Inc/decrement callStack
Array<UInt, MAX_SHADER_CALL_STACK_SIZE> callStack;
Array<UInt> callStack;
Int enableIndex;
Array<Int4, MAX_SHADER_ENABLE_STACK_SIZE> enableStack;
......@@ -122,18 +124,18 @@ namespace sw
Vector4f sampleTexture(const Src &s, Vector4f &uvwq, Float4 &lod, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function);
Vector4f sampleTexture(int sampler, Vector4f &uvwq, Float4 &lod, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function);
BoundedIndex<MAX_SHADER_NESTED_IFS> ifDepth = 0;
BoundedIndex<MAX_SHADER_NESTED_LOOPS> loopRepDepth = 0;
BoundedIndex<MAX_SHADER_CALL_SITES> currentLabel = -1;
BoundedIndex ifDepth = 0;
BoundedIndex loopRepDepth = 0;
BoundedIndex currentLabel = -1;
bool scalar = false;
BasicBlock *ifFalseBlock[MAX_SHADER_NESTED_IFS];
BasicBlock *loopRepTestBlock[MAX_SHADER_NESTED_LOOPS];
BasicBlock *loopRepEndBlock[MAX_SHADER_NESTED_LOOPS];
BasicBlock *labelBlock[MAX_SHADER_CALL_SITES];
std::vector<BasicBlock*> callRetBlock[MAX_SHADER_CALL_SITES];
std::vector<BasicBlock*> ifFalseBlock;
std::vector<BasicBlock*> loopRepTestBlock;
std::vector<BasicBlock*> loopRepEndBlock;
std::vector<BasicBlock*> labelBlock;
std::unordered_map<unsigned int, std::vector<BasicBlock*>> callRetBlock; // label -> list of call sites
BasicBlock *returnBlock;
bool isConditionalIf[MAX_SHADER_NESTED_IFS];
std::vector<bool> isConditionalIf;
std::vector<Int4> restoreContinue;
};
}
......
......@@ -208,6 +208,7 @@ namespace sw
analyzeSamplers();
analyzeCallSites();
analyzeIndirectAddressing();
analyzeLimits();
}
void VertexShader::analyzeInput()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment