Commit efe061bd by jchen10 Committed by Commit Bot

Optimize HLSL zero initializer

Currently we initialize a variable using zero initializer. Take the below variable for example: uint var[4]; We translate it to: uint var[4] = { 0, 0, 0, 0}; If the array size is large, we have to use very long zero initializer. The problem is that it's very slow for D3D drivers to compile. This CL uses the 'static' trick below to solve the problem: static uint _ANGLE_ZEROS_[256]; ... uint var[516] = {_ANGLE_ZEROS_, _ANGLE_ZEROS_, 0, 0, 0, 0}; For 'static', if the declaration does not include an initializer, the value is set to zero. https://docs.microsoft.com/en-us/windows/desktop/direct3dhlsl/dx-graphics-hlsl-variable-syntax Bug: chromium:898030 Change-Id: Ia3f6574b5ddaffa94bf971140eba95835ee105ee Reviewed-on: https://chromium-review.googlesource.com/c/1332805Reviewed-by: 's avatarShahbaz Youssefi <syoussefi@chromium.org> Reviewed-by: 's avatarGeoff Lang <geofflang@chromium.org> Commit-Queue: Jie A Chen <jie.a.chen@intel.com>
parent 610640fa
......@@ -116,6 +116,44 @@ bool IsAtomicFunctionDirectAssign(const TIntermBinary &node)
IsAtomicFunction(node.getRight()->getAsAggregate()->getOp());
}
const char *kZeros = "_ANGLE_ZEROS_";
constexpr int kZeroCount = 256;
std::string DefineZeroArray()
{
std::stringstream ss;
// For 'static', if the declaration does not include an initializer, the value is set to zero.
// https://docs.microsoft.com/en-us/windows/desktop/direct3dhlsl/dx-graphics-hlsl-variable-syntax
ss << "static uint " << kZeros << "[" << kZeroCount << "];\n";
return ss.str();
}
std::string GetZeroInitializer(size_t size)
{
std::stringstream ss;
size_t quotient = size / kZeroCount;
size_t reminder = size % kZeroCount;
for (size_t i = 0; i < quotient; ++i)
{
if (i != 0)
{
ss << ", ";
}
ss << kZeros;
}
for (size_t i = 0; i < reminder; ++i)
{
if (quotient != 0 || i != 0)
{
ss << ", ";
}
ss << "0";
}
return ss.str();
}
} // anonymous namespace
TReferencedBlock::TReferencedBlock(const TInterfaceBlock *aBlock,
......@@ -225,6 +263,7 @@ OutputHLSL::OutputHLSL(sh::GLenum shaderType,
mUsesDiscardRewriting = false;
mUsesNestedBreak = false;
mRequiresIEEEStrictCompiling = false;
mUseZeroArray = false;
mUniqueIndex = 0;
......@@ -553,6 +592,11 @@ void OutputHLSL::header(TInfoSinkBase &out,
// https://github.com/KhronosGroup/OpenGL-API/issues/5
out << "\n#define ATOMIC_COUNTER_ARRAY_STRIDE 4\n\n";
if (mUseZeroArray)
{
out << DefineZeroArray() << "\n";
}
if (mShaderType == GL_FRAGMENT_SHADER)
{
const bool usingMRTExtension =
......@@ -2028,9 +2072,6 @@ bool OutputHLSL::visitDeclaration(Visit visit, TIntermDeclaration *node)
{
symbol->traverse(this);
out << ArrayString(symbol->getType());
// Add initializer only when requested. It is very slow for D3D11 drivers to
// compile a compute shader if we add code to initialize a groupshared array
// variable with a large array size.
if (declarator->getQualifier() != EvqShared ||
mCompileOptions & SH_INIT_SHARED_VARIABLES)
{
......@@ -2965,20 +3006,16 @@ void OutputHLSL::writeParameter(const TVariable *param, TInfoSinkBase &out)
}
}
TString OutputHLSL::zeroInitializer(const TType &type)
TString OutputHLSL::zeroInitializer(const TType &type) const
{
TString string;
size_t size = type.getObjectSize();
for (size_t component = 0; component < size; component++)
if (size >= kZeroCount)
{
string += "0";
if (component + 1 < size)
{
string += ", ";
}
mUseZeroArray = true;
}
string = GetZeroInitializer(size).c_str();
return "{" + string + "}";
}
......
......@@ -57,8 +57,6 @@ class OutputHLSL : public TIntermTraverser
const std::map<std::string, unsigned int> &getUniformBlockRegisterMap() const;
const std::map<std::string, unsigned int> &getUniformRegisterMap() const;
static TString zeroInitializer(const TType &type);
TInfoSinkBase &getInfoSink()
{
ASSERT(!mInfoSinkStack.empty());
......@@ -68,6 +66,8 @@ class OutputHLSL : public TIntermTraverser
protected:
friend class ShaderStorageBlockOutputHLSL;
TString zeroInitializer(const TType &type) const;
void writeReferencedAttributes(TInfoSinkBase &out) const;
void writeReferencedVaryings(TInfoSinkBase &out) const;
void header(TInfoSinkBase &out,
......@@ -202,6 +202,7 @@ class OutputHLSL : public TIntermTraverser
bool mUsesDiscardRewriting;
bool mUsesNestedBreak;
bool mRequiresIEEEStrictCompiling;
mutable bool mUseZeroArray;
int mNumRenderTargets;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment