Commit 378653f8 by Xinghua Cao Committed by Commit Bot

D3D: throw a perf warning for uniform block

We had translated an uniform block only containing a large array member into StructuredBuffer instead of cbuffer on D3D backend for slow fxc compile performance issue with dynamic uniform indexing. This patch throw a warning if a uniform block containing a large array member fails to hit the optimization. Bug: angleproject:3682 Change-Id: I33459b559923f16a8dfb70c6f46ec52f68d96e06 Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2552365 Commit-Queue: Jamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarJiajia Qin <jiajia.qin@intel.com>
parent f0f79e08
......@@ -26,7 +26,7 @@
// Version number for shader translation API.
// It is incremented every time the API changes.
#define ANGLE_SH_VERSION 245
#define ANGLE_SH_VERSION 246
enum ShShaderSpec
{
......@@ -759,6 +759,7 @@ bool GetUniformBlockRegister(const ShHandle handle,
bool ShouldUniformBlockUseStructuredBuffer(const ShHandle handle,
const std::string &uniformBlockName);
const std::set<std::string> *GetSlowCompilingUniformBlockSet(const ShHandle handle);
// Gives a map from uniform names to compiler-assigned registers in the default uniform block.
// Note that the map contains also registers of samplers that have been extracted from structs.
......
......@@ -150,8 +150,8 @@ angle_translator_sources = [
"src/compiler/translator/tree_ops/PruneNoOps.h",
"src/compiler/translator/tree_ops/RecordConstantPrecision.cpp",
"src/compiler/translator/tree_ops/RecordConstantPrecision.h",
"src/compiler/translator/tree_ops/RecordUniformBlocksTranslatedToStructuredBuffers.cpp",
"src/compiler/translator/tree_ops/RecordUniformBlocksTranslatedToStructuredBuffers.h",
"src/compiler/translator/tree_ops/RecordUniformBlocksWithLargeArrayMember.cpp",
"src/compiler/translator/tree_ops/RecordUniformBlocksWithLargeArrayMember.h",
"src/compiler/translator/tree_ops/RegenerateStructNames.cpp",
"src/compiler/translator/tree_ops/RegenerateStructNames.h",
"src/compiler/translator/tree_ops/RemoveArrayLengthMethod.cpp",
......
......@@ -297,22 +297,21 @@ const TConstantUnion *OutputHLSL::writeConstantUnionArray(TInfoSinkBase &out,
return constUnionIterated;
}
OutputHLSL::OutputHLSL(
sh::GLenum shaderType,
ShShaderSpec shaderSpec,
int shaderVersion,
const TExtensionBehavior &extensionBehavior,
const char *sourcePath,
ShShaderOutput outputType,
int numRenderTargets,
int maxDualSourceDrawBuffers,
const std::vector<ShaderVariable> &uniforms,
ShCompileOptions compileOptions,
sh::WorkGroupSize workGroupSize,
TSymbolTable *symbolTable,
PerformanceDiagnostics *perfDiagnostics,
const std::map<int, const TInterfaceBlock *> &uniformBlocksTranslatedToStructuredBuffers,
const std::vector<InterfaceBlock> &shaderStorageBlocks)
OutputHLSL::OutputHLSL(sh::GLenum shaderType,
ShShaderSpec shaderSpec,
int shaderVersion,
const TExtensionBehavior &extensionBehavior,
const char *sourcePath,
ShShaderOutput outputType,
int numRenderTargets,
int maxDualSourceDrawBuffers,
const std::vector<ShaderVariable> &uniforms,
ShCompileOptions compileOptions,
sh::WorkGroupSize workGroupSize,
TSymbolTable *symbolTable,
PerformanceDiagnostics *perfDiagnostics,
const std::map<int, const TInterfaceBlock *> &uniformBlockOptimizedMap,
const std::vector<InterfaceBlock> &shaderStorageBlocks)
: TIntermTraverser(true, true, true, symbolTable),
mShaderType(shaderType),
mShaderSpec(shaderSpec),
......@@ -323,7 +322,7 @@ OutputHLSL::OutputHLSL(
mCompileOptions(compileOptions),
mInsideFunction(false),
mInsideMain(false),
mUniformBlocksTranslatedToStructuredBuffers(uniformBlocksTranslatedToStructuredBuffers),
mUniformBlockOptimizedMap(uniformBlockOptimizedMap),
mNumRenderTargets(numRenderTargets),
mMaxDualSourceDrawBuffers(maxDualSourceDrawBuffers),
mCurrentFunctionMetadata(nullptr),
......@@ -661,8 +660,7 @@ void OutputHLSL::header(TInfoSinkBase &out,
out << mStructureHLSL->structsHeader();
mResourcesHLSL->uniformsHeader(out, mOutputType, mReferencedUniforms, mSymbolTable);
out << mResourcesHLSL->uniformBlocksHeader(mReferencedUniformBlocks,
mUniformBlocksTranslatedToStructuredBuffers);
out << mResourcesHLSL->uniformBlocksHeader(mReferencedUniformBlocks, mUniformBlockOptimizedMap);
mSSBOOutputHLSL->writeShaderStorageBlocksHeader(out);
if (!mEqualityFunctions.empty())
......@@ -1650,8 +1648,8 @@ bool OutputHLSL::visitBinary(Visit visit, TIntermBinary *node)
{
const TInterfaceBlock *interfaceBlock =
GetInterfaceBlockOfUniformBlockNearestIndexOperator(node->getLeft());
if (interfaceBlock && mUniformBlocksTranslatedToStructuredBuffers.count(
interfaceBlock->uniqueId().get()) != 0)
if (interfaceBlock &&
mUniformBlockOptimizedMap.count(interfaceBlock->uniqueId().get()) != 0)
{
// If the uniform block member's type is not structure, we had explicitly
// packed the member into a structure, so need to add an operator of field
......@@ -1685,8 +1683,8 @@ bool OutputHLSL::visitBinary(Visit visit, TIntermBinary *node)
{
const TInterfaceBlock *interfaceBlock =
GetInterfaceBlockOfUniformBlockNearestIndexOperator(node->getLeft());
if (interfaceBlock && mUniformBlocksTranslatedToStructuredBuffers.count(
interfaceBlock->uniqueId().get()) != 0)
if (interfaceBlock &&
mUniformBlockOptimizedMap.count(interfaceBlock->uniqueId().get()) != 0)
{
// If the uniform block member's type is not structure, we had explicitly
// packed the member into a structure, so need to add an operator of field
......@@ -1757,8 +1755,8 @@ bool OutputHLSL::visitBinary(Visit visit, TIntermBinary *node)
node->getLeft()->getType().getInterfaceBlock();
const TIntermConstantUnion *index = node->getRight()->getAsConstantUnion();
const TField *field = interfaceBlock->fields()[index->getIConst(0)];
if (structInStd140UniformBlock || mUniformBlocksTranslatedToStructuredBuffers.count(
interfaceBlock->uniqueId().get()) != 0)
if (structInStd140UniformBlock ||
mUniformBlockOptimizedMap.count(interfaceBlock->uniqueId().get()) != 0)
{
out << "_";
}
......
......@@ -37,22 +37,21 @@ using ReferencedVariables = std::map<int, const TVariable *>;
class OutputHLSL : public TIntermTraverser
{
public:
OutputHLSL(
sh::GLenum shaderType,
ShShaderSpec shaderSpec,
int shaderVersion,
const TExtensionBehavior &extensionBehavior,
const char *sourcePath,
ShShaderOutput outputType,
int numRenderTargets,
int maxDualSourceDrawBuffers,
const std::vector<ShaderVariable> &uniforms,
ShCompileOptions compileOptions,
sh::WorkGroupSize workGroupSize,
TSymbolTable *symbolTable,
PerformanceDiagnostics *perfDiagnostics,
const std::map<int, const TInterfaceBlock *> &uniformBlocksTranslatedToStructuredBuffers,
const std::vector<InterfaceBlock> &shaderStorageBlocks);
OutputHLSL(sh::GLenum shaderType,
ShShaderSpec shaderSpec,
int shaderVersion,
const TExtensionBehavior &extensionBehavior,
const char *sourcePath,
ShShaderOutput outputType,
int numRenderTargets,
int maxDualSourceDrawBuffers,
const std::vector<ShaderVariable> &uniforms,
ShCompileOptions compileOptions,
sh::WorkGroupSize workGroupSize,
TSymbolTable *symbolTable,
PerformanceDiagnostics *perfDiagnostics,
const std::map<int, const TInterfaceBlock *> &uniformBlockOptimizedMap,
const std::vector<InterfaceBlock> &shaderStorageBlocks);
~OutputHLSL() override;
......@@ -181,7 +180,7 @@ class OutputHLSL : public TIntermTraverser
// Indexed by block id, not instance id.
ReferencedInterfaceBlocks mReferencedUniformBlocks;
std::map<int, const TInterfaceBlock *> mUniformBlocksTranslatedToStructuredBuffers;
std::map<int, const TInterfaceBlock *> mUniformBlockOptimizedMap;
ReferencedVariables mReferencedAttributes;
ReferencedVariables mReferencedVaryings;
......
......@@ -697,7 +697,7 @@ void ResourcesHLSL::imageMetadataUniforms(TInfoSinkBase &out, unsigned int regIn
TString ResourcesHLSL::uniformBlocksHeader(
const ReferencedInterfaceBlocks &referencedInterfaceBlocks,
const std::map<int, const TInterfaceBlock *> &uniformBlockTranslatedToStructuredBuffer)
const std::map<int, const TInterfaceBlock *> &uniformBlockOptimizedMap)
{
TString interfaceBlocks;
......@@ -712,7 +712,7 @@ TString ResourcesHLSL::uniformBlocksHeader(
// In order to avoid compile performance issue, translate uniform block to structured
// buffer. anglebug.com/3682.
if (uniformBlockTranslatedToStructuredBuffer.count(interfaceBlock.uniqueId().get()) != 0)
if (uniformBlockOptimizedMap.count(interfaceBlock.uniqueId().get()) != 0)
{
unsigned int structuredBufferRegister = mSRVRegister;
if (instanceVariable != nullptr && instanceVariable->getType().isArray())
......
......@@ -40,7 +40,7 @@ class ResourcesHLSL : angle::NonCopyable
void imageMetadataUniforms(TInfoSinkBase &out, unsigned int regIndex);
TString uniformBlocksHeader(
const ReferencedInterfaceBlocks &referencedInterfaceBlocks,
const std::map<int, const TInterfaceBlock *> &uniformBlockTranslatedToStructuredBuffer);
const std::map<int, const TInterfaceBlock *> &uniformBlockOptimizedMap);
TString shaderStorageBlocksHeader(const ReferencedInterfaceBlocks &referencedInterfaceBlocks);
// Used for direct index references
......
......@@ -629,6 +629,18 @@ const std::map<std::string, unsigned int> *GetUniformRegisterMap(const ShHandle
#endif // ANGLE_ENABLE_HLSL
}
const std::set<std::string> *GetSlowCompilingUniformBlockSet(const ShHandle handle)
{
#ifdef ANGLE_ENABLE_HLSL
TranslatorHLSL *translator = GetTranslatorHLSLFromHandle(handle);
ASSERT(translator);
return translator->getSlowCompilingUniformBlockSet();
#else
return nullptr;
#endif // ANGLE_ENABLE_HLSL
}
unsigned int GetReadonlyImage2DRegisterIndex(const ShHandle handle)
{
#ifdef ANGLE_ENABLE_HLSL
......
......@@ -12,7 +12,7 @@
#include "compiler/translator/tree_ops/BreakVariableAliasingInInnerLoops.h"
#include "compiler/translator/tree_ops/ExpandIntegerPowExpressions.h"
#include "compiler/translator/tree_ops/PruneEmptyCases.h"
#include "compiler/translator/tree_ops/RecordUniformBlocksTranslatedToStructuredBuffers.h"
#include "compiler/translator/tree_ops/RecordUniformBlocksWithLargeArrayMember.h"
#include "compiler/translator/tree_ops/RemoveDynamicIndexing.h"
#include "compiler/translator/tree_ops/RewriteAtomicFunctionExpressions.h"
#include "compiler/translator/tree_ops/RewriteElseBlocks.h"
......@@ -184,25 +184,26 @@ bool TranslatorHLSL::translate(TIntermBlock *root,
}
}
mUniformBlocksTranslatedToStructuredBuffers.clear();
mUniformBlockOptimizedMap.clear();
mSlowCompilingUniformBlockSet.clear();
// In order to get the exact maximum of slots are available for shader resources, which would
// been bound with StructuredBuffer, we only translate uniform block with a large array member
// into StructuredBuffer when shader version is 300.
if (getShaderVersion() == 300 &&
(compileOptions & SH_ALLOW_TRANSLATE_UNIFORM_BLOCK_TO_STRUCTUREDBUFFER) != 0)
{
if (!sh::RecordUniformBlocksTranslatedToStructuredBuffers(
root, mUniformBlocksTranslatedToStructuredBuffers))
if (!sh::RecordUniformBlocksWithLargeArrayMember(root, mUniformBlockOptimizedMap,
mSlowCompilingUniformBlockSet))
{
return false;
}
}
sh::OutputHLSL outputHLSL(
getShaderType(), getShaderSpec(), getShaderVersion(), getExtensionBehavior(),
getSourcePath(), getOutputType(), numRenderTargets, maxDualSourceDrawBuffers, getUniforms(),
compileOptions, getComputeShaderLocalSize(), &getSymbolTable(), perfDiagnostics,
mUniformBlocksTranslatedToStructuredBuffers, mShaderStorageBlocks);
sh::OutputHLSL outputHLSL(getShaderType(), getShaderSpec(), getShaderVersion(),
getExtensionBehavior(), getSourcePath(), getOutputType(),
numRenderTargets, maxDualSourceDrawBuffers, getUniforms(),
compileOptions, getComputeShaderLocalSize(), &getSymbolTable(),
perfDiagnostics, mUniformBlockOptimizedMap, mShaderStorageBlocks);
outputHLSL.output(root, getInfoSink().obj);
......@@ -251,6 +252,11 @@ const std::map<std::string, unsigned int> *TranslatorHLSL::getUniformRegisterMap
return &mUniformRegisterMap;
}
const std::set<std::string> *TranslatorHLSL::getSlowCompilingUniformBlockSet() const
{
return &mSlowCompilingUniformBlockSet;
}
unsigned int TranslatorHLSL::getReadonlyImage2DRegisterIndex() const
{
return mReadonlyImage2DRegisterIndex;
......
......@@ -24,6 +24,7 @@ class TranslatorHLSL : public TCompiler
bool hasUniformBlock(const std::string &interfaceBlockName) const;
unsigned int getUniformBlockRegister(const std::string &interfaceBlockName) const;
bool shouldUniformBlockUseStructuredBuffer(const std::string &uniformBlockName) const;
const std::set<std::string> *getSlowCompilingUniformBlockSet() const;
const std::map<std::string, unsigned int> *getUniformRegisterMap() const;
unsigned int getReadonlyImage2DRegisterIndex() const;
......@@ -46,7 +47,8 @@ class TranslatorHLSL : public TCompiler
unsigned int mReadonlyImage2DRegisterIndex;
unsigned int mImage2DRegisterIndex;
std::set<std::string> mUsedImage2DFunctionNames;
std::map<int, const TInterfaceBlock *> mUniformBlocksTranslatedToStructuredBuffers;
std::map<int, const TInterfaceBlock *> mUniformBlockOptimizedMap;
std::set<std::string> mSlowCompilingUniformBlockSet;
};
} // namespace sh
......
//
// Copyright 2020 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// RecordUniformBlocksTranslatedToStructuredBuffers.h:
// Collect all uniform blocks which will been translated to StructuredBuffers on Direct3D
// backend.
//
#ifndef COMPILER_TRANSLATOR_TREEOPS_RECORDUNIFORMBLOCKSTRANSLATEDTOSTRUCTUREDBUFFERS_H_
#define COMPILER_TRANSLATOR_TREEOPS_RECORDUNIFORMBLOCKSTRANSLATEDTOSTRUCTUREDBUFFERS_H_
#include "compiler/translator/IntermNode.h"
namespace sh
{
class TIntermNode;
ANGLE_NO_DISCARD bool RecordUniformBlocksTranslatedToStructuredBuffers(
TIntermNode *root,
std::map<int, const TInterfaceBlock *> &uniformBlockTranslatedToStructuredBuffer);
} // namespace sh
#endif // COMPILER_TRANSLATOR_TREEOPS_RECORDACCESSUNIFORMBLOCKENTIREARRAYMEMBER_H_
//
// Copyright 2020 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// RecordUniformBlocksWithLargeArrayMember.h:
// Collect all uniform blocks which have one or more large array members,
// and the array sizes are greater than or equal to 50. If some of them
// satify some conditions, we will translate them to StructuredBuffers
// on Direct3D backend.
//
#ifndef COMPILER_TRANSLATOR_TREEOPS_RECORDUNIFORMBLOCKSWITHLARGEARRAYMEMBER_H_
#define COMPILER_TRANSLATOR_TREEOPS_RECORDUNIFORMBLOCKSWITHLARGEARRAYMEMBER_H_
#include "compiler/translator/IntermNode.h"
namespace sh
{
class TIntermNode;
ANGLE_NO_DISCARD bool RecordUniformBlocksWithLargeArrayMember(
TIntermNode *root,
std::map<int, const TInterfaceBlock *> &uniformBlockOptimizedMap,
std::set<std::string> &slowCompilingUniformBlockSet);
} // namespace sh
#endif // COMPILER_TRANSLATOR_TREEOPS_RECORDUNIFORMBLOCKSWITHLARGEARRAYMEMBER_H_
......@@ -2078,6 +2078,27 @@ std::unique_ptr<LinkEvent> ProgramD3D::link(const gl::Context *context,
shadersD3D[shaderType]->generateWorkarounds(&mShaderWorkarounds[shaderType]);
mShaderUniformsDirty.set(shaderType);
const std::set<std::string> &slowCompilingUniformBlockSet =
shadersD3D[shaderType]->getSlowCompilingUniformBlockSet();
if (slowCompilingUniformBlockSet.size() > 0)
{
std::ostringstream stream;
stream << "You could get a better shader compiling performance if you re-write"
<< " the uniform block(s)\n[ ";
for (const std::string &str : slowCompilingUniformBlockSet)
{
stream << str << " ";
}
stream << "]\nin the " << gl::GetShaderTypeString(shaderType) << " shader.\n";
stream << "You could get more details from "
"https://chromium.googlesource.com/angle/angle/+/refs/heads/master/"
"src/libANGLE/renderer/d3d/d3d11/"
"UniformBlockToStructuredBufferTranslation.md\n";
ANGLE_PERF_WARNING(context->getState().getDebug(), GL_DEBUG_SEVERITY_MEDIUM,
stream.str().c_str());
}
}
}
......
......@@ -240,6 +240,11 @@ bool ShaderD3D::useImage2DFunction(const std::string &functionName) const
return mUsedImage2DFunctionNames.find(functionName) != mUsedImage2DFunctionNames.end();
}
const std::set<std::string> &ShaderD3D::getSlowCompilingUniformBlockSet() const
{
return mSlowCompilingUniformBlockSet;
}
const std::map<std::string, unsigned int> &GetUniformRegisterMap(
const std::map<std::string, unsigned int> *uniformRegisterMap)
{
......@@ -247,6 +252,13 @@ const std::map<std::string, unsigned int> &GetUniformRegisterMap(
return *uniformRegisterMap;
}
const std::set<std::string> &GetSlowCompilingUniformBlockSet(
const std::set<std::string> *slowCompilingUniformBlockSet)
{
ASSERT(slowCompilingUniformBlockSet);
return *slowCompilingUniformBlockSet;
}
const std::set<std::string> &GetUsedImage2DFunctionNames(
const std::set<std::string> *usedImage2DFunctionNames)
{
......@@ -330,6 +342,9 @@ std::shared_ptr<WaitableCompileEvent> ShaderD3D::compile(const gl::Context *cont
}
}
mSlowCompilingUniformBlockSet =
GetSlowCompilingUniformBlockSet(sh::GetSlowCompilingUniformBlockSet(compilerHandle));
for (const sh::InterfaceBlock &interfaceBlock : mState.getShaderStorageBlocks())
{
if (interfaceBlock.active)
......
......@@ -59,6 +59,7 @@ class ShaderD3D : public ShaderImpl
unsigned int getReadonlyImage2DRegisterIndex() const { return mReadonlyImage2DRegisterIndex; }
unsigned int getImage2DRegisterIndex() const { return mImage2DRegisterIndex; }
bool useImage2DFunction(const std::string &functionName) const;
const std::set<std::string> &getSlowCompilingUniformBlockSet() const;
void appendDebugInfo(const std::string &info) const { mDebugInfo += info; }
void generateWorkarounds(angle::CompilerWorkaroundsD3D *workarounds) const;
......@@ -104,6 +105,7 @@ class ShaderD3D : public ShaderImpl
std::map<std::string, unsigned int> mUniformRegisterMap;
std::map<std::string, unsigned int> mUniformBlockRegisterMap;
std::map<std::string, bool> mUniformBlockUseStructuredBufferMap;
std::set<std::string> mSlowCompilingUniformBlockSet;
std::map<std::string, unsigned int> mShaderStorageBlockRegisterMap;
unsigned int mReadonlyImage2DRegisterIndex;
unsigned int mImage2DRegisterIndex;
......
......@@ -3093,6 +3093,51 @@ void main(void){
EXPECT_GL_NO_ERROR();
}
// Test to throw a warning if a uniform block with a large array member
// fails to hit the optimization on D3D backend.
TEST_P(UniformBlockWithOneLargeArrayMemberTest, ThrowPerfWarningInD3D)
{
constexpr char kFS[] = R"(#version 300 es
precision highp float;
struct S1 {
vec2 a[2];
};
struct S2 {
mat2x4 b;
};
layout(std140, row_major) uniform UBO1{
mat3x2 buf1[128];
};
layout(std140, row_major) uniform UBO2{
mat4x3 buf2[128];
} instance1;
layout(std140, row_major) uniform UBO3{
S1 buf3[128];
};
layout(std140, row_major) uniform UBO4{
S2 buf4[128];
} instance2[2];
out vec4 my_FragColor;
void main(void){
uvec2 coord = uvec2(floor(gl_FragCoord.xy));
uint x = coord.x % 64u;
uint y = coord.y;
my_FragColor = vec4(buf1[y]*instance1.buf2[y]*instance2[0].buf4[y].b*buf3[y].a[x], 0.0f, 1.0);
})";
ANGLE_GL_PROGRAM(program, essl3_shaders::vs::Simple(), kFS);
EXPECT_GL_NO_ERROR();
}
// Use this to select which configurations (e.g. which renderer, which GLES major version) these
// tests should be run against.
ANGLE_INSTANTIATE_TEST_ES3(UniformBufferTest);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment