Commit 378653f8 by Xinghua Cao Committed by Commit Bot

D3D: throw a perf warning for uniform block

We had translated an uniform block only containing a large array member into StructuredBuffer instead of cbuffer on D3D backend for slow fxc compile performance issue with dynamic uniform indexing. This patch throw a warning if a uniform block containing a large array member fails to hit the optimization. Bug: angleproject:3682 Change-Id: I33459b559923f16a8dfb70c6f46ec52f68d96e06 Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2552365 Commit-Queue: Jamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarJiajia Qin <jiajia.qin@intel.com>
parent f0f79e08
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
// Version number for shader translation API. // Version number for shader translation API.
// It is incremented every time the API changes. // It is incremented every time the API changes.
#define ANGLE_SH_VERSION 245 #define ANGLE_SH_VERSION 246
enum ShShaderSpec enum ShShaderSpec
{ {
...@@ -759,6 +759,7 @@ bool GetUniformBlockRegister(const ShHandle handle, ...@@ -759,6 +759,7 @@ bool GetUniformBlockRegister(const ShHandle handle,
bool ShouldUniformBlockUseStructuredBuffer(const ShHandle handle, bool ShouldUniformBlockUseStructuredBuffer(const ShHandle handle,
const std::string &uniformBlockName); const std::string &uniformBlockName);
const std::set<std::string> *GetSlowCompilingUniformBlockSet(const ShHandle handle);
// Gives a map from uniform names to compiler-assigned registers in the default uniform block. // Gives a map from uniform names to compiler-assigned registers in the default uniform block.
// Note that the map contains also registers of samplers that have been extracted from structs. // Note that the map contains also registers of samplers that have been extracted from structs.
......
...@@ -150,8 +150,8 @@ angle_translator_sources = [ ...@@ -150,8 +150,8 @@ angle_translator_sources = [
"src/compiler/translator/tree_ops/PruneNoOps.h", "src/compiler/translator/tree_ops/PruneNoOps.h",
"src/compiler/translator/tree_ops/RecordConstantPrecision.cpp", "src/compiler/translator/tree_ops/RecordConstantPrecision.cpp",
"src/compiler/translator/tree_ops/RecordConstantPrecision.h", "src/compiler/translator/tree_ops/RecordConstantPrecision.h",
"src/compiler/translator/tree_ops/RecordUniformBlocksTranslatedToStructuredBuffers.cpp", "src/compiler/translator/tree_ops/RecordUniformBlocksWithLargeArrayMember.cpp",
"src/compiler/translator/tree_ops/RecordUniformBlocksTranslatedToStructuredBuffers.h", "src/compiler/translator/tree_ops/RecordUniformBlocksWithLargeArrayMember.h",
"src/compiler/translator/tree_ops/RegenerateStructNames.cpp", "src/compiler/translator/tree_ops/RegenerateStructNames.cpp",
"src/compiler/translator/tree_ops/RegenerateStructNames.h", "src/compiler/translator/tree_ops/RegenerateStructNames.h",
"src/compiler/translator/tree_ops/RemoveArrayLengthMethod.cpp", "src/compiler/translator/tree_ops/RemoveArrayLengthMethod.cpp",
......
...@@ -297,22 +297,21 @@ const TConstantUnion *OutputHLSL::writeConstantUnionArray(TInfoSinkBase &out, ...@@ -297,22 +297,21 @@ const TConstantUnion *OutputHLSL::writeConstantUnionArray(TInfoSinkBase &out,
return constUnionIterated; return constUnionIterated;
} }
OutputHLSL::OutputHLSL( OutputHLSL::OutputHLSL(sh::GLenum shaderType,
sh::GLenum shaderType, ShShaderSpec shaderSpec,
ShShaderSpec shaderSpec, int shaderVersion,
int shaderVersion, const TExtensionBehavior &extensionBehavior,
const TExtensionBehavior &extensionBehavior, const char *sourcePath,
const char *sourcePath, ShShaderOutput outputType,
ShShaderOutput outputType, int numRenderTargets,
int numRenderTargets, int maxDualSourceDrawBuffers,
int maxDualSourceDrawBuffers, const std::vector<ShaderVariable> &uniforms,
const std::vector<ShaderVariable> &uniforms, ShCompileOptions compileOptions,
ShCompileOptions compileOptions, sh::WorkGroupSize workGroupSize,
sh::WorkGroupSize workGroupSize, TSymbolTable *symbolTable,
TSymbolTable *symbolTable, PerformanceDiagnostics *perfDiagnostics,
PerformanceDiagnostics *perfDiagnostics, const std::map<int, const TInterfaceBlock *> &uniformBlockOptimizedMap,
const std::map<int, const TInterfaceBlock *> &uniformBlocksTranslatedToStructuredBuffers, const std::vector<InterfaceBlock> &shaderStorageBlocks)
const std::vector<InterfaceBlock> &shaderStorageBlocks)
: TIntermTraverser(true, true, true, symbolTable), : TIntermTraverser(true, true, true, symbolTable),
mShaderType(shaderType), mShaderType(shaderType),
mShaderSpec(shaderSpec), mShaderSpec(shaderSpec),
...@@ -323,7 +322,7 @@ OutputHLSL::OutputHLSL( ...@@ -323,7 +322,7 @@ OutputHLSL::OutputHLSL(
mCompileOptions(compileOptions), mCompileOptions(compileOptions),
mInsideFunction(false), mInsideFunction(false),
mInsideMain(false), mInsideMain(false),
mUniformBlocksTranslatedToStructuredBuffers(uniformBlocksTranslatedToStructuredBuffers), mUniformBlockOptimizedMap(uniformBlockOptimizedMap),
mNumRenderTargets(numRenderTargets), mNumRenderTargets(numRenderTargets),
mMaxDualSourceDrawBuffers(maxDualSourceDrawBuffers), mMaxDualSourceDrawBuffers(maxDualSourceDrawBuffers),
mCurrentFunctionMetadata(nullptr), mCurrentFunctionMetadata(nullptr),
...@@ -661,8 +660,7 @@ void OutputHLSL::header(TInfoSinkBase &out, ...@@ -661,8 +660,7 @@ void OutputHLSL::header(TInfoSinkBase &out,
out << mStructureHLSL->structsHeader(); out << mStructureHLSL->structsHeader();
mResourcesHLSL->uniformsHeader(out, mOutputType, mReferencedUniforms, mSymbolTable); mResourcesHLSL->uniformsHeader(out, mOutputType, mReferencedUniforms, mSymbolTable);
out << mResourcesHLSL->uniformBlocksHeader(mReferencedUniformBlocks, out << mResourcesHLSL->uniformBlocksHeader(mReferencedUniformBlocks, mUniformBlockOptimizedMap);
mUniformBlocksTranslatedToStructuredBuffers);
mSSBOOutputHLSL->writeShaderStorageBlocksHeader(out); mSSBOOutputHLSL->writeShaderStorageBlocksHeader(out);
if (!mEqualityFunctions.empty()) if (!mEqualityFunctions.empty())
...@@ -1650,8 +1648,8 @@ bool OutputHLSL::visitBinary(Visit visit, TIntermBinary *node) ...@@ -1650,8 +1648,8 @@ bool OutputHLSL::visitBinary(Visit visit, TIntermBinary *node)
{ {
const TInterfaceBlock *interfaceBlock = const TInterfaceBlock *interfaceBlock =
GetInterfaceBlockOfUniformBlockNearestIndexOperator(node->getLeft()); GetInterfaceBlockOfUniformBlockNearestIndexOperator(node->getLeft());
if (interfaceBlock && mUniformBlocksTranslatedToStructuredBuffers.count( if (interfaceBlock &&
interfaceBlock->uniqueId().get()) != 0) mUniformBlockOptimizedMap.count(interfaceBlock->uniqueId().get()) != 0)
{ {
// If the uniform block member's type is not structure, we had explicitly // If the uniform block member's type is not structure, we had explicitly
// packed the member into a structure, so need to add an operator of field // packed the member into a structure, so need to add an operator of field
...@@ -1685,8 +1683,8 @@ bool OutputHLSL::visitBinary(Visit visit, TIntermBinary *node) ...@@ -1685,8 +1683,8 @@ bool OutputHLSL::visitBinary(Visit visit, TIntermBinary *node)
{ {
const TInterfaceBlock *interfaceBlock = const TInterfaceBlock *interfaceBlock =
GetInterfaceBlockOfUniformBlockNearestIndexOperator(node->getLeft()); GetInterfaceBlockOfUniformBlockNearestIndexOperator(node->getLeft());
if (interfaceBlock && mUniformBlocksTranslatedToStructuredBuffers.count( if (interfaceBlock &&
interfaceBlock->uniqueId().get()) != 0) mUniformBlockOptimizedMap.count(interfaceBlock->uniqueId().get()) != 0)
{ {
// If the uniform block member's type is not structure, we had explicitly // If the uniform block member's type is not structure, we had explicitly
// packed the member into a structure, so need to add an operator of field // packed the member into a structure, so need to add an operator of field
...@@ -1757,8 +1755,8 @@ bool OutputHLSL::visitBinary(Visit visit, TIntermBinary *node) ...@@ -1757,8 +1755,8 @@ bool OutputHLSL::visitBinary(Visit visit, TIntermBinary *node)
node->getLeft()->getType().getInterfaceBlock(); node->getLeft()->getType().getInterfaceBlock();
const TIntermConstantUnion *index = node->getRight()->getAsConstantUnion(); const TIntermConstantUnion *index = node->getRight()->getAsConstantUnion();
const TField *field = interfaceBlock->fields()[index->getIConst(0)]; const TField *field = interfaceBlock->fields()[index->getIConst(0)];
if (structInStd140UniformBlock || mUniformBlocksTranslatedToStructuredBuffers.count( if (structInStd140UniformBlock ||
interfaceBlock->uniqueId().get()) != 0) mUniformBlockOptimizedMap.count(interfaceBlock->uniqueId().get()) != 0)
{ {
out << "_"; out << "_";
} }
......
...@@ -37,22 +37,21 @@ using ReferencedVariables = std::map<int, const TVariable *>; ...@@ -37,22 +37,21 @@ using ReferencedVariables = std::map<int, const TVariable *>;
class OutputHLSL : public TIntermTraverser class OutputHLSL : public TIntermTraverser
{ {
public: public:
OutputHLSL( OutputHLSL(sh::GLenum shaderType,
sh::GLenum shaderType, ShShaderSpec shaderSpec,
ShShaderSpec shaderSpec, int shaderVersion,
int shaderVersion, const TExtensionBehavior &extensionBehavior,
const TExtensionBehavior &extensionBehavior, const char *sourcePath,
const char *sourcePath, ShShaderOutput outputType,
ShShaderOutput outputType, int numRenderTargets,
int numRenderTargets, int maxDualSourceDrawBuffers,
int maxDualSourceDrawBuffers, const std::vector<ShaderVariable> &uniforms,
const std::vector<ShaderVariable> &uniforms, ShCompileOptions compileOptions,
ShCompileOptions compileOptions, sh::WorkGroupSize workGroupSize,
sh::WorkGroupSize workGroupSize, TSymbolTable *symbolTable,
TSymbolTable *symbolTable, PerformanceDiagnostics *perfDiagnostics,
PerformanceDiagnostics *perfDiagnostics, const std::map<int, const TInterfaceBlock *> &uniformBlockOptimizedMap,
const std::map<int, const TInterfaceBlock *> &uniformBlocksTranslatedToStructuredBuffers, const std::vector<InterfaceBlock> &shaderStorageBlocks);
const std::vector<InterfaceBlock> &shaderStorageBlocks);
~OutputHLSL() override; ~OutputHLSL() override;
...@@ -181,7 +180,7 @@ class OutputHLSL : public TIntermTraverser ...@@ -181,7 +180,7 @@ class OutputHLSL : public TIntermTraverser
// Indexed by block id, not instance id. // Indexed by block id, not instance id.
ReferencedInterfaceBlocks mReferencedUniformBlocks; ReferencedInterfaceBlocks mReferencedUniformBlocks;
std::map<int, const TInterfaceBlock *> mUniformBlocksTranslatedToStructuredBuffers; std::map<int, const TInterfaceBlock *> mUniformBlockOptimizedMap;
ReferencedVariables mReferencedAttributes; ReferencedVariables mReferencedAttributes;
ReferencedVariables mReferencedVaryings; ReferencedVariables mReferencedVaryings;
......
...@@ -697,7 +697,7 @@ void ResourcesHLSL::imageMetadataUniforms(TInfoSinkBase &out, unsigned int regIn ...@@ -697,7 +697,7 @@ void ResourcesHLSL::imageMetadataUniforms(TInfoSinkBase &out, unsigned int regIn
TString ResourcesHLSL::uniformBlocksHeader( TString ResourcesHLSL::uniformBlocksHeader(
const ReferencedInterfaceBlocks &referencedInterfaceBlocks, const ReferencedInterfaceBlocks &referencedInterfaceBlocks,
const std::map<int, const TInterfaceBlock *> &uniformBlockTranslatedToStructuredBuffer) const std::map<int, const TInterfaceBlock *> &uniformBlockOptimizedMap)
{ {
TString interfaceBlocks; TString interfaceBlocks;
...@@ -712,7 +712,7 @@ TString ResourcesHLSL::uniformBlocksHeader( ...@@ -712,7 +712,7 @@ TString ResourcesHLSL::uniformBlocksHeader(
// In order to avoid compile performance issue, translate uniform block to structured // In order to avoid compile performance issue, translate uniform block to structured
// buffer. anglebug.com/3682. // buffer. anglebug.com/3682.
if (uniformBlockTranslatedToStructuredBuffer.count(interfaceBlock.uniqueId().get()) != 0) if (uniformBlockOptimizedMap.count(interfaceBlock.uniqueId().get()) != 0)
{ {
unsigned int structuredBufferRegister = mSRVRegister; unsigned int structuredBufferRegister = mSRVRegister;
if (instanceVariable != nullptr && instanceVariable->getType().isArray()) if (instanceVariable != nullptr && instanceVariable->getType().isArray())
......
...@@ -40,7 +40,7 @@ class ResourcesHLSL : angle::NonCopyable ...@@ -40,7 +40,7 @@ class ResourcesHLSL : angle::NonCopyable
void imageMetadataUniforms(TInfoSinkBase &out, unsigned int regIndex); void imageMetadataUniforms(TInfoSinkBase &out, unsigned int regIndex);
TString uniformBlocksHeader( TString uniformBlocksHeader(
const ReferencedInterfaceBlocks &referencedInterfaceBlocks, const ReferencedInterfaceBlocks &referencedInterfaceBlocks,
const std::map<int, const TInterfaceBlock *> &uniformBlockTranslatedToStructuredBuffer); const std::map<int, const TInterfaceBlock *> &uniformBlockOptimizedMap);
TString shaderStorageBlocksHeader(const ReferencedInterfaceBlocks &referencedInterfaceBlocks); TString shaderStorageBlocksHeader(const ReferencedInterfaceBlocks &referencedInterfaceBlocks);
// Used for direct index references // Used for direct index references
......
...@@ -629,6 +629,18 @@ const std::map<std::string, unsigned int> *GetUniformRegisterMap(const ShHandle ...@@ -629,6 +629,18 @@ const std::map<std::string, unsigned int> *GetUniformRegisterMap(const ShHandle
#endif // ANGLE_ENABLE_HLSL #endif // ANGLE_ENABLE_HLSL
} }
const std::set<std::string> *GetSlowCompilingUniformBlockSet(const ShHandle handle)
{
#ifdef ANGLE_ENABLE_HLSL
TranslatorHLSL *translator = GetTranslatorHLSLFromHandle(handle);
ASSERT(translator);
return translator->getSlowCompilingUniformBlockSet();
#else
return nullptr;
#endif // ANGLE_ENABLE_HLSL
}
unsigned int GetReadonlyImage2DRegisterIndex(const ShHandle handle) unsigned int GetReadonlyImage2DRegisterIndex(const ShHandle handle)
{ {
#ifdef ANGLE_ENABLE_HLSL #ifdef ANGLE_ENABLE_HLSL
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
#include "compiler/translator/tree_ops/BreakVariableAliasingInInnerLoops.h" #include "compiler/translator/tree_ops/BreakVariableAliasingInInnerLoops.h"
#include "compiler/translator/tree_ops/ExpandIntegerPowExpressions.h" #include "compiler/translator/tree_ops/ExpandIntegerPowExpressions.h"
#include "compiler/translator/tree_ops/PruneEmptyCases.h" #include "compiler/translator/tree_ops/PruneEmptyCases.h"
#include "compiler/translator/tree_ops/RecordUniformBlocksTranslatedToStructuredBuffers.h" #include "compiler/translator/tree_ops/RecordUniformBlocksWithLargeArrayMember.h"
#include "compiler/translator/tree_ops/RemoveDynamicIndexing.h" #include "compiler/translator/tree_ops/RemoveDynamicIndexing.h"
#include "compiler/translator/tree_ops/RewriteAtomicFunctionExpressions.h" #include "compiler/translator/tree_ops/RewriteAtomicFunctionExpressions.h"
#include "compiler/translator/tree_ops/RewriteElseBlocks.h" #include "compiler/translator/tree_ops/RewriteElseBlocks.h"
...@@ -184,25 +184,26 @@ bool TranslatorHLSL::translate(TIntermBlock *root, ...@@ -184,25 +184,26 @@ bool TranslatorHLSL::translate(TIntermBlock *root,
} }
} }
mUniformBlocksTranslatedToStructuredBuffers.clear(); mUniformBlockOptimizedMap.clear();
mSlowCompilingUniformBlockSet.clear();
// In order to get the exact maximum of slots are available for shader resources, which would // In order to get the exact maximum of slots are available for shader resources, which would
// been bound with StructuredBuffer, we only translate uniform block with a large array member // been bound with StructuredBuffer, we only translate uniform block with a large array member
// into StructuredBuffer when shader version is 300. // into StructuredBuffer when shader version is 300.
if (getShaderVersion() == 300 && if (getShaderVersion() == 300 &&
(compileOptions & SH_ALLOW_TRANSLATE_UNIFORM_BLOCK_TO_STRUCTUREDBUFFER) != 0) (compileOptions & SH_ALLOW_TRANSLATE_UNIFORM_BLOCK_TO_STRUCTUREDBUFFER) != 0)
{ {
if (!sh::RecordUniformBlocksTranslatedToStructuredBuffers( if (!sh::RecordUniformBlocksWithLargeArrayMember(root, mUniformBlockOptimizedMap,
root, mUniformBlocksTranslatedToStructuredBuffers)) mSlowCompilingUniformBlockSet))
{ {
return false; return false;
} }
} }
sh::OutputHLSL outputHLSL( sh::OutputHLSL outputHLSL(getShaderType(), getShaderSpec(), getShaderVersion(),
getShaderType(), getShaderSpec(), getShaderVersion(), getExtensionBehavior(), getExtensionBehavior(), getSourcePath(), getOutputType(),
getSourcePath(), getOutputType(), numRenderTargets, maxDualSourceDrawBuffers, getUniforms(), numRenderTargets, maxDualSourceDrawBuffers, getUniforms(),
compileOptions, getComputeShaderLocalSize(), &getSymbolTable(), perfDiagnostics, compileOptions, getComputeShaderLocalSize(), &getSymbolTable(),
mUniformBlocksTranslatedToStructuredBuffers, mShaderStorageBlocks); perfDiagnostics, mUniformBlockOptimizedMap, mShaderStorageBlocks);
outputHLSL.output(root, getInfoSink().obj); outputHLSL.output(root, getInfoSink().obj);
...@@ -251,6 +252,11 @@ const std::map<std::string, unsigned int> *TranslatorHLSL::getUniformRegisterMap ...@@ -251,6 +252,11 @@ const std::map<std::string, unsigned int> *TranslatorHLSL::getUniformRegisterMap
return &mUniformRegisterMap; return &mUniformRegisterMap;
} }
const std::set<std::string> *TranslatorHLSL::getSlowCompilingUniformBlockSet() const
{
return &mSlowCompilingUniformBlockSet;
}
unsigned int TranslatorHLSL::getReadonlyImage2DRegisterIndex() const unsigned int TranslatorHLSL::getReadonlyImage2DRegisterIndex() const
{ {
return mReadonlyImage2DRegisterIndex; return mReadonlyImage2DRegisterIndex;
......
...@@ -24,6 +24,7 @@ class TranslatorHLSL : public TCompiler ...@@ -24,6 +24,7 @@ class TranslatorHLSL : public TCompiler
bool hasUniformBlock(const std::string &interfaceBlockName) const; bool hasUniformBlock(const std::string &interfaceBlockName) const;
unsigned int getUniformBlockRegister(const std::string &interfaceBlockName) const; unsigned int getUniformBlockRegister(const std::string &interfaceBlockName) const;
bool shouldUniformBlockUseStructuredBuffer(const std::string &uniformBlockName) const; bool shouldUniformBlockUseStructuredBuffer(const std::string &uniformBlockName) const;
const std::set<std::string> *getSlowCompilingUniformBlockSet() const;
const std::map<std::string, unsigned int> *getUniformRegisterMap() const; const std::map<std::string, unsigned int> *getUniformRegisterMap() const;
unsigned int getReadonlyImage2DRegisterIndex() const; unsigned int getReadonlyImage2DRegisterIndex() const;
...@@ -46,7 +47,8 @@ class TranslatorHLSL : public TCompiler ...@@ -46,7 +47,8 @@ class TranslatorHLSL : public TCompiler
unsigned int mReadonlyImage2DRegisterIndex; unsigned int mReadonlyImage2DRegisterIndex;
unsigned int mImage2DRegisterIndex; unsigned int mImage2DRegisterIndex;
std::set<std::string> mUsedImage2DFunctionNames; std::set<std::string> mUsedImage2DFunctionNames;
std::map<int, const TInterfaceBlock *> mUniformBlocksTranslatedToStructuredBuffers; std::map<int, const TInterfaceBlock *> mUniformBlockOptimizedMap;
std::set<std::string> mSlowCompilingUniformBlockSet;
}; };
} // namespace sh } // namespace sh
......
//
// Copyright 2020 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// RecordUniformBlocksTranslatedToStructuredBuffers.h:
// Collect all uniform blocks which will been translated to StructuredBuffers on Direct3D
// backend.
//
#ifndef COMPILER_TRANSLATOR_TREEOPS_RECORDUNIFORMBLOCKSTRANSLATEDTOSTRUCTUREDBUFFERS_H_
#define COMPILER_TRANSLATOR_TREEOPS_RECORDUNIFORMBLOCKSTRANSLATEDTOSTRUCTUREDBUFFERS_H_
#include "compiler/translator/IntermNode.h"
namespace sh
{
class TIntermNode;
ANGLE_NO_DISCARD bool RecordUniformBlocksTranslatedToStructuredBuffers(
TIntermNode *root,
std::map<int, const TInterfaceBlock *> &uniformBlockTranslatedToStructuredBuffer);
} // namespace sh
#endif // COMPILER_TRANSLATOR_TREEOPS_RECORDACCESSUNIFORMBLOCKENTIREARRAYMEMBER_H_
//
// Copyright 2020 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// RecordUniformBlocksWithLargeArrayMember.h:
// Collect all uniform blocks which have one or more large array members,
// and the array sizes are greater than or equal to 50. If some of them
// satify some conditions, we will translate them to StructuredBuffers
// on Direct3D backend.
//
#ifndef COMPILER_TRANSLATOR_TREEOPS_RECORDUNIFORMBLOCKSWITHLARGEARRAYMEMBER_H_
#define COMPILER_TRANSLATOR_TREEOPS_RECORDUNIFORMBLOCKSWITHLARGEARRAYMEMBER_H_
#include "compiler/translator/IntermNode.h"
namespace sh
{
class TIntermNode;
ANGLE_NO_DISCARD bool RecordUniformBlocksWithLargeArrayMember(
TIntermNode *root,
std::map<int, const TInterfaceBlock *> &uniformBlockOptimizedMap,
std::set<std::string> &slowCompilingUniformBlockSet);
} // namespace sh
#endif // COMPILER_TRANSLATOR_TREEOPS_RECORDUNIFORMBLOCKSWITHLARGEARRAYMEMBER_H_
...@@ -2078,6 +2078,27 @@ std::unique_ptr<LinkEvent> ProgramD3D::link(const gl::Context *context, ...@@ -2078,6 +2078,27 @@ std::unique_ptr<LinkEvent> ProgramD3D::link(const gl::Context *context,
shadersD3D[shaderType]->generateWorkarounds(&mShaderWorkarounds[shaderType]); shadersD3D[shaderType]->generateWorkarounds(&mShaderWorkarounds[shaderType]);
mShaderUniformsDirty.set(shaderType); mShaderUniformsDirty.set(shaderType);
const std::set<std::string> &slowCompilingUniformBlockSet =
shadersD3D[shaderType]->getSlowCompilingUniformBlockSet();
if (slowCompilingUniformBlockSet.size() > 0)
{
std::ostringstream stream;
stream << "You could get a better shader compiling performance if you re-write"
<< " the uniform block(s)\n[ ";
for (const std::string &str : slowCompilingUniformBlockSet)
{
stream << str << " ";
}
stream << "]\nin the " << gl::GetShaderTypeString(shaderType) << " shader.\n";
stream << "You could get more details from "
"https://chromium.googlesource.com/angle/angle/+/refs/heads/master/"
"src/libANGLE/renderer/d3d/d3d11/"
"UniformBlockToStructuredBufferTranslation.md\n";
ANGLE_PERF_WARNING(context->getState().getDebug(), GL_DEBUG_SEVERITY_MEDIUM,
stream.str().c_str());
}
} }
} }
......
...@@ -240,6 +240,11 @@ bool ShaderD3D::useImage2DFunction(const std::string &functionName) const ...@@ -240,6 +240,11 @@ bool ShaderD3D::useImage2DFunction(const std::string &functionName) const
return mUsedImage2DFunctionNames.find(functionName) != mUsedImage2DFunctionNames.end(); return mUsedImage2DFunctionNames.find(functionName) != mUsedImage2DFunctionNames.end();
} }
const std::set<std::string> &ShaderD3D::getSlowCompilingUniformBlockSet() const
{
return mSlowCompilingUniformBlockSet;
}
const std::map<std::string, unsigned int> &GetUniformRegisterMap( const std::map<std::string, unsigned int> &GetUniformRegisterMap(
const std::map<std::string, unsigned int> *uniformRegisterMap) const std::map<std::string, unsigned int> *uniformRegisterMap)
{ {
...@@ -247,6 +252,13 @@ const std::map<std::string, unsigned int> &GetUniformRegisterMap( ...@@ -247,6 +252,13 @@ const std::map<std::string, unsigned int> &GetUniformRegisterMap(
return *uniformRegisterMap; return *uniformRegisterMap;
} }
const std::set<std::string> &GetSlowCompilingUniformBlockSet(
const std::set<std::string> *slowCompilingUniformBlockSet)
{
ASSERT(slowCompilingUniformBlockSet);
return *slowCompilingUniformBlockSet;
}
const std::set<std::string> &GetUsedImage2DFunctionNames( const std::set<std::string> &GetUsedImage2DFunctionNames(
const std::set<std::string> *usedImage2DFunctionNames) const std::set<std::string> *usedImage2DFunctionNames)
{ {
...@@ -330,6 +342,9 @@ std::shared_ptr<WaitableCompileEvent> ShaderD3D::compile(const gl::Context *cont ...@@ -330,6 +342,9 @@ std::shared_ptr<WaitableCompileEvent> ShaderD3D::compile(const gl::Context *cont
} }
} }
mSlowCompilingUniformBlockSet =
GetSlowCompilingUniformBlockSet(sh::GetSlowCompilingUniformBlockSet(compilerHandle));
for (const sh::InterfaceBlock &interfaceBlock : mState.getShaderStorageBlocks()) for (const sh::InterfaceBlock &interfaceBlock : mState.getShaderStorageBlocks())
{ {
if (interfaceBlock.active) if (interfaceBlock.active)
......
...@@ -59,6 +59,7 @@ class ShaderD3D : public ShaderImpl ...@@ -59,6 +59,7 @@ class ShaderD3D : public ShaderImpl
unsigned int getReadonlyImage2DRegisterIndex() const { return mReadonlyImage2DRegisterIndex; } unsigned int getReadonlyImage2DRegisterIndex() const { return mReadonlyImage2DRegisterIndex; }
unsigned int getImage2DRegisterIndex() const { return mImage2DRegisterIndex; } unsigned int getImage2DRegisterIndex() const { return mImage2DRegisterIndex; }
bool useImage2DFunction(const std::string &functionName) const; bool useImage2DFunction(const std::string &functionName) const;
const std::set<std::string> &getSlowCompilingUniformBlockSet() const;
void appendDebugInfo(const std::string &info) const { mDebugInfo += info; } void appendDebugInfo(const std::string &info) const { mDebugInfo += info; }
void generateWorkarounds(angle::CompilerWorkaroundsD3D *workarounds) const; void generateWorkarounds(angle::CompilerWorkaroundsD3D *workarounds) const;
...@@ -104,6 +105,7 @@ class ShaderD3D : public ShaderImpl ...@@ -104,6 +105,7 @@ class ShaderD3D : public ShaderImpl
std::map<std::string, unsigned int> mUniformRegisterMap; std::map<std::string, unsigned int> mUniformRegisterMap;
std::map<std::string, unsigned int> mUniformBlockRegisterMap; std::map<std::string, unsigned int> mUniformBlockRegisterMap;
std::map<std::string, bool> mUniformBlockUseStructuredBufferMap; std::map<std::string, bool> mUniformBlockUseStructuredBufferMap;
std::set<std::string> mSlowCompilingUniformBlockSet;
std::map<std::string, unsigned int> mShaderStorageBlockRegisterMap; std::map<std::string, unsigned int> mShaderStorageBlockRegisterMap;
unsigned int mReadonlyImage2DRegisterIndex; unsigned int mReadonlyImage2DRegisterIndex;
unsigned int mImage2DRegisterIndex; unsigned int mImage2DRegisterIndex;
......
...@@ -3093,6 +3093,51 @@ void main(void){ ...@@ -3093,6 +3093,51 @@ void main(void){
EXPECT_GL_NO_ERROR(); EXPECT_GL_NO_ERROR();
} }
// Test to throw a warning if a uniform block with a large array member
// fails to hit the optimization on D3D backend.
TEST_P(UniformBlockWithOneLargeArrayMemberTest, ThrowPerfWarningInD3D)
{
constexpr char kFS[] = R"(#version 300 es
precision highp float;
struct S1 {
vec2 a[2];
};
struct S2 {
mat2x4 b;
};
layout(std140, row_major) uniform UBO1{
mat3x2 buf1[128];
};
layout(std140, row_major) uniform UBO2{
mat4x3 buf2[128];
} instance1;
layout(std140, row_major) uniform UBO3{
S1 buf3[128];
};
layout(std140, row_major) uniform UBO4{
S2 buf4[128];
} instance2[2];
out vec4 my_FragColor;
void main(void){
uvec2 coord = uvec2(floor(gl_FragCoord.xy));
uint x = coord.x % 64u;
uint y = coord.y;
my_FragColor = vec4(buf1[y]*instance1.buf2[y]*instance2[0].buf4[y].b*buf3[y].a[x], 0.0f, 1.0);
})";
ANGLE_GL_PROGRAM(program, essl3_shaders::vs::Simple(), kFS);
EXPECT_GL_NO_ERROR();
}
// Use this to select which configurations (e.g. which renderer, which GLES major version) these // Use this to select which configurations (e.g. which renderer, which GLES major version) these
// tests should be run against. // tests should be run against.
ANGLE_INSTANTIATE_TEST_ES3(UniformBufferTest); ANGLE_INSTANTIATE_TEST_ES3(UniformBufferTest);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment