Commit 378653f8 by Xinghua Cao Committed by Commit Bot

D3D: throw a perf warning for uniform block

We had translated an uniform block only containing a large array member into StructuredBuffer instead of cbuffer on D3D backend for slow fxc compile performance issue with dynamic uniform indexing. This patch throw a warning if a uniform block containing a large array member fails to hit the optimization. Bug: angleproject:3682 Change-Id: I33459b559923f16a8dfb70c6f46ec52f68d96e06 Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2552365 Commit-Queue: Jamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarJiajia Qin <jiajia.qin@intel.com>
parent f0f79e08
......@@ -26,7 +26,7 @@
// Version number for shader translation API.
// It is incremented every time the API changes.
#define ANGLE_SH_VERSION 245
#define ANGLE_SH_VERSION 246
enum ShShaderSpec
{
......@@ -759,6 +759,7 @@ bool GetUniformBlockRegister(const ShHandle handle,
bool ShouldUniformBlockUseStructuredBuffer(const ShHandle handle,
const std::string &uniformBlockName);
const std::set<std::string> *GetSlowCompilingUniformBlockSet(const ShHandle handle);
// Gives a map from uniform names to compiler-assigned registers in the default uniform block.
// Note that the map contains also registers of samplers that have been extracted from structs.
......
......@@ -150,8 +150,8 @@ angle_translator_sources = [
"src/compiler/translator/tree_ops/PruneNoOps.h",
"src/compiler/translator/tree_ops/RecordConstantPrecision.cpp",
"src/compiler/translator/tree_ops/RecordConstantPrecision.h",
"src/compiler/translator/tree_ops/RecordUniformBlocksTranslatedToStructuredBuffers.cpp",
"src/compiler/translator/tree_ops/RecordUniformBlocksTranslatedToStructuredBuffers.h",
"src/compiler/translator/tree_ops/RecordUniformBlocksWithLargeArrayMember.cpp",
"src/compiler/translator/tree_ops/RecordUniformBlocksWithLargeArrayMember.h",
"src/compiler/translator/tree_ops/RegenerateStructNames.cpp",
"src/compiler/translator/tree_ops/RegenerateStructNames.h",
"src/compiler/translator/tree_ops/RemoveArrayLengthMethod.cpp",
......
......@@ -297,22 +297,21 @@ const TConstantUnion *OutputHLSL::writeConstantUnionArray(TInfoSinkBase &out,
return constUnionIterated;
}
OutputHLSL::OutputHLSL(
sh::GLenum shaderType,
ShShaderSpec shaderSpec,
int shaderVersion,
const TExtensionBehavior &extensionBehavior,
const char *sourcePath,
ShShaderOutput outputType,
int numRenderTargets,
int maxDualSourceDrawBuffers,
const std::vector<ShaderVariable> &uniforms,
ShCompileOptions compileOptions,
sh::WorkGroupSize workGroupSize,
TSymbolTable *symbolTable,
PerformanceDiagnostics *perfDiagnostics,
const std::map<int, const TInterfaceBlock *> &uniformBlocksTranslatedToStructuredBuffers,
const std::vector<InterfaceBlock> &shaderStorageBlocks)
OutputHLSL::OutputHLSL(sh::GLenum shaderType,
ShShaderSpec shaderSpec,
int shaderVersion,
const TExtensionBehavior &extensionBehavior,
const char *sourcePath,
ShShaderOutput outputType,
int numRenderTargets,
int maxDualSourceDrawBuffers,
const std::vector<ShaderVariable> &uniforms,
ShCompileOptions compileOptions,
sh::WorkGroupSize workGroupSize,
TSymbolTable *symbolTable,
PerformanceDiagnostics *perfDiagnostics,
const std::map<int, const TInterfaceBlock *> &uniformBlockOptimizedMap,
const std::vector<InterfaceBlock> &shaderStorageBlocks)
: TIntermTraverser(true, true, true, symbolTable),
mShaderType(shaderType),
mShaderSpec(shaderSpec),
......@@ -323,7 +322,7 @@ OutputHLSL::OutputHLSL(
mCompileOptions(compileOptions),
mInsideFunction(false),
mInsideMain(false),
mUniformBlocksTranslatedToStructuredBuffers(uniformBlocksTranslatedToStructuredBuffers),
mUniformBlockOptimizedMap(uniformBlockOptimizedMap),
mNumRenderTargets(numRenderTargets),
mMaxDualSourceDrawBuffers(maxDualSourceDrawBuffers),
mCurrentFunctionMetadata(nullptr),
......@@ -661,8 +660,7 @@ void OutputHLSL::header(TInfoSinkBase &out,
out << mStructureHLSL->structsHeader();
mResourcesHLSL->uniformsHeader(out, mOutputType, mReferencedUniforms, mSymbolTable);
out << mResourcesHLSL->uniformBlocksHeader(mReferencedUniformBlocks,
mUniformBlocksTranslatedToStructuredBuffers);
out << mResourcesHLSL->uniformBlocksHeader(mReferencedUniformBlocks, mUniformBlockOptimizedMap);
mSSBOOutputHLSL->writeShaderStorageBlocksHeader(out);
if (!mEqualityFunctions.empty())
......@@ -1650,8 +1648,8 @@ bool OutputHLSL::visitBinary(Visit visit, TIntermBinary *node)
{
const TInterfaceBlock *interfaceBlock =
GetInterfaceBlockOfUniformBlockNearestIndexOperator(node->getLeft());
if (interfaceBlock && mUniformBlocksTranslatedToStructuredBuffers.count(
interfaceBlock->uniqueId().get()) != 0)
if (interfaceBlock &&
mUniformBlockOptimizedMap.count(interfaceBlock->uniqueId().get()) != 0)
{
// If the uniform block member's type is not structure, we had explicitly
// packed the member into a structure, so need to add an operator of field
......@@ -1685,8 +1683,8 @@ bool OutputHLSL::visitBinary(Visit visit, TIntermBinary *node)
{
const TInterfaceBlock *interfaceBlock =
GetInterfaceBlockOfUniformBlockNearestIndexOperator(node->getLeft());
if (interfaceBlock && mUniformBlocksTranslatedToStructuredBuffers.count(
interfaceBlock->uniqueId().get()) != 0)
if (interfaceBlock &&
mUniformBlockOptimizedMap.count(interfaceBlock->uniqueId().get()) != 0)
{
// If the uniform block member's type is not structure, we had explicitly
// packed the member into a structure, so need to add an operator of field
......@@ -1757,8 +1755,8 @@ bool OutputHLSL::visitBinary(Visit visit, TIntermBinary *node)
node->getLeft()->getType().getInterfaceBlock();
const TIntermConstantUnion *index = node->getRight()->getAsConstantUnion();
const TField *field = interfaceBlock->fields()[index->getIConst(0)];
if (structInStd140UniformBlock || mUniformBlocksTranslatedToStructuredBuffers.count(
interfaceBlock->uniqueId().get()) != 0)
if (structInStd140UniformBlock ||
mUniformBlockOptimizedMap.count(interfaceBlock->uniqueId().get()) != 0)
{
out << "_";
}
......
......@@ -37,22 +37,21 @@ using ReferencedVariables = std::map<int, const TVariable *>;
class OutputHLSL : public TIntermTraverser
{
public:
OutputHLSL(
sh::GLenum shaderType,
ShShaderSpec shaderSpec,
int shaderVersion,
const TExtensionBehavior &extensionBehavior,
const char *sourcePath,
ShShaderOutput outputType,
int numRenderTargets,
int maxDualSourceDrawBuffers,
const std::vector<ShaderVariable> &uniforms,
ShCompileOptions compileOptions,
sh::WorkGroupSize workGroupSize,
TSymbolTable *symbolTable,
PerformanceDiagnostics *perfDiagnostics,
const std::map<int, const TInterfaceBlock *> &uniformBlocksTranslatedToStructuredBuffers,
const std::vector<InterfaceBlock> &shaderStorageBlocks);
OutputHLSL(sh::GLenum shaderType,
ShShaderSpec shaderSpec,
int shaderVersion,
const TExtensionBehavior &extensionBehavior,
const char *sourcePath,
ShShaderOutput outputType,
int numRenderTargets,
int maxDualSourceDrawBuffers,
const std::vector<ShaderVariable> &uniforms,
ShCompileOptions compileOptions,
sh::WorkGroupSize workGroupSize,
TSymbolTable *symbolTable,
PerformanceDiagnostics *perfDiagnostics,
const std::map<int, const TInterfaceBlock *> &uniformBlockOptimizedMap,
const std::vector<InterfaceBlock> &shaderStorageBlocks);
~OutputHLSL() override;
......@@ -181,7 +180,7 @@ class OutputHLSL : public TIntermTraverser
// Indexed by block id, not instance id.
ReferencedInterfaceBlocks mReferencedUniformBlocks;
std::map<int, const TInterfaceBlock *> mUniformBlocksTranslatedToStructuredBuffers;
std::map<int, const TInterfaceBlock *> mUniformBlockOptimizedMap;
ReferencedVariables mReferencedAttributes;
ReferencedVariables mReferencedVaryings;
......
......@@ -697,7 +697,7 @@ void ResourcesHLSL::imageMetadataUniforms(TInfoSinkBase &out, unsigned int regIn
TString ResourcesHLSL::uniformBlocksHeader(
const ReferencedInterfaceBlocks &referencedInterfaceBlocks,
const std::map<int, const TInterfaceBlock *> &uniformBlockTranslatedToStructuredBuffer)
const std::map<int, const TInterfaceBlock *> &uniformBlockOptimizedMap)
{
TString interfaceBlocks;
......@@ -712,7 +712,7 @@ TString ResourcesHLSL::uniformBlocksHeader(
// In order to avoid compile performance issue, translate uniform block to structured
// buffer. anglebug.com/3682.
if (uniformBlockTranslatedToStructuredBuffer.count(interfaceBlock.uniqueId().get()) != 0)
if (uniformBlockOptimizedMap.count(interfaceBlock.uniqueId().get()) != 0)
{
unsigned int structuredBufferRegister = mSRVRegister;
if (instanceVariable != nullptr && instanceVariable->getType().isArray())
......
......@@ -40,7 +40,7 @@ class ResourcesHLSL : angle::NonCopyable
void imageMetadataUniforms(TInfoSinkBase &out, unsigned int regIndex);
TString uniformBlocksHeader(
const ReferencedInterfaceBlocks &referencedInterfaceBlocks,
const std::map<int, const TInterfaceBlock *> &uniformBlockTranslatedToStructuredBuffer);
const std::map<int, const TInterfaceBlock *> &uniformBlockOptimizedMap);
TString shaderStorageBlocksHeader(const ReferencedInterfaceBlocks &referencedInterfaceBlocks);
// Used for direct index references
......
......@@ -629,6 +629,18 @@ const std::map<std::string, unsigned int> *GetUniformRegisterMap(const ShHandle
#endif // ANGLE_ENABLE_HLSL
}
const std::set<std::string> *GetSlowCompilingUniformBlockSet(const ShHandle handle)
{
#ifdef ANGLE_ENABLE_HLSL
TranslatorHLSL *translator = GetTranslatorHLSLFromHandle(handle);
ASSERT(translator);
return translator->getSlowCompilingUniformBlockSet();
#else
return nullptr;
#endif // ANGLE_ENABLE_HLSL
}
unsigned int GetReadonlyImage2DRegisterIndex(const ShHandle handle)
{
#ifdef ANGLE_ENABLE_HLSL
......
......@@ -12,7 +12,7 @@
#include "compiler/translator/tree_ops/BreakVariableAliasingInInnerLoops.h"
#include "compiler/translator/tree_ops/ExpandIntegerPowExpressions.h"
#include "compiler/translator/tree_ops/PruneEmptyCases.h"
#include "compiler/translator/tree_ops/RecordUniformBlocksTranslatedToStructuredBuffers.h"
#include "compiler/translator/tree_ops/RecordUniformBlocksWithLargeArrayMember.h"
#include "compiler/translator/tree_ops/RemoveDynamicIndexing.h"
#include "compiler/translator/tree_ops/RewriteAtomicFunctionExpressions.h"
#include "compiler/translator/tree_ops/RewriteElseBlocks.h"
......@@ -184,25 +184,26 @@ bool TranslatorHLSL::translate(TIntermBlock *root,
}
}
mUniformBlocksTranslatedToStructuredBuffers.clear();
mUniformBlockOptimizedMap.clear();
mSlowCompilingUniformBlockSet.clear();
// In order to get the exact maximum of slots are available for shader resources, which would
// been bound with StructuredBuffer, we only translate uniform block with a large array member
// into StructuredBuffer when shader version is 300.
if (getShaderVersion() == 300 &&
(compileOptions & SH_ALLOW_TRANSLATE_UNIFORM_BLOCK_TO_STRUCTUREDBUFFER) != 0)
{
if (!sh::RecordUniformBlocksTranslatedToStructuredBuffers(
root, mUniformBlocksTranslatedToStructuredBuffers))
if (!sh::RecordUniformBlocksWithLargeArrayMember(root, mUniformBlockOptimizedMap,
mSlowCompilingUniformBlockSet))
{
return false;
}
}
sh::OutputHLSL outputHLSL(
getShaderType(), getShaderSpec(), getShaderVersion(), getExtensionBehavior(),
getSourcePath(), getOutputType(), numRenderTargets, maxDualSourceDrawBuffers, getUniforms(),
compileOptions, getComputeShaderLocalSize(), &getSymbolTable(), perfDiagnostics,
mUniformBlocksTranslatedToStructuredBuffers, mShaderStorageBlocks);
sh::OutputHLSL outputHLSL(getShaderType(), getShaderSpec(), getShaderVersion(),
getExtensionBehavior(), getSourcePath(), getOutputType(),
numRenderTargets, maxDualSourceDrawBuffers, getUniforms(),
compileOptions, getComputeShaderLocalSize(), &getSymbolTable(),
perfDiagnostics, mUniformBlockOptimizedMap, mShaderStorageBlocks);
outputHLSL.output(root, getInfoSink().obj);
......@@ -251,6 +252,11 @@ const std::map<std::string, unsigned int> *TranslatorHLSL::getUniformRegisterMap
return &mUniformRegisterMap;
}
const std::set<std::string> *TranslatorHLSL::getSlowCompilingUniformBlockSet() const
{
return &mSlowCompilingUniformBlockSet;
}
unsigned int TranslatorHLSL::getReadonlyImage2DRegisterIndex() const
{
return mReadonlyImage2DRegisterIndex;
......
......@@ -24,6 +24,7 @@ class TranslatorHLSL : public TCompiler
bool hasUniformBlock(const std::string &interfaceBlockName) const;
unsigned int getUniformBlockRegister(const std::string &interfaceBlockName) const;
bool shouldUniformBlockUseStructuredBuffer(const std::string &uniformBlockName) const;
const std::set<std::string> *getSlowCompilingUniformBlockSet() const;
const std::map<std::string, unsigned int> *getUniformRegisterMap() const;
unsigned int getReadonlyImage2DRegisterIndex() const;
......@@ -46,7 +47,8 @@ class TranslatorHLSL : public TCompiler
unsigned int mReadonlyImage2DRegisterIndex;
unsigned int mImage2DRegisterIndex;
std::set<std::string> mUsedImage2DFunctionNames;
std::map<int, const TInterfaceBlock *> mUniformBlocksTranslatedToStructuredBuffers;
std::map<int, const TInterfaceBlock *> mUniformBlockOptimizedMap;
std::set<std::string> mSlowCompilingUniformBlockSet;
};
} // namespace sh
......
//
// Copyright 2020 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// RecordUniformBlocksTranslatedToStructuredBuffers.h:
// Collect all uniform blocks which will been translated to StructuredBuffers on Direct3D
// backend.
//
#ifndef COMPILER_TRANSLATOR_TREEOPS_RECORDUNIFORMBLOCKSTRANSLATEDTOSTRUCTUREDBUFFERS_H_
#define COMPILER_TRANSLATOR_TREEOPS_RECORDUNIFORMBLOCKSTRANSLATEDTOSTRUCTUREDBUFFERS_H_
#include "compiler/translator/IntermNode.h"
namespace sh
{
class TIntermNode;
ANGLE_NO_DISCARD bool RecordUniformBlocksTranslatedToStructuredBuffers(
TIntermNode *root,
std::map<int, const TInterfaceBlock *> &uniformBlockTranslatedToStructuredBuffer);
} // namespace sh
#endif // COMPILER_TRANSLATOR_TREEOPS_RECORDACCESSUNIFORMBLOCKENTIREARRAYMEMBER_H_
......@@ -3,12 +3,14 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// RecordUniformBlocksTranslatedToStructuredBuffers.cpp:
// Collect all uniform blocks which will been translated to StructuredBuffers on Direct3D
// backend.
// RecordUniformBlocksWithLargeArrayMember.h:
// Collect all uniform blocks which have one or more large array members,
// and the array sizes are greater than or equal to 50. If some of them
// satify some conditions, we will translate them to StructuredBuffers
// on Direct3D backend.
//
#include "compiler/translator/tree_ops/RecordUniformBlocksTranslatedToStructuredBuffers.h"
#include "compiler/translator/tree_ops/RecordUniformBlocksWithLargeArrayMember.h"
#include "compiler/translator/Compiler.h"
#include "compiler/translator/tree_util/IntermNode_util.h"
......@@ -24,18 +26,18 @@ namespace
// to a StructuredBuffer on Direct3D backend.
const unsigned int kMinArraySizeUseStructuredBuffer = 50u;
// There is a maximum of D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT(128) slots that are available
// for shader resources on Direct3D 11. When shader version is 300, we only use
// D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT(16) slots for texture units. We allow StructuredBuffer to
// use the maximum of 60 slots, that is enough here.
// There is a maximum of D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT(128) slots that are
// available for shader resources on Direct3D 11. When shader version is 300, we only use
// D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT(16) slots for texture units. We allow StructuredBuffer
// to use the maximum of 60 slots, that is enough here.
const unsigned int kMaxAllowToUseRegisterCount = 60u;
// Traverser that collects all uniform blocks which will been translated to StructuredBuffer on
// Direct3D backend.
class UniformBlockTranslatedToStructuredBufferTraverser : public TIntermTraverser
// Traverser that all uniform blocks which have one or more large array members, and the array
// sizes are greater than or equal to 50.
class UniformBlocksWithLargeArrayMemberTraverser : public TIntermTraverser
{
public:
UniformBlockTranslatedToStructuredBufferTraverser();
UniformBlocksWithLargeArrayMemberTraverser();
void visitSymbol(TIntermSymbol *node) override;
bool visitBinary(Visit visit, TIntermBinary *node) override;
......@@ -51,42 +53,50 @@ class UniformBlockTranslatedToStructuredBufferTraverser : public TIntermTraverse
{
return mUniformBlockUsedRegisterCount;
}
std::map<int, const TInterfaceBlock *> &getUniformBlockWithLargeArrayMember()
{
return mUniformBlockWithLargeArrayMember;
}
private:
void parseAccessWholeUniformBlock(TIntermTyped *node);
std::map<int, const TInterfaceBlock *> mUniformBlockMayTranslation;
std::map<int, const TInterfaceBlock *> mUniformBlockNotAllowTranslation;
std::map<int, unsigned int> mUniformBlockUsedRegisterCount;
std::map<int, const TInterfaceBlock *> mUniformBlockWithLargeArrayMember;
};
UniformBlockTranslatedToStructuredBufferTraverser::
UniformBlockTranslatedToStructuredBufferTraverser()
UniformBlocksWithLargeArrayMemberTraverser::UniformBlocksWithLargeArrayMemberTraverser()
: TIntermTraverser(true, true, false)
{}
static bool IsSupportedTypeForStructuredBuffer(const TType &type)
{
const TStructure *structure = type.getStruct();
const TStructure *structure = type.getStruct();
const TLayoutMatrixPacking matrixPacking = type.getLayoutQualifier().matrixPacking;
if (structure)
{
const TFieldList &fields = structure->fields();
for (size_t i = 0; i < fields.size(); i++)
{
const TType &fieldType = *fields[i]->type();
// Do not allow the structure's member is array or structure.
if (fields[i]->type()->isArray() || fields[i]->type()->getStruct() ||
!IsSupportedTypeForStructuredBuffer(*fields[i]->type()))
if (!fieldType.isArray() && !fieldType.getStruct() &&
(fieldType.isScalar() || fieldType.isVector() ||
(fieldType.isMatrix() &&
((matrixPacking != EmpRowMajor && fieldType.getRows() == 4) ||
(matrixPacking == EmpRowMajor && fieldType.getCols() == 4)))))
{
return false;
return true;
}
}
return true;
return false;
}
else if (type.isMatrix())
{
// Only supports the matrix types that we do not need to pad in a structure or an array
// explicitly.
return (type.getLayoutQualifier().matrixPacking != EmpRowMajor && type.getRows() == 4) ||
(type.getLayoutQualifier().matrixPacking == EmpRowMajor && type.getCols() == 4);
return (matrixPacking != EmpRowMajor && type.getRows() == 4) ||
(matrixPacking == EmpRowMajor && type.getCols() == 4);
}
else
{
......@@ -112,7 +122,50 @@ static bool CanTranslateUniformBlockToStructuredBuffer(const TInterfaceBlock &in
return false;
}
void UniformBlockTranslatedToStructuredBufferTraverser::visitSymbol(TIntermSymbol *node)
static bool FieldIsOrHasLargeArrayField(const TField &field)
{
const TType *type = field.type();
if (type->getArraySizeProduct() >= kMinArraySizeUseStructuredBuffer)
{
return true;
}
const TStructure *structure = type->getStruct();
if (structure)
{
const TFieldList &fields = structure->fields();
bool hasLargeArrayField = false;
for (size_t i = 0; i < fields.size(); i++)
{
hasLargeArrayField = FieldIsOrHasLargeArrayField(*fields[i]);
if (hasLargeArrayField)
{
break;
}
}
return hasLargeArrayField;
}
return false;
}
static bool IsInterfaceBlockWithLargeArrayField(const TInterfaceBlock &interfaceBlock)
{
const TFieldList &fields = interfaceBlock.fields();
bool isLargeArrayField = false;
for (size_t i = 0; i < fields.size(); i++)
{
isLargeArrayField = FieldIsOrHasLargeArrayField(*fields[i]);
if (isLargeArrayField)
{
break;
}
}
return isLargeArrayField;
}
void UniformBlocksWithLargeArrayMemberTraverser::visitSymbol(TIntermSymbol *node)
{
const TVariable &variable = node->variable();
const TType &variableType = variable.getType();
......@@ -121,56 +174,80 @@ void UniformBlockTranslatedToStructuredBufferTraverser::visitSymbol(TIntermSymbo
if (qualifier == EvqUniform)
{
const TInterfaceBlock *interfaceBlock = variableType.getInterfaceBlock();
if (interfaceBlock && CanTranslateUniformBlockToStructuredBuffer(*interfaceBlock))
if (interfaceBlock)
{
if (mUniformBlockMayTranslation.count(interfaceBlock->uniqueId().get()) == 0)
if (CanTranslateUniformBlockToStructuredBuffer(*interfaceBlock))
{
mUniformBlockMayTranslation[interfaceBlock->uniqueId().get()] = interfaceBlock;
}
if (mUniformBlockMayTranslation.count(interfaceBlock->uniqueId().get()) == 0)
{
mUniformBlockMayTranslation[interfaceBlock->uniqueId().get()] = interfaceBlock;
}
if (!variableType.isInterfaceBlock())
{
TIntermNode *accessor = getAncestorNode(0);
TIntermBinary *accessorAsBinary = accessor->getAsBinaryNode();
// The uniform block variable is array type, only indexing operator is allowed to
// operate on the variable, otherwise do not translate the uniform block to HLSL
// StructuredBuffer.
if (!accessorAsBinary ||
!(accessorAsBinary && (accessorAsBinary->getOp() == EOpIndexDirect ||
accessorAsBinary->getOp() == EOpIndexIndirect)))
if (!variableType.isInterfaceBlock())
{
if (mUniformBlockNotAllowTranslation.count(interfaceBlock->uniqueId().get()) ==
0)
TIntermNode *accessor = getAncestorNode(0);
TIntermBinary *accessorAsBinary = accessor->getAsBinaryNode();
// The uniform block variable is array type, only indexing operator is allowed
// to operate on the variable, otherwise do not translate the uniform block to
// HLSL StructuredBuffer.
if (!accessorAsBinary ||
!(accessorAsBinary && (accessorAsBinary->getOp() == EOpIndexDirect ||
accessorAsBinary->getOp() == EOpIndexIndirect)))
{
mUniformBlockNotAllowTranslation[interfaceBlock->uniqueId().get()] =
interfaceBlock;
if (mUniformBlockNotAllowTranslation.count(
interfaceBlock->uniqueId().get()) == 0)
{
mUniformBlockNotAllowTranslation[interfaceBlock->uniqueId().get()] =
interfaceBlock;
}
}
else
{
if (mUniformBlockUsedRegisterCount.count(
interfaceBlock->uniqueId().get()) == 0)
{
// The uniform block is not an instanced one, so it only uses one
// register.
mUniformBlockUsedRegisterCount[interfaceBlock->uniqueId().get()] = 1;
}
}
}
else
{
if (mUniformBlockUsedRegisterCount.count(interfaceBlock->uniqueId().get()) == 0)
{
// The uniform block is not an instanced one, so it only uses one register.
mUniformBlockUsedRegisterCount[interfaceBlock->uniqueId().get()] = 1;
// The uniform block is an instanced one, the count of used registers
// depends on the array size of variable.
mUniformBlockUsedRegisterCount[interfaceBlock->uniqueId().get()] =
variableType.isArray() ? variableType.getOutermostArraySize() : 1;
}
}
}
else
if (interfaceBlock->blockStorage() == EbsStd140 &&
IsInterfaceBlockWithLargeArrayField(*interfaceBlock))
{
if (mUniformBlockUsedRegisterCount.count(interfaceBlock->uniqueId().get()) == 0)
if (!variableType.isInterfaceBlock())
{
// The uniform block is an instanced one, the count of used registers depends on
// the array size of variable.
mUniformBlockUsedRegisterCount[interfaceBlock->uniqueId().get()] =
variableType.isArray() ? variableType.getOutermostArraySize() : 1;
TIntermNode *accessor = getAncestorNode(0);
TIntermBinary *accessorAsBinary = accessor->getAsBinaryNode();
if (accessorAsBinary && (accessorAsBinary->getOp() == EOpIndexDirect ||
accessorAsBinary->getOp() == EOpIndexIndirect))
{
if (mUniformBlockWithLargeArrayMember.count(
interfaceBlock->uniqueId().get()) == 0)
{
mUniformBlockWithLargeArrayMember[interfaceBlock->uniqueId().get()] =
interfaceBlock;
}
}
}
}
}
}
}
bool UniformBlockTranslatedToStructuredBufferTraverser::visitBinary(Visit visit,
TIntermBinary *node)
bool UniformBlocksWithLargeArrayMemberTraverser::visitBinary(Visit visit, TIntermBinary *node)
{
switch (node->getOp())
{
......@@ -197,6 +274,17 @@ bool UniformBlockTranslatedToStructuredBufferTraverser::visitBinary(Visit visit,
}
return false;
}
if (interfaceBlock->blockStorage() == EbsStd140 &&
IsInterfaceBlockWithLargeArrayField(*interfaceBlock))
{
if (mUniformBlockWithLargeArrayMember.count(
interfaceBlock->uniqueId().get()) == 0)
{
mUniformBlockWithLargeArrayMember[interfaceBlock->uniqueId().get()] =
interfaceBlock;
}
}
}
}
break;
......@@ -212,9 +300,8 @@ bool UniformBlockTranslatedToStructuredBufferTraverser::visitBinary(Visit visit,
TIntermNode *accessor = getAncestorNode(0);
TIntermBinary *accessorAsBinary = accessor->getAsBinaryNode();
// The uniform block variable is array type, only indexing operator is allowed
// to operate on the
// variable, otherwise do not translate the uniform block to HLSL
// StructuredBuffer.
// to operate on the variable, otherwise do not translate the uniform block to
// HLSL StructuredBuffer.
if ((!accessorAsBinary ||
!(accessorAsBinary && (accessorAsBinary->getOp() == EOpIndexDirect ||
accessorAsBinary->getOp() == EOpIndexIndirect))) &&
......@@ -226,6 +313,23 @@ bool UniformBlockTranslatedToStructuredBufferTraverser::visitBinary(Visit visit,
return false;
}
}
if (interfaceBlock->blockStorage() == EbsStd140 &&
IsInterfaceBlockWithLargeArrayField(*interfaceBlock))
{
TIntermNode *accessor = getAncestorNode(0);
TIntermBinary *accessorAsBinary = accessor->getAsBinaryNode();
if (accessorAsBinary && (accessorAsBinary->getOp() == EOpIndexDirect ||
accessorAsBinary->getOp() == EOpIndexIndirect))
{
if (mUniformBlockWithLargeArrayMember.count(
interfaceBlock->uniqueId().get()) == 0)
{
mUniformBlockWithLargeArrayMember[interfaceBlock->uniqueId().get()] =
interfaceBlock;
}
}
}
}
break;
}
......@@ -237,11 +341,12 @@ bool UniformBlockTranslatedToStructuredBufferTraverser::visitBinary(Visit visit,
}
} // namespace
bool RecordUniformBlocksTranslatedToStructuredBuffers(
bool RecordUniformBlocksWithLargeArrayMember(
TIntermNode *root,
std::map<int, const TInterfaceBlock *> &uniformBlockTranslatedToStructuredBuffer)
std::map<int, const TInterfaceBlock *> &uniformBlockOptimizedMap,
std::set<std::string> &slowCompilingUniformBlockSet)
{
UniformBlockTranslatedToStructuredBufferTraverser traverser;
UniformBlocksWithLargeArrayMemberTraverser traverser;
root->traverse(&traverser);
std::map<int, const TInterfaceBlock *> &uniformBlockMayTranslation =
traverser.getUniformBlockMayTranslation();
......@@ -249,6 +354,8 @@ bool RecordUniformBlocksTranslatedToStructuredBuffers(
traverser.getUniformBlockNotAllowTranslation();
std::map<int, unsigned int> &uniformBlockUsedRegisterCount =
traverser.getUniformBlockUsedRegisterCount();
std::map<int, const TInterfaceBlock *> &uniformBlockWithLargeArrayMember =
traverser.getUniformBlockWithLargeArrayMember();
unsigned int usedRegisterCount = 0;
for (auto &uniformBlock : uniformBlockMayTranslation)
......@@ -260,9 +367,18 @@ bool RecordUniformBlocksTranslatedToStructuredBuffers(
{
break;
}
uniformBlockTranslatedToStructuredBuffer[uniformBlock.first] = uniformBlock.second;
uniformBlockOptimizedMap[uniformBlock.first] = uniformBlock.second;
}
}
for (auto &uniformBlock : uniformBlockWithLargeArrayMember)
{
if (uniformBlockOptimizedMap.count(uniformBlock.first) == 0)
{
slowCompilingUniformBlockSet.insert(uniformBlock.second->name().data());
}
}
return true;
}
......
//
// Copyright 2020 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// RecordUniformBlocksWithLargeArrayMember.h:
// Collect all uniform blocks which have one or more large array members,
// and the array sizes are greater than or equal to 50. If some of them
// satify some conditions, we will translate them to StructuredBuffers
// on Direct3D backend.
//
#ifndef COMPILER_TRANSLATOR_TREEOPS_RECORDUNIFORMBLOCKSWITHLARGEARRAYMEMBER_H_
#define COMPILER_TRANSLATOR_TREEOPS_RECORDUNIFORMBLOCKSWITHLARGEARRAYMEMBER_H_
#include "compiler/translator/IntermNode.h"
namespace sh
{
class TIntermNode;
ANGLE_NO_DISCARD bool RecordUniformBlocksWithLargeArrayMember(
TIntermNode *root,
std::map<int, const TInterfaceBlock *> &uniformBlockOptimizedMap,
std::set<std::string> &slowCompilingUniformBlockSet);
} // namespace sh
#endif // COMPILER_TRANSLATOR_TREEOPS_RECORDUNIFORMBLOCKSWITHLARGEARRAYMEMBER_H_
......@@ -2078,6 +2078,27 @@ std::unique_ptr<LinkEvent> ProgramD3D::link(const gl::Context *context,
shadersD3D[shaderType]->generateWorkarounds(&mShaderWorkarounds[shaderType]);
mShaderUniformsDirty.set(shaderType);
const std::set<std::string> &slowCompilingUniformBlockSet =
shadersD3D[shaderType]->getSlowCompilingUniformBlockSet();
if (slowCompilingUniformBlockSet.size() > 0)
{
std::ostringstream stream;
stream << "You could get a better shader compiling performance if you re-write"
<< " the uniform block(s)\n[ ";
for (const std::string &str : slowCompilingUniformBlockSet)
{
stream << str << " ";
}
stream << "]\nin the " << gl::GetShaderTypeString(shaderType) << " shader.\n";
stream << "You could get more details from "
"https://chromium.googlesource.com/angle/angle/+/refs/heads/master/"
"src/libANGLE/renderer/d3d/d3d11/"
"UniformBlockToStructuredBufferTranslation.md\n";
ANGLE_PERF_WARNING(context->getState().getDebug(), GL_DEBUG_SEVERITY_MEDIUM,
stream.str().c_str());
}
}
}
......
......@@ -240,6 +240,11 @@ bool ShaderD3D::useImage2DFunction(const std::string &functionName) const
return mUsedImage2DFunctionNames.find(functionName) != mUsedImage2DFunctionNames.end();
}
const std::set<std::string> &ShaderD3D::getSlowCompilingUniformBlockSet() const
{
return mSlowCompilingUniformBlockSet;
}
const std::map<std::string, unsigned int> &GetUniformRegisterMap(
const std::map<std::string, unsigned int> *uniformRegisterMap)
{
......@@ -247,6 +252,13 @@ const std::map<std::string, unsigned int> &GetUniformRegisterMap(
return *uniformRegisterMap;
}
const std::set<std::string> &GetSlowCompilingUniformBlockSet(
const std::set<std::string> *slowCompilingUniformBlockSet)
{
ASSERT(slowCompilingUniformBlockSet);
return *slowCompilingUniformBlockSet;
}
const std::set<std::string> &GetUsedImage2DFunctionNames(
const std::set<std::string> *usedImage2DFunctionNames)
{
......@@ -330,6 +342,9 @@ std::shared_ptr<WaitableCompileEvent> ShaderD3D::compile(const gl::Context *cont
}
}
mSlowCompilingUniformBlockSet =
GetSlowCompilingUniformBlockSet(sh::GetSlowCompilingUniformBlockSet(compilerHandle));
for (const sh::InterfaceBlock &interfaceBlock : mState.getShaderStorageBlocks())
{
if (interfaceBlock.active)
......
......@@ -59,6 +59,7 @@ class ShaderD3D : public ShaderImpl
unsigned int getReadonlyImage2DRegisterIndex() const { return mReadonlyImage2DRegisterIndex; }
unsigned int getImage2DRegisterIndex() const { return mImage2DRegisterIndex; }
bool useImage2DFunction(const std::string &functionName) const;
const std::set<std::string> &getSlowCompilingUniformBlockSet() const;
void appendDebugInfo(const std::string &info) const { mDebugInfo += info; }
void generateWorkarounds(angle::CompilerWorkaroundsD3D *workarounds) const;
......@@ -104,6 +105,7 @@ class ShaderD3D : public ShaderImpl
std::map<std::string, unsigned int> mUniformRegisterMap;
std::map<std::string, unsigned int> mUniformBlockRegisterMap;
std::map<std::string, bool> mUniformBlockUseStructuredBufferMap;
std::set<std::string> mSlowCompilingUniformBlockSet;
std::map<std::string, unsigned int> mShaderStorageBlockRegisterMap;
unsigned int mReadonlyImage2DRegisterIndex;
unsigned int mImage2DRegisterIndex;
......
......@@ -3093,6 +3093,51 @@ void main(void){
EXPECT_GL_NO_ERROR();
}
// Test to throw a warning if a uniform block with a large array member
// fails to hit the optimization on D3D backend.
TEST_P(UniformBlockWithOneLargeArrayMemberTest, ThrowPerfWarningInD3D)
{
constexpr char kFS[] = R"(#version 300 es
precision highp float;
struct S1 {
vec2 a[2];
};
struct S2 {
mat2x4 b;
};
layout(std140, row_major) uniform UBO1{
mat3x2 buf1[128];
};
layout(std140, row_major) uniform UBO2{
mat4x3 buf2[128];
} instance1;
layout(std140, row_major) uniform UBO3{
S1 buf3[128];
};
layout(std140, row_major) uniform UBO4{
S2 buf4[128];
} instance2[2];
out vec4 my_FragColor;
void main(void){
uvec2 coord = uvec2(floor(gl_FragCoord.xy));
uint x = coord.x % 64u;
uint y = coord.y;
my_FragColor = vec4(buf1[y]*instance1.buf2[y]*instance2[0].buf4[y].b*buf3[y].a[x], 0.0f, 1.0);
})";
ANGLE_GL_PROGRAM(program, essl3_shaders::vs::Simple(), kFS);
EXPECT_GL_NO_ERROR();
}
// Use this to select which configurations (e.g. which renderer, which GLES major version) these
// tests should be run against.
ANGLE_INSTANTIATE_TEST_ES3(UniformBufferTest);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment