Commit d8724a94 by Olli Etuaho Committed by Commit Bot

Start D3D constant register allocations from 1 on NVIDIA

Recent NVIDIA drivers have a bug where a specific optimized path inside the driver doesn't handle constant register 0 correctly. Work around this by starting constant register allocations from 1. This should make sure that the bug doesn't trigger if the ordering of uniforms is changed on the D3D backend. The repro case seems to require some specific driver state to be set that's used inside Chromium. Because of this we have not been able to develop a standalone test case so far. The maximum number of available uniform slots is reduced accordingly. This should not take them below required minimums in the spec. BUG=angleproject:2294 TEST=WebGL tests on passthrough command buffer, angle_end2end_tests --gtest_filter=*GLSLTest*Uniform* Change-Id: I92fff71efe5432ea7f15a7e90d497492514c65dc Reviewed-on: https://chromium-review.googlesource.com/847481 Commit-Queue: Olli Etuaho <oetuaho@nvidia.com> Reviewed-by: 's avatarCorentin Wallez <cwallez@chromium.org> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org>
parent 910a3daf
...@@ -253,6 +253,11 @@ const ShCompileOptions SH_REWRITE_VECTOR_SCALAR_ARITHMETIC = UINT64_C(1) << 35; ...@@ -253,6 +253,11 @@ const ShCompileOptions SH_REWRITE_VECTOR_SCALAR_ARITHMETIC = UINT64_C(1) << 35;
// variable initialization is turned on. // variable initialization is turned on.
const ShCompileOptions SH_DONT_USE_LOOPS_TO_INITIALIZE_VARIABLES = UINT64_C(1) << 36; const ShCompileOptions SH_DONT_USE_LOOPS_TO_INITIALIZE_VARIABLES = UINT64_C(1) << 36;
// Don't use D3D constant register zero when allocating space for uniforms. This is targeted to work
// around a bug in NVIDIA D3D driver version 388.59 where in very specific cases the driver would
// not handle constant register zero correctly. Only has an effect on HLSL translation.
const ShCompileOptions SH_SKIP_D3D_CONSTANT_REGISTER_ZERO = UINT64_C(1) << 37;
// Defines alternate strategies for implementing array index clamping. // Defines alternate strategies for implementing array index clamping.
enum ShArrayIndexClampingStrategy enum ShArrayIndexClampingStrategy
{ {
......
...@@ -128,6 +128,11 @@ struct WorkaroundsD3D ...@@ -128,6 +128,11 @@ struct WorkaroundsD3D
// set viewport a large size on Intel windows platforms. So we enable depth buffer if stencil // set viewport a large size on Intel windows platforms. So we enable depth buffer if stencil
// buffer is enabled to workaround this issue. See http://crbug.com/782317 // buffer is enabled to workaround this issue. See http://crbug.com/782317
bool enableDepthBufferWhenStencilBufferEnabled = false; bool enableDepthBufferWhenStencilBufferEnabled = false;
// Don't use D3D constant register zero when allocating space for uniforms. This is targeted to
// work around a bug in NVIDIA D3D driver version 388.59 where in very specific cases the driver
// would not handle constant register zero correctly.
bool skipConstantRegisterZero = false;
}; };
} // namespace angle } // namespace angle
......
...@@ -191,10 +191,14 @@ OutputHLSL::OutputHLSL(sh::GLenum shaderType, ...@@ -191,10 +191,14 @@ OutputHLSL::OutputHLSL(sh::GLenum shaderType,
mExcessiveLoopIndex = nullptr; mExcessiveLoopIndex = nullptr;
mStructureHLSL = new StructureHLSL; mStructureHLSL = new StructureHLSL;
mUniformHLSL = new UniformHLSL(shaderType, mStructureHLSL, outputType, uniforms);
mTextureFunctionHLSL = new TextureFunctionHLSL; mTextureFunctionHLSL = new TextureFunctionHLSL;
mImageFunctionHLSL = new ImageFunctionHLSL; mImageFunctionHLSL = new ImageFunctionHLSL;
unsigned int firstUniformRegister =
((compileOptions & SH_SKIP_D3D_CONSTANT_REGISTER_ZERO) != 0) ? 1u : 0u;
mUniformHLSL =
new UniformHLSL(shaderType, mStructureHLSL, outputType, uniforms, firstUniformRegister);
if (mOutputType == SH_HLSL_3_0_OUTPUT) if (mOutputType == SH_HLSL_3_0_OUTPUT)
{ {
// Fragment shaders need dx_DepthRange, dx_ViewCoords and dx_DepthFront. // Fragment shaders need dx_DepthRange, dx_ViewCoords and dx_DepthFront.
......
...@@ -95,8 +95,9 @@ void OutputSamplerIndexArrayInitializer(TInfoSinkBase &out, ...@@ -95,8 +95,9 @@ void OutputSamplerIndexArrayInitializer(TInfoSinkBase &out,
UniformHLSL::UniformHLSL(sh::GLenum shaderType, UniformHLSL::UniformHLSL(sh::GLenum shaderType,
StructureHLSL *structureHLSL, StructureHLSL *structureHLSL,
ShShaderOutput outputType, ShShaderOutput outputType,
const std::vector<Uniform> &uniforms) const std::vector<Uniform> &uniforms,
: mUniformRegister(0), unsigned int firstUniformRegister)
: mUniformRegister(firstUniformRegister),
mUniformBlockRegister(0), mUniformBlockRegister(0),
mTextureRegister(0), mTextureRegister(0),
mRWTextureRegister(0), mRWTextureRegister(0),
......
...@@ -24,7 +24,8 @@ class UniformHLSL : angle::NonCopyable ...@@ -24,7 +24,8 @@ class UniformHLSL : angle::NonCopyable
UniformHLSL(sh::GLenum shaderType, UniformHLSL(sh::GLenum shaderType,
StructureHLSL *structureHLSL, StructureHLSL *structureHLSL,
ShShaderOutput outputType, ShShaderOutput outputType,
const std::vector<Uniform> &uniforms); const std::vector<Uniform> &uniforms,
unsigned int firstUniformRegister);
void reserveUniformRegisters(unsigned int registerCount); void reserveUniformRegisters(unsigned int registerCount);
void reserveUniformBlockRegisters(unsigned int registerCount); void reserveUniformBlockRegisters(unsigned int registerCount);
......
...@@ -73,6 +73,10 @@ ShaderD3D::ShaderD3D(const gl::ShaderState &data, ...@@ -73,6 +73,10 @@ ShaderD3D::ShaderD3D(const gl::ShaderState &data,
{ {
mAdditionalOptions |= SH_EMULATE_ISNAN_FLOAT_FUNCTION; mAdditionalOptions |= SH_EMULATE_ISNAN_FLOAT_FUNCTION;
} }
if (workarounds.skipConstantRegisterZero)
{
mAdditionalOptions |= SH_SKIP_D3D_CONSTANT_REGISTER_ZERO;
}
if (extensions.multiview) if (extensions.multiview)
{ {
mAdditionalOptions |= SH_INITIALIZE_BUILTINS_FOR_INSTANCED_MULTIVIEW; mAdditionalOptions |= SH_INITIALIZE_BUILTINS_FOR_INSTANCED_MULTIVIEW;
......
...@@ -3799,8 +3799,8 @@ void Renderer11::generateCaps(gl::Caps *outCaps, ...@@ -3799,8 +3799,8 @@ void Renderer11::generateCaps(gl::Caps *outCaps,
gl::Extensions *outExtensions, gl::Extensions *outExtensions,
gl::Limitations *outLimitations) const gl::Limitations *outLimitations) const
{ {
d3d11_gl::GenerateCaps(mDevice, mDeviceContext, mRenderer11DeviceCaps, outCaps, outTextureCaps, d3d11_gl::GenerateCaps(mDevice, mDeviceContext, mRenderer11DeviceCaps, getWorkarounds(),
outExtensions, outLimitations); outCaps, outTextureCaps, outExtensions, outLimitations);
} }
angle::WorkaroundsD3D Renderer11::generateWorkarounds() const angle::WorkaroundsD3D Renderer11::generateWorkarounds() const
......
...@@ -1268,8 +1268,14 @@ unsigned int GetMaxSampleMaskWords(D3D_FEATURE_LEVEL featureLevel) ...@@ -1268,8 +1268,14 @@ unsigned int GetMaxSampleMaskWords(D3D_FEATURE_LEVEL featureLevel)
} }
} }
void GenerateCaps(ID3D11Device *device, ID3D11DeviceContext *deviceContext, const Renderer11DeviceCaps &renderer11DeviceCaps, gl::Caps *caps, void GenerateCaps(ID3D11Device *device,
gl::TextureCapsMap *textureCapsMap, gl::Extensions *extensions, gl::Limitations *limitations) ID3D11DeviceContext *deviceContext,
const Renderer11DeviceCaps &renderer11DeviceCaps,
const angle::WorkaroundsD3D &workarounds,
gl::Caps *caps,
gl::TextureCapsMap *textureCapsMap,
gl::Extensions *extensions,
gl::Limitations *limitations)
{ {
D3D_FEATURE_LEVEL featureLevel = renderer11DeviceCaps.featureLevel; D3D_FEATURE_LEVEL featureLevel = renderer11DeviceCaps.featureLevel;
const gl::FormatSet &allFormats = gl::GetAllSizedInternalFormats(); const gl::FormatSet &allFormats = gl::GetAllSizedInternalFormats();
...@@ -1344,10 +1350,13 @@ void GenerateCaps(ID3D11Device *device, ID3D11DeviceContext *deviceContext, cons ...@@ -1344,10 +1350,13 @@ void GenerateCaps(ID3D11Device *device, ID3D11DeviceContext *deviceContext, cons
// Vertex shader limits // Vertex shader limits
caps->maxVertexAttributes = static_cast<GLuint>(GetMaximumVertexInputSlots(featureLevel)); caps->maxVertexAttributes = static_cast<GLuint>(GetMaximumVertexInputSlots(featureLevel));
caps->maxVertexUniformComponents =
static_cast<GLuint>(GetMaximumVertexUniformVectors(featureLevel)) * 4;
caps->maxVertexUniformVectors = caps->maxVertexUniformVectors =
static_cast<GLuint>(GetMaximumVertexUniformVectors(featureLevel)); static_cast<GLuint>(GetMaximumVertexUniformVectors(featureLevel));
if (workarounds.skipConstantRegisterZero)
{
caps->maxVertexUniformVectors -= 1;
}
caps->maxVertexUniformComponents = caps->maxVertexUniformVectors * 4;
caps->maxVertexUniformBlocks = static_cast<GLuint>(GetMaximumVertexUniformBlocks(featureLevel)); caps->maxVertexUniformBlocks = static_cast<GLuint>(GetMaximumVertexUniformBlocks(featureLevel));
caps->maxVertexOutputComponents = caps->maxVertexOutputComponents =
static_cast<GLuint>(GetMaximumVertexOutputVectors(featureLevel)) * 4; static_cast<GLuint>(GetMaximumVertexOutputVectors(featureLevel)) * 4;
...@@ -1363,10 +1372,13 @@ void GenerateCaps(ID3D11Device *device, ID3D11DeviceContext *deviceContext, cons ...@@ -1363,10 +1372,13 @@ void GenerateCaps(ID3D11Device *device, ID3D11DeviceContext *deviceContext, cons
caps->maxVertexAttribStride = 2048; caps->maxVertexAttribStride = 2048;
// Fragment shader limits // Fragment shader limits
caps->maxFragmentUniformComponents =
static_cast<GLuint>(GetMaximumPixelUniformVectors(featureLevel)) * 4;
caps->maxFragmentUniformVectors = caps->maxFragmentUniformVectors =
static_cast<GLuint>(GetMaximumPixelUniformVectors(featureLevel)); static_cast<GLuint>(GetMaximumPixelUniformVectors(featureLevel));
if (workarounds.skipConstantRegisterZero)
{
caps->maxFragmentUniformVectors -= 1;
}
caps->maxFragmentUniformComponents = caps->maxFragmentUniformVectors * 4;
caps->maxFragmentUniformBlocks = caps->maxFragmentUniformBlocks =
static_cast<GLuint>(GetMaximumPixelUniformBlocks(featureLevel)); static_cast<GLuint>(GetMaximumPixelUniformBlocks(featureLevel));
caps->maxFragmentInputComponents = caps->maxFragmentInputComponents =
...@@ -1382,6 +1394,10 @@ void GenerateCaps(ID3D11Device *device, ID3D11DeviceContext *deviceContext, cons ...@@ -1382,6 +1394,10 @@ void GenerateCaps(ID3D11Device *device, ID3D11DeviceContext *deviceContext, cons
static_cast<GLuint>(GetMaxComputeWorkGroupInvocations(featureLevel)); static_cast<GLuint>(GetMaxComputeWorkGroupInvocations(featureLevel));
caps->maxComputeUniformComponents = caps->maxComputeUniformComponents =
static_cast<GLuint>(GetMaximumComputeUniformVectors(featureLevel)) * 4; static_cast<GLuint>(GetMaximumComputeUniformVectors(featureLevel)) * 4;
if (workarounds.skipConstantRegisterZero)
{
caps->maxComputeUniformComponents -= 4;
}
caps->maxComputeUniformBlocks = caps->maxComputeUniformBlocks =
static_cast<GLuint>(GetMaximumComputeUniformBlocks(featureLevel)); static_cast<GLuint>(GetMaximumComputeUniformBlocks(featureLevel));
caps->maxComputeTextureImageUnits = caps->maxComputeTextureImageUnits =
...@@ -2198,6 +2214,7 @@ angle::WorkaroundsD3D GenerateWorkarounds(const Renderer11DeviceCaps &deviceCaps ...@@ -2198,6 +2214,7 @@ angle::WorkaroundsD3D GenerateWorkarounds(const Renderer11DeviceCaps &deviceCaps
workarounds.flushAfterEndingTransformFeedback = IsNvidia(adapterDesc.VendorId); workarounds.flushAfterEndingTransformFeedback = IsNvidia(adapterDesc.VendorId);
workarounds.getDimensionsIgnoresBaseLevel = IsNvidia(adapterDesc.VendorId); workarounds.getDimensionsIgnoresBaseLevel = IsNvidia(adapterDesc.VendorId);
workarounds.skipConstantRegisterZero = IsNvidia(adapterDesc.VendorId);
if (IsIntel(adapterDesc.VendorId)) if (IsIntel(adapterDesc.VendorId))
{ {
......
...@@ -68,8 +68,14 @@ unsigned int GetReservedVertexUniformVectors(D3D_FEATURE_LEVEL featureLevel); ...@@ -68,8 +68,14 @@ unsigned int GetReservedVertexUniformVectors(D3D_FEATURE_LEVEL featureLevel);
unsigned int GetReservedFragmentUniformVectors(D3D_FEATURE_LEVEL featureLevel); unsigned int GetReservedFragmentUniformVectors(D3D_FEATURE_LEVEL featureLevel);
gl::Version GetMaximumClientVersion(D3D_FEATURE_LEVEL featureLevel); gl::Version GetMaximumClientVersion(D3D_FEATURE_LEVEL featureLevel);
void GenerateCaps(ID3D11Device *device, ID3D11DeviceContext *deviceContext, const Renderer11DeviceCaps &renderer11DeviceCaps, gl::Caps *caps, void GenerateCaps(ID3D11Device *device,
gl::TextureCapsMap *textureCapsMap, gl::Extensions *extensions, gl::Limitations *limitations); ID3D11DeviceContext *deviceContext,
const Renderer11DeviceCaps &renderer11DeviceCaps,
const angle::WorkaroundsD3D &workarounds,
gl::Caps *caps,
gl::TextureCapsMap *textureCapsMap,
gl::Extensions *extensions,
gl::Limitations *limitations);
void GetSamplePosition(GLsizei sampleCount, size_t index, GLfloat *xy); void GetSamplePosition(GLsizei sampleCount, size_t index, GLfloat *xy);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment