Commit d8724a94 by Olli Etuaho Committed by Commit Bot

Start D3D constant register allocations from 1 on NVIDIA

Recent NVIDIA drivers have a bug where a specific optimized path inside the driver doesn't handle constant register 0 correctly. Work around this by starting constant register allocations from 1. This should make sure that the bug doesn't trigger if the ordering of uniforms is changed on the D3D backend. The repro case seems to require some specific driver state to be set that's used inside Chromium. Because of this we have not been able to develop a standalone test case so far. The maximum number of available uniform slots is reduced accordingly. This should not take them below required minimums in the spec. BUG=angleproject:2294 TEST=WebGL tests on passthrough command buffer, angle_end2end_tests --gtest_filter=*GLSLTest*Uniform* Change-Id: I92fff71efe5432ea7f15a7e90d497492514c65dc Reviewed-on: https://chromium-review.googlesource.com/847481 Commit-Queue: Olli Etuaho <oetuaho@nvidia.com> Reviewed-by: 's avatarCorentin Wallez <cwallez@chromium.org> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org>
parent 910a3daf
......@@ -253,6 +253,11 @@ const ShCompileOptions SH_REWRITE_VECTOR_SCALAR_ARITHMETIC = UINT64_C(1) << 35;
// variable initialization is turned on.
const ShCompileOptions SH_DONT_USE_LOOPS_TO_INITIALIZE_VARIABLES = UINT64_C(1) << 36;
// Don't use D3D constant register zero when allocating space for uniforms. This is targeted to work
// around a bug in NVIDIA D3D driver version 388.59 where in very specific cases the driver would
// not handle constant register zero correctly. Only has an effect on HLSL translation.
const ShCompileOptions SH_SKIP_D3D_CONSTANT_REGISTER_ZERO = UINT64_C(1) << 37;
// Defines alternate strategies for implementing array index clamping.
enum ShArrayIndexClampingStrategy
{
......
......@@ -128,6 +128,11 @@ struct WorkaroundsD3D
// set viewport a large size on Intel windows platforms. So we enable depth buffer if stencil
// buffer is enabled to workaround this issue. See http://crbug.com/782317
bool enableDepthBufferWhenStencilBufferEnabled = false;
// Don't use D3D constant register zero when allocating space for uniforms. This is targeted to
// work around a bug in NVIDIA D3D driver version 388.59 where in very specific cases the driver
// would not handle constant register zero correctly.
bool skipConstantRegisterZero = false;
};
} // namespace angle
......
......@@ -191,10 +191,14 @@ OutputHLSL::OutputHLSL(sh::GLenum shaderType,
mExcessiveLoopIndex = nullptr;
mStructureHLSL = new StructureHLSL;
mUniformHLSL = new UniformHLSL(shaderType, mStructureHLSL, outputType, uniforms);
mTextureFunctionHLSL = new TextureFunctionHLSL;
mImageFunctionHLSL = new ImageFunctionHLSL;
unsigned int firstUniformRegister =
((compileOptions & SH_SKIP_D3D_CONSTANT_REGISTER_ZERO) != 0) ? 1u : 0u;
mUniformHLSL =
new UniformHLSL(shaderType, mStructureHLSL, outputType, uniforms, firstUniformRegister);
if (mOutputType == SH_HLSL_3_0_OUTPUT)
{
// Fragment shaders need dx_DepthRange, dx_ViewCoords and dx_DepthFront.
......
......@@ -95,8 +95,9 @@ void OutputSamplerIndexArrayInitializer(TInfoSinkBase &out,
UniformHLSL::UniformHLSL(sh::GLenum shaderType,
StructureHLSL *structureHLSL,
ShShaderOutput outputType,
const std::vector<Uniform> &uniforms)
: mUniformRegister(0),
const std::vector<Uniform> &uniforms,
unsigned int firstUniformRegister)
: mUniformRegister(firstUniformRegister),
mUniformBlockRegister(0),
mTextureRegister(0),
mRWTextureRegister(0),
......
......@@ -24,7 +24,8 @@ class UniformHLSL : angle::NonCopyable
UniformHLSL(sh::GLenum shaderType,
StructureHLSL *structureHLSL,
ShShaderOutput outputType,
const std::vector<Uniform> &uniforms);
const std::vector<Uniform> &uniforms,
unsigned int firstUniformRegister);
void reserveUniformRegisters(unsigned int registerCount);
void reserveUniformBlockRegisters(unsigned int registerCount);
......
......@@ -73,6 +73,10 @@ ShaderD3D::ShaderD3D(const gl::ShaderState &data,
{
mAdditionalOptions |= SH_EMULATE_ISNAN_FLOAT_FUNCTION;
}
if (workarounds.skipConstantRegisterZero)
{
mAdditionalOptions |= SH_SKIP_D3D_CONSTANT_REGISTER_ZERO;
}
if (extensions.multiview)
{
mAdditionalOptions |= SH_INITIALIZE_BUILTINS_FOR_INSTANCED_MULTIVIEW;
......
......@@ -3799,8 +3799,8 @@ void Renderer11::generateCaps(gl::Caps *outCaps,
gl::Extensions *outExtensions,
gl::Limitations *outLimitations) const
{
d3d11_gl::GenerateCaps(mDevice, mDeviceContext, mRenderer11DeviceCaps, outCaps, outTextureCaps,
outExtensions, outLimitations);
d3d11_gl::GenerateCaps(mDevice, mDeviceContext, mRenderer11DeviceCaps, getWorkarounds(),
outCaps, outTextureCaps, outExtensions, outLimitations);
}
angle::WorkaroundsD3D Renderer11::generateWorkarounds() const
......
......@@ -1268,8 +1268,14 @@ unsigned int GetMaxSampleMaskWords(D3D_FEATURE_LEVEL featureLevel)
}
}
void GenerateCaps(ID3D11Device *device, ID3D11DeviceContext *deviceContext, const Renderer11DeviceCaps &renderer11DeviceCaps, gl::Caps *caps,
gl::TextureCapsMap *textureCapsMap, gl::Extensions *extensions, gl::Limitations *limitations)
void GenerateCaps(ID3D11Device *device,
ID3D11DeviceContext *deviceContext,
const Renderer11DeviceCaps &renderer11DeviceCaps,
const angle::WorkaroundsD3D &workarounds,
gl::Caps *caps,
gl::TextureCapsMap *textureCapsMap,
gl::Extensions *extensions,
gl::Limitations *limitations)
{
D3D_FEATURE_LEVEL featureLevel = renderer11DeviceCaps.featureLevel;
const gl::FormatSet &allFormats = gl::GetAllSizedInternalFormats();
......@@ -1344,10 +1350,13 @@ void GenerateCaps(ID3D11Device *device, ID3D11DeviceContext *deviceContext, cons
// Vertex shader limits
caps->maxVertexAttributes = static_cast<GLuint>(GetMaximumVertexInputSlots(featureLevel));
caps->maxVertexUniformComponents =
static_cast<GLuint>(GetMaximumVertexUniformVectors(featureLevel)) * 4;
caps->maxVertexUniformVectors =
static_cast<GLuint>(GetMaximumVertexUniformVectors(featureLevel));
if (workarounds.skipConstantRegisterZero)
{
caps->maxVertexUniformVectors -= 1;
}
caps->maxVertexUniformComponents = caps->maxVertexUniformVectors * 4;
caps->maxVertexUniformBlocks = static_cast<GLuint>(GetMaximumVertexUniformBlocks(featureLevel));
caps->maxVertexOutputComponents =
static_cast<GLuint>(GetMaximumVertexOutputVectors(featureLevel)) * 4;
......@@ -1363,10 +1372,13 @@ void GenerateCaps(ID3D11Device *device, ID3D11DeviceContext *deviceContext, cons
caps->maxVertexAttribStride = 2048;
// Fragment shader limits
caps->maxFragmentUniformComponents =
static_cast<GLuint>(GetMaximumPixelUniformVectors(featureLevel)) * 4;
caps->maxFragmentUniformVectors =
static_cast<GLuint>(GetMaximumPixelUniformVectors(featureLevel));
if (workarounds.skipConstantRegisterZero)
{
caps->maxFragmentUniformVectors -= 1;
}
caps->maxFragmentUniformComponents = caps->maxFragmentUniformVectors * 4;
caps->maxFragmentUniformBlocks =
static_cast<GLuint>(GetMaximumPixelUniformBlocks(featureLevel));
caps->maxFragmentInputComponents =
......@@ -1382,6 +1394,10 @@ void GenerateCaps(ID3D11Device *device, ID3D11DeviceContext *deviceContext, cons
static_cast<GLuint>(GetMaxComputeWorkGroupInvocations(featureLevel));
caps->maxComputeUniformComponents =
static_cast<GLuint>(GetMaximumComputeUniformVectors(featureLevel)) * 4;
if (workarounds.skipConstantRegisterZero)
{
caps->maxComputeUniformComponents -= 4;
}
caps->maxComputeUniformBlocks =
static_cast<GLuint>(GetMaximumComputeUniformBlocks(featureLevel));
caps->maxComputeTextureImageUnits =
......@@ -2198,6 +2214,7 @@ angle::WorkaroundsD3D GenerateWorkarounds(const Renderer11DeviceCaps &deviceCaps
workarounds.flushAfterEndingTransformFeedback = IsNvidia(adapterDesc.VendorId);
workarounds.getDimensionsIgnoresBaseLevel = IsNvidia(adapterDesc.VendorId);
workarounds.skipConstantRegisterZero = IsNvidia(adapterDesc.VendorId);
if (IsIntel(adapterDesc.VendorId))
{
......
......@@ -68,8 +68,14 @@ unsigned int GetReservedVertexUniformVectors(D3D_FEATURE_LEVEL featureLevel);
unsigned int GetReservedFragmentUniformVectors(D3D_FEATURE_LEVEL featureLevel);
gl::Version GetMaximumClientVersion(D3D_FEATURE_LEVEL featureLevel);
void GenerateCaps(ID3D11Device *device, ID3D11DeviceContext *deviceContext, const Renderer11DeviceCaps &renderer11DeviceCaps, gl::Caps *caps,
gl::TextureCapsMap *textureCapsMap, gl::Extensions *extensions, gl::Limitations *limitations);
void GenerateCaps(ID3D11Device *device,
ID3D11DeviceContext *deviceContext,
const Renderer11DeviceCaps &renderer11DeviceCaps,
const angle::WorkaroundsD3D &workarounds,
gl::Caps *caps,
gl::TextureCapsMap *textureCapsMap,
gl::Extensions *extensions,
gl::Limitations *limitations);
void GetSamplePosition(GLsizei sampleCount, size_t index, GLfloat *xy);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment