Commit d4b5054d by Corentin Wallez

compiler: Rewrite do-while loops as while loops

This works around a Mac driver shader compiler bug that makes many do-while loops cause GPU-hangs when ran. BUG=angleproject:891 Change-Id: I29828d6ea9e887ad0ed0c577f1deb41fb632a900 Reviewed-on: https://chromium-review.googlesource.com/302465Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org> Tested-by: 's avatarCorentin Wallez <cwallez@chromium.org>
parent 7b43c9b0
......@@ -199,6 +199,10 @@ typedef enum {
// This flag works around a bug in NVIDIA 331 series drivers related
// to pow(x, y) where y is a constant vector.
SH_REMOVE_POW_WITH_CONSTANT_EXPONENT = 0x200000,
// This flag works around bugs in Mac drivers related to do-while by
// transforming them into an other construct.
SH_REWRITE_DO_WHILE_LOOPS = 0x400000,
} ShCompileOptions;
// Defines alternate strategies for implementing array index clamping.
......
......@@ -89,6 +89,8 @@
'compiler/translator/RegenerateStructNames.h',
'compiler/translator/RemovePow.cpp',
'compiler/translator/RemovePow.h',
'compiler/translator/RewriteDoWhile.cpp',
'compiler/translator/RewriteDoWhile.h',
'compiler/translator/RenameFunction.h',
'compiler/translator/ScalarizeVecAndMatConstructorArgs.cpp',
'compiler/translator/ScalarizeVecAndMatConstructorArgs.h',
......
......@@ -16,6 +16,7 @@
#include "compiler/translator/RegenerateStructNames.h"
#include "compiler/translator/RemovePow.h"
#include "compiler/translator/RenameFunction.h"
#include "compiler/translator/RewriteDoWhile.h"
#include "compiler/translator/ScalarizeVecAndMatConstructorArgs.h"
#include "compiler/translator/UnfoldShortCircuitAST.h"
#include "compiler/translator/ValidateLimitations.h"
......@@ -143,7 +144,8 @@ TCompiler::TCompiler(sh::GLenum type, ShShaderSpec spec, ShShaderOutput output)
fragmentPrecisionHigh(false),
clampingStrategy(SH_CLAMP_WITH_CLAMP_INTRINSIC),
builtInFunctionEmulator(),
mSourcePath(NULL)
mSourcePath(NULL),
mTemporaryIndex(0)
{
}
......@@ -320,6 +322,10 @@ TIntermNode *TCompiler::compileTreeImpl(const char *const shaderStrings[],
if (success && shaderType == GL_VERTEX_SHADER && (compileOptions & SH_INIT_GL_POSITION))
initializeGLPosition(root);
// This pass might emit short circuits so keep it before the short circuit unfolding
if (success && (compileOptions & SH_REWRITE_DO_WHILE_LOOPS))
RewriteDoWhile(root, getTemporaryIndex());
if (success && (compileOptions & SH_UNFOLD_SHORT_CIRCUIT))
{
UnfoldShortCircuitAST unfoldShortCircuit;
......@@ -503,6 +509,7 @@ void TCompiler::clearResults()
nameMap.clear();
mSourcePath = NULL;
mTemporaryIndex = 0;
}
bool TCompiler::initCallDag(TIntermNode *root)
......
......@@ -154,6 +154,7 @@ class TCompiler : public TShHandleBase
const char *getSourcePath() const;
const TPragma& getPragma() const { return mPragma; }
void writePragma();
unsigned int *getTemporaryIndex() { return &mTemporaryIndex; }
const ArrayBoundsClamper& getArrayBoundsClamper() const;
ShArrayIndexClampingStrategy getArrayIndexClampingStrategy() const;
......@@ -230,6 +231,8 @@ class TCompiler : public TShHandleBase
NameMap nameMap;
TPragma mPragma;
unsigned int mTemporaryIndex;
};
//
......
//
// Copyright (c) 2015 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// RewriteDoWhile.cpp: rewrites do-while loops using another equivalent
// construct.
#include "compiler/translator/RewriteDoWhile.h"
#include "compiler/translator/IntermNode.h"
namespace
{
// An AST traverser that rewrites loops of the form
// do {
// CODE;
// } while (CONDITION)
//
// to loops of the form
// bool temp = false;
// while (true) {
// if (temp) {
// if (!CONDITION) {
// break;
// }
// }
// temp = true;
// CODE;
// }
//
// The reason we don't use a simpler form, with for example just (temp && !CONDITION) in the
// while condition, is that short-circuit is often badly supported by driver shader compiler.
// The double if has the same effect, but forces shader compilers to behave.
//
// TODO(cwallez) when UnfoldShortCircuitIntoIf handles loops correctly, revisit this as we might
// be able to use while (temp || CONDITION) with temp initially set to true then run
// UnfoldShortCircuitIntoIf
class DoWhileRewriter : public TIntermTraverser
{
public:
DoWhileRewriter() : TIntermTraverser(true, false, false) {}
bool visitAggregate(Visit, TIntermAggregate *node) override
{
// A well-formed AST can only have do-while in EOpSequence which represent lists of
// statements. By doing a prefix traversal we are able to replace the do-while in the
// sequence directly as the content of the do-while will be traversed later.
if (node->getOp() != EOpSequence)
{
return true;
}
TIntermSequence *statements = node->getSequence();
// The statements vector will have new statements inserted when we encounter a do-while,
// which prevents us from using a range-based for loop. Using the usual i++ works, as
// the (two) new statements inserted replace the statement at the current position.
for (size_t i = 0; i < statements->size(); i++)
{
TIntermNode *statement = (*statements)[i];
TIntermLoop *loop = statement->getAsLoopNode();
if (loop == nullptr || loop->getType() != ELoopDoWhile)
{
continue;
}
TType boolType = TType(EbtBool);
// bool temp = false;
TIntermAggregate *tempDeclaration = nullptr;
{
TConstantUnion *falseConstant = new TConstantUnion();
falseConstant->setBConst(false);
TIntermTyped *falseValue = new TIntermConstantUnion(falseConstant, boolType);
tempDeclaration = createTempInitDeclaration(falseValue);
}
// temp = true;
TIntermBinary *assignTrue = nullptr;
{
TConstantUnion *trueConstant = new TConstantUnion();
trueConstant->setBConst(true);
TIntermTyped *trueValue = new TIntermConstantUnion(trueConstant, boolType);
assignTrue = createTempAssignment(trueValue);
}
// if (temp) {
// if (!CONDITION) {
// break;
// }
// }
TIntermSelection *breakIf = nullptr;
{
TIntermBranch *breakStatement = new TIntermBranch(EOpBreak, nullptr);
TIntermAggregate *breakBlock = new TIntermAggregate(EOpSequence);
breakBlock->getSequence()->push_back(breakStatement);
TIntermUnary *negatedCondition = new TIntermUnary(EOpLogicalNot);
negatedCondition->setOperand(loop->getCondition());
TIntermSelection *innerIf =
new TIntermSelection(negatedCondition, breakBlock, nullptr);
TIntermAggregate *innerIfBlock = new TIntermAggregate(EOpSequence);
innerIfBlock->getSequence()->push_back(innerIf);
breakIf = new TIntermSelection(createTempSymbol(boolType), innerIfBlock, nullptr);
}
// Assemble the replacement loops, reusing the do-while loop's body and inserting our
// statements at the front.
TIntermLoop *newLoop = nullptr;
{
TConstantUnion *trueConstant = new TConstantUnion();
trueConstant->setBConst(true);
TIntermTyped *trueValue = new TIntermConstantUnion(trueConstant, boolType);
TIntermAggregate *body = nullptr;
if (loop->getBody() != nullptr)
{
body = loop->getBody()->getAsAggregate();
}
else
{
body = new TIntermAggregate(EOpSequence);
}
auto sequence = body->getSequence();
sequence->insert(sequence->begin(), assignTrue);
sequence->insert(sequence->begin(), breakIf);
newLoop = new TIntermLoop(ELoopWhile, nullptr, trueValue, nullptr, body);
}
TIntermSequence replacement;
replacement.push_back(tempDeclaration);
replacement.push_back(newLoop);
node->replaceChildNodeWithMultiple(loop, replacement);
nextTemporaryIndex();
}
return true;
}
};
} // anonymous namespace
void RewriteDoWhile(TIntermNode *root, unsigned int *temporaryIndex)
{
ASSERT(temporaryIndex != 0);
DoWhileRewriter rewriter;
rewriter.useTemporaryIndex(temporaryIndex);
root->traverse(&rewriter);
}
//
// Copyright (c) 2015 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// RewriteDoWhile.h: rewrite do-while loops as while loops to work around
// driver bugs
#ifndef COMPILER_TRANSLATOR_REWRITEDOWHILE_H_
#define COMPILER_TRANSLATOR_REWRITEDOWHILE_H_
class TIntermNode;
void RewriteDoWhile(TIntermNode *root, unsigned int *temporaryIndex);
#endif // COMPILER_TRANSLATOR_REWRITEDOWHILE_H_
......@@ -49,9 +49,7 @@ void TranslatorESSL::translate(TIntermNode *root, int) {
emulatePrecision.writeEmulationHelpers(sink, SH_ESSL_OUTPUT);
}
unsigned int temporaryIndex = 0;
RecordConstantPrecision(root, &temporaryIndex);
RecordConstantPrecision(root, getTemporaryIndex());
// Write emulated built-in functions if needed.
if (!getBuiltInFunctionEmulator().IsOutputEmpty())
......
......@@ -29,7 +29,8 @@ void TranslatorGLSL::initBuiltInFunctionEmulator(BuiltInFunctionEmulator *emu, i
InitBuiltInFunctionEmulatorForGLSLMissingFunctions(emu, getShaderType(), targetGLSLVersion);
}
void TranslatorGLSL::translate(TIntermNode *root, int) {
void TranslatorGLSL::translate(TIntermNode *root, int compileOptions)
{
TInfoSinkBase& sink = getInfoSink().obj;
// Write GLSL version.
......
......@@ -27,31 +27,29 @@ void TranslatorHLSL::translate(TIntermNode *root, int compileOptions)
SeparateDeclarations(root);
unsigned int temporaryIndex = 0;
// Note that SeparateDeclarations needs to be run before UnfoldShortCircuitToIf.
UnfoldShortCircuitToIf(root, &temporaryIndex);
UnfoldShortCircuitToIf(root, getTemporaryIndex());
SeparateExpressionsReturningArrays(root, &temporaryIndex);
SeparateExpressionsReturningArrays(root, getTemporaryIndex());
// Note that SeparateDeclarations needs to be run before SeparateArrayInitialization.
SeparateArrayInitialization(root);
// HLSL doesn't support arrays as return values, we'll need to make functions that have an array
// as a return value to use an out parameter to transfer the array data instead.
ArrayReturnValueToOutParameter(root, &temporaryIndex);
ArrayReturnValueToOutParameter(root, getTemporaryIndex());
if (!shouldRunLoopAndIndexingValidation(compileOptions))
{
// HLSL doesn't support dynamic indexing of vectors and matrices.
RemoveDynamicIndexing(root, &temporaryIndex, getSymbolTable(), getShaderVersion());
RemoveDynamicIndexing(root, getTemporaryIndex(), getSymbolTable(), getShaderVersion());
}
// Work around D3D9 bug that would manifest in vertex shaders with selection blocks which
// use a vertex attribute as a condition, and some related computation in the else block.
if (getOutputType() == SH_HLSL9_OUTPUT && getShaderType() == GL_VERTEX_SHADER)
{
sh::RewriteElseBlocks(root, &temporaryIndex);
sh::RewriteElseBlocks(root, getTemporaryIndex());
}
sh::OutputHLSL outputHLSL(getShaderType(), getShaderVersion(), getExtensionBehavior(),
......
......@@ -245,7 +245,7 @@ CompilerImpl *RendererGL::createCompiler()
ShaderImpl *RendererGL::createShader(const gl::Shader::Data &data)
{
return new ShaderGL(data, mFunctions);
return new ShaderGL(data, mFunctions, mWorkarounds);
}
ProgramImpl *RendererGL::createProgram(const gl::Program::Data &data)
......
......@@ -12,12 +12,17 @@
#include "libANGLE/Compiler.h"
#include "libANGLE/renderer/gl/FunctionsGL.h"
#include "libANGLE/renderer/gl/RendererGL.h"
#include "libANGLE/renderer/gl/WorkaroundsGL.h"
#include <iostream>
namespace rx
{
ShaderGL::ShaderGL(const gl::Shader::Data &data, const FunctionsGL *functions)
: ShaderImpl(data), mFunctions(functions), mShaderID(0)
ShaderGL::ShaderGL(const gl::Shader::Data &data,
const FunctionsGL *functions,
const WorkaroundsGL &workarounds)
: ShaderImpl(data), mFunctions(functions), mWorkarounds(workarounds), mShaderID(0)
{
ASSERT(mFunctions);
}
......@@ -42,7 +47,14 @@ int ShaderGL::prepareSourceAndReturnOptions(std::stringstream *sourceStream)
*sourceStream << mData.getSource();
return SH_INIT_GL_POSITION;
int options = SH_INIT_GL_POSITION;
if (mWorkarounds.doWhileGLSLCausesGPUHang)
{
options |= SH_REWRITE_DO_WHILE_LOOPS;
}
return options;
}
bool ShaderGL::postTranslateCompile(gl::Compiler *compiler, std::string *infoLog)
......
......@@ -14,11 +14,14 @@
namespace rx
{
class FunctionsGL;
struct WorkaroundsGL;
class ShaderGL : public ShaderImpl
{
public:
ShaderGL(const gl::Shader::Data &data, const FunctionsGL *functions);
ShaderGL(const gl::Shader::Data &data,
const FunctionsGL *functions,
const WorkaroundsGL &workarounds);
~ShaderGL() override;
// ShaderImpl implementation
......@@ -30,6 +33,7 @@ class ShaderGL : public ShaderImpl
private:
const FunctionsGL *mFunctions;
const WorkaroundsGL &mWorkarounds;
GLuint mShaderID;
};
......
......@@ -17,7 +17,8 @@ struct WorkaroundsGL
WorkaroundsGL()
: avoid1BitAlphaTextureFormats(false),
rgba4IsNotSupportedForColorRendering(false),
doesSRGBClearsOnLinearFramebufferAttachments(false)
doesSRGBClearsOnLinearFramebufferAttachments(false),
doWhileGLSLCausesGPUHang(false)
{
}
......@@ -39,7 +40,16 @@ struct WorkaroundsGL
// driver clears to the linearized clear color despite the framebuffer not supporting SRGB
// blending. It only seems to do this when the framebuffer has only linear attachments, mixed
// attachments appear to get the correct clear color.
bool doesSRGBClearsOnLinearFramebufferAttachments;
// On Mac some GLSL constructs involving do-while loops cause GPU hangs, such as the following:
// int i = 1;
// do {
// i --;
// continue;
// } while (i > 0)
// Work around this by rewriting the do-while to use another GLSL construct (block + while)
bool doWhileGLSLCausesGPUHang;
};
}
......
......@@ -561,6 +561,10 @@ void GenerateWorkarounds(const FunctionsGL *functions, WorkaroundsGL *workaround
workarounds->doesSRGBClearsOnLinearFramebufferAttachments =
functions->standard == STANDARD_GL_DESKTOP &&
(vendor == VENDOR_ID_INTEL || vendor == VENDOR_ID_AMD);
#if ANGLE_PLATFORM_APPLE
workarounds->doWhileGLSLCausesGPUHang = true;
#endif
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment