Commit 1239ee94 by Corentin Wallez

Use the AST analyses to narrow the usage of [[loop]] and [[unroll]]

These attributes are now used exactly in the loops and ifs that require them, limiting the number of failed compilations due to excessive unrolling and flattening. Also output Lod0 functions only when needed. Adds unit tests for LOOP, FLATTEN and Lod0 generation. The patch was tested against the WebGL CTS 1.0.4 for which all the failures existed prior to this patch and seem to be unrelated to this change. It also works correctly on the following sites that had trouble with [[loop]] and [[unroll]]: * dev.miaumiau.cat/rayTracer "Skull Demo" * The turbulenz engine particle demo * Lots of ShaderToy samples (including "Volcanic" and "Metropolis") * Google Maps Earth mode * Lots of Chrome Experiments * Lagoa * madebyevan.com/webgl-water * SketchFab * Unit Tests BUG=angleproject:937 BUG=395048 Change-Id: I856de9025f10b79781929ec212dbffc2064a940e Reviewed-on: https://chromium-review.googlesource.com/264791Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org> Tested-by: 's avatarCorentin Wallez <cwallez@chromium.org>
parent 794e0009
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
#include "common/utilities.h" #include "common/utilities.h"
#include "compiler/translator/BuiltInFunctionEmulator.h" #include "compiler/translator/BuiltInFunctionEmulator.h"
#include "compiler/translator/BuiltInFunctionEmulatorHLSL.h" #include "compiler/translator/BuiltInFunctionEmulatorHLSL.h"
#include "compiler/translator/DetectDiscontinuity.h"
#include "compiler/translator/FlagStd140Structs.h" #include "compiler/translator/FlagStd140Structs.h"
#include "compiler/translator/InfoSink.h" #include "compiler/translator/InfoSink.h"
#include "compiler/translator/NodeSearch.h" #include "compiler/translator/NodeSearch.h"
...@@ -109,7 +108,8 @@ OutputHLSL::OutputHLSL(sh::GLenum shaderType, int shaderVersion, ...@@ -109,7 +108,8 @@ OutputHLSL::OutputHLSL(sh::GLenum shaderType, int shaderVersion,
mSourcePath(sourcePath), mSourcePath(sourcePath),
mOutputType(outputType), mOutputType(outputType),
mNumRenderTargets(numRenderTargets), mNumRenderTargets(numRenderTargets),
mCompileOptions(compileOptions) mCompileOptions(compileOptions),
mCurrentFunctionMetadata(nullptr)
{ {
mUnfoldShortCircuit = new UnfoldShortCircuit(this); mUnfoldShortCircuit = new UnfoldShortCircuit(this);
mInsideFunction = false; mInsideFunction = false;
...@@ -130,8 +130,6 @@ OutputHLSL::OutputHLSL(sh::GLenum shaderType, int shaderVersion, ...@@ -130,8 +130,6 @@ OutputHLSL::OutputHLSL(sh::GLenum shaderType, int shaderVersion,
mUniqueIndex = 0; mUniqueIndex = 0;
mContainsLoopDiscontinuity = false;
mContainsAnyLoop = false;
mOutputLod0Function = false; mOutputLod0Function = false;
mInsideDiscontinuousLoop = false; mInsideDiscontinuousLoop = false;
mNestedLoopDepth = 0; mNestedLoopDepth = 0;
...@@ -170,9 +168,6 @@ OutputHLSL::~OutputHLSL() ...@@ -170,9 +168,6 @@ OutputHLSL::~OutputHLSL()
void OutputHLSL::output(TIntermNode *treeRoot, TInfoSinkBase &objSink) void OutputHLSL::output(TIntermNode *treeRoot, TInfoSinkBase &objSink)
{ {
mContainsLoopDiscontinuity = mShaderType == GL_FRAGMENT_SHADER && containsLoopDiscontinuity(treeRoot);
mContainsAnyLoop = containsAnyLoop(treeRoot);
const std::vector<TIntermTyped*> &flaggedStructs = FlagStd140ValueStructs(treeRoot); const std::vector<TIntermTyped*> &flaggedStructs = FlagStd140ValueStructs(treeRoot);
makeFlaggedStructMaps(flaggedStructs); makeFlaggedStructMaps(flaggedStructs);
...@@ -188,12 +183,9 @@ void OutputHLSL::output(TIntermNode *treeRoot, TInfoSinkBase &objSink) ...@@ -188,12 +183,9 @@ void OutputHLSL::output(TIntermNode *treeRoot, TInfoSinkBase &objSink)
builtInFunctionEmulator.MarkBuiltInFunctionsForEmulation(treeRoot); builtInFunctionEmulator.MarkBuiltInFunctionsForEmulation(treeRoot);
// Now that we are done changing the AST, do the analyses need for HLSL generation // Now that we are done changing the AST, do the analyses need for HLSL generation
{ CallDAG::InitResult success = mCallDag.init(treeRoot, &objSink);
CallDAG dag; ASSERT(success == CallDAG::INITDAG_SUCCESS);
CallDAG::InitResult success = dag.init(treeRoot, &objSink); mASTMetadataList = CreateASTMetadataHLSL(treeRoot, mCallDag);
ASSERT(success == CallDAG::INITDAG_SUCCESS);
mASTAnalyses = CreateASTMetadataHLSL(treeRoot, dag);
}
// Output the body and footer first to determine what has to go in the header // Output the body and footer first to determine what has to go in the header
mInfoSinkStack.push(&mBody); mInfoSinkStack.push(&mBody);
...@@ -1963,6 +1955,13 @@ bool OutputHLSL::visitAggregate(Visit visit, TIntermAggregate *node) ...@@ -1963,6 +1955,13 @@ bool OutputHLSL::visitAggregate(Visit visit, TIntermAggregate *node)
case EOpPrototype: case EOpPrototype:
if (visit == PreVisit) if (visit == PreVisit)
{ {
size_t index = mCallDag.findIndex(node);
// Skip the prototype if it is not implemented (and thus not used)
if (index == CallDAG::InvalidIndex)
{
return false;
}
out << TypeString(node->getType()) << " " << Decorate(TFunction::unmangleName(node->getName())) << (mOutputLod0Function ? "Lod0(" : "("); out << TypeString(node->getType()) << " " << Decorate(TFunction::unmangleName(node->getName())) << (mOutputLod0Function ? "Lod0(" : "(");
TIntermSequence *arguments = node->getSequence(); TIntermSequence *arguments = node->getSequence();
...@@ -1986,7 +1985,8 @@ bool OutputHLSL::visitAggregate(Visit visit, TIntermAggregate *node) ...@@ -1986,7 +1985,8 @@ bool OutputHLSL::visitAggregate(Visit visit, TIntermAggregate *node)
out << ");\n"; out << ");\n";
// Also prototype the Lod0 variant if needed // Also prototype the Lod0 variant if needed
if (mContainsLoopDiscontinuity && !mOutputLod0Function) bool needsLod0 = mASTMetadataList[index].mNeedsLod0;
if (needsLod0 && !mOutputLod0Function && mShaderType == GL_FRAGMENT_SHADER)
{ {
mOutputLod0Function = true; mOutputLod0Function = true;
node->traverse(this); node->traverse(this);
...@@ -1999,8 +1999,13 @@ bool OutputHLSL::visitAggregate(Visit visit, TIntermAggregate *node) ...@@ -1999,8 +1999,13 @@ bool OutputHLSL::visitAggregate(Visit visit, TIntermAggregate *node)
case EOpComma: outputTriplet(visit, "(", ", ", ")"); break; case EOpComma: outputTriplet(visit, "(", ", ", ")"); break;
case EOpFunction: case EOpFunction:
{ {
ASSERT(mCurrentFunctionMetadata == nullptr);
TString name = TFunction::unmangleName(node->getName()); TString name = TFunction::unmangleName(node->getName());
size_t index = mCallDag.findIndex(node);
ASSERT(index != CallDAG::InvalidIndex);
mCurrentFunctionMetadata = &mASTMetadataList[index];
out << TypeString(node->getType()) << " "; out << TypeString(node->getType()) << " ";
if (name == "main") if (name == "main")
...@@ -2050,14 +2055,15 @@ bool OutputHLSL::visitAggregate(Visit visit, TIntermAggregate *node) ...@@ -2050,14 +2055,15 @@ bool OutputHLSL::visitAggregate(Visit visit, TIntermAggregate *node)
out << "}\n"; out << "}\n";
if (mContainsLoopDiscontinuity && !mOutputLod0Function) mCurrentFunctionMetadata = nullptr;
bool needsLod0 = mASTMetadataList[index].mNeedsLod0;
if (needsLod0 && !mOutputLod0Function && mShaderType == GL_FRAGMENT_SHADER)
{ {
if (name != "main") ASSERT(name != "main");
{ mOutputLod0Function = true;
mOutputLod0Function = true; node->traverse(this);
node->traverse(this); mOutputLod0Function = false;
mOutputLod0Function = false;
}
} }
return false; return false;
...@@ -2066,11 +2072,15 @@ bool OutputHLSL::visitAggregate(Visit visit, TIntermAggregate *node) ...@@ -2066,11 +2072,15 @@ bool OutputHLSL::visitAggregate(Visit visit, TIntermAggregate *node)
case EOpFunctionCall: case EOpFunctionCall:
{ {
TString name = TFunction::unmangleName(node->getName()); TString name = TFunction::unmangleName(node->getName());
bool lod0 = mInsideDiscontinuousLoop || mOutputLod0Function;
TIntermSequence *arguments = node->getSequence(); TIntermSequence *arguments = node->getSequence();
bool lod0 = mInsideDiscontinuousLoop || mOutputLod0Function;
if (node->isUserDefined()) if (node->isUserDefined())
{ {
size_t index = mCallDag.findIndex(node);
ASSERT(index != CallDAG::InvalidIndex);
lod0 &= mASTMetadataList[index].mNeedsLod0;
out << Decorate(name) << (lod0 ? "Lod0(" : "("); out << Decorate(name) << (lod0 ? "Lod0(" : "(");
} }
else else
...@@ -2301,11 +2311,10 @@ bool OutputHLSL::visitSelection(Visit visit, TIntermSelection *node) ...@@ -2301,11 +2311,10 @@ bool OutputHLSL::visitSelection(Visit visit, TIntermSelection *node)
{ {
mUnfoldShortCircuit->traverse(node->getCondition()); mUnfoldShortCircuit->traverse(node->getCondition());
// D3D errors when there is a gradient operation in a loop in an unflattened if // D3D errors when there is a gradient operation in a loop in an unflattened if.
// however flattening all the ifs in branch heavy shaders made D3D error too. if (mShaderType == GL_FRAGMENT_SHADER
// As a temporary workaround we flatten the ifs only if there is at least a loop && mCurrentFunctionMetadata->hasDiscontinuousLoop(node)
// present somewhere in the shader. && mCurrentFunctionMetadata->hasGradientInCallGraph(node))
if (mShaderType == GL_FRAGMENT_SHADER && mContainsAnyLoop)
{ {
out << "FLATTEN "; out << "FLATTEN ";
} }
...@@ -2400,11 +2409,8 @@ bool OutputHLSL::visitLoop(Visit visit, TIntermLoop *node) ...@@ -2400,11 +2409,8 @@ bool OutputHLSL::visitLoop(Visit visit, TIntermLoop *node)
mNestedLoopDepth++; mNestedLoopDepth++;
bool wasDiscontinuous = mInsideDiscontinuousLoop; bool wasDiscontinuous = mInsideDiscontinuousLoop;
mInsideDiscontinuousLoop = mInsideDiscontinuousLoop ||
if (mContainsLoopDiscontinuity && !mInsideDiscontinuousLoop) mCurrentFunctionMetadata->mDiscontinuousLoops.count(node) >= 0;
{
mInsideDiscontinuousLoop = containsLoopDiscontinuity(node);
}
if (mOutputType == SH_HLSL9_OUTPUT) if (mOutputType == SH_HLSL9_OUTPUT)
{ {
...@@ -2419,16 +2425,17 @@ bool OutputHLSL::visitLoop(Visit visit, TIntermLoop *node) ...@@ -2419,16 +2425,17 @@ bool OutputHLSL::visitLoop(Visit visit, TIntermLoop *node)
TInfoSinkBase &out = getInfoSink(); TInfoSinkBase &out = getInfoSink();
const char *unroll = mCurrentFunctionMetadata->hasGradientInCallGraph(node) ? "LOOP" : "";
if (node->getType() == ELoopDoWhile) if (node->getType() == ELoopDoWhile)
{ {
out << "{LOOP do\n"; out << "{" << unroll << " do\n";
outputLineDirective(node->getLine().first_line); outputLineDirective(node->getLine().first_line);
out << "{\n"; out << "{\n";
} }
else else
{ {
out << "{LOOP for("; out << "{" << unroll << " for(";
if (node->getInit()) if (node->getInit())
{ {
...@@ -2734,8 +2741,9 @@ bool OutputHLSL::handleExcessiveLoop(TIntermLoop *node) ...@@ -2734,8 +2741,9 @@ bool OutputHLSL::handleExcessiveLoop(TIntermLoop *node)
} }
// for(int index = initial; index < clampedLimit; index += increment) // for(int index = initial; index < clampedLimit; index += increment)
const char *unroll = mCurrentFunctionMetadata->hasGradientInCallGraph(node) ? "LOOP" : "";
out << "LOOP for("; out << unroll << " for(";
index->traverse(this); index->traverse(this);
out << " = "; out << " = ";
out << initial; out << initial;
......
...@@ -170,9 +170,9 @@ class OutputHLSL : public TIntermTraverser ...@@ -170,9 +170,9 @@ class OutputHLSL : public TIntermTraverser
int mUniqueIndex; // For creating unique names int mUniqueIndex; // For creating unique names
std::vector<ASTMetadataHLSL> mASTAnalyses; CallDAG mCallDag;
bool mContainsLoopDiscontinuity; MetadataList mASTMetadataList;
bool mContainsAnyLoop; ASTMetadataHLSL *mCurrentFunctionMetadata;
bool mOutputLod0Function; bool mOutputLod0Function;
bool mInsideDiscontinuousLoop; bool mInsideDiscontinuousLoop;
int mNestedLoopDepth; int mNestedLoopDepth;
......
...@@ -98,5 +98,13 @@ ...@@ -98,5 +98,13 @@
}, },
}, },
}], }],
['OS=="win"',
{
# TODO(cwallez): make this angle_enable_hlsl instead (requires gyp file refactoring)
'sources':
[
'<(angle_path)/src/tests/compiler_tests/UnrollFlatten_test.cpp',
],
}],
], ],
} }
//
// Copyright (c) 2015 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// UnrollFlatten_test.cpp:
// Test for the outputting of [[unroll]] and [[flatten]] for the D3D compiler
//
#include "angle_gl.h"
#include "gtest/gtest.h"
#include "GLSLANG/ShaderLang.h"
#include "compiler/translator/TranslatorHLSL.h"
namespace
{
class UnrollFlattenTest : public testing::Test
{
public:
UnrollFlattenTest() {}
protected:
void SetUp() override
{
ShBuiltInResources resources;
ShInitBuiltInResources(&resources);
resources.FragmentPrecisionHigh = 1;
mTranslator = new TranslatorHLSL(GL_FRAGMENT_SHADER, SH_GLES2_SPEC, SH_HLSL11_OUTPUT);
ASSERT_TRUE(mTranslator->Init(resources));
}
void TearDown() override
{
SafeDelete(mTranslator);
}
void compile(const std::string &shaderString)
{
const char *shaderStrings[] = { shaderString.c_str() };
bool compilationSuccess = mTranslator->compile(shaderStrings, 1, SH_VARIABLES | SH_OBJECT_CODE);
TInfoSink &infoSink = mTranslator->getInfoSink();
if (!compilationSuccess)
{
FAIL() << "Shader compilation failed " << infoSink.info.str();
}
mTranslatedSource = infoSink.obj.str();
// Ignore the beginning of the shader to avoid the definitions of LOOP and FLATTEN
mCurrentPosition = mTranslatedSource.find("GL_USES_FRAG_COLOR");
}
void expect(const char *patterns[], size_t count)
{
const char *badPatterns[] = { UNROLL, FLATTEN };
for (size_t i = 0; i < count; i++)
{
const char *pattern = patterns[i];
auto position = mTranslatedSource.find(pattern, mCurrentPosition);
if (position == std::string::npos)
{
FAIL() << "Couldn't find '" << pattern << "' after expectations '"
<< mExpectationList << "' in translated source:\n" << mTranslatedSource;
}
for (size_t j = 0; j < ArraySize(badPatterns); j++)
{
const char *badPattern = badPatterns[j];
if (pattern != badPattern &&
mTranslatedSource.find(badPattern, mCurrentPosition) < position)
{
FAIL() << "Found '" << badPattern << "' before '" << pattern << "' after expectations '"
<< mExpectationList << "' in translated source:\n" << mTranslatedSource;
}
}
mExpectationList += " - " + std::string(pattern);
mCurrentPosition = position + 1;
}
}
static const char *UNROLL;
static const char *FLATTEN;
private:
TranslatorHLSL *mTranslator;
std::string mTranslatedSource;
int mCurrentPosition;
std::string mExpectationList;
};
const char *UnrollFlattenTest::UNROLL = "LOOP";
const char *UnrollFlattenTest::FLATTEN = "FLATTEN";
// Check that the nothing is added if there is no gradient operation
// even when there is ifs and discontinuous loops
TEST_F(UnrollFlattenTest, NoGradient)
{
const std::string &shaderString =
"precision mediump float;\n"
"uniform float f;\n"
"float fun(float a){\n" // 1
" if (a > 1.0) {return f;}\n" // 2
" else {return a + 1.0;}\n"
"}\n"
"float fun2(float a){\n" // 3
" for (int i = 0; i < 10; i++) {\n" // 4
" if (a > 1.0) {break;}\n" // 5
" a = fun(a);\n" // 6
" }\n"
" return a;\n"
"}\n"
"void main() {\n"
" float accum = 0.0;\n"
" if (f < 5.0) {accum = fun2(accum);}\n" // 7
" gl_FragColor = vec4(accum);\n"
"}\n";
compile(shaderString);
// 1 - shouldn't get a Lod0 version generated
// 2 - no FLATTEN because does not contain discont loop
// 3 - shouldn't get a Lod0 version generated
// 4 - no LOOP because discont, and also no gradient
// 5 - no FLATTEN because does not contain discont loop
// 6 - call non-Lod0 version
// 7 - no FLATTEN
const char *expectations[] =
{
"fun(", "if",
"fun2(", "for", "if", "break", "fun(",
"main(", "if", "fun2("
};
expect(expectations, ArraySize(expectations));
}
// Check that when we have a gradient in a non-discontinuous loop
// we use the regular version of the functions. Also checks that
// LOOP is generated for the loop containing the gradient.
TEST_F(UnrollFlattenTest, GradientNotInDiscont)
{
const std::string &shaderString =
"precision mediump float;\n"
"uniform float f;\n"
"uniform sampler2D tex;"
"float fun(float a){\n" // 1
" return texture2D(tex, vec2(0.5, f)).x;\n" // 2
"}\n"
"float fun2(float a){\n" // 3
" for (int i = 0; i < 10; i++) {\n" // 4
" if (a > 1.0) {}\n" // 5
" a = fun(a);\n" // 6
" }\n"
" return a;\n"
"}\n"
"void main() {\n"
" float accum = 0.0;\n"
" if (f < 5.0) {accum = fun2(accum);}\n" // 7
" gl_FragColor = vec4(accum);\n"
"}\n";
// 1 - shouldn't get a Lod0 version generated
// 2 - no Lod0 version generated
// 3 - shouldn't get a Lod0 version generated (not in discont loop)
// 4 - should have LOOP because it contains a gradient operation (even if Lod0)
// 5 - no FLATTEN because doesn't contain discont loop
// 6 - call Lod0 version
// 7 - no FLATTEN
compile(shaderString);
const char *expectations[] =
{
"fun(", "texture2D(",
"fun2(", "LOOP", "for", "if", "fun(",
"main(", "if", "fun2("
};
expect(expectations, ArraySize(expectations));
}
// Check that when we have a gradient in a discontinuous loop
// we use the Lod0 version of the functions.
TEST_F(UnrollFlattenTest, GradientInDiscont)
{
const std::string &shaderString =
"precision mediump float;\n"
"uniform float f;\n"
"uniform sampler2D tex;"
"float fun(float a){\n" // 1
" return texture2D(tex, vec2(0.5, f)).x;\n" // 2
"}\n"
"float fun2(float a){\n" // 3
" for (int i = 0; i < 10; i++) {\n" // 4
" if (a > 1.0) {break;}\n" // 5
" a = fun(a);\n" // 6
" }\n"
" return a;\n"
"}\n"
"void main() {\n"
" float accum = 0.0;\n"
" if (f < 5.0) {accum = fun2(accum);}\n" // 7
" gl_FragColor = vec4(accum);\n"
"}\n";
// 1 - should get a Lod0 version generated (gradient + discont loop)
// 2 - will get the Lod0 if in funLod0
// 3 - shouldn't get a Lod0 version generated (not in discont loop)
// 4 - should have LOOP because it contains a gradient operation (even if Lod0)
// 5 - no FLATTEN because doesn't contain discont loop
// 6 - call Lod0 version
// 7 - should have a FLATTEN because has a discont loop and gradient
compile(shaderString);
const char *expectations[] =
{
"fun(", "texture2D(",
"funLod0(", "texture2DLod0(",
"fun2(", "LOOP", "for", "if", "break", "funLod0(",
"main(", "FLATTEN", "if", "fun2("
};
expect(expectations, ArraySize(expectations));
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment