Commit a75aa3b2 by Jiawei Shao Committed by Commit Bot

ES31: Support compute shader shared variables in HLSL

This patch implements 'shared' variables in compute shader on D3D11 back-ends. GLSL shared variables are translated into 'groupshared' ones in HLSL. Note that although HLSL allows initializing the variables with 'groupshared' qualifier, currently we do not initialize them because: 1. It is very slow to for d3d11 drivers to compile the compute shader if we add the code to initialize a shared variable with large array size. 2. It seems unnecessary to do so and in GLSL it is not allowed to initialize a shared variable in the declaration. (ESSL 3.1, Chapter 4.3.8) BUG=angleproject:2682 TEST=angle_end2end_tests Change-Id: Ica8247e1b98059968612a36e369718ef113a598c Reviewed-on: https://chromium-review.googlesource.com/1109587Reviewed-by: 's avatarJiajia Qin <jiajia.qin@intel.com> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarGeoff Lang <geofflang@chromium.org> Commit-Queue: Jiawei Shao <jiawei.shao@intel.com>
parent f5fd5c6c
...@@ -6,9 +6,9 @@ ...@@ -6,9 +6,9 @@
#include "compiler/translator/OutputHLSL.h" #include "compiler/translator/OutputHLSL.h"
#include <stdio.h>
#include <algorithm> #include <algorithm>
#include <cfloat> #include <cfloat>
#include <stdio.h>
#include "common/angleutils.h" #include "common/angleutils.h"
#include "common/debug.h" #include "common/debug.h"
...@@ -56,7 +56,7 @@ bool IsDeclarationWrittenOut(TIntermDeclaration *node) ...@@ -56,7 +56,7 @@ bool IsDeclarationWrittenOut(TIntermDeclaration *node)
ASSERT(sequence->size() == 1); ASSERT(sequence->size() == 1);
ASSERT(variable); ASSERT(variable);
return (variable->getQualifier() == EvqTemporary || variable->getQualifier() == EvqGlobal || return (variable->getQualifier() == EvqTemporary || variable->getQualifier() == EvqGlobal ||
variable->getQualifier() == EvqConst); variable->getQualifier() == EvqConst || variable->getQualifier() == EvqShared);
} }
bool IsInStd140InterfaceBlock(TIntermTyped *node) bool IsInStd140InterfaceBlock(TIntermTyped *node)
...@@ -165,14 +165,14 @@ OutputHLSL::OutputHLSL(sh::GLenum shaderType, ...@@ -165,14 +165,14 @@ OutputHLSL::OutputHLSL(sh::GLenum shaderType,
{ {
mInsideFunction = false; mInsideFunction = false;
mUsesFragColor = false; mUsesFragColor = false;
mUsesFragData = false; mUsesFragData = false;
mUsesDepthRange = false; mUsesDepthRange = false;
mUsesFragCoord = false; mUsesFragCoord = false;
mUsesPointCoord = false; mUsesPointCoord = false;
mUsesFrontFacing = false; mUsesFrontFacing = false;
mUsesPointSize = false; mUsesPointSize = false;
mUsesInstanceID = false; mUsesInstanceID = false;
mHasMultiviewExtensionEnabled = mHasMultiviewExtensionEnabled =
IsExtensionEnabled(mExtensionBehavior, TExtension::OVR_multiview); IsExtensionEnabled(mExtensionBehavior, TExtension::OVR_multiview);
mUsesViewID = false; mUsesViewID = false;
...@@ -414,18 +414,18 @@ void OutputHLSL::header(TInfoSinkBase &out, ...@@ -414,18 +414,18 @@ void OutputHLSL::header(TInfoSinkBase &out,
for (const auto &varying : mReferencedVaryings) for (const auto &varying : mReferencedVaryings)
{ {
const TType &type = varying.second->getType(); const TType &type = varying.second->getType();
const ImmutableString &name = varying.second->name(); const ImmutableString &name = varying.second->name();
// Program linking depends on this exact format // Program linking depends on this exact format
varyings += TString("static ") + InterpolationString(type.getQualifier()) + " " + TypeString(type) + varyings += TString("static ") + InterpolationString(type.getQualifier()) + " " +
" " + Decorate(name) + ArrayString(type) + " = " + zeroInitializer(type) + TypeString(type) + " " + Decorate(name) + ArrayString(type) + " = " +
";\n"; zeroInitializer(type) + ";\n";
} }
for (const auto &attribute : mReferencedAttributes) for (const auto &attribute : mReferencedAttributes)
{ {
const TType &type = attribute.second->getType(); const TType &type = attribute.second->getType();
const ImmutableString &name = attribute.second->name(); const ImmutableString &name = attribute.second->name();
attributes += "static " + TypeString(type) + " " + Decorate(name) + ArrayString(type) + attributes += "static " + TypeString(type) + " " + Decorate(name) + ArrayString(type) +
...@@ -499,7 +499,7 @@ void OutputHLSL::header(TInfoSinkBase &out, ...@@ -499,7 +499,7 @@ void OutputHLSL::header(TInfoSinkBase &out,
for (const auto &outputVariable : mReferencedOutputVariables) for (const auto &outputVariable : mReferencedOutputVariables)
{ {
const ImmutableString &variableName = outputVariable.second->name(); const ImmutableString &variableName = outputVariable.second->name();
const TType &variableType = outputVariable.second->getType(); const TType &variableType = outputVariable.second->getType();
out << "static " << TypeString(variableType) << " out_" << variableName out << "static " << TypeString(variableType) << " out_" << variableName
<< ArrayString(variableType) << " = " << zeroInitializer(variableType) << ";\n"; << ArrayString(variableType) << " = " << zeroInitializer(variableType) << ";\n";
...@@ -509,8 +509,9 @@ void OutputHLSL::header(TInfoSinkBase &out, ...@@ -509,8 +509,9 @@ void OutputHLSL::header(TInfoSinkBase &out,
{ {
const unsigned int numColorValues = usingMRTExtension ? mNumRenderTargets : 1; const unsigned int numColorValues = usingMRTExtension ? mNumRenderTargets : 1;
out << "static float4 gl_Color[" << numColorValues << "] =\n" out << "static float4 gl_Color[" << numColorValues
"{\n"; << "] =\n"
"{\n";
for (unsigned int i = 0; i < numColorValues; i++) for (unsigned int i = 0; i < numColorValues; i++)
{ {
out << " float4(0, 0, 0, 0)"; out << " float4(0, 0, 0, 0)";
...@@ -1244,7 +1245,7 @@ bool OutputHLSL::visitBinary(Visit visit, TIntermBinary *node) ...@@ -1244,7 +1245,7 @@ bool OutputHLSL::visitBinary(Visit visit, TIntermBinary *node)
{ {
if (visit == PreVisit) if (visit == PreVisit)
{ {
TIntermSymbol *instanceArraySymbol = node->getLeft()->getAsSymbolNode(); TIntermSymbol *instanceArraySymbol = node->getLeft()->getAsSymbolNode();
const TInterfaceBlock *interfaceBlock = leftType.getInterfaceBlock(); const TInterfaceBlock *interfaceBlock = leftType.getInterfaceBlock();
if (mReferencedUniformBlocks.count(interfaceBlock->uniqueId().get()) == 0) if (mReferencedUniformBlocks.count(interfaceBlock->uniqueId().get()) == 0)
{ {
...@@ -1807,7 +1808,11 @@ bool OutputHLSL::visitDeclaration(Visit visit, TIntermDeclaration *node) ...@@ -1807,7 +1808,11 @@ bool OutputHLSL::visitDeclaration(Visit visit, TIntermDeclaration *node)
declarator->getAsSymbolNode()->variable().symbolType() != declarator->getAsSymbolNode()->variable().symbolType() !=
SymbolType::Empty) // Variable declaration SymbolType::Empty) // Variable declaration
{ {
if (!mInsideFunction) if (declarator->getQualifier() == EvqShared)
{
out << "groupshared ";
}
else if (!mInsideFunction)
{ {
out << "static "; out << "static ";
} }
...@@ -1820,7 +1825,15 @@ bool OutputHLSL::visitDeclaration(Visit visit, TIntermDeclaration *node) ...@@ -1820,7 +1825,15 @@ bool OutputHLSL::visitDeclaration(Visit visit, TIntermDeclaration *node)
{ {
symbol->traverse(this); symbol->traverse(this);
out << ArrayString(symbol->getType()); out << ArrayString(symbol->getType());
out << " = " + zeroInitializer(symbol->getType()); // We don't initialize shared variables because:
// 1. It is very slow for D3D11 drivers to compile a compute shader if we add
// code to initialize a groupshared array variable with a large array size.
// 2. It is unnecessary to initialize shared variables, as GLSL even does not
// allow initializing shared variables at all.
if (declarator->getQualifier() != EvqShared)
{
out << " = " + zeroInitializer(symbol->getType());
}
} }
else else
{ {
...@@ -1928,7 +1941,7 @@ bool OutputHLSL::visitAggregate(Visit visit, TIntermAggregate *node) ...@@ -1928,7 +1941,7 @@ bool OutputHLSL::visitAggregate(Visit visit, TIntermAggregate *node)
else if (node->getFunction()->isImageFunction()) else if (node->getFunction()->isImageFunction())
{ {
const ImmutableString &name = node->getFunction()->name(); const ImmutableString &name = node->getFunction()->name();
TType type = (*arguments)[0]->getAsTyped()->getType(); TType type = (*arguments)[0]->getAsTyped()->getType();
TString imageFunctionName = mImageFunctionHLSL->useImageFunction( TString imageFunctionName = mImageFunctionHLSL->useImageFunction(
name, type.getBasicType(), type.getLayoutQualifier().imageInternalFormat, name, type.getBasicType(), type.getLayoutQualifier().imageInternalFormat,
type.getMemoryQualifier().readonly); type.getMemoryQualifier().readonly);
......
...@@ -19,6 +19,57 @@ class ComputeShaderTest : public ANGLETest ...@@ -19,6 +19,57 @@ class ComputeShaderTest : public ANGLETest
{ {
protected: protected:
ComputeShaderTest() {} ComputeShaderTest() {}
template <GLint kWidth, GLint kHeight>
void runSharedMemoryTest(const char *csSource,
const std::array<GLuint, kWidth * kHeight> &inputData,
const std::array<GLuint, kWidth * kHeight> &expectedValues)
{
GLTexture texture[2];
GLFramebuffer framebuffer;
glBindTexture(GL_TEXTURE_2D, texture[0]);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, kWidth, kHeight);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, kWidth, kHeight, GL_RED_INTEGER, GL_UNSIGNED_INT,
inputData.data());
EXPECT_GL_NO_ERROR();
constexpr GLuint initData[kWidth * kHeight] = {};
glBindTexture(GL_TEXTURE_2D, texture[1]);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, kWidth, kHeight);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, kWidth, kHeight, GL_RED_INTEGER, GL_UNSIGNED_INT,
initData);
EXPECT_GL_NO_ERROR();
ANGLE_GL_COMPUTE_PROGRAM(program, csSource);
glUseProgram(program.get());
glBindImageTexture(0, texture[0], 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI);
EXPECT_GL_NO_ERROR();
glBindImageTexture(1, texture[1], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI);
EXPECT_GL_NO_ERROR();
glDispatchCompute(1, 1, 1);
EXPECT_GL_NO_ERROR();
glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
GLuint outputValues[kWidth * kHeight];
glUseProgram(0);
glBindFramebuffer(GL_READ_FRAMEBUFFER, framebuffer);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture[1],
0);
EXPECT_GL_NO_ERROR();
glReadPixels(0, 0, kWidth, kHeight, GL_RED_INTEGER, GL_UNSIGNED_INT, outputValues);
EXPECT_GL_NO_ERROR();
for (int i = 0; i < kWidth * kHeight; i++)
{
EXPECT_EQ(expectedValues[i], outputValues[i]);
}
}
}; };
class ComputeShaderTestES3 : public ANGLETest class ComputeShaderTestES3 : public ANGLETest
...@@ -1305,6 +1356,91 @@ TEST_P(ComputeShaderTest, UniformBlockWithStructMember) ...@@ -1305,6 +1356,91 @@ TEST_P(ComputeShaderTest, UniformBlockWithStructMember)
EXPECT_GL_NO_ERROR(); EXPECT_GL_NO_ERROR();
} }
// Verify shared non-array variables can work correctly.
TEST_P(ComputeShaderTest, NonArraySharedVariable)
{
const char kCSShader[] =
R"(#version 310 es
layout (local_size_x = 2, local_size_y = 2, local_size_z = 1) in;
layout (r32ui, binding = 0) readonly uniform highp uimage2D srcImage;
layout (r32ui, binding = 1) writeonly uniform highp uimage2D dstImage;
shared uint temp;
void main()
{
if (gl_LocalInvocationID == uvec3(0, 0, 0))
{
temp = imageLoad(srcImage, ivec2(gl_LocalInvocationID.xy)).x;
}
groupMemoryBarrier();
barrier();
if (gl_LocalInvocationID == uvec3(1, 1, 0))
{
imageStore(dstImage, ivec2(gl_LocalInvocationID.xy), uvec4(temp));
}
else
{
uint inputValue = imageLoad(srcImage, ivec2(gl_LocalInvocationID.xy)).x;
imageStore(dstImage, ivec2(gl_LocalInvocationID.xy), uvec4(inputValue));
}
})";
const std::array<GLuint, 4> inputData = {{250, 200, 150, 100}};
const std::array<GLuint, 4> expectedValues = {{250, 200, 150, 250}};
runSharedMemoryTest<2, 2>(kCSShader, inputData, expectedValues);
}
// Verify shared non-struct array variables can work correctly.
TEST_P(ComputeShaderTest, NonStructArrayAsSharedVariable)
{
const char kCSShader[] =
R"(#version 310 es
layout (local_size_x = 2, local_size_y = 2, local_size_z = 1) in;
layout (r32ui, binding = 0) readonly uniform highp uimage2D srcImage;
layout (r32ui, binding = 1) writeonly uniform highp uimage2D dstImage;
shared uint sharedData[2][2];
void main()
{
uint inputData = imageLoad(srcImage, ivec2(gl_LocalInvocationID.xy)).x;
sharedData[gl_LocalInvocationID.x][gl_LocalInvocationID.y] = inputData;
groupMemoryBarrier();
barrier();
imageStore(dstImage, ivec2(gl_LocalInvocationID.xy),
uvec4(sharedData[gl_LocalInvocationID.y][gl_LocalInvocationID.x]));
})";
const std::array<GLuint, 4> inputData = {{250, 200, 150, 100}};
const std::array<GLuint, 4> expectedValues = {{250, 150, 200, 100}};
runSharedMemoryTest<2, 2>(kCSShader, inputData, expectedValues);
}
// Verify shared struct array variables work correctly.
TEST_P(ComputeShaderTest, StructArrayAsSharedVariable)
{
const char kCSShader[] =
R"(#version 310 es
layout (local_size_x = 2, local_size_y = 2, local_size_z = 1) in;
layout (r32ui, binding = 0) readonly uniform highp uimage2D srcImage;
layout (r32ui, binding = 1) writeonly uniform highp uimage2D dstImage;
struct SharedStruct
{
uint data;
};
shared SharedStruct sharedData[2][2];
void main()
{
uint inputData = imageLoad(srcImage, ivec2(gl_LocalInvocationID.xy)).x;
sharedData[gl_LocalInvocationID.x][gl_LocalInvocationID.y].data = inputData;
groupMemoryBarrier();
barrier();
imageStore(dstImage, ivec2(gl_LocalInvocationID.xy),
uvec4(sharedData[gl_LocalInvocationID.y][gl_LocalInvocationID.x].data));
})";
const std::array<GLuint, 4> inputData = {{250, 200, 150, 100}};
const std::array<GLuint, 4> expectedValues = {{250, 150, 200, 100}};
runSharedMemoryTest<2, 2>(kCSShader, inputData, expectedValues);
}
// Check that it is not possible to create a compute shader when the context does not support ES // Check that it is not possible to create a compute shader when the context does not support ES
// 3.10 // 3.10
TEST_P(ComputeShaderTestES3, NotSupported) TEST_P(ComputeShaderTestES3, NotSupported)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment