Commit a75aa3b2 by Jiawei Shao Committed by Commit Bot

ES31: Support compute shader shared variables in HLSL

This patch implements 'shared' variables in compute shader on D3D11 back-ends. GLSL shared variables are translated into 'groupshared' ones in HLSL. Note that although HLSL allows initializing the variables with 'groupshared' qualifier, currently we do not initialize them because: 1. It is very slow to for d3d11 drivers to compile the compute shader if we add the code to initialize a shared variable with large array size. 2. It seems unnecessary to do so and in GLSL it is not allowed to initialize a shared variable in the declaration. (ESSL 3.1, Chapter 4.3.8) BUG=angleproject:2682 TEST=angle_end2end_tests Change-Id: Ica8247e1b98059968612a36e369718ef113a598c Reviewed-on: https://chromium-review.googlesource.com/1109587Reviewed-by: 's avatarJiajia Qin <jiajia.qin@intel.com> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarGeoff Lang <geofflang@chromium.org> Commit-Queue: Jiawei Shao <jiawei.shao@intel.com>
parent f5fd5c6c
......@@ -6,9 +6,9 @@
#include "compiler/translator/OutputHLSL.h"
#include <stdio.h>
#include <algorithm>
#include <cfloat>
#include <stdio.h>
#include "common/angleutils.h"
#include "common/debug.h"
......@@ -56,7 +56,7 @@ bool IsDeclarationWrittenOut(TIntermDeclaration *node)
ASSERT(sequence->size() == 1);
ASSERT(variable);
return (variable->getQualifier() == EvqTemporary || variable->getQualifier() == EvqGlobal ||
variable->getQualifier() == EvqConst);
variable->getQualifier() == EvqConst || variable->getQualifier() == EvqShared);
}
bool IsInStd140InterfaceBlock(TIntermTyped *node)
......@@ -165,14 +165,14 @@ OutputHLSL::OutputHLSL(sh::GLenum shaderType,
{
mInsideFunction = false;
mUsesFragColor = false;
mUsesFragData = false;
mUsesDepthRange = false;
mUsesFragCoord = false;
mUsesPointCoord = false;
mUsesFrontFacing = false;
mUsesPointSize = false;
mUsesInstanceID = false;
mUsesFragColor = false;
mUsesFragData = false;
mUsesDepthRange = false;
mUsesFragCoord = false;
mUsesPointCoord = false;
mUsesFrontFacing = false;
mUsesPointSize = false;
mUsesInstanceID = false;
mHasMultiviewExtensionEnabled =
IsExtensionEnabled(mExtensionBehavior, TExtension::OVR_multiview);
mUsesViewID = false;
......@@ -414,18 +414,18 @@ void OutputHLSL::header(TInfoSinkBase &out,
for (const auto &varying : mReferencedVaryings)
{
const TType &type = varying.second->getType();
const TType &type = varying.second->getType();
const ImmutableString &name = varying.second->name();
// Program linking depends on this exact format
varyings += TString("static ") + InterpolationString(type.getQualifier()) + " " + TypeString(type) +
" " + Decorate(name) + ArrayString(type) + " = " + zeroInitializer(type) +
";\n";
varyings += TString("static ") + InterpolationString(type.getQualifier()) + " " +
TypeString(type) + " " + Decorate(name) + ArrayString(type) + " = " +
zeroInitializer(type) + ";\n";
}
for (const auto &attribute : mReferencedAttributes)
{
const TType &type = attribute.second->getType();
const TType &type = attribute.second->getType();
const ImmutableString &name = attribute.second->name();
attributes += "static " + TypeString(type) + " " + Decorate(name) + ArrayString(type) +
......@@ -499,7 +499,7 @@ void OutputHLSL::header(TInfoSinkBase &out,
for (const auto &outputVariable : mReferencedOutputVariables)
{
const ImmutableString &variableName = outputVariable.second->name();
const TType &variableType = outputVariable.second->getType();
const TType &variableType = outputVariable.second->getType();
out << "static " << TypeString(variableType) << " out_" << variableName
<< ArrayString(variableType) << " = " << zeroInitializer(variableType) << ";\n";
......@@ -509,8 +509,9 @@ void OutputHLSL::header(TInfoSinkBase &out,
{
const unsigned int numColorValues = usingMRTExtension ? mNumRenderTargets : 1;
out << "static float4 gl_Color[" << numColorValues << "] =\n"
"{\n";
out << "static float4 gl_Color[" << numColorValues
<< "] =\n"
"{\n";
for (unsigned int i = 0; i < numColorValues; i++)
{
out << " float4(0, 0, 0, 0)";
......@@ -1244,7 +1245,7 @@ bool OutputHLSL::visitBinary(Visit visit, TIntermBinary *node)
{
if (visit == PreVisit)
{
TIntermSymbol *instanceArraySymbol = node->getLeft()->getAsSymbolNode();
TIntermSymbol *instanceArraySymbol = node->getLeft()->getAsSymbolNode();
const TInterfaceBlock *interfaceBlock = leftType.getInterfaceBlock();
if (mReferencedUniformBlocks.count(interfaceBlock->uniqueId().get()) == 0)
{
......@@ -1807,7 +1808,11 @@ bool OutputHLSL::visitDeclaration(Visit visit, TIntermDeclaration *node)
declarator->getAsSymbolNode()->variable().symbolType() !=
SymbolType::Empty) // Variable declaration
{
if (!mInsideFunction)
if (declarator->getQualifier() == EvqShared)
{
out << "groupshared ";
}
else if (!mInsideFunction)
{
out << "static ";
}
......@@ -1820,7 +1825,15 @@ bool OutputHLSL::visitDeclaration(Visit visit, TIntermDeclaration *node)
{
symbol->traverse(this);
out << ArrayString(symbol->getType());
out << " = " + zeroInitializer(symbol->getType());
// We don't initialize shared variables because:
// 1. It is very slow for D3D11 drivers to compile a compute shader if we add
// code to initialize a groupshared array variable with a large array size.
// 2. It is unnecessary to initialize shared variables, as GLSL even does not
// allow initializing shared variables at all.
if (declarator->getQualifier() != EvqShared)
{
out << " = " + zeroInitializer(symbol->getType());
}
}
else
{
......@@ -1928,7 +1941,7 @@ bool OutputHLSL::visitAggregate(Visit visit, TIntermAggregate *node)
else if (node->getFunction()->isImageFunction())
{
const ImmutableString &name = node->getFunction()->name();
TType type = (*arguments)[0]->getAsTyped()->getType();
TType type = (*arguments)[0]->getAsTyped()->getType();
TString imageFunctionName = mImageFunctionHLSL->useImageFunction(
name, type.getBasicType(), type.getLayoutQualifier().imageInternalFormat,
type.getMemoryQualifier().readonly);
......
......@@ -19,6 +19,57 @@ class ComputeShaderTest : public ANGLETest
{
protected:
ComputeShaderTest() {}
template <GLint kWidth, GLint kHeight>
void runSharedMemoryTest(const char *csSource,
const std::array<GLuint, kWidth * kHeight> &inputData,
const std::array<GLuint, kWidth * kHeight> &expectedValues)
{
GLTexture texture[2];
GLFramebuffer framebuffer;
glBindTexture(GL_TEXTURE_2D, texture[0]);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, kWidth, kHeight);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, kWidth, kHeight, GL_RED_INTEGER, GL_UNSIGNED_INT,
inputData.data());
EXPECT_GL_NO_ERROR();
constexpr GLuint initData[kWidth * kHeight] = {};
glBindTexture(GL_TEXTURE_2D, texture[1]);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, kWidth, kHeight);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, kWidth, kHeight, GL_RED_INTEGER, GL_UNSIGNED_INT,
initData);
EXPECT_GL_NO_ERROR();
ANGLE_GL_COMPUTE_PROGRAM(program, csSource);
glUseProgram(program.get());
glBindImageTexture(0, texture[0], 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI);
EXPECT_GL_NO_ERROR();
glBindImageTexture(1, texture[1], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI);
EXPECT_GL_NO_ERROR();
glDispatchCompute(1, 1, 1);
EXPECT_GL_NO_ERROR();
glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
GLuint outputValues[kWidth * kHeight];
glUseProgram(0);
glBindFramebuffer(GL_READ_FRAMEBUFFER, framebuffer);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture[1],
0);
EXPECT_GL_NO_ERROR();
glReadPixels(0, 0, kWidth, kHeight, GL_RED_INTEGER, GL_UNSIGNED_INT, outputValues);
EXPECT_GL_NO_ERROR();
for (int i = 0; i < kWidth * kHeight; i++)
{
EXPECT_EQ(expectedValues[i], outputValues[i]);
}
}
};
class ComputeShaderTestES3 : public ANGLETest
......@@ -1305,6 +1356,91 @@ TEST_P(ComputeShaderTest, UniformBlockWithStructMember)
EXPECT_GL_NO_ERROR();
}
// Verify shared non-array variables can work correctly.
TEST_P(ComputeShaderTest, NonArraySharedVariable)
{
const char kCSShader[] =
R"(#version 310 es
layout (local_size_x = 2, local_size_y = 2, local_size_z = 1) in;
layout (r32ui, binding = 0) readonly uniform highp uimage2D srcImage;
layout (r32ui, binding = 1) writeonly uniform highp uimage2D dstImage;
shared uint temp;
void main()
{
if (gl_LocalInvocationID == uvec3(0, 0, 0))
{
temp = imageLoad(srcImage, ivec2(gl_LocalInvocationID.xy)).x;
}
groupMemoryBarrier();
barrier();
if (gl_LocalInvocationID == uvec3(1, 1, 0))
{
imageStore(dstImage, ivec2(gl_LocalInvocationID.xy), uvec4(temp));
}
else
{
uint inputValue = imageLoad(srcImage, ivec2(gl_LocalInvocationID.xy)).x;
imageStore(dstImage, ivec2(gl_LocalInvocationID.xy), uvec4(inputValue));
}
})";
const std::array<GLuint, 4> inputData = {{250, 200, 150, 100}};
const std::array<GLuint, 4> expectedValues = {{250, 200, 150, 250}};
runSharedMemoryTest<2, 2>(kCSShader, inputData, expectedValues);
}
// Verify shared non-struct array variables can work correctly.
TEST_P(ComputeShaderTest, NonStructArrayAsSharedVariable)
{
const char kCSShader[] =
R"(#version 310 es
layout (local_size_x = 2, local_size_y = 2, local_size_z = 1) in;
layout (r32ui, binding = 0) readonly uniform highp uimage2D srcImage;
layout (r32ui, binding = 1) writeonly uniform highp uimage2D dstImage;
shared uint sharedData[2][2];
void main()
{
uint inputData = imageLoad(srcImage, ivec2(gl_LocalInvocationID.xy)).x;
sharedData[gl_LocalInvocationID.x][gl_LocalInvocationID.y] = inputData;
groupMemoryBarrier();
barrier();
imageStore(dstImage, ivec2(gl_LocalInvocationID.xy),
uvec4(sharedData[gl_LocalInvocationID.y][gl_LocalInvocationID.x]));
})";
const std::array<GLuint, 4> inputData = {{250, 200, 150, 100}};
const std::array<GLuint, 4> expectedValues = {{250, 150, 200, 100}};
runSharedMemoryTest<2, 2>(kCSShader, inputData, expectedValues);
}
// Verify shared struct array variables work correctly.
TEST_P(ComputeShaderTest, StructArrayAsSharedVariable)
{
const char kCSShader[] =
R"(#version 310 es
layout (local_size_x = 2, local_size_y = 2, local_size_z = 1) in;
layout (r32ui, binding = 0) readonly uniform highp uimage2D srcImage;
layout (r32ui, binding = 1) writeonly uniform highp uimage2D dstImage;
struct SharedStruct
{
uint data;
};
shared SharedStruct sharedData[2][2];
void main()
{
uint inputData = imageLoad(srcImage, ivec2(gl_LocalInvocationID.xy)).x;
sharedData[gl_LocalInvocationID.x][gl_LocalInvocationID.y].data = inputData;
groupMemoryBarrier();
barrier();
imageStore(dstImage, ivec2(gl_LocalInvocationID.xy),
uvec4(sharedData[gl_LocalInvocationID.y][gl_LocalInvocationID.x].data));
})";
const std::array<GLuint, 4> inputData = {{250, 200, 150, 100}};
const std::array<GLuint, 4> expectedValues = {{250, 150, 200, 100}};
runSharedMemoryTest<2, 2>(kCSShader, inputData, expectedValues);
}
// Check that it is not possible to create a compute shader when the context does not support ES
// 3.10
TEST_P(ComputeShaderTestES3, NotSupported)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment