Commit c1c9fb1b by Enrico Galli Committed by Commit Bot

ES31: Add atomic counter buffer support to D3D11 renderer

Adds support for atomic counters to the D3D11 renderer using UAV. Bug: angleproject:1729 Test: angle_end2end_tests Change-Id: I2904ba62644685b7d91f7475bd80a81ae414993b Reviewed-on: https://chromium-review.googlesource.com/c/1451259 Commit-Queue: Geoff Lang <geofflang@chromium.org> Reviewed-by: 's avatarGeoff Lang <geofflang@chromium.org>
parent aead8edf
......@@ -61,6 +61,9 @@ enum
IMPLEMENTATION_MAX_SHADER_TEXTURES = 32,
IMPLEMENTATION_MAX_ACTIVE_TEXTURES = IMPLEMENTATION_MAX_SHADER_TEXTURES * 2,
IMPLEMENTATION_MAX_IMAGE_UNITS = IMPLEMENTATION_MAX_ACTIVE_TEXTURES,
// Maximum number of slots allocated for atomic counter buffers.
IMPLEMENTATION_MAX_ATOMIC_COUNTER_BUFFERS = 8,
};
} // namespace gl
......
......@@ -3278,6 +3278,9 @@ void Context::initCaps()
LimitCap(&mState.mCaps.maxImageUnits, IMPLEMENTATION_MAX_IMAGE_UNITS);
LimitCap(&mState.mCaps.maxCombinedAtomicCounterBuffers,
IMPLEMENTATION_MAX_ATOMIC_COUNTER_BUFFERS);
mState.mCaps.maxSampleMaskWords =
std::min<GLuint>(mState.mCaps.maxSampleMaskWords, MAX_SAMPLE_MASK_WORDS);
......
......@@ -668,6 +668,7 @@ ProgramD3D::ProgramD3D(const gl::ProgramState &state, RendererD3D *renderer)
mDirtySamplerMapping(true),
mUsedComputeImageRange(0, 0),
mUsedComputeReadonlyImageRange(0, 0),
mUsedComputeAtomicCounterRange(0, 0),
mSerial(issueSerial())
{
mDynamicHLSL = new DynamicHLSL(renderer);
......@@ -925,6 +926,11 @@ angle::Result ProgramD3D::load(const gl::Context *context,
mUsedComputeReadonlyImageRange =
gl::RangeUI(computeReadonlyImageRangeLow, computeReadonlyImageRangeHigh);
unsigned int atomicCounterRangeLow, atomicCounterRangeHigh;
stream->readInt(&atomicCounterRangeLow);
stream->readInt(&atomicCounterRangeHigh);
mUsedComputeAtomicCounterRange = gl::RangeUI(atomicCounterRangeLow, atomicCounterRangeHigh);
const unsigned int shaderStorageBlockCount = stream->readInt<unsigned int>();
if (stream->error())
{
......@@ -943,6 +949,12 @@ angle::Result ProgramD3D::load(const gl::Context *context,
mD3DShaderStorageBlocks.push_back(shaderStorageBlock);
}
for (unsigned int ii = 0; ii < gl::IMPLEMENTATION_MAX_ATOMIC_COUNTER_BUFFERS; ++ii)
{
unsigned int index = stream->readInt<unsigned int>();
mComputeAtomicCounterBufferRegisterIndices[ii] = index;
}
const unsigned int uniformCount = stream->readInt<unsigned int>();
if (stream->error())
{
......@@ -1225,6 +1237,8 @@ void ProgramD3D::save(const gl::Context *context, gl::BinaryOutputStream *stream
stream->writeInt(mUsedComputeImageRange.high());
stream->writeInt(mUsedComputeReadonlyImageRange.low());
stream->writeInt(mUsedComputeReadonlyImageRange.high());
stream->writeInt(mUsedComputeAtomicCounterRange.low());
stream->writeInt(mUsedComputeAtomicCounterRange.high());
stream->writeInt(mD3DShaderStorageBlocks.size());
for (const D3DInterfaceBlock &shaderStorageBlock : mD3DShaderStorageBlocks)
......@@ -1235,6 +1249,11 @@ void ProgramD3D::save(const gl::Context *context, gl::BinaryOutputStream *stream
}
}
for (unsigned int ii = 0; ii < gl::IMPLEMENTATION_MAX_ATOMIC_COUNTER_BUFFERS; ++ii)
{
stream->writeInt(mComputeAtomicCounterBufferRegisterIndices[ii]);
}
stream->writeInt(mD3DUniforms.size());
for (const D3DUniform *uniform : mD3DUniforms)
{
......@@ -2111,6 +2130,18 @@ void ProgramD3D::updateUniformBufferCache(
}
}
unsigned int ProgramD3D::getAtomicCounterBufferRegisterIndex(GLuint binding,
gl::ShaderType shaderType) const
{
if (shaderType != gl::ShaderType::Compute)
{
// Implement atomic counters for non-compute shaders
// http://anglebug.com/1729
UNIMPLEMENTED();
}
return mComputeAtomicCounterBufferRegisterIndices[binding];
}
unsigned int ProgramD3D::getShaderStorageBufferRegisterIndex(GLuint blockIndex,
gl::ShaderType shaderType) const
{
......@@ -2308,11 +2339,15 @@ void ProgramD3D::defineUniformsAndAssignRegisters()
}
assignAllSamplerRegisters();
assignAllAtomicCounterRegisters();
// Samplers and readonly images share shader input resource slot, adjust low value of
// readonly image range.
mUsedComputeReadonlyImageRange =
gl::RangeUI(mUsedShaderSamplerRanges[gl::ShaderType::Compute].high(),
mUsedShaderSamplerRanges[gl::ShaderType::Compute].high());
// Atomic counter buffers and non-readonly images share input resource slots
mUsedComputeImageRange =
gl::RangeUI(mUsedComputeAtomicCounterRange.high(), mUsedComputeAtomicCounterRange.high());
assignAllImageRegisters();
initializeUniformStorage(attachedShaders);
}
......@@ -2359,6 +2394,14 @@ void ProgramD3D::defineUniformBase(const gl::Shader *shader,
sh::TraverseShaderVariable(uniform, false, &visitor);
return;
}
else if (gl::IsAtomicCounterType(uniform.type))
{
UniformEncodingVisitorD3D visitor(shader->getType(), HLSLRegisterType::UnorderedAccessView,
&dummyEncoder, uniformMap);
sh::TraverseShaderVariable(uniform, false, &visitor);
mAtomicBindingMap[uniform.name] = uniform.binding;
return;
}
const ShaderD3D *shaderD3D = GetImplAs<ShaderD3D>(shader);
unsigned int startRegister = shaderD3D->getUniformRegister(uniform.name);
......@@ -2568,6 +2611,45 @@ void ProgramD3D::assignAllImageRegisters()
}
}
void ProgramD3D::assignAllAtomicCounterRegisters()
{
if (mAtomicBindingMap.empty())
{
return;
}
gl::ShaderType shaderType = gl::ShaderType::Compute;
const gl::Shader *computeShader = mState.getAttachedShader(shaderType);
if (computeShader)
{
const ShaderD3D *computeShaderD3D = GetImplAs<ShaderD3D>(computeShader);
auto &registerIndices = mComputeAtomicCounterBufferRegisterIndices;
unsigned int firstRegister = GL_INVALID_VALUE;
unsigned int lastRegister = 0;
for (auto &atomicBinding : mAtomicBindingMap)
{
ASSERT(computeShaderD3D->hasUniform(atomicBinding.first));
unsigned int currentRegister =
computeShaderD3D->getUniformRegister(atomicBinding.first);
ASSERT(currentRegister != GL_INVALID_INDEX);
const int kBinding = atomicBinding.second;
registerIndices[kBinding] = currentRegister;
firstRegister = std::min(firstRegister, currentRegister);
lastRegister = std::max(lastRegister, currentRegister);
}
ASSERT(firstRegister != GL_INVALID_VALUE);
ASSERT(lastRegister != GL_INVALID_VALUE);
mUsedComputeAtomicCounterRange = gl::RangeUI(firstRegister, lastRegister + 1);
}
else
{
// Implement atomic counters for non-compute shaders
// http://anglebug.com/1729
UNIMPLEMENTED();
}
}
void ProgramD3D::assignImageRegisters(size_t uniformIndex)
{
D3DUniform *d3dUniform = mD3DUniforms[uniformIndex];
......@@ -2700,6 +2782,7 @@ void ProgramD3D::reset()
SafeDeleteContainer(mD3DUniforms);
mD3DUniformBlocks.clear();
mD3DShaderStorageBlocks.clear();
mComputeAtomicCounterBufferRegisterIndices.fill({});
for (gl::ShaderType shaderType : gl::AllShaderTypes())
{
......@@ -2711,6 +2794,7 @@ void ProgramD3D::reset()
mReadonlyImagesCS.clear();
mUsedShaderSamplerRanges.fill({0, 0});
mUsedComputeAtomicCounterRange = {0, 0};
mDirtySamplerMapping = true;
mUsedComputeImageRange = {0, 0};
mUsedComputeReadonlyImageRange = {0, 0};
......
......@@ -217,6 +217,9 @@ class ProgramD3D : public ProgramImpl
void updateUniformBufferCache(const gl::Caps &caps,
const gl::ShaderMap<unsigned int> &reservedShaderRegisterIndexes);
unsigned int getAtomicCounterBufferRegisterIndex(GLuint binding,
gl::ShaderType shaderType) const;
unsigned int getShaderStorageBufferRegisterIndex(GLuint blockIndex,
gl::ShaderType shaderType) const;
const std::vector<GLint> &getShaderUniformBufferCache(gl::ShaderType shaderType) const;
......@@ -435,6 +438,7 @@ class ProgramD3D : public ProgramImpl
gl::RangeUI *outUsedRange);
void assignAllImageRegisters();
void assignAllAtomicCounterRegisters();
void assignImageRegisters(size_t uniformIndex);
static void AssignImages(unsigned int startImageIndex,
int startLogicalImageUnit,
......@@ -522,6 +526,7 @@ class ProgramD3D : public ProgramImpl
std::vector<Image> mReadonlyImagesCS;
gl::RangeUI mUsedComputeImageRange;
gl::RangeUI mUsedComputeReadonlyImageRange;
gl::RangeUI mUsedComputeAtomicCounterRange;
// Cache for pixel shader output layout to save reallocations.
std::vector<GLenum> mPixelShaderOutputLayoutCache;
......@@ -539,8 +544,11 @@ class ProgramD3D : public ProgramImpl
std::vector<D3DVarying> mStreamOutVaryings;
std::vector<D3DUniform *> mD3DUniforms;
std::map<std::string, int> mImageBindingMap;
std::map<std::string, int> mAtomicBindingMap;
std::vector<D3DInterfaceBlock> mD3DUniformBlocks;
std::vector<D3DInterfaceBlock> mD3DShaderStorageBlocks;
std::array<unsigned int, gl::IMPLEMENTATION_MAX_ATOMIC_COUNTER_BUFFERS>
mComputeAtomicCounterBufferRegisterIndices;
std::vector<sh::Uniform> mImage2DUniforms;
gl::ImageUnitTextureTypeMap mComputeShaderImage2DBindLayoutCache;
......
......@@ -3686,7 +3686,63 @@ angle::Result StateManager11::syncUniformBuffers(const gl::Context *context)
angle::Result StateManager11::syncAtomicCounterBuffers(const gl::Context *context)
{
// TODO(jie.a.chen@intel.com): http://anglebug.com/1729
if (mProgramD3D->hasShaderStage(gl::ShaderType::Compute))
{
ANGLE_TRY(syncAtomicCounterBuffersForShader(context, gl::ShaderType::Compute));
}
return angle::Result::Continue;
}
angle::Result StateManager11::syncAtomicCounterBuffersForShader(const gl::Context *context,
gl::ShaderType shaderType)
{
const gl::State &glState = context->getState();
const gl::Program *program = glState.getProgram();
for (const auto &atomicCounterBuffer : program->getState().getAtomicCounterBuffers())
{
GLuint binding = atomicCounterBuffer.binding;
const auto &buffer = glState.getIndexedAtomicCounterBuffer(binding);
if (buffer.get() == nullptr)
{
continue;
}
Buffer11 *bufferStorage = GetImplAs<Buffer11>(buffer.get());
// TODO(enrico.galli@intel.com): Check to make sure that we aren't binding the same buffer
// multiple times, as this is unsupported by D3D11. http://anglebug.com/3141
// Bindings only have a valid size if bound using glBindBufferRange. Therefore, we use the
// buffer size for glBindBufferBase
GLsizeiptr viewSize = (buffer.getSize() > 0) ? buffer.getSize() : bufferStorage->getSize();
d3d11::UnorderedAccessView *uavPtr = nullptr;
ANGLE_TRY(bufferStorage->getRawUAVRange(context, buffer.getOffset(), viewSize, &uavPtr));
// We need to make sure that resource being set to UnorderedAccessView slot |registerIndex|
// is not bound on SRV.
if (uavPtr && unsetConflictingView(uavPtr->get()))
{
mInternalDirtyBits.set(DIRTY_BIT_TEXTURE_AND_SAMPLER_STATE);
}
const unsigned int registerIndex =
mProgramD3D->getAtomicCounterBufferRegisterIndex(binding, shaderType);
if (shaderType == gl::ShaderType::Compute)
{
ID3D11UnorderedAccessView *uav = uavPtr->get();
ID3D11DeviceContext *deviceContext = mRenderer->getDeviceContext();
deviceContext->CSSetUnorderedAccessViews(registerIndex, 1, &uav, nullptr);
}
else
{
// Atomic Shaders on non-compute shaders are currently unimplemented
// http://anglebug.com/1729
UNIMPLEMENTED();
}
}
return angle::Result::Continue;
}
......
......@@ -370,6 +370,8 @@ class StateManager11 final : angle::NonCopyable
angle::Result syncUniformBuffersForShader(const gl::Context *context,
gl::ShaderType shaderType);
angle::Result syncAtomicCounterBuffers(const gl::Context *context);
angle::Result syncAtomicCounterBuffersForShader(const gl::Context *context,
gl::ShaderType shaderType);
angle::Result syncShaderStorageBuffers(const gl::Context *context);
angle::Result syncTransformFeedbackBuffers(const gl::Context *context);
......
......@@ -177,10 +177,6 @@ TEST_P(AtomicCounterBufferTest31, AtomicCounterRead)
// Test atomic counter increment and decrement.
TEST_P(AtomicCounterBufferTest31, AtomicCounterIncrementAndDecrement)
{
// Skipping test while we work on enabling atomic counter buffer support in th D3D renderer.
// http://anglebug.com/1729
ANGLE_SKIP_TEST_IF(IsD3D11());
constexpr char kCS[] =
"#version 310 es\n"
"layout(local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
......@@ -206,6 +202,8 @@ TEST_P(AtomicCounterBufferTest31, AtomicCounterIncrementAndDecrement)
glDispatchCompute(1, 1, 1);
EXPECT_GL_NO_ERROR();
glMemoryBarrier(GL_ATOMIC_COUNTER_BARRIER_BIT);
glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, atomicCounterBuffer);
void *mappedBuffer =
glMapBufferRange(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint) * 3, GL_MAP_READ_BIT);
......@@ -217,6 +215,58 @@ TEST_P(AtomicCounterBufferTest31, AtomicCounterIncrementAndDecrement)
EXPECT_EQ(0u, bufferData[2]);
}
// Tests multiple atomic counter buffers.
TEST_P(AtomicCounterBufferTest31, AtomicCounterMultipleBuffers)
{
GLint maxAtomicCounterBuffers = 0;
glGetIntegerv(GL_MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS, &maxAtomicCounterBuffers);
constexpr unsigned int kBufferCount = 3;
// ES 3.1 table 20.45 only guarantees 1 atomic counter buffer
ANGLE_SKIP_TEST_IF(maxAtomicCounterBuffers < static_cast<int>(kBufferCount));
constexpr char kComputeShaderSource[] = R"(#version 310 es
layout(local_size_x=1, local_size_y=1, local_size_z=1) in;
layout(binding = 0) uniform atomic_uint ac1;
layout(binding = 1) uniform atomic_uint ac2;
layout(binding = 2) uniform atomic_uint ac3;
void main()
{
atomicCounterIncrement(ac1);
atomicCounterIncrement(ac2);
atomicCounterIncrement(ac3);
})";
ANGLE_GL_COMPUTE_PROGRAM(program, kComputeShaderSource);
glUseProgram(program);
GLBuffer atomicCounterBuffers[kBufferCount];
for (unsigned int ii = 0; ii < kBufferCount; ++ii)
{
GLuint initialData[1] = {ii};
glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, atomicCounterBuffers[ii]);
glBufferData(GL_ATOMIC_COUNTER_BUFFER, sizeof(initialData), initialData, GL_STATIC_DRAW);
glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, ii, atomicCounterBuffers[ii]);
}
glDispatchCompute(1, 1, 1);
EXPECT_GL_NO_ERROR();
glMemoryBarrier(GL_ATOMIC_COUNTER_BARRIER_BIT);
for (unsigned int ii = 0; ii < kBufferCount; ++ii)
{
glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, atomicCounterBuffers[ii]);
GLuint *mappedBuffer = static_cast<GLuint *>(
glMapBufferRange(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint), GL_MAP_READ_BIT));
EXPECT_EQ(ii + 1, mappedBuffer[0]);
glUnmapBuffer(GL_ATOMIC_COUNTER_BUFFER);
}
}
ANGLE_INSTANTIATE_TEST(AtomicCounterBufferTest,
ES3_OPENGL(),
ES3_OPENGLES(),
......
......@@ -416,6 +416,69 @@ TEST_P(ProgramBinaryES31Test, ProgramBinaryWithComputeShader)
ASSERT_GL_NO_ERROR();
}
// Tests that saving and loading a program attached with computer shader.
TEST_P(ProgramBinaryES31Test, ProgramBinaryWithAtomicCounterComputeShader)
{
// We can't run the test if no program binary formats are supported.
GLint binaryFormatCount = 0;
glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &binaryFormatCount);
ANGLE_SKIP_TEST_IF(binaryFormatCount == 0);
constexpr char kComputeShader[] = R"(#version 310 es
layout(local_size_x=1, local_size_y=1, local_size_z=1) in;
layout(binding = 0, offset = 4) uniform atomic_uint ac[2];
void main() {
atomicCounterIncrement(ac[0]);
atomicCounterDecrement(ac[1]);
})";
ANGLE_GL_COMPUTE_PROGRAM(program, kComputeShader);
// Read back the binary.
GLint programLength = 0;
glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &programLength);
ASSERT_GL_NO_ERROR();
GLsizei readLength = 0;
GLenum binaryFormat = GL_NONE;
std::vector<uint8_t> binary(programLength);
glGetProgramBinary(program, programLength, &readLength, &binaryFormat, binary.data());
ASSERT_GL_NO_ERROR();
EXPECT_EQ(static_cast<GLsizei>(programLength), readLength);
// Load a new program with the binary.
ANGLE_GL_BINARY_ES3_PROGRAM(binaryProgram, binary, binaryFormat);
ASSERT_GL_NO_ERROR();
// Dispatch compute with the loaded binary program
glUseProgram(binaryProgram);
// The initial value of 'ac[0]' is 3u, 'ac[1]' is 1u.
unsigned int bufferData[3] = {11u, 3u, 1u};
GLBuffer atomicCounterBuffer;
glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, atomicCounterBuffer);
glBufferData(GL_ATOMIC_COUNTER_BUFFER, sizeof(bufferData), bufferData, GL_STATIC_DRAW);
glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, atomicCounterBuffer);
glDispatchCompute(1, 1, 1);
EXPECT_GL_NO_ERROR();
glMemoryBarrier(GL_ATOMIC_COUNTER_BARRIER_BIT);
glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, atomicCounterBuffer);
void *mappedBuffer =
glMapBufferRange(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint) * 3, GL_MAP_READ_BIT);
memcpy(bufferData, mappedBuffer, sizeof(bufferData));
glUnmapBuffer(GL_ATOMIC_COUNTER_BUFFER);
EXPECT_EQ(11u, bufferData[0]);
EXPECT_EQ(4u, bufferData[1]);
EXPECT_EQ(0u, bufferData[2]);
ASSERT_GL_NO_ERROR();
}
// Tests that image texture works correctly when loading a program from binary.
TEST_P(ProgramBinaryES31Test, ImageTextureBinding)
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment