Commit 73badc07 by Xinghua Cao Committed by Commit Bot

ES31: Implement glDispatchCompute for D3D backend

BUG=angleproject:1955 TESTCASE=angle_end2end_tests Change-Id: I69b4df83d67017d39df67753d6d17fc15ececebf Reviewed-on: https://chromium-review.googlesource.com/462067 Commit-Queue: Jamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org>
parent 7854d861
......@@ -639,13 +639,16 @@ void OutputHLSL::header(TInfoSinkBase &out, const BuiltInFunctionEmulator *built
else // Compute shader
{
ASSERT(mShaderType == GL_COMPUTE_SHADER);
out << "cbuffer DriverConstants : register(b1)\n"
"{\n";
if (mUsesNumWorkGroups)
{
out << "cbuffer DriverConstants : register(b1)\n"
"{\n";
out << " uint3 gl_NumWorkGroups : packoffset(c0);\n";
out << "};\n";
}
ASSERT(mOutputType == SH_HLSL_4_1_OUTPUT);
mUniformHLSL->samplerMetadataUniforms(out, "c1");
out << "};\n";
// Follow built-in variables would be initialized in
// DynamicHLSL::generateComputeShaderLinkHLSL, if they
......
......@@ -1384,6 +1384,16 @@ class ProgramD3D::GetGeometryExecutableTask : public ProgramD3D::GetExecutableTa
const gl::ContextState &mContextState;
};
gl::Error ProgramD3D::getComputeExecutable(ShaderExecutableD3D **outExecutable)
{
if (outExecutable)
{
*outExecutable = mComputeExecutable.get();
}
return gl::NoError();
}
LinkResult ProgramD3D::compileProgramExecutables(const gl::ContextState &contextState,
gl::InfoLog &infoLog)
{
......@@ -1695,6 +1705,19 @@ gl::Error ProgramD3D::applyUniforms(GLenum drawMode)
return gl::NoError();
}
gl::Error ProgramD3D::applyComputeUniforms()
{
ASSERT(!mDirtySamplerMapping);
ANGLE_TRY(mRenderer->applyComputeUniforms(*this, mD3DUniforms));
for (D3DUniform *d3dUniform : mD3DUniforms)
{
d3dUniform->dirty = false;
}
return gl::NoError();
}
gl::Error ProgramD3D::applyUniformBuffers(const gl::ContextState &data)
{
if (mState.getUniformBlocks().empty())
......
......@@ -177,10 +177,7 @@ class ProgramD3D : public ProgramImpl
GLenum drawMode,
ShaderExecutableD3D **outExecutable,
gl::InfoLog *infoLog);
gl::Error getComputeExecutable(const gl::ContextState &data,
ShaderExecutableD3D **outExecutable,
gl::InfoLog *infoLog);
gl::Error getComputeExecutable(ShaderExecutableD3D **outExecutable);
LinkResult link(ContextImpl *contextImpl,
const gl::VaryingPacking &packing,
gl::InfoLog &infoLog) override;
......@@ -196,6 +193,7 @@ class ProgramD3D : public ProgramImpl
void initializeUniformStorage();
gl::Error applyUniforms(GLenum drawMode);
gl::Error applyComputeUniforms();
gl::Error applyUniformBuffers(const gl::ContextState &data);
void dirtyAllUniforms();
......
......@@ -272,6 +272,9 @@ class RendererD3D : public BufferFactoryD3D
angle::WorkerThreadPool *getWorkerThreadPool();
virtual gl::Error applyComputeUniforms(const ProgramD3D &programD3D,
const std::vector<D3DUniform *> &uniformArray) = 0;
protected:
virtual bool getLUID(LUID *adapterLuid) const = 0;
virtual void generateCaps(gl::Caps *outCaps,
......
......@@ -282,8 +282,7 @@ const gl::Limitations &Context11::getNativeLimitations() const
gl::Error Context11::dispatchCompute(GLuint numGroupsX, GLuint numGroupsY, GLuint numGroupsZ)
{
UNIMPLEMENTED();
return gl::NoError();
return mRenderer->dispatchCompute(this, numGroupsX, numGroupsY, numGroupsZ);
}
} // namespace rx
......@@ -427,10 +427,12 @@ Renderer11::Renderer11(egl::Display *display)
mDriverConstantBufferVS = nullptr;
mDriverConstantBufferPS = nullptr;
mDriverConstantBufferCS = nullptr;
mAppliedVertexShader = NULL;
mAppliedGeometryShader = NULL;
mAppliedPixelShader = NULL;
mAppliedVertexShader = angle::DirtyPointer;
mAppliedGeometryShader = angle::DirtyPointer;
mAppliedPixelShader = angle::DirtyPointer;
mAppliedComputeShader = angle::DirtyPointer;
mAppliedTFObject = angle::DirtyPointer;
......@@ -835,6 +837,10 @@ void Renderer11::initializeDevice()
mCurPixelSamplerStates.resize(rendererCaps.maxTextureImageUnits);
mSamplerMetadataPS.initData(rendererCaps.maxTextureImageUnits);
mForceSetComputeSamplerStates.resize(rendererCaps.maxComputeTextureImageUnits);
mCurComputeSamplerStates.resize(rendererCaps.maxComputeTextureImageUnits);
mSamplerMetadataCS.initData(rendererCaps.maxComputeTextureImageUnits);
mStateManager.initialize(rendererCaps);
markAllStateDirty();
......@@ -1476,6 +1482,26 @@ gl::Error Renderer11::setSamplerState(gl::SamplerType type,
metadata = &mSamplerMetadataVS;
}
else if (type == gl::SAMPLER_COMPUTE)
{
ASSERT(static_cast<unsigned int>(index) < getNativeCaps().maxComputeTextureImageUnits);
if (mForceSetComputeSamplerStates[index] ||
memcmp(&samplerState, &mCurComputeSamplerStates[index], sizeof(gl::SamplerState)) != 0)
{
ID3D11SamplerState *dxSamplerState = nullptr;
ANGLE_TRY(mStateCache.getSamplerState(samplerState, &dxSamplerState));
ASSERT(dxSamplerState != nullptr);
mDeviceContext->CSSetSamplers(index, 1, &dxSamplerState);
mCurComputeSamplerStates[index] = samplerState;
}
mForceSetComputeSamplerStates[index] = false;
metadata = &mSamplerMetadataCS;
}
else
UNREACHABLE();
......@@ -2782,6 +2808,12 @@ template void Renderer11::applyDriverConstantsIfNeeded<dx_PixelConstants11>(
SamplerMetadataD3D11 *samplerMetadata,
size_t samplerMetadataReferencedBytes,
ID3D11Buffer *driverConstantBuffer);
template void Renderer11::applyDriverConstantsIfNeeded<dx_ComputeConstants11>(
dx_ComputeConstants11 *appliedConstants,
const dx_ComputeConstants11 &constants,
SamplerMetadataD3D11 *samplerMetadata,
size_t samplerMetadataReferencedBytes,
ID3D11Buffer *driverConstantBuffer);
void Renderer11::markAllStateDirty()
{
......@@ -2797,6 +2829,11 @@ void Renderer11::markAllStateDirty()
mForceSetPixelSamplerStates[fsamplerId] = true;
}
for (size_t csamplerId = 0; csamplerId < mForceSetComputeSamplerStates.size(); ++csamplerId)
{
mForceSetComputeSamplerStates[csamplerId] = true;
}
mStateManager.invalidateEverything();
mAppliedIB = nullptr;
......@@ -2806,6 +2843,7 @@ void Renderer11::markAllStateDirty()
mAppliedVertexShader = angle::DirtyPointer;
mAppliedGeometryShader = angle::DirtyPointer;
mAppliedPixelShader = angle::DirtyPointer;
mAppliedComputeShader = angle::DirtyPointer;
mAppliedTFObject = angle::DirtyPointer;
......@@ -2827,6 +2865,7 @@ void Renderer11::markAllStateDirty()
mCurrentVertexConstantBuffer = nullptr;
mCurrentPixelConstantBuffer = nullptr;
mCurrentGeometryConstantBuffer = nullptr;
mCurrentComputeConstantBuffer = nullptr;
mCurrentPrimitiveTopology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED;
}
......@@ -2848,6 +2887,7 @@ void Renderer11::releaseDeviceResources()
SafeRelease(mDriverConstantBufferVS);
SafeRelease(mDriverConstantBufferPS);
SafeRelease(mDriverConstantBufferCS);
SafeRelease(mSyncQuery);
}
......@@ -4845,4 +4885,141 @@ gl::DebugAnnotator *Renderer11::getAnnotator()
return mAnnotator;
}
gl::Error Renderer11::applyComputeShader(const gl::ContextState &data)
{
ANGLE_TRY(ensureHLSLCompilerInitialized());
const auto &glState = data.getState();
ProgramD3D *programD3D = GetImplAs<ProgramD3D>(glState.getProgram());
ShaderExecutableD3D *computeExe = nullptr;
ANGLE_TRY(programD3D->getComputeExecutable(&computeExe));
ASSERT(computeExe != nullptr);
ID3D11ComputeShader *computeShader = GetAs<ShaderExecutable11>(computeExe)->getComputeShader();
bool dirtyUniforms = false;
if (reinterpret_cast<uintptr_t>(computeShader) != mAppliedComputeShader)
{
mDeviceContext->CSSetShader(computeShader, nullptr, 0);
mAppliedComputeShader = reinterpret_cast<uintptr_t>(computeShader);
dirtyUniforms = true;
}
if (dirtyUniforms)
{
programD3D->dirtyAllUniforms();
}
return programD3D->applyComputeUniforms();
}
gl::Error Renderer11::dispatchCompute(Context11 *context,
GLuint numGroupsX,
GLuint numGroupsY,
GLuint numGroupsZ)
{
const auto &data = context->getContextState();
gl::Program *program = data.getState().getProgram();
ASSERT(program != nullptr);
ProgramD3D *programD3D = GetImplAs<ProgramD3D>(program);
mStateManager.setComputeConstants(numGroupsX, numGroupsY, numGroupsZ);
programD3D->updateSamplerMapping();
ANGLE_TRY(generateSwizzles(data, gl::SAMPLER_COMPUTE));
ANGLE_TRY(applyTextures(context, data));
ANGLE_TRY(applyComputeShader(data));
// TODO(Xinghua): applyUniformBuffers for compute shader.
mDeviceContext->Dispatch(numGroupsX, numGroupsY, numGroupsZ);
return gl::NoError();
}
gl::Error Renderer11::applyComputeUniforms(const ProgramD3D &programD3D,
const std::vector<D3DUniform *> &uniformArray)
{
unsigned int totalRegisterCountCS = 0;
bool computeUniformsDirty = false;
for (const D3DUniform *uniform : uniformArray)
{
ASSERT(uniform->isReferencedByComputeShader());
// TODO(Xinghua): add isImage() and isAtomicCounter().
if (uniform->isSampler())
{
totalRegisterCountCS += uniform->registerCount;
computeUniformsDirty = (computeUniformsDirty || uniform->dirty);
}
}
const UniformStorage11 *computeUniformStorage =
GetAs<UniformStorage11>(&programD3D.getComputeUniformStorage());
ASSERT(computeUniformStorage);
ID3D11Buffer *computeConstantBuffer = computeUniformStorage->getConstantBuffer();
if (totalRegisterCountCS > 0 && computeUniformsDirty)
{
D3D11_MAPPED_SUBRESOURCE map = {0};
HRESULT result =
mDeviceContext->Map(computeConstantBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
ASSERT(SUCCEEDED(result));
auto *mapCS = static_cast<float(*)[4]>(map.pData);
ASSERT(mapCS);
for (const D3DUniform *uniform : uniformArray)
{
ASSERT(uniform->isReferencedByComputeShader());
if (uniform->isSampler())
{
continue;
}
unsigned int componentCount = (4 - uniform->registerCount);
memcpy(&mapCS[uniform->csRegisterIndex][uniform->registerElement], uniform->data,
uniform->registerCount * sizeof(float) * componentCount);
}
mDeviceContext->Unmap(computeConstantBuffer, 0);
}
if (mCurrentComputeConstantBuffer != computeConstantBuffer)
{
mDeviceContext->CSSetConstantBuffers(
d3d11::RESERVED_CONSTANT_BUFFER_SLOT_DEFAULT_UNIFORM_BLOCK, 1, &computeConstantBuffer);
mCurrentComputeConstantBuffer = computeConstantBuffer;
}
if (!mDriverConstantBufferCS)
{
D3D11_BUFFER_DESC constantBufferDescription = {0};
d3d11::InitConstantBufferDesc(
&constantBufferDescription,
sizeof(dx_ComputeConstants11) + mSamplerMetadataCS.sizeBytes());
HRESULT result =
mDevice->CreateBuffer(&constantBufferDescription, nullptr, &mDriverConstantBufferCS);
ASSERT(SUCCEEDED(result));
if (FAILED(result))
{
return gl::OutOfMemory()
<< "Failed to create compute shader constant buffer, " << result;
}
mDeviceContext->CSSetConstantBuffers(d3d11::RESERVED_CONSTANT_BUFFER_SLOT_DRIVER, 1,
&mDriverConstantBufferCS);
}
const dx_ComputeConstants11 &computeConstants = mStateManager.getComputeConstants();
size_t samplerMetadataReferencedBytesCS = sizeof(SamplerMetadataD3D11::dx_SamplerMetadata) *
programD3D.getUsedSamplerRange(gl::SAMPLER_COMPUTE);
applyDriverConstantsIfNeeded(&mAppliedComputeConstants, computeConstants, &mSamplerMetadataCS,
samplerMetadataReferencedBytesCS, mDriverConstantBufferCS);
return gl::NoError();
}
} // namespace rx
......@@ -388,6 +388,14 @@ class Renderer11 : public RendererD3D
gl::Version getMaxSupportedESVersion() const override;
gl::Error dispatchCompute(Context11 *context,
GLuint numGroupsX,
GLuint numGroupsY,
GLuint numGroupsZ);
gl::Error applyComputeUniforms(const ProgramD3D &programD3D,
const std::vector<D3DUniform *> &uniformArray) override;
gl::Error applyComputeShader(const gl::ContextState &data);
protected:
gl::Error clearTextures(gl::SamplerType samplerType, size_t rangeStart, size_t rangeEnd) override;
......@@ -520,6 +528,9 @@ class Renderer11 : public RendererD3D
std::vector<bool> mForceSetPixelSamplerStates;
std::vector<gl::SamplerState> mCurPixelSamplerStates;
std::vector<bool> mForceSetComputeSamplerStates;
std::vector<gl::SamplerState> mCurComputeSamplerStates;
StateManager11 mStateManager;
// Currently applied primitive topology
......@@ -538,6 +549,7 @@ class Renderer11 : public RendererD3D
uintptr_t mAppliedVertexShader;
uintptr_t mAppliedGeometryShader;
uintptr_t mAppliedPixelShader;
uintptr_t mAppliedComputeShader;
dx_VertexConstants11 mAppliedVertexConstants;
ID3D11Buffer *mDriverConstantBufferVS;
......@@ -555,6 +567,11 @@ class Renderer11 : public RendererD3D
GLintptr mCurrentConstantBufferPSOffset[gl::IMPLEMENTATION_MAX_FRAGMENT_SHADER_UNIFORM_BUFFERS];
GLsizeiptr mCurrentConstantBufferPSSize[gl::IMPLEMENTATION_MAX_FRAGMENT_SHADER_UNIFORM_BUFFERS];
dx_ComputeConstants11 mAppliedComputeConstants;
ID3D11Buffer *mDriverConstantBufferCS;
SamplerMetadataD3D11 mSamplerMetadataCS;
ID3D11Buffer *mCurrentComputeConstantBuffer;
ID3D11Buffer *mCurrentGeometryConstantBuffer;
// Vertex, index and input layouts
......
......@@ -246,6 +246,13 @@ void StateManager11::updatePresentPath(bool presentPathFastActive,
}
}
void StateManager11::setComputeConstants(GLuint numGroupsX, GLuint numGroupsY, GLuint numGroupsZ)
{
mComputeConstants.numWorkGroups[0] = numGroupsX;
mComputeConstants.numWorkGroups[1] = numGroupsY;
mComputeConstants.numWorkGroups[2] = numGroupsZ;
}
void StateManager11::syncState(const gl::State &state, const gl::State::DirtyBits &dirtyBits)
{
if (!dirtyBits.any())
......
......@@ -41,6 +41,12 @@ struct dx_PixelConstants11
float viewScale[4];
};
struct dx_ComputeConstants11
{
unsigned int numWorkGroups[3];
unsigned int padding; // This just pads the struct to 16 bytes
};
class StateManager11 final : angle::NonCopyable
{
public:
......@@ -69,6 +75,9 @@ class StateManager11 final : angle::NonCopyable
const dx_VertexConstants11 &getVertexConstants() const { return mVertexConstants; }
const dx_PixelConstants11 &getPixelConstants() const { return mPixelConstants; }
const dx_ComputeConstants11 &getComputeConstants() const { return mComputeConstants; }
void setComputeConstants(GLuint numGroupsX, GLuint numGroupsY, GLuint numGroupsZ);
void updateStencilSizeIfChanged(bool depthStencilInitialized, unsigned int stencilSize);
......@@ -143,6 +152,8 @@ class StateManager11 final : angle::NonCopyable
dx_VertexConstants11 mVertexConstants;
dx_PixelConstants11 mPixelConstants;
dx_ComputeConstants11 mComputeConstants;
// Render target variables
gl::Extents mViewportBounds;
......
......@@ -3082,4 +3082,11 @@ gl::Version Renderer9::getMaxSupportedESVersion() const
return gl::Version(2, 0);
}
gl::Error Renderer9::applyComputeUniforms(const ProgramD3D &programD3D,
const std::vector<D3DUniform *> &uniformArray)
{
UNIMPLEMENTED();
return gl::InternalError() << "Compute shader is not implemented on D3D9";
}
} // namespace rx
......@@ -354,6 +354,9 @@ class Renderer9 : public RendererD3D
gl::Version getMaxSupportedESVersion() const override;
gl::Error applyComputeUniforms(const ProgramD3D &programD3D,
const std::vector<D3DUniform *> &uniformArray) override;
protected:
gl::Error clearTextures(gl::SamplerType samplerType, size_t rangeStart, size_t rangeEnd) override;
......
......@@ -21,12 +21,6 @@ class ComputeShaderTest : public ANGLETest
ComputeShaderTest() {}
};
class DispatchComputeTest : public ANGLETest
{
protected:
DispatchComputeTest() {}
};
class ComputeShaderTestES3 : public ANGLETest
{
protected:
......@@ -231,16 +225,15 @@ TEST_P(ComputeShaderTest, AccessPartSpecialVariables)
ANGLE_GL_COMPUTE_PROGRAM(program, csSource);
}
// TODO(Xinghua): A temporary test for glDispatchCompute, remove and merge it
// ComputeShaderTest after implementing this API on D3D backend.
TEST_P(DispatchComputeTest, DispatchCompute)
// Use glDispatchCompute to define work group count.
TEST_P(ComputeShaderTest, DispatchCompute)
{
const std::string csSource =
"#version 310 es\n"
"layout(local_size_x=4, local_size_y=3, local_size_z=2) in;\n"
"void main()\n"
"{\n"
" uvec3 temp1 = gl_NumWorkGroups;\n"
" uvec3 temp = gl_NumWorkGroups;\n"
"}\n";
ANGLE_GL_COMPUTE_PROGRAM(program, csSource);
......@@ -260,7 +253,6 @@ TEST_P(ComputeShaderTestES3, NotSupported)
}
ANGLE_INSTANTIATE_TEST(ComputeShaderTest, ES31_OPENGL(), ES31_OPENGLES(), ES31_D3D11());
ANGLE_INSTANTIATE_TEST(DispatchComputeTest, ES31_OPENGL(), ES31_OPENGLES());
ANGLE_INSTANTIATE_TEST(ComputeShaderTestES3, ES3_OPENGL(), ES3_OPENGLES());
} // namespace
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment