Commit be5e2ec3 by Jamie Madill Committed by Commit Bot

Remove uniform memory copy from GL front-end.

This moves the uniform query to the back-end. In D3D, this requires a bit more redesign, especially for matrix uniforms. Gives about a 10% speed improvement in the GL/NULL uniforms stress test on Windows (UniformsBenchmark.Run/gl_null_400_vec4). BUG=angleproject:1390 Change-Id: Idac22a77118e9e94d2f28c585e31ff0bc785ba94 Reviewed-on: https://chromium-review.googlesource.com/623929 Commit-Queue: Jamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarCorentin Wallez <cwallez@chromium.org>
parent 6ee26d7a
...@@ -618,17 +618,16 @@ class Program final : angle::NonCopyable, public LabeledObject ...@@ -618,17 +618,16 @@ class Program final : angle::NonCopyable, public LabeledObject
// Both these function update the cached uniform values and return a modified "count" // Both these function update the cached uniform values and return a modified "count"
// so that the uniform update doesn't overflow the uniform. // so that the uniform update doesn't overflow the uniform.
template <typename T> template <typename T>
GLsizei setUniformInternal(GLint location, GLsizei count, int vectorSize, const T *v); GLsizei clampUniformCount(const VariableLocation &locationInfo,
GLsizei count,
int vectorSize,
const T *v);
template <size_t cols, size_t rows, typename T> template <size_t cols, size_t rows, typename T>
GLsizei setMatrixUniformInternal(GLint location, GLsizei clampMatrixUniformCount(GLint location, GLsizei count, GLboolean transpose, const T *v);
GLsizei count,
GLboolean transpose,
const T *v);
template <typename T>
void updateSamplerUniform(const VariableLocation &locationInfo, void updateSamplerUniform(const VariableLocation &locationInfo,
const uint8_t *destPointer,
GLsizei clampedCount, GLsizei clampedCount,
const T *v); const GLint *v);
template <typename DestT> template <typename DestT>
void getUniformInternal(const Context *context, void getUniformInternal(const Context *context,
......
...@@ -46,15 +46,10 @@ LinkedUniform::LinkedUniform(const sh::Uniform &uniform) ...@@ -46,15 +46,10 @@ LinkedUniform::LinkedUniform(const sh::Uniform &uniform)
LinkedUniform::LinkedUniform(const LinkedUniform &uniform) LinkedUniform::LinkedUniform(const LinkedUniform &uniform)
: sh::Uniform(uniform), bufferIndex(uniform.bufferIndex), blockInfo(uniform.blockInfo) : sh::Uniform(uniform), bufferIndex(uniform.bufferIndex), blockInfo(uniform.blockInfo)
{ {
// This function is not intended to be called during runtime.
ASSERT(uniform.mLazyData.empty());
} }
LinkedUniform &LinkedUniform::operator=(const LinkedUniform &uniform) LinkedUniform &LinkedUniform::operator=(const LinkedUniform &uniform)
{ {
// This function is not intended to be called during runtime.
ASSERT(uniform.mLazyData.empty());
sh::Uniform::operator=(uniform); sh::Uniform::operator=(uniform);
bufferIndex = uniform.bufferIndex; bufferIndex = uniform.bufferIndex;
blockInfo = uniform.blockInfo; blockInfo = uniform.blockInfo;
...@@ -71,23 +66,6 @@ bool LinkedUniform::isInDefaultBlock() const ...@@ -71,23 +66,6 @@ bool LinkedUniform::isInDefaultBlock() const
return bufferIndex == -1; return bufferIndex == -1;
} }
size_t LinkedUniform::dataSize() const
{
ASSERT(type != GL_STRUCT_ANGLEX);
if (mLazyData.empty())
{
mLazyData.resize(VariableExternalSize(type) * elementCount());
ASSERT(!mLazyData.empty());
}
return mLazyData.size();
}
const uint8_t *LinkedUniform::data() const
{
return const_cast<LinkedUniform *>(this)->data();
}
bool LinkedUniform::isSampler() const bool LinkedUniform::isSampler() const
{ {
return IsSamplerType(type); return IsSamplerType(type);
...@@ -118,17 +96,6 @@ size_t LinkedUniform::getElementComponents() const ...@@ -118,17 +96,6 @@ size_t LinkedUniform::getElementComponents() const
return VariableComponentCount(type); return VariableComponentCount(type);
} }
uint8_t *LinkedUniform::getDataPtrToElement(size_t elementIndex)
{
ASSERT((!isArray() && elementIndex == 0) || (isArray() && elementIndex < arraySize));
return data() + (elementIndex > 0 ? (getElementSize() * elementIndex) : 0u);
}
const uint8_t *LinkedUniform::getDataPtrToElement(size_t elementIndex) const
{
return const_cast<LinkedUniform *>(this)->getDataPtrToElement(elementIndex);
}
ShaderVariableBuffer::ShaderVariableBuffer() ShaderVariableBuffer::ShaderVariableBuffer()
: binding(0), : binding(0),
dataSize(0), dataSize(0),
......
...@@ -37,19 +37,6 @@ struct LinkedUniform : public sh::Uniform ...@@ -37,19 +37,6 @@ struct LinkedUniform : public sh::Uniform
LinkedUniform &operator=(const LinkedUniform &uniform); LinkedUniform &operator=(const LinkedUniform &uniform);
~LinkedUniform(); ~LinkedUniform();
size_t dataSize() const;
uint8_t *data()
{
if (mLazyData.empty())
{
// dataSize() will init the data store.
size_t size = dataSize();
memset(mLazyData.data(), 0, size);
}
return mLazyData.data();
}
const uint8_t *data() const;
bool isSampler() const; bool isSampler() const;
bool isImage() const; bool isImage() const;
bool isAtomicCounter() const; bool isAtomicCounter() const;
...@@ -57,15 +44,10 @@ struct LinkedUniform : public sh::Uniform ...@@ -57,15 +44,10 @@ struct LinkedUniform : public sh::Uniform
bool isField() const; bool isField() const;
size_t getElementSize() const; size_t getElementSize() const;
size_t getElementComponents() const; size_t getElementComponents() const;
uint8_t *getDataPtrToElement(size_t elementIndex);
const uint8_t *getDataPtrToElement(size_t elementIndex) const;
// Identifies the containing buffer backed resource -- interface block or atomic counter buffer. // Identifies the containing buffer backed resource -- interface block or atomic counter buffer.
int bufferIndex; int bufferIndex;
sh::BlockMemberInfo blockInfo; sh::BlockMemberInfo blockInfo;
private:
mutable angle::MemoryBuffer mLazyData;
}; };
// Parent struct for atomic counter, uniform block, and shader storage block buffer, which all // Parent struct for atomic counter, uniform block, and shader storage block buffer, which all
......
...@@ -32,6 +32,8 @@ class ShaderExecutableD3D; ...@@ -32,6 +32,8 @@ class ShaderExecutableD3D;
#endif #endif
// Helper struct representing a single shader uniform // Helper struct representing a single shader uniform
// TODO(jmadill): Make uniform blocks shared between all programs, so we don't need separate
// register indices.
struct D3DUniform : private angle::NonCopyable struct D3DUniform : private angle::NonCopyable
{ {
D3DUniform(GLenum typeIn, D3DUniform(GLenum typeIn,
...@@ -46,14 +48,18 @@ struct D3DUniform : private angle::NonCopyable ...@@ -46,14 +48,18 @@ struct D3DUniform : private angle::NonCopyable
bool isReferencedByFragmentShader() const; bool isReferencedByFragmentShader() const;
bool isReferencedByComputeShader() const; bool isReferencedByComputeShader() const;
const uint8_t *firstNonNullData() const;
const uint8_t *getDataPtrToElement(size_t elementIndex) const;
// Duplicated from the GL layer // Duplicated from the GL layer
GLenum type; GLenum type;
std::string name; std::string name;
unsigned int arraySize; unsigned int arraySize;
// Pointer to a system copy of the data. // Pointer to a system copies of the data. Separate pointers for each uniform storage type.
// TODO(jmadill): remove this in favor of gl::LinkedUniform::data(). uint8_t *vsData;
uint8_t *data; uint8_t *psData;
uint8_t *csData;
// Has the data been updated since the last sync? // Has the data been updated since the last sync?
bool dirty; bool dirty;
...@@ -68,6 +74,9 @@ struct D3DUniform : private angle::NonCopyable ...@@ -68,6 +74,9 @@ struct D3DUniform : private angle::NonCopyable
// uniforms // uniforms
// inside aggregate types, which are packed according C-like structure rules. // inside aggregate types, which are packed according C-like structure rules.
unsigned int registerElement; unsigned int registerElement;
// Special buffer for sampler values.
std::vector<GLint> mSamplerData;
}; };
struct D3DUniformBlock struct D3DUniformBlock
...@@ -371,14 +380,29 @@ class ProgramD3D : public ProgramImpl ...@@ -371,14 +380,29 @@ class ProgramD3D : public ProgramImpl
void getUniformInternal(GLint location, DestT *dataOut) const; void getUniformInternal(GLint location, DestT *dataOut) const;
template <typename T> template <typename T>
void setUniform(GLint location, GLsizei count, const T *v, GLenum targetUniformType); void setUniformImpl(const gl::VariableLocation &locationInfo,
GLsizei count,
const T *v,
uint8_t *targetData,
GLenum targetUniformType);
template <typename T>
void setUniformInternal(GLint location, GLsizei count, const T *v, GLenum targetUniformType);
template <int cols, int rows>
void setUniformMatrixfvImpl(GLint location,
GLsizei count,
GLboolean transpose,
const GLfloat *value,
uint8_t *targetData,
GLenum targetUniformType);
template <int cols, int rows> template <int cols, int rows>
void setUniformMatrixfv(GLint location, void setUniformMatrixfvInternal(GLint location,
GLsizei count, GLsizei count,
GLboolean transpose, GLboolean transpose,
const GLfloat *value, const GLfloat *value,
GLenum targetUniformType); GLenum targetUniformType);
gl::LinkResult compileProgramExecutables(const gl::Context *context, gl::InfoLog &infoLog); gl::LinkResult compileProgramExecutables(const gl::Context *context, gl::InfoLog &infoLog);
gl::LinkResult compileComputeExecutable(const gl::Context *context, gl::InfoLog &infoLog); gl::LinkResult compileComputeExecutable(const gl::Context *context, gl::InfoLog &infoLog);
...@@ -387,6 +411,7 @@ class ProgramD3D : public ProgramImpl ...@@ -387,6 +411,7 @@ class ProgramD3D : public ProgramImpl
const BuiltinInfo &builtins); const BuiltinInfo &builtins);
D3DUniform *getD3DUniformByName(const std::string &name); D3DUniform *getD3DUniformByName(const std::string &name);
D3DUniform *getD3DUniformFromLocation(GLint location); D3DUniform *getD3DUniformFromLocation(GLint location);
const D3DUniform *getD3DUniformFromLocation(GLint location) const;
void initAttribLocationsToD3DSemantic(const gl::Context *context); void initAttribLocationsToD3DSemantic(const gl::Context *context);
......
...@@ -44,9 +44,13 @@ void ShaderExecutableD3D::appendDebugInfo(const std::string &info) ...@@ -44,9 +44,13 @@ void ShaderExecutableD3D::appendDebugInfo(const std::string &info)
mDebugInfo += info; mDebugInfo += info;
} }
UniformStorageD3D::UniformStorageD3D(size_t initialSize) : mUniformData()
UniformStorageD3D::UniformStorageD3D(size_t initialSize) : mSize(initialSize)
{ {
bool result = mUniformData.resize(initialSize);
ASSERT(result);
// Uniform data is zero-initialized by default.
mUniformData.fill(0);
} }
UniformStorageD3D::~UniformStorageD3D() UniformStorageD3D::~UniformStorageD3D()
...@@ -55,7 +59,13 @@ UniformStorageD3D::~UniformStorageD3D() ...@@ -55,7 +59,13 @@ UniformStorageD3D::~UniformStorageD3D()
size_t UniformStorageD3D::size() const size_t UniformStorageD3D::size() const
{ {
return mSize; return mUniformData.size();
} }
uint8_t *UniformStorageD3D::getDataPointer(unsigned int registerIndex, unsigned int registerElement)
{
size_t offset = ((registerIndex * 4 + registerElement) * sizeof(float));
return mUniformData.data() + offset;
} }
} // namespace rx
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#ifndef LIBANGLE_RENDERER_D3D_SHADEREXECUTABLED3D_H_ #ifndef LIBANGLE_RENDERER_D3D_SHADEREXECUTABLED3D_H_
#define LIBANGLE_RENDERER_D3D_SHADEREXECUTABLED3D_H_ #define LIBANGLE_RENDERER_D3D_SHADEREXECUTABLED3D_H_
#include "common/MemoryBuffer.h"
#include "common/debug.h" #include "common/debug.h"
#include <vector> #include <vector>
...@@ -45,10 +46,12 @@ class UniformStorageD3D : angle::NonCopyable ...@@ -45,10 +46,12 @@ class UniformStorageD3D : angle::NonCopyable
size_t size() const; size_t size() const;
uint8_t *getDataPointer(unsigned int registerIndex, unsigned int registerElement);
private: private:
size_t mSize; angle::MemoryBuffer mUniformData;
}; };
} } // namespace rx
#endif // LIBANGLE_RENDERER_D3D_SHADEREXECUTABLED3D_H_ #endif // LIBANGLE_RENDERER_D3D_SHADEREXECUTABLED3D_H_
...@@ -2171,16 +2171,14 @@ gl::Error Renderer11::applyUniforms(const ProgramD3D &programD3D, ...@@ -2171,16 +2171,14 @@ gl::Error Renderer11::applyUniforms(const ProgramD3D &programD3D,
const d3d11::Buffer *pixelConstantBuffer = nullptr; const d3d11::Buffer *pixelConstantBuffer = nullptr;
ANGLE_TRY(fragmentUniformStorage->getConstantBuffer(this, &pixelConstantBuffer)); ANGLE_TRY(fragmentUniformStorage->getConstantBuffer(this, &pixelConstantBuffer));
float(*mapVS)[4] = nullptr;
float(*mapPS)[4] = nullptr;
if (totalRegisterCountVS > 0 && vertexUniformsDirty) if (totalRegisterCountVS > 0 && vertexUniformsDirty)
{ {
D3D11_MAPPED_SUBRESOURCE map = {0}; D3D11_MAPPED_SUBRESOURCE map = {0};
HRESULT result = HRESULT result =
mDeviceContext->Map(vertexConstantBuffer->get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &map); mDeviceContext->Map(vertexConstantBuffer->get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
ASSERT(SUCCEEDED(result)); ASSERT(SUCCEEDED(result));
mapVS = (float(*)[4])map.pData; memcpy(map.pData, vertexUniformStorage->getDataPointer(0, 0), vertexUniformStorage->size());
mDeviceContext->Unmap(vertexConstantBuffer->get(), 0);
} }
if (totalRegisterCountPS > 0 && pixelUniformsDirty) if (totalRegisterCountPS > 0 && pixelUniformsDirty)
...@@ -2189,39 +2187,8 @@ gl::Error Renderer11::applyUniforms(const ProgramD3D &programD3D, ...@@ -2189,39 +2187,8 @@ gl::Error Renderer11::applyUniforms(const ProgramD3D &programD3D,
HRESULT result = HRESULT result =
mDeviceContext->Map(pixelConstantBuffer->get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &map); mDeviceContext->Map(pixelConstantBuffer->get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
ASSERT(SUCCEEDED(result)); ASSERT(SUCCEEDED(result));
mapPS = (float(*)[4])map.pData; memcpy(map.pData, fragmentUniformStorage->getDataPointer(0, 0),
} fragmentUniformStorage->size());
for (const D3DUniform *uniform : uniformArray)
{
if (uniform->isSampler())
continue;
unsigned int componentCount = (4 - uniform->registerElement);
// we assume that uniforms from structs are arranged in struct order in our uniforms list.
// otherwise we would overwrite previously written regions of memory.
if (uniform->isReferencedByVertexShader() && mapVS)
{
memcpy(&mapVS[uniform->vsRegisterIndex][uniform->registerElement], uniform->data,
uniform->registerCount * sizeof(float) * componentCount);
}
if (uniform->isReferencedByFragmentShader() && mapPS)
{
memcpy(&mapPS[uniform->psRegisterIndex][uniform->registerElement], uniform->data,
uniform->registerCount * sizeof(float) * componentCount);
}
}
if (mapVS)
{
mDeviceContext->Unmap(vertexConstantBuffer->get(), 0);
}
if (mapPS)
{
mDeviceContext->Unmap(pixelConstantBuffer->get(), 0); mDeviceContext->Unmap(pixelConstantBuffer->get(), 0);
} }
...@@ -4369,23 +4336,8 @@ gl::Error Renderer11::applyComputeUniforms(const ProgramD3D &programD3D, ...@@ -4369,23 +4336,8 @@ gl::Error Renderer11::applyComputeUniforms(const ProgramD3D &programD3D,
HRESULT result = HRESULT result =
mDeviceContext->Map(computeConstantBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &map); mDeviceContext->Map(computeConstantBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
ASSERT(SUCCEEDED(result)); ASSERT(SUCCEEDED(result));
auto *mapCS = static_cast<float(*)[4]>(map.pData); memcpy(map.pData, computeUniformStorage->getDataPointer(0, 0),
computeUniformStorage->size());
ASSERT(mapCS);
for (const D3DUniform *uniform : uniformArray)
{
ASSERT(uniform->isReferencedByComputeShader());
if (uniform->isSampler())
{
continue;
}
unsigned int componentCount = (4 - uniform->registerCount);
memcpy(&mapCS[uniform->csRegisterIndex][uniform->registerElement], uniform->data,
uniform->registerCount * sizeof(float) * componentCount);
}
mDeviceContext->Unmap(computeConstantBuffer, 0); mDeviceContext->Unmap(computeConstantBuffer, 0);
} }
......
...@@ -1854,8 +1854,13 @@ gl::Error Renderer9::applyUniforms(const ProgramD3D &programD3D, ...@@ -1854,8 +1854,13 @@ gl::Error Renderer9::applyUniforms(const ProgramD3D &programD3D,
if (!targetUniform->dirty) if (!targetUniform->dirty)
continue; continue;
GLfloat *f = (GLfloat *)targetUniform->data; // Built-in uniforms must be skipped.
GLint *i = (GLint *)targetUniform->data; if (!targetUniform->isReferencedByFragmentShader() &&
!targetUniform->isReferencedByVertexShader())
continue;
const GLfloat *f = reinterpret_cast<const GLfloat *>(targetUniform->firstNonNullData());
const GLint *i = reinterpret_cast<const GLint *>(targetUniform->firstNonNullData());
switch (targetUniform->type) switch (targetUniform->type)
{ {
......
...@@ -416,7 +416,7 @@ class UniformTestES3 : public ANGLETest ...@@ -416,7 +416,7 @@ class UniformTestES3 : public ANGLETest
}; };
// Test queries for transposed arrays of non-square matrix uniforms. // Test queries for transposed arrays of non-square matrix uniforms.
TEST_P(UniformTestES3, TranposedMatrixArrayUniformStateQuery) TEST_P(UniformTestES3, TransposedMatrixArrayUniformStateQuery)
{ {
const std::string &vertexShader = const std::string &vertexShader =
"#version 300 es\n" "#version 300 es\n"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment