Commit 580961fb by Doug Horn Committed by Commit Bot

Micro-optimizations for setUniform*

This CL improves the setUniform* call for the case where the niform component size is 4. In that case, we can issue a single memcpy. This reduces the average wall time of setUniform4fv in our test app by ~20%. Test: Run the dEQP shader tests. Bug: b/179160884 Change-Id: I9352f6188bc87449719aac522d1a2323adf7fca5 Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2667592Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarGeoff Lang <geofflang@chromium.org> Commit-Queue: Doug Horn <doughorn@google.com>
parent d820ec7b
...@@ -2662,24 +2662,34 @@ bool ProgramD3D::hasNamedUniform(const std::string &name) ...@@ -2662,24 +2662,34 @@ bool ProgramD3D::hasNamedUniform(const std::string &name)
// Assume count is already clamped. // Assume count is already clamped.
template <typename T> template <typename T>
void ProgramD3D::setUniformImpl(const gl::VariableLocation &locationInfo, void ProgramD3D::setUniformImpl(D3DUniform *targetUniform,
const gl::VariableLocation &locationInfo,
GLsizei count, GLsizei count,
const T *v, const T *v,
uint8_t *targetState, uint8_t *targetState,
GLenum uniformType) GLenum uniformType)
{ {
D3DUniform *targetUniform = mD3DUniforms[locationInfo.index];
const int components = targetUniform->typeInfo.componentCount; const int components = targetUniform->typeInfo.componentCount;
const unsigned int arrayElementOffset = locationInfo.arrayIndex; const unsigned int arrayElementOffset = locationInfo.arrayIndex;
const int blockSize = 4;
if (targetUniform->typeInfo.type == uniformType) if (targetUniform->typeInfo.type == uniformType)
{ {
T *dest = reinterpret_cast<T *>(targetState) + arrayElementOffset * 4; T *dest = reinterpret_cast<T *>(targetState) + arrayElementOffset * blockSize;
const T *source = v; const T *source = v;
for (GLint i = 0; i < count; i++, dest += 4, source += components) // If the component is equal to the block size, we can optimize to a single memcpy.
// Otherwise, we have to do partial block writes.
if (components == blockSize)
{ {
memcpy(dest, source, components * sizeof(T)); memcpy(dest, source, components * count * sizeof(T));
}
else
{
for (GLint i = 0; i < count; i++, dest += blockSize, source += components)
{
memcpy(dest, source, components * sizeof(T));
}
} }
} }
else else
...@@ -2721,10 +2731,10 @@ void ProgramD3D::setUniformInternal(GLint location, GLsizei count, const T *v, G ...@@ -2721,10 +2731,10 @@ void ProgramD3D::setUniformInternal(GLint location, GLsizei count, const T *v, G
for (gl::ShaderType shaderType : gl::AllShaderTypes()) for (gl::ShaderType shaderType : gl::AllShaderTypes())
{ {
if (targetUniform->mShaderData[shaderType]) uint8_t *targetState = targetUniform->mShaderData[shaderType];
if (targetState)
{ {
setUniformImpl(locationInfo, count, v, targetUniform->mShaderData[shaderType], setUniformImpl(targetUniform, locationInfo, count, v, targetState, uniformType);
uniformType);
mShaderUniformsDirty.set(shaderType); mShaderUniformsDirty.set(shaderType);
} }
} }
......
...@@ -481,7 +481,8 @@ class ProgramD3D : public ProgramImpl ...@@ -481,7 +481,8 @@ class ProgramD3D : public ProgramImpl
void getUniformInternal(GLint location, DestT *dataOut) const; void getUniformInternal(GLint location, DestT *dataOut) const;
template <typename T> template <typename T>
void setUniformImpl(const gl::VariableLocation &locationInfo, void setUniformImpl(D3DUniform *targetUniform,
const gl::VariableLocation &locationInfo,
GLsizei count, GLsizei count,
const T *v, const T *v,
uint8_t *targetData, uint8_t *targetData,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment