Commit be5e2ec3 by Jamie Madill Committed by Commit Bot

Remove uniform memory copy from GL front-end.

This moves the uniform query to the back-end. In D3D, this requires a bit more redesign, especially for matrix uniforms. Gives about a 10% speed improvement in the GL/NULL uniforms stress test on Windows (UniformsBenchmark.Run/gl_null_400_vec4). BUG=angleproject:1390 Change-Id: Idac22a77118e9e94d2f28c585e31ff0bc785ba94 Reviewed-on: https://chromium-review.googlesource.com/623929 Commit-Queue: Jamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarCorentin Wallez <cwallez@chromium.org>
parent 6ee26d7a
......@@ -618,17 +618,16 @@ class Program final : angle::NonCopyable, public LabeledObject
// Both these function update the cached uniform values and return a modified "count"
// so that the uniform update doesn't overflow the uniform.
template <typename T>
GLsizei setUniformInternal(GLint location, GLsizei count, int vectorSize, const T *v);
GLsizei clampUniformCount(const VariableLocation &locationInfo,
GLsizei count,
int vectorSize,
const T *v);
template <size_t cols, size_t rows, typename T>
GLsizei setMatrixUniformInternal(GLint location,
GLsizei count,
GLboolean transpose,
const T *v);
template <typename T>
GLsizei clampMatrixUniformCount(GLint location, GLsizei count, GLboolean transpose, const T *v);
void updateSamplerUniform(const VariableLocation &locationInfo,
const uint8_t *destPointer,
GLsizei clampedCount,
const T *v);
const GLint *v);
template <typename DestT>
void getUniformInternal(const Context *context,
......
......@@ -46,15 +46,10 @@ LinkedUniform::LinkedUniform(const sh::Uniform &uniform)
LinkedUniform::LinkedUniform(const LinkedUniform &uniform)
: sh::Uniform(uniform), bufferIndex(uniform.bufferIndex), blockInfo(uniform.blockInfo)
{
// This function is not intended to be called during runtime.
ASSERT(uniform.mLazyData.empty());
}
LinkedUniform &LinkedUniform::operator=(const LinkedUniform &uniform)
{
// This function is not intended to be called during runtime.
ASSERT(uniform.mLazyData.empty());
sh::Uniform::operator=(uniform);
bufferIndex = uniform.bufferIndex;
blockInfo = uniform.blockInfo;
......@@ -71,23 +66,6 @@ bool LinkedUniform::isInDefaultBlock() const
return bufferIndex == -1;
}
size_t LinkedUniform::dataSize() const
{
ASSERT(type != GL_STRUCT_ANGLEX);
if (mLazyData.empty())
{
mLazyData.resize(VariableExternalSize(type) * elementCount());
ASSERT(!mLazyData.empty());
}
return mLazyData.size();
}
const uint8_t *LinkedUniform::data() const
{
return const_cast<LinkedUniform *>(this)->data();
}
bool LinkedUniform::isSampler() const
{
return IsSamplerType(type);
......@@ -118,17 +96,6 @@ size_t LinkedUniform::getElementComponents() const
return VariableComponentCount(type);
}
uint8_t *LinkedUniform::getDataPtrToElement(size_t elementIndex)
{
ASSERT((!isArray() && elementIndex == 0) || (isArray() && elementIndex < arraySize));
return data() + (elementIndex > 0 ? (getElementSize() * elementIndex) : 0u);
}
const uint8_t *LinkedUniform::getDataPtrToElement(size_t elementIndex) const
{
return const_cast<LinkedUniform *>(this)->getDataPtrToElement(elementIndex);
}
ShaderVariableBuffer::ShaderVariableBuffer()
: binding(0),
dataSize(0),
......
......@@ -37,19 +37,6 @@ struct LinkedUniform : public sh::Uniform
LinkedUniform &operator=(const LinkedUniform &uniform);
~LinkedUniform();
size_t dataSize() const;
uint8_t *data()
{
if (mLazyData.empty())
{
// dataSize() will init the data store.
size_t size = dataSize();
memset(mLazyData.data(), 0, size);
}
return mLazyData.data();
}
const uint8_t *data() const;
bool isSampler() const;
bool isImage() const;
bool isAtomicCounter() const;
......@@ -57,15 +44,10 @@ struct LinkedUniform : public sh::Uniform
bool isField() const;
size_t getElementSize() const;
size_t getElementComponents() const;
uint8_t *getDataPtrToElement(size_t elementIndex);
const uint8_t *getDataPtrToElement(size_t elementIndex) const;
// Identifies the containing buffer backed resource -- interface block or atomic counter buffer.
int bufferIndex;
sh::BlockMemberInfo blockInfo;
private:
mutable angle::MemoryBuffer mLazyData;
};
// Parent struct for atomic counter, uniform block, and shader storage block buffer, which all
......
......@@ -32,6 +32,8 @@ class ShaderExecutableD3D;
#endif
// Helper struct representing a single shader uniform
// TODO(jmadill): Make uniform blocks shared between all programs, so we don't need separate
// register indices.
struct D3DUniform : private angle::NonCopyable
{
D3DUniform(GLenum typeIn,
......@@ -46,14 +48,18 @@ struct D3DUniform : private angle::NonCopyable
bool isReferencedByFragmentShader() const;
bool isReferencedByComputeShader() const;
const uint8_t *firstNonNullData() const;
const uint8_t *getDataPtrToElement(size_t elementIndex) const;
// Duplicated from the GL layer
GLenum type;
std::string name;
unsigned int arraySize;
// Pointer to a system copy of the data.
// TODO(jmadill): remove this in favor of gl::LinkedUniform::data().
uint8_t *data;
// Pointer to a system copies of the data. Separate pointers for each uniform storage type.
uint8_t *vsData;
uint8_t *psData;
uint8_t *csData;
// Has the data been updated since the last sync?
bool dirty;
......@@ -68,6 +74,9 @@ struct D3DUniform : private angle::NonCopyable
// uniforms
// inside aggregate types, which are packed according C-like structure rules.
unsigned int registerElement;
// Special buffer for sampler values.
std::vector<GLint> mSamplerData;
};
struct D3DUniformBlock
......@@ -371,14 +380,29 @@ class ProgramD3D : public ProgramImpl
void getUniformInternal(GLint location, DestT *dataOut) const;
template <typename T>
void setUniform(GLint location, GLsizei count, const T *v, GLenum targetUniformType);
void setUniformImpl(const gl::VariableLocation &locationInfo,
GLsizei count,
const T *v,
uint8_t *targetData,
GLenum targetUniformType);
template <typename T>
void setUniformInternal(GLint location, GLsizei count, const T *v, GLenum targetUniformType);
template <int cols, int rows>
void setUniformMatrixfvImpl(GLint location,
GLsizei count,
GLboolean transpose,
const GLfloat *value,
uint8_t *targetData,
GLenum targetUniformType);
template <int cols, int rows>
void setUniformMatrixfv(GLint location,
GLsizei count,
GLboolean transpose,
const GLfloat *value,
GLenum targetUniformType);
void setUniformMatrixfvInternal(GLint location,
GLsizei count,
GLboolean transpose,
const GLfloat *value,
GLenum targetUniformType);
gl::LinkResult compileProgramExecutables(const gl::Context *context, gl::InfoLog &infoLog);
gl::LinkResult compileComputeExecutable(const gl::Context *context, gl::InfoLog &infoLog);
......@@ -387,6 +411,7 @@ class ProgramD3D : public ProgramImpl
const BuiltinInfo &builtins);
D3DUniform *getD3DUniformByName(const std::string &name);
D3DUniform *getD3DUniformFromLocation(GLint location);
const D3DUniform *getD3DUniformFromLocation(GLint location) const;
void initAttribLocationsToD3DSemantic(const gl::Context *context);
......
......@@ -44,9 +44,13 @@ void ShaderExecutableD3D::appendDebugInfo(const std::string &info)
mDebugInfo += info;
}
UniformStorageD3D::UniformStorageD3D(size_t initialSize) : mSize(initialSize)
UniformStorageD3D::UniformStorageD3D(size_t initialSize) : mUniformData()
{
bool result = mUniformData.resize(initialSize);
ASSERT(result);
// Uniform data is zero-initialized by default.
mUniformData.fill(0);
}
UniformStorageD3D::~UniformStorageD3D()
......@@ -55,7 +59,13 @@ UniformStorageD3D::~UniformStorageD3D()
size_t UniformStorageD3D::size() const
{
return mSize;
return mUniformData.size();
}
uint8_t *UniformStorageD3D::getDataPointer(unsigned int registerIndex, unsigned int registerElement)
{
size_t offset = ((registerIndex * 4 + registerElement) * sizeof(float));
return mUniformData.data() + offset;
}
} // namespace rx
......@@ -10,6 +10,7 @@
#ifndef LIBANGLE_RENDERER_D3D_SHADEREXECUTABLED3D_H_
#define LIBANGLE_RENDERER_D3D_SHADEREXECUTABLED3D_H_
#include "common/MemoryBuffer.h"
#include "common/debug.h"
#include <vector>
......@@ -45,10 +46,12 @@ class UniformStorageD3D : angle::NonCopyable
size_t size() const;
uint8_t *getDataPointer(unsigned int registerIndex, unsigned int registerElement);
private:
size_t mSize;
angle::MemoryBuffer mUniformData;
};
}
} // namespace rx
#endif // LIBANGLE_RENDERER_D3D_SHADEREXECUTABLED3D_H_
......@@ -2171,16 +2171,14 @@ gl::Error Renderer11::applyUniforms(const ProgramD3D &programD3D,
const d3d11::Buffer *pixelConstantBuffer = nullptr;
ANGLE_TRY(fragmentUniformStorage->getConstantBuffer(this, &pixelConstantBuffer));
float(*mapVS)[4] = nullptr;
float(*mapPS)[4] = nullptr;
if (totalRegisterCountVS > 0 && vertexUniformsDirty)
{
D3D11_MAPPED_SUBRESOURCE map = {0};
HRESULT result =
mDeviceContext->Map(vertexConstantBuffer->get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
ASSERT(SUCCEEDED(result));
mapVS = (float(*)[4])map.pData;
memcpy(map.pData, vertexUniformStorage->getDataPointer(0, 0), vertexUniformStorage->size());
mDeviceContext->Unmap(vertexConstantBuffer->get(), 0);
}
if (totalRegisterCountPS > 0 && pixelUniformsDirty)
......@@ -2189,39 +2187,8 @@ gl::Error Renderer11::applyUniforms(const ProgramD3D &programD3D,
HRESULT result =
mDeviceContext->Map(pixelConstantBuffer->get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
ASSERT(SUCCEEDED(result));
mapPS = (float(*)[4])map.pData;
}
for (const D3DUniform *uniform : uniformArray)
{
if (uniform->isSampler())
continue;
unsigned int componentCount = (4 - uniform->registerElement);
// we assume that uniforms from structs are arranged in struct order in our uniforms list.
// otherwise we would overwrite previously written regions of memory.
if (uniform->isReferencedByVertexShader() && mapVS)
{
memcpy(&mapVS[uniform->vsRegisterIndex][uniform->registerElement], uniform->data,
uniform->registerCount * sizeof(float) * componentCount);
}
if (uniform->isReferencedByFragmentShader() && mapPS)
{
memcpy(&mapPS[uniform->psRegisterIndex][uniform->registerElement], uniform->data,
uniform->registerCount * sizeof(float) * componentCount);
}
}
if (mapVS)
{
mDeviceContext->Unmap(vertexConstantBuffer->get(), 0);
}
if (mapPS)
{
memcpy(map.pData, fragmentUniformStorage->getDataPointer(0, 0),
fragmentUniformStorage->size());
mDeviceContext->Unmap(pixelConstantBuffer->get(), 0);
}
......@@ -4369,23 +4336,8 @@ gl::Error Renderer11::applyComputeUniforms(const ProgramD3D &programD3D,
HRESULT result =
mDeviceContext->Map(computeConstantBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
ASSERT(SUCCEEDED(result));
auto *mapCS = static_cast<float(*)[4]>(map.pData);
ASSERT(mapCS);
for (const D3DUniform *uniform : uniformArray)
{
ASSERT(uniform->isReferencedByComputeShader());
if (uniform->isSampler())
{
continue;
}
unsigned int componentCount = (4 - uniform->registerCount);
memcpy(&mapCS[uniform->csRegisterIndex][uniform->registerElement], uniform->data,
uniform->registerCount * sizeof(float) * componentCount);
}
memcpy(map.pData, computeUniformStorage->getDataPointer(0, 0),
computeUniformStorage->size());
mDeviceContext->Unmap(computeConstantBuffer, 0);
}
......
......@@ -1854,8 +1854,13 @@ gl::Error Renderer9::applyUniforms(const ProgramD3D &programD3D,
if (!targetUniform->dirty)
continue;
GLfloat *f = (GLfloat *)targetUniform->data;
GLint *i = (GLint *)targetUniform->data;
// Built-in uniforms must be skipped.
if (!targetUniform->isReferencedByFragmentShader() &&
!targetUniform->isReferencedByVertexShader())
continue;
const GLfloat *f = reinterpret_cast<const GLfloat *>(targetUniform->firstNonNullData());
const GLint *i = reinterpret_cast<const GLint *>(targetUniform->firstNonNullData());
switch (targetUniform->type)
{
......
......@@ -416,7 +416,7 @@ class UniformTestES3 : public ANGLETest
};
// Test queries for transposed arrays of non-square matrix uniforms.
TEST_P(UniformTestES3, TranposedMatrixArrayUniformStateQuery)
TEST_P(UniformTestES3, TransposedMatrixArrayUniformStateQuery)
{
const std::string &vertexShader =
"#version 300 es\n"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment