Commit c674abeb by Jamie Madill

Refactor InputLayoutCache::applyVertexBuffers.

The redesigned code places the internal buffers and input elements for instance point sprite emulation (FL9_3) at the start of the arrays, instead of swapping with the first non-instanced element. This makes the tracking logic of the caching somewhat cleaner. This facilitates the work of implementing instancing-on-instancing for FL9_3 conformance. BUG=angleproject:1279 Change-Id: Ifb030816a313b1e8b916c57ef05915914443312a Reviewed-on: https://chromium-review.googlesource.com/325090Reviewed-by: 's avatarGeoff Lang <geofflang@chromium.org> Commit-Queue: Jamie Madill <jmadill@chromium.org>
parent 3a913264
...@@ -27,9 +27,7 @@ namespace rx ...@@ -27,9 +27,7 @@ namespace rx
namespace namespace
{ {
gl::InputLayout GetInputLayout( gl::InputLayout GetInputLayout(const SortedAttribArray &translatedAttributes, size_t attributeCount)
const TranslatedAttribute *translatedAttributes[gl::MAX_VERTEX_ATTRIBS],
size_t attributeCount)
{ {
gl::InputLayout inputLayout(attributeCount, gl::VERTEX_FORMAT_INVALID); gl::InputLayout inputLayout(attributeCount, gl::VERTEX_FORMAT_INVALID);
...@@ -79,6 +77,19 @@ struct PackedAttribute ...@@ -79,6 +77,19 @@ struct PackedAttribute
uint8_t divisor; uint8_t divisor;
}; };
Optional<size_t> FindFirstNonInstanced(const SortedAttribArray &sortedAttributes, size_t maxIndex)
{
for (size_t index = 0; index < maxIndex; ++index)
{
if (sortedAttributes[index]->divisor == 0)
{
return Optional<size_t>(index);
}
}
return Optional<size_t>::Invalid();
}
} // anonymous namespace } // anonymous namespace
void InputLayoutCache::PackedAttributeLayout::addAttributeData( void InputLayoutCache::PackedAttributeLayout::addAttributeData(
...@@ -176,175 +187,62 @@ void InputLayoutCache::markDirty() ...@@ -176,175 +187,62 @@ void InputLayoutCache::markDirty()
gl::Error InputLayoutCache::applyVertexBuffers(const std::vector<TranslatedAttribute> &unsortedAttributes, gl::Error InputLayoutCache::applyVertexBuffers(const std::vector<TranslatedAttribute> &unsortedAttributes,
GLenum mode, gl::Program *program, SourceIndexData *sourceInfo) GLenum mode, gl::Program *program, SourceIndexData *sourceInfo)
{ {
ASSERT(mDevice && mDeviceContext);
ProgramD3D *programD3D = GetImplAs<ProgramD3D>(program); ProgramD3D *programD3D = GetImplAs<ProgramD3D>(program);
int sortedSemanticIndices[gl::MAX_VERTEX_ATTRIBS];
const TranslatedAttribute *sortedAttributes[gl::MAX_VERTEX_ATTRIBS] = { nullptr };
programD3D->sortAttributesByLayout(unsortedAttributes, sortedSemanticIndices, sortedAttributes);
bool programUsesInstancedPointSprites = programD3D->usesPointSize() && programD3D->usesInstancedPointSpriteEmulation(); bool programUsesInstancedPointSprites = programD3D->usesPointSize() && programD3D->usesInstancedPointSpriteEmulation();
bool instancedPointSpritesActive = programUsesInstancedPointSprites && (mode == GL_POINTS); bool instancedPointSpritesActive = programUsesInstancedPointSprites && (mode == GL_POINTS);
bool indexedPointSpriteEmulationActive = instancedPointSpritesActive && (sourceInfo != nullptr);
const auto &semanticToLocation = programD3D->getAttributesByLayout(); SortedIndexArray sortedSemanticIndices;
SortedAttribArray sortedAttributes;
sortedAttributes.fill(nullptr);
programD3D->sortAttributesByLayout(unsortedAttributes, sortedSemanticIndices.data(),
sortedAttributes.data());
if (!mDevice || !mDeviceContext) // If we are using FL 9_3, make sure the first attribute is not instanced
if (mFeatureLevel <= D3D_FEATURE_LEVEL_9_3 && !unsortedAttributes.empty())
{ {
return gl::Error(GL_OUT_OF_MEMORY, "Internal input layout cache is not initialized."); if (sortedAttributes[0]->divisor > 0)
}
unsigned int inputElementCount = 0;
D3D11_INPUT_ELEMENT_DESC inputElements[gl::MAX_VERTEX_ATTRIBS];
PackedAttributeLayout layout;
static const char* semanticName = "TEXCOORD";
unsigned int firstIndexedElement = gl::MAX_VERTEX_ATTRIBS;
unsigned int firstInstancedElement = gl::MAX_VERTEX_ATTRIBS;
unsigned int nextAvailableInputSlot = 0;
const std::vector<sh::Attribute> &shaderAttributes = program->getAttributes();
for (unsigned int i = 0; i < unsortedAttributes.size(); i++)
{
if (sortedAttributes[i]->active)
{ {
D3D11_INPUT_CLASSIFICATION inputClass = sortedAttributes[i]->divisor > 0 ? D3D11_INPUT_PER_INSTANCE_DATA : D3D11_INPUT_PER_VERTEX_DATA; Optional<size_t> firstNonInstancedIndex =
// If rendering points and instanced pointsprite emulation is being used, the inputClass is required to be configured as per instance data FindFirstNonInstanced(sortedAttributes, unsortedAttributes.size());
inputClass = instancedPointSpritesActive ? D3D11_INPUT_PER_INSTANCE_DATA : inputClass; if (firstNonInstancedIndex.valid())
gl::VertexFormatType vertexFormatType = gl::GetVertexFormatType(*sortedAttributes[i]->attribute, sortedAttributes[i]->currentValueType);
const d3d11::VertexFormat &vertexFormatInfo = d3d11::GetVertexFormatInfo(vertexFormatType, mFeatureLevel);
inputElements[inputElementCount].SemanticName = semanticName;
inputElements[inputElementCount].SemanticIndex = sortedSemanticIndices[i];
inputElements[inputElementCount].Format = vertexFormatInfo.nativeFormat;
inputElements[inputElementCount].InputSlot = i;
inputElements[inputElementCount].AlignedByteOffset = 0;
inputElements[inputElementCount].InputSlotClass = inputClass;
inputElements[inputElementCount].InstanceDataStepRate = instancedPointSpritesActive ? 1 : sortedAttributes[i]->divisor;
if (inputClass == D3D11_INPUT_PER_VERTEX_DATA && firstIndexedElement == gl::MAX_VERTEX_ATTRIBS)
{
firstIndexedElement = inputElementCount;
}
else if (inputClass == D3D11_INPUT_PER_INSTANCE_DATA && firstInstancedElement == gl::MAX_VERTEX_ATTRIBS)
{ {
firstInstancedElement = inputElementCount; size_t index = firstNonInstancedIndex.value();
std::swap(sortedAttributes[0], sortedAttributes[index]);
std::swap(sortedSemanticIndices[0], sortedSemanticIndices[index]);
} }
// Record the type of the associated vertex shader vector in our key
// This will prevent mismatched vertex shaders from using the same input layout
GLenum glslElementType = GetGLSLAttributeType(
shaderAttributes, semanticToLocation[sortedSemanticIndices[i]]);
layout.addAttributeData(glslElementType,
sortedSemanticIndices[i],
vertexFormatType,
sortedAttributes[i]->divisor);
inputElementCount++;
nextAvailableInputSlot = i + 1;
} }
} }
// Instanced PointSprite emulation requires additional entries in the gl::Error error = updateInputLayout(program, mode, sortedAttributes, sortedSemanticIndices,
// inputlayout to support the vertices that make up the pointsprite quad. unsortedAttributes.size());
// We do this even if mode != GL_POINTS, since the shader signature has these inputs, and the input layout must match the shader
if (programUsesInstancedPointSprites)
{
inputElements[inputElementCount].SemanticName = "SPRITEPOSITION";
inputElements[inputElementCount].SemanticIndex = 0;
inputElements[inputElementCount].Format = DXGI_FORMAT_R32G32B32_FLOAT;
inputElements[inputElementCount].InputSlot = nextAvailableInputSlot;
inputElements[inputElementCount].AlignedByteOffset = 0;
inputElements[inputElementCount].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
inputElements[inputElementCount].InstanceDataStepRate = 0;
// The new elements are D3D11_INPUT_PER_VERTEX_DATA data so the indexed element
// tracking must be applied. This ensures that the instancing specific
// buffer swapping logic continues to work.
if (firstIndexedElement == gl::MAX_VERTEX_ATTRIBS)
{
firstIndexedElement = inputElementCount;
}
inputElementCount++;
inputElements[inputElementCount].SemanticName = "SPRITETEXCOORD";
inputElements[inputElementCount].SemanticIndex = 0;
inputElements[inputElementCount].Format = DXGI_FORMAT_R32G32_FLOAT;
inputElements[inputElementCount].InputSlot = nextAvailableInputSlot;
inputElements[inputElementCount].AlignedByteOffset = sizeof(float) * 3;
inputElements[inputElementCount].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
inputElements[inputElementCount].InstanceDataStepRate = 0;
inputElementCount++;
}
// On 9_3, we must ensure that slot 0 contains non-instanced data.
// If slot 0 currently contains instanced data then we swap it with a non-instanced element.
// Note that instancing is only available on 9_3 via ANGLE_instanced_arrays, since 9_3 doesn't support OpenGL ES 3.0.
// As per the spec for ANGLE_instanced_arrays, not all attributes can be instanced simultaneously, so a non-instanced element must exist.
ASSERT(!(mFeatureLevel <= D3D_FEATURE_LEVEL_9_3 && firstIndexedElement == gl::MAX_VERTEX_ATTRIBS));
bool moveFirstIndexedIntoSlotZero = mFeatureLevel <= D3D_FEATURE_LEVEL_9_3 && firstInstancedElement == 0 && firstIndexedElement != gl::MAX_VERTEX_ATTRIBS;
if (moveFirstIndexedIntoSlotZero)
{
inputElements[firstInstancedElement].InputSlot = inputElements[firstIndexedElement].InputSlot;
inputElements[firstIndexedElement].InputSlot = 0;
// Instanced PointSprite emulation uses multiple layout entries across a single vertex buffer.
// If an index swap is performed, we need to ensure that all elements get the proper InputSlot.
if (programUsesInstancedPointSprites)
{
inputElements[firstIndexedElement + 1].InputSlot = 0;
}
}
if (programUsesInstancedPointSprites)
{
layout.flags |= PackedAttributeLayout::FLAG_USES_INSTANCED_SPRITES;
}
if (moveFirstIndexedIntoSlotZero)
{
layout.flags |= PackedAttributeLayout::FLAG_MOVE_FIRST_INDEXED;
}
if (instancedPointSpritesActive)
{
layout.flags |= PackedAttributeLayout::FLAG_INSTANCED_SPRITES_ACTIVE;
}
ID3D11InputLayout *inputLayout = nullptr;
gl::Error error = findInputLayout(layout, inputElementCount, inputElements, programD3D,
sortedAttributes, unsortedAttributes.size(), &inputLayout);
if (error.isError()) if (error.isError())
{ {
return error; return error;
} }
if (inputLayout != mCurrentIL)
{
mDeviceContext->IASetInputLayout(inputLayout);
mCurrentIL = inputLayout;
}
bool dirtyBuffers = false; bool dirtyBuffers = false;
unsigned int minDiff = gl::MAX_VERTEX_ATTRIBS; size_t minDiff = gl::MAX_VERTEX_ATTRIBS;
unsigned int maxDiff = 0; size_t maxDiff = 0;
unsigned int nextAvailableIndex = 0;
for (unsigned int i = 0; i < gl::MAX_VERTEX_ATTRIBS; i++) // Note that if we use instance emulation, we reserve the first buffer slot.
size_t reservedBuffers = programUsesInstancedPointSprites ? 1 : 0;
for (size_t attribIndex = 0; attribIndex < (gl::MAX_VERTEX_ATTRIBS - reservedBuffers);
++attribIndex)
{ {
ID3D11Buffer *buffer = NULL; ID3D11Buffer *buffer = NULL;
UINT vertexStride = 0; UINT vertexStride = 0;
UINT vertexOffset = 0; UINT vertexOffset = 0;
if (i < unsortedAttributes.size() && sortedAttributes[i]->active) const auto &attrib = *sortedAttributes[attribIndex];
if (attribIndex < unsortedAttributes.size() && attrib.active)
{ {
VertexBuffer11 *vertexBuffer = GetAs<VertexBuffer11>(sortedAttributes[i]->vertexBuffer); VertexBuffer11 *vertexBuffer = GetAs<VertexBuffer11>(attrib.vertexBuffer);
Buffer11 *bufferStorage = sortedAttributes[i]->storage ? GetAs<Buffer11>(sortedAttributes[i]->storage) : NULL; Buffer11 *bufferStorage = attrib.storage ? GetAs<Buffer11>(attrib.storage) : nullptr;
// If indexed pointsprite emulation is active, then we need to take a less efficent code path. // If indexed pointsprite emulation is active, then we need to take a less efficent code path.
// Emulated indexed pointsprite rendering requires that the vertex buffers match exactly to // Emulated indexed pointsprite rendering requires that the vertex buffers match exactly to
...@@ -354,7 +252,7 @@ gl::Error InputLayoutCache::applyVertexBuffers(const std::vector<TranslatedAttri ...@@ -354,7 +252,7 @@ gl::Error InputLayoutCache::applyVertexBuffers(const std::vector<TranslatedAttri
{ {
buffer = vertexBuffer->getBuffer(); buffer = vertexBuffer->getBuffer();
} }
else if (indexedPointSpriteEmulationActive) else if (instancedPointSpritesActive && (sourceInfo != nullptr))
{ {
if (sourceInfo->srcBuffer != nullptr) if (sourceInfo->srcBuffer != nullptr)
{ {
...@@ -371,47 +269,40 @@ gl::Error InputLayoutCache::applyVertexBuffers(const std::vector<TranslatedAttri ...@@ -371,47 +269,40 @@ gl::Error InputLayoutCache::applyVertexBuffers(const std::vector<TranslatedAttri
sourceInfo->srcIndices = bufferData + offset; sourceInfo->srcIndices = bufferData + offset;
} }
buffer = bufferStorage->getEmulatedIndexedBuffer(sourceInfo, sortedAttributes[i]); buffer = bufferStorage->getEmulatedIndexedBuffer(sourceInfo, &attrib);
} }
else else
{ {
buffer = bufferStorage->getBuffer(BUFFER_USAGE_VERTEX_OR_TRANSFORM_FEEDBACK); buffer = bufferStorage->getBuffer(BUFFER_USAGE_VERTEX_OR_TRANSFORM_FEEDBACK);
} }
vertexStride = sortedAttributes[i]->stride; vertexStride = attrib.stride;
vertexOffset = sortedAttributes[i]->offset; vertexOffset = attrib.offset;
} }
if (buffer != mCurrentBuffers[i] || vertexStride != mCurrentVertexStrides[i] || size_t bufferIndex = reservedBuffers + attribIndex;
vertexOffset != mCurrentVertexOffsets[i])
if (buffer != mCurrentBuffers[bufferIndex] ||
vertexStride != mCurrentVertexStrides[bufferIndex] ||
vertexOffset != mCurrentVertexOffsets[bufferIndex])
{ {
dirtyBuffers = true; dirtyBuffers = true;
minDiff = std::min(minDiff, i); minDiff = std::min(minDiff, bufferIndex);
maxDiff = std::max(maxDiff, i); maxDiff = std::max(maxDiff, bufferIndex);
mCurrentBuffers[i] = buffer;
mCurrentVertexStrides[i] = vertexStride;
mCurrentVertexOffsets[i] = vertexOffset;
}
// If a non null ID3D11Buffer is being assigned to mCurrentBuffers, mCurrentBuffers[bufferIndex] = buffer;
// then the next available index needs to be tracked to ensure mCurrentVertexStrides[bufferIndex] = vertexStride;
// that any instanced pointsprite emulation buffers will be properly packed. mCurrentVertexOffsets[bufferIndex] = vertexOffset;
if (buffer)
{
nextAvailableIndex = i + 1;
} }
} }
// Instanced PointSprite emulation requires two additional ID3D11Buffers. // Instanced PointSprite emulation requires two additional ID3D11Buffers. A vertex buffer needs
// A vertex buffer needs to be created and added to the list of current buffers, // to be created and added to the list of current buffers, strides and offsets collections.
// strides and offsets collections. This buffer contains the vertices for a single // This buffer contains the vertices for a single PointSprite quad.
// PointSprite quad. // An index buffer also needs to be created and applied because rendering instanced data on
// An index buffer also needs to be created and applied because rendering instanced // D3D11 FL9_3 requires DrawIndexedInstanced() to be used. Shaders that contain gl_PointSize and
// data on D3D11 FL9_3 requires DrawIndexedInstanced() to be used. // used without the GL_POINTS rendering mode require a vertex buffer because some drivers cannot
// Shaders that contain gl_PointSize and used without the GL_POINTS rendering mode // handle missing vertex data and will TDR the system.
// require a vertex buffer because some drivers cannot handle missing vertex data
// and will TDR the system.
if (programUsesInstancedPointSprites) if (programUsesInstancedPointSprites)
{ {
HRESULT result = S_OK; HRESULT result = S_OK;
...@@ -446,16 +337,16 @@ gl::Error InputLayoutCache::applyVertexBuffers(const std::vector<TranslatedAttri ...@@ -446,16 +337,16 @@ gl::Error InputLayoutCache::applyVertexBuffers(const std::vector<TranslatedAttri
} }
} }
mCurrentBuffers[nextAvailableIndex] = mPointSpriteVertexBuffer; mCurrentBuffers[0] = mPointSpriteVertexBuffer;
// Set the stride to 0 if GL_POINTS mode is not being used to instruct the driver // Set the stride to 0 if GL_POINTS mode is not being used to instruct the driver to avoid
// to avoid indexing into the vertex buffer. // indexing into the vertex buffer.
mCurrentVertexStrides[nextAvailableIndex] = mCurrentVertexStrides[0] = instancedPointSpritesActive ? pointSpriteVertexStride : 0;
instancedPointSpritesActive ? pointSpriteVertexStride : 0; mCurrentVertexOffsets[0] = 0;
mCurrentVertexOffsets[nextAvailableIndex] = 0;
// Update maxDiff to include the additional point sprite vertex buffer // Update maxDiff to include the additional point sprite vertex buffer
// to ensure that IASetVertexBuffers uses the correct buffer count. // to ensure that IASetVertexBuffers uses the correct buffer count.
maxDiff = std::max(maxDiff, nextAvailableIndex); minDiff = 0;
maxDiff = std::max(maxDiff, static_cast<size_t>(0));
if (!mPointSpriteIndexBuffer) if (!mPointSpriteIndexBuffer)
{ {
...@@ -484,59 +375,209 @@ gl::Error InputLayoutCache::applyVertexBuffers(const std::vector<TranslatedAttri ...@@ -484,59 +375,209 @@ gl::Error InputLayoutCache::applyVertexBuffers(const std::vector<TranslatedAttri
if (instancedPointSpritesActive) if (instancedPointSpritesActive)
{ {
// The index buffer is applied here because Instanced PointSprite emulation uses // The index buffer is applied here because Instanced PointSprite emulation uses the a
// the a non-indexed rendering path in ANGLE (DrawArrays). This means that // non-indexed rendering path in ANGLE (DrawArrays). This means that applyIndexBuffer()
// applyIndexBuffer()
// on the renderer will not be called and setting this buffer here ensures that the // on the renderer will not be called and setting this buffer here ensures that the
// rendering // rendering path will contain the correct index buffers.
// path will contain the correct index buffers.
mDeviceContext->IASetIndexBuffer(mPointSpriteIndexBuffer, DXGI_FORMAT_R16_UINT, 0); mDeviceContext->IASetIndexBuffer(mPointSpriteIndexBuffer, DXGI_FORMAT_R16_UINT, 0);
} }
} }
if (moveFirstIndexedIntoSlotZero) if (dirtyBuffers)
{ {
// In this case, we swapped the slots of the first instanced element and the first indexed element, to ensure ASSERT(minDiff <= maxDiff && maxDiff < gl::MAX_VERTEX_ATTRIBS);
// that the first slot contains non-instanced data (required by Feature Level 9_3). mDeviceContext->IASetVertexBuffers(
// We must also swap the corresponding buffers sent to IASetVertexBuffers so that the correct data is sent to each slot. static_cast<UINT>(minDiff), static_cast<UINT>(maxDiff - minDiff + 1),
std::swap(mCurrentBuffers[firstIndexedElement], mCurrentBuffers[firstInstancedElement]); mCurrentBuffers + minDiff, mCurrentVertexStrides + minDiff,
std::swap(mCurrentVertexStrides[firstIndexedElement], mCurrentVertexStrides[firstInstancedElement]); mCurrentVertexOffsets + minDiff);
std::swap(mCurrentVertexOffsets[firstIndexedElement], mCurrentVertexOffsets[firstInstancedElement]);
} }
if (dirtyBuffers) return gl::Error(GL_NO_ERROR);
}
gl::Error InputLayoutCache::updateInputLayout(gl::Program *program,
GLenum mode,
const SortedAttribArray &sortedAttributes,
const SortedIndexArray &sortedSemanticIndices,
size_t attribCount)
{
const std::vector<sh::Attribute> &shaderAttributes = program->getAttributes();
PackedAttributeLayout layout;
ProgramD3D *programD3D = GetImplAs<ProgramD3D>(program);
bool programUsesInstancedPointSprites =
programD3D->usesPointSize() && programD3D->usesInstancedPointSpriteEmulation();
bool instancedPointSpritesActive = programUsesInstancedPointSprites && (mode == GL_POINTS);
if (programUsesInstancedPointSprites)
{ {
ASSERT(minDiff <= maxDiff && maxDiff < gl::MAX_VERTEX_ATTRIBS); layout.flags |= PackedAttributeLayout::FLAG_USES_INSTANCED_SPRITES;
mDeviceContext->IASetVertexBuffers(minDiff, maxDiff - minDiff + 1, mCurrentBuffers + minDiff, }
mCurrentVertexStrides + minDiff, mCurrentVertexOffsets + minDiff);
if (instancedPointSpritesActive)
{
layout.flags |= PackedAttributeLayout::FLAG_INSTANCED_SPRITES_ACTIVE;
}
const auto &semanticToLocation = programD3D->getAttributesByLayout();
for (size_t attribIndex = 0; attribIndex < attribCount; ++attribIndex)
{
const auto &attrib = *sortedAttributes[attribIndex];
int sortedIndex = sortedSemanticIndices[attribIndex];
if (!attrib.active)
continue;
gl::VertexFormatType vertexFormatType =
gl::GetVertexFormatType(*attrib.attribute, attrib.currentValueType);
// Record the type of the associated vertex shader vector in our key
// This will prevent mismatched vertex shaders from using the same input layout
GLenum glslElementType =
GetGLSLAttributeType(shaderAttributes, semanticToLocation[sortedIndex]);
layout.addAttributeData(glslElementType, sortedIndex, vertexFormatType, attrib.divisor);
}
ID3D11InputLayout *inputLayout = nullptr;
if (layout.numAttributes > 0 || layout.flags != 0)
{
auto layoutMapIt = mLayoutMap.find(layout);
if (layoutMapIt != mLayoutMap.end())
{
inputLayout = layoutMapIt->second;
}
else
{
gl::Error error = createInputLayout(sortedAttributes, sortedSemanticIndices,
attribCount, mode, program, &inputLayout);
if (error.isError())
{
return error;
}
if (mLayoutMap.size() >= mCacheSize)
{
TRACE("Overflowed the limit of %u input layouts, purging half the cache.",
mCacheSize);
// Randomly release every second element
auto it = mLayoutMap.begin();
while (it != mLayoutMap.end())
{
it++;
if (it != mLayoutMap.end())
{
// c++11 erase allows us to easily delete the current iterator.
SafeRelease(it->second);
it = mLayoutMap.erase(it);
}
}
}
mLayoutMap[layout] = inputLayout;
}
}
if (inputLayout != mCurrentIL)
{
mDeviceContext->IASetInputLayout(inputLayout);
mCurrentIL = inputLayout;
} }
return gl::Error(GL_NO_ERROR); return gl::Error(GL_NO_ERROR);
} }
gl::Error InputLayoutCache::findInputLayout( gl::Error InputLayoutCache::createInputLayout(const SortedAttribArray &sortedAttributes,
const PackedAttributeLayout &layout, const SortedIndexArray &sortedSemanticIndices,
unsigned int inputElementCount, size_t attribCount,
const D3D11_INPUT_ELEMENT_DESC inputElements[gl::MAX_VERTEX_ATTRIBS], GLenum mode,
ProgramD3D *programD3D, gl::Program *program,
const TranslatedAttribute *sortedAttributes[gl::MAX_VERTEX_ATTRIBS], ID3D11InputLayout **inputLayoutOut)
size_t attributeCount,
ID3D11InputLayout **inputLayout)
{ {
if (inputElementCount == 0) ProgramD3D *programD3D = GetImplAs<ProgramD3D>(program);
bool programUsesInstancedPointSprites =
programD3D->usesPointSize() && programD3D->usesInstancedPointSpriteEmulation();
unsigned int inputElementCount = 0;
std::array<D3D11_INPUT_ELEMENT_DESC, gl::MAX_VERTEX_ATTRIBS> inputElements;
for (size_t attribIndex = 0; attribIndex < attribCount; ++attribIndex)
{ {
*inputLayout = nullptr; const auto &attrib = *sortedAttributes[attribIndex];
return gl::Error(GL_NO_ERROR); const int sortedIndex = sortedSemanticIndices[attribIndex];
if (!attrib.active)
continue;
D3D11_INPUT_CLASSIFICATION inputClass =
attrib.divisor > 0 ? D3D11_INPUT_PER_INSTANCE_DATA : D3D11_INPUT_PER_VERTEX_DATA;
const auto &vertexFormatType =
gl::GetVertexFormatType(*attrib.attribute, attrib.currentValueType);
const auto &vertexFormatInfo = d3d11::GetVertexFormatInfo(vertexFormatType, mFeatureLevel);
auto *inputElement = &inputElements[inputElementCount];
inputElement->SemanticName = "TEXCOORD";
inputElement->SemanticIndex = sortedIndex;
inputElement->Format = vertexFormatInfo.nativeFormat;
inputElement->InputSlot = static_cast<UINT>(attribIndex);
inputElement->AlignedByteOffset = 0;
inputElement->InputSlotClass = inputClass;
inputElement->InstanceDataStepRate = attrib.divisor;
inputElementCount++;
} }
auto layoutMapIt = mLayoutMap.find(layout); // Instanced PointSprite emulation requires additional entries in the
if (layoutMapIt != mLayoutMap.end()) // inputlayout to support the vertices that make up the pointsprite quad.
// We do this even if mode != GL_POINTS, since the shader signature has these inputs, and the
// input layout must match the shader
if (programUsesInstancedPointSprites)
{ {
*inputLayout = layoutMapIt->second; // On 9_3, we must ensure that slot 0 contains non-instanced data.
return gl::Error(GL_NO_ERROR); // If slot 0 currently contains instanced data then we swap it with a non-instanced element.
// Note that instancing is only available on 9_3 via ANGLE_instanced_arrays, since 9_3
// doesn't support OpenGL ES 3.0.
// As per the spec for ANGLE_instanced_arrays, not all attributes can be instanced
// simultaneously, so a non-instanced element must exist.
for (size_t elementIndex = 0; elementIndex < inputElementCount; ++elementIndex)
{
if (sortedAttributes[elementIndex]->active)
{
// If rendering points and instanced pointsprite emulation is being used, the
// inputClass is required to be configured as per instance data
if (mode == GL_POINTS)
{
inputElements[elementIndex].InputSlotClass = D3D11_INPUT_PER_INSTANCE_DATA;
inputElements[elementIndex].InstanceDataStepRate = 1;
}
inputElements[elementIndex].InputSlot++;
}
}
inputElements[inputElementCount].SemanticName = "SPRITEPOSITION";
inputElements[inputElementCount].SemanticIndex = 0;
inputElements[inputElementCount].Format = DXGI_FORMAT_R32G32B32_FLOAT;
inputElements[inputElementCount].InputSlot = 0;
inputElements[inputElementCount].AlignedByteOffset = 0;
inputElements[inputElementCount].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
inputElements[inputElementCount].InstanceDataStepRate = 0;
inputElementCount++;
inputElements[inputElementCount].SemanticName = "SPRITETEXCOORD";
inputElements[inputElementCount].SemanticIndex = 0;
inputElements[inputElementCount].Format = DXGI_FORMAT_R32G32_FLOAT;
inputElements[inputElementCount].InputSlot = 0;
inputElements[inputElementCount].AlignedByteOffset = sizeof(float) * 3;
inputElements[inputElementCount].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
inputElements[inputElementCount].InstanceDataStepRate = 0;
inputElementCount++;
} }
const gl::InputLayout &shaderInputLayout = GetInputLayout(sortedAttributes, attributeCount); const gl::InputLayout &shaderInputLayout = GetInputLayout(sortedAttributes, attribCount);
ShaderExecutableD3D *shader = nullptr; ShaderExecutableD3D *shader = nullptr;
gl::Error error = gl::Error error =
...@@ -549,34 +590,14 @@ gl::Error InputLayoutCache::findInputLayout( ...@@ -549,34 +590,14 @@ gl::Error InputLayoutCache::findInputLayout(
ShaderExecutableD3D *shader11 = GetAs<ShaderExecutable11>(shader); ShaderExecutableD3D *shader11 = GetAs<ShaderExecutable11>(shader);
HRESULT result = HRESULT result =
mDevice->CreateInputLayout(inputElements, inputElementCount, shader11->getFunction(), mDevice->CreateInputLayout(inputElements.data(), inputElementCount, shader11->getFunction(),
shader11->getLength(), inputLayout); shader11->getLength(), inputLayoutOut);
if (FAILED(result)) if (FAILED(result))
{ {
return gl::Error(GL_OUT_OF_MEMORY, return gl::Error(GL_OUT_OF_MEMORY,
"Failed to create internal input layout, HRESULT: 0x%08x", result); "Failed to create internal input layout, HRESULT: 0x%08x", result);
} }
if (mLayoutMap.size() >= mCacheSize)
{
TRACE("Overflowed the limit of %u input layouts, purging half the cache.", mCacheSize);
// Randomly release every second element
auto it = mLayoutMap.begin();
while (it != mLayoutMap.end())
{
it++;
if (it != mLayoutMap.end())
{
// Calling std::map::erase invalidates the current iterator, so make a copy.
auto eraseIt = it++;
SafeRelease(eraseIt->second);
mLayoutMap.erase(eraseIt);
}
}
}
mLayoutMap[layout] = *inputLayout;
return gl::Error(GL_NO_ERROR); return gl::Error(GL_NO_ERROR);
} }
......
...@@ -13,8 +13,9 @@ ...@@ -13,8 +13,9 @@
#include <GLES2/gl2.h> #include <GLES2/gl2.h>
#include <cstddef> #include <cstddef>
#include <array>
#include <map> #include <map>
#include <unordered_map>
#include "common/angleutils.h" #include "common/angleutils.h"
#include "libANGLE/Constants.h" #include "libANGLE/Constants.h"
...@@ -33,6 +34,9 @@ struct TranslatedIndexData; ...@@ -33,6 +34,9 @@ struct TranslatedIndexData;
struct SourceIndexData; struct SourceIndexData;
class ProgramD3D; class ProgramD3D;
using SortedAttribArray = std::array<const TranslatedAttribute *, gl::MAX_VERTEX_ATTRIBS>;
using SortedIndexArray = std::array<int, gl::MAX_VERTEX_ATTRIBS>;
class InputLayoutCache : angle::NonCopyable class InputLayoutCache : angle::NonCopyable
{ {
public: public:
...@@ -67,9 +71,8 @@ class InputLayoutCache : angle::NonCopyable ...@@ -67,9 +71,8 @@ class InputLayoutCache : angle::NonCopyable
enum Flags enum Flags
{ {
FLAG_USES_INSTANCED_SPRITES = 0x1, FLAG_USES_INSTANCED_SPRITES = 0x1,
FLAG_MOVE_FIRST_INDEXED = 0x2, FLAG_INSTANCED_SPRITES_ACTIVE = 0x2,
FLAG_INSTANCED_SPRITES_ACTIVE = 0x4,
}; };
size_t numAttributes; size_t numAttributes;
...@@ -77,13 +80,17 @@ class InputLayoutCache : angle::NonCopyable ...@@ -77,13 +80,17 @@ class InputLayoutCache : angle::NonCopyable
uint32_t attributeData[gl::MAX_VERTEX_ATTRIBS]; uint32_t attributeData[gl::MAX_VERTEX_ATTRIBS];
}; };
gl::Error findInputLayout(const PackedAttributeLayout &layout, gl::Error updateInputLayout(gl::Program *program,
unsigned int inputElementCount, GLenum mode,
const D3D11_INPUT_ELEMENT_DESC inputElements[gl::MAX_VERTEX_ATTRIBS], const SortedAttribArray &sortedAttributes,
ProgramD3D *programD3D, const SortedIndexArray &sortedSemanticIndices,
const TranslatedAttribute *sortedAttributes[gl::MAX_VERTEX_ATTRIBS], size_t attribCount);
size_t attributeCount, gl::Error createInputLayout(const SortedAttribArray &sortedAttributes,
ID3D11InputLayout **inputLayout); const SortedIndexArray &sortedSemanticIndices,
size_t attribCount,
GLenum mode,
gl::Program *program,
ID3D11InputLayout **inputLayoutOut);
std::map<PackedAttributeLayout, ID3D11InputLayout *> mLayoutMap; std::map<PackedAttributeLayout, ID3D11InputLayout *> mLayoutMap;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment