Commit 11ffe1b8 by Olli Etuaho

Micro-optimize math in IndexDataManager

Use bitwise operations instead of division, which is expensive on multiple CPU architectures. BUG=angleproject:956 TEST=angle_end2end_tests Change-Id: I57ab540d447c03dae5a96bafb4975fc37e310261 Reviewed-on: https://chromium-review.googlesource.com/262181Tested-by: 's avatarOlli Etuaho <oetuaho@nvidia.com> Reviewed-by: 's avatarNicolas Capens <capn@chromium.org> Tested-by: 's avatarJamie Madill <jmadill@chromium.org>
parent 5e5c826c
...@@ -132,6 +132,7 @@ FormatMap BuildFormatMap() ...@@ -132,6 +132,7 @@ FormatMap BuildFormatMap()
Type::Type() Type::Type()
: bytes(0), : bytes(0),
bytesShift(0),
specialInterpretation(false) specialInterpretation(false)
{ {
} }
...@@ -140,6 +141,13 @@ static Type GenTypeInfo(GLuint bytes, bool specialInterpretation) ...@@ -140,6 +141,13 @@ static Type GenTypeInfo(GLuint bytes, bool specialInterpretation)
{ {
Type info; Type info;
info.bytes = bytes; info.bytes = bytes;
GLuint i = 0;
while ((1u << i) < bytes)
{
++i;
}
info.bytesShift = i;
ASSERT((1u << info.bytesShift) == bytes);
info.specialInterpretation = specialInterpretation; info.specialInterpretation = specialInterpretation;
return info; return info;
} }
......
...@@ -25,6 +25,7 @@ struct Type ...@@ -25,6 +25,7 @@ struct Type
Type(); Type();
GLuint bytes; GLuint bytes;
GLuint bytesShift; // Bit shift by this value to effectively divide/multiply by "bytes" in a more optimal way
bool specialInterpretation; bool specialInterpretation;
}; };
const Type &GetTypeInfo(GLenum type); const Type &GetTypeInfo(GLenum type);
......
...@@ -86,6 +86,8 @@ gl::Error IndexDataManager::prepareIndexData(GLenum type, GLsizei count, gl::Buf ...@@ -86,6 +86,8 @@ gl::Error IndexDataManager::prepareIndexData(GLenum type, GLsizei count, gl::Buf
storage = GetImplAs<BufferD3D>(buffer); storage = GetImplAs<BufferD3D>(buffer);
// We'll trust that the compiler will optimize the % below:
// the operands are unsigned and the divisor is a constant.
switch (type) switch (type)
{ {
case GL_UNSIGNED_BYTE: alignedOffset = (offset % sizeof(GLubyte) == 0); break; case GL_UNSIGNED_BYTE: alignedOffset = (offset % sizeof(GLubyte) == 0); break;
...@@ -127,7 +129,8 @@ gl::Error IndexDataManager::prepareIndexData(GLenum type, GLsizei count, gl::Buf ...@@ -127,7 +129,8 @@ gl::Error IndexDataManager::prepareIndexData(GLenum type, GLsizei count, gl::Buf
if (!staticBuffer->getIndexRangeCache()->findRange(type, offset, count, NULL, &streamOffset)) if (!staticBuffer->getIndexRangeCache()->findRange(type, offset, count, NULL, &streamOffset))
{ {
streamOffset = (offset / typeInfo.bytes) * gl::GetTypeInfo(destinationIndexType).bytes; // Using bit-shift here is faster than using division.
streamOffset = (offset >> typeInfo.bytesShift) << gl::GetTypeInfo(destinationIndexType).bytesShift;
staticBuffer->getIndexRangeCache()->addRange(type, offset, count, translated->indexRange, streamOffset); staticBuffer->getIndexRangeCache()->addRange(type, offset, count, translated->indexRange, streamOffset);
} }
if (!buffer->getIndexRangeCache()->findRange(type, offset, count, nullptr, nullptr)) if (!buffer->getIndexRangeCache()->findRange(type, offset, count, nullptr, nullptr))
...@@ -162,7 +165,8 @@ gl::Error IndexDataManager::prepareIndexData(GLenum type, GLsizei count, gl::Buf ...@@ -162,7 +165,8 @@ gl::Error IndexDataManager::prepareIndexData(GLenum type, GLsizei count, gl::Buf
if (staticBuffer->getBufferSize() == 0 && alignedOffset) if (staticBuffer->getBufferSize() == 0 && alignedOffset)
{ {
indexBuffer = staticBuffer; indexBuffer = staticBuffer;
convertCount = storage->getSize() / typeInfo.bytes; // Using bit-shift here is faster than using division.
convertCount = storage->getSize() >> typeInfo.bytesShift;
} }
else else
{ {
...@@ -173,13 +177,14 @@ gl::Error IndexDataManager::prepareIndexData(GLenum type, GLsizei count, gl::Buf ...@@ -173,13 +177,14 @@ gl::Error IndexDataManager::prepareIndexData(GLenum type, GLsizei count, gl::Buf
ASSERT(indexBuffer); ASSERT(indexBuffer);
if (convertCount > std::numeric_limits<unsigned int>::max() / destTypeInfo.bytes) // Using bit-shift here is faster than using division.
if (convertCount > (std::numeric_limits<unsigned int>::max() >> destTypeInfo.bytesShift))
{ {
return gl::Error(GL_OUT_OF_MEMORY, "Reserving %u indices of %u bytes each exceeds the maximum buffer size.", return gl::Error(GL_OUT_OF_MEMORY, "Reserving %u indices of %u bytes each exceeds the maximum buffer size.",
convertCount, destTypeInfo.bytes); convertCount, destTypeInfo.bytes);
} }
unsigned int bufferSizeRequired = convertCount * destTypeInfo.bytes; unsigned int bufferSizeRequired = convertCount << destTypeInfo.bytesShift;
error = indexBuffer->reserveBufferSpace(bufferSizeRequired, type); error = indexBuffer->reserveBufferSpace(bufferSizeRequired, type);
if (error.isError()) if (error.isError())
{ {
...@@ -212,7 +217,8 @@ gl::Error IndexDataManager::prepareIndexData(GLenum type, GLsizei count, gl::Buf ...@@ -212,7 +217,8 @@ gl::Error IndexDataManager::prepareIndexData(GLenum type, GLsizei count, gl::Buf
if (staticBuffer) if (staticBuffer)
{ {
streamOffset = (offset / typeInfo.bytes) * destTypeInfo.bytes; // Using bit-shift here is faster than using division.
streamOffset = (offset >> typeInfo.bytesShift) << destTypeInfo.bytesShift;
staticBuffer->getIndexRangeCache()->addRange(type, offset, count, translated->indexRange, streamOffset); staticBuffer->getIndexRangeCache()->addRange(type, offset, count, translated->indexRange, streamOffset);
} }
} }
...@@ -220,13 +226,14 @@ gl::Error IndexDataManager::prepareIndexData(GLenum type, GLsizei count, gl::Buf ...@@ -220,13 +226,14 @@ gl::Error IndexDataManager::prepareIndexData(GLenum type, GLsizei count, gl::Buf
translated->storage = directStorage ? storage : NULL; translated->storage = directStorage ? storage : NULL;
translated->indexBuffer = indexBuffer ? indexBuffer->getIndexBuffer() : NULL; translated->indexBuffer = indexBuffer ? indexBuffer->getIndexBuffer() : NULL;
translated->serial = directStorage ? storage->getSerial() : indexBuffer->getSerial(); translated->serial = directStorage ? storage->getSerial() : indexBuffer->getSerial();
translated->startIndex = streamOffset / destTypeInfo.bytes; // Using bit-shift here is faster than using division.
translated->startIndex = (streamOffset >> destTypeInfo.bytesShift);
translated->startOffset = streamOffset; translated->startOffset = streamOffset;
translated->indexType = destinationIndexType; translated->indexType = destinationIndexType;
if (storage) if (storage)
{ {
storage->promoteStaticUsage(count * typeInfo.bytes); storage->promoteStaticUsage(count << typeInfo.bytesShift);
} }
return gl::Error(GL_NO_ERROR); return gl::Error(GL_NO_ERROR);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment