Commit 11ffe1b8 by Olli Etuaho

Micro-optimize math in IndexDataManager

Use bitwise operations instead of division, which is expensive on multiple CPU architectures. BUG=angleproject:956 TEST=angle_end2end_tests Change-Id: I57ab540d447c03dae5a96bafb4975fc37e310261 Reviewed-on: https://chromium-review.googlesource.com/262181Tested-by: 's avatarOlli Etuaho <oetuaho@nvidia.com> Reviewed-by: 's avatarNicolas Capens <capn@chromium.org> Tested-by: 's avatarJamie Madill <jmadill@chromium.org>
parent 5e5c826c
......@@ -132,6 +132,7 @@ FormatMap BuildFormatMap()
Type::Type()
: bytes(0),
bytesShift(0),
specialInterpretation(false)
{
}
......@@ -140,6 +141,13 @@ static Type GenTypeInfo(GLuint bytes, bool specialInterpretation)
{
Type info;
info.bytes = bytes;
GLuint i = 0;
while ((1u << i) < bytes)
{
++i;
}
info.bytesShift = i;
ASSERT((1u << info.bytesShift) == bytes);
info.specialInterpretation = specialInterpretation;
return info;
}
......
......@@ -25,6 +25,7 @@ struct Type
Type();
GLuint bytes;
GLuint bytesShift; // Bit shift by this value to effectively divide/multiply by "bytes" in a more optimal way
bool specialInterpretation;
};
const Type &GetTypeInfo(GLenum type);
......
......@@ -86,6 +86,8 @@ gl::Error IndexDataManager::prepareIndexData(GLenum type, GLsizei count, gl::Buf
storage = GetImplAs<BufferD3D>(buffer);
// We'll trust that the compiler will optimize the % below:
// the operands are unsigned and the divisor is a constant.
switch (type)
{
case GL_UNSIGNED_BYTE: alignedOffset = (offset % sizeof(GLubyte) == 0); break;
......@@ -127,7 +129,8 @@ gl::Error IndexDataManager::prepareIndexData(GLenum type, GLsizei count, gl::Buf
if (!staticBuffer->getIndexRangeCache()->findRange(type, offset, count, NULL, &streamOffset))
{
streamOffset = (offset / typeInfo.bytes) * gl::GetTypeInfo(destinationIndexType).bytes;
// Using bit-shift here is faster than using division.
streamOffset = (offset >> typeInfo.bytesShift) << gl::GetTypeInfo(destinationIndexType).bytesShift;
staticBuffer->getIndexRangeCache()->addRange(type, offset, count, translated->indexRange, streamOffset);
}
if (!buffer->getIndexRangeCache()->findRange(type, offset, count, nullptr, nullptr))
......@@ -162,7 +165,8 @@ gl::Error IndexDataManager::prepareIndexData(GLenum type, GLsizei count, gl::Buf
if (staticBuffer->getBufferSize() == 0 && alignedOffset)
{
indexBuffer = staticBuffer;
convertCount = storage->getSize() / typeInfo.bytes;
// Using bit-shift here is faster than using division.
convertCount = storage->getSize() >> typeInfo.bytesShift;
}
else
{
......@@ -173,13 +177,14 @@ gl::Error IndexDataManager::prepareIndexData(GLenum type, GLsizei count, gl::Buf
ASSERT(indexBuffer);
if (convertCount > std::numeric_limits<unsigned int>::max() / destTypeInfo.bytes)
// Using bit-shift here is faster than using division.
if (convertCount > (std::numeric_limits<unsigned int>::max() >> destTypeInfo.bytesShift))
{
return gl::Error(GL_OUT_OF_MEMORY, "Reserving %u indices of %u bytes each exceeds the maximum buffer size.",
convertCount, destTypeInfo.bytes);
}
unsigned int bufferSizeRequired = convertCount * destTypeInfo.bytes;
unsigned int bufferSizeRequired = convertCount << destTypeInfo.bytesShift;
error = indexBuffer->reserveBufferSpace(bufferSizeRequired, type);
if (error.isError())
{
......@@ -212,7 +217,8 @@ gl::Error IndexDataManager::prepareIndexData(GLenum type, GLsizei count, gl::Buf
if (staticBuffer)
{
streamOffset = (offset / typeInfo.bytes) * destTypeInfo.bytes;
// Using bit-shift here is faster than using division.
streamOffset = (offset >> typeInfo.bytesShift) << destTypeInfo.bytesShift;
staticBuffer->getIndexRangeCache()->addRange(type, offset, count, translated->indexRange, streamOffset);
}
}
......@@ -220,13 +226,14 @@ gl::Error IndexDataManager::prepareIndexData(GLenum type, GLsizei count, gl::Buf
translated->storage = directStorage ? storage : NULL;
translated->indexBuffer = indexBuffer ? indexBuffer->getIndexBuffer() : NULL;
translated->serial = directStorage ? storage->getSerial() : indexBuffer->getSerial();
translated->startIndex = streamOffset / destTypeInfo.bytes;
// Using bit-shift here is faster than using division.
translated->startIndex = (streamOffset >> destTypeInfo.bytesShift);
translated->startOffset = streamOffset;
translated->indexType = destinationIndexType;
if (storage)
{
storage->promoteStaticUsage(count * typeInfo.bytes);
storage->promoteStaticUsage(count << typeInfo.bytesShift);
}
return gl::Error(GL_NO_ERROR);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment