Commit eaef7c17 by Russ Lind Committed by Commit Bot

floating point perf improvement in CopyNativeVertexData

In the CopyNativeVertexData template function, in the else clause use memcpy/memset rather than for loops. This is a performance improvment for floating point versions of CopyNativeVertexData, particularly on 32-bit Firefox, where the -arch:IA32 compiler option used cause the compiler to generate fld/fstp operations inside the for loops. Update AUTHORS & CONTRIBUTORS to include AMD. BUG=angleproject:1317 Change-Id: I7133fc476028284f56783f4e2f9e2395f6227514 Reviewed-on: https://chromium-review.googlesource.com/327590Reviewed-by: 's avatarCorentin Wallez <cwallez@chromium.org> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarShannon Woods <shannonwoods@chromium.org> Commit-Queue: Jamie Madill <jmadill@chromium.org>
parent d2f02c2a
......@@ -26,6 +26,7 @@ Microsoft Open Technologies, Inc.
NVIDIA Corporation
Opera Software ASA
The Qt Company Ltd.
Advanced Micro Devices, Inc.
Jacek Caban
Mark Callow
......
......@@ -105,3 +105,6 @@ NVIDIA Corporation
Opera Software ASA
Daniel Bratell
Tomasz Moniuszko
Advanced Micro Devices, Inc.
Russ Lind
......@@ -15,33 +15,42 @@ inline void CopyNativeVertexData(const uint8_t *input, size_t stride, size_t cou
if (attribSize == stride && inputComponentCount == outputComponentCount)
{
memcpy(output, input, count * attribSize);
return;
}
else
{
const T defaultAlphaValue = gl::bitCast<T>(alphaDefaultValueBits);
const size_t lastNonAlphaOutputComponent = std::min<size_t>(outputComponentCount, 3);
if (inputComponentCount == outputComponentCount)
{
for (size_t i = 0; i < count; i++)
{
const T *offsetInput = reinterpret_cast<const T*>(input + (i * stride));
T *offsetOutput = reinterpret_cast<T*>(output) + i * outputComponentCount;
for (size_t j = 0; j < inputComponentCount; j++)
{
offsetOutput[j] = offsetInput[j];
}
memcpy(offsetOutput, offsetInput, attribSize);
}
return;
}
for (size_t j = inputComponentCount; j < lastNonAlphaOutputComponent; j++)
{
// Set the remaining G/B channels to 0.
offsetOutput[j] = 0;
}
const T defaultAlphaValue = gl::bitCast<T>(alphaDefaultValueBits);
const size_t lastNonAlphaOutputComponent = std::min<size_t>(outputComponentCount, 3);
if (inputComponentCount < outputComponentCount && outputComponentCount == 4)
{
// Set the remaining alpha channel to the defaultAlphaValue.
offsetOutput[3] = defaultAlphaValue;
}
for (size_t i = 0; i < count; i++)
{
const T *offsetInput = reinterpret_cast<const T*>(input + (i * stride));
T *offsetOutput = reinterpret_cast<T*>(output) + i * outputComponentCount;
memcpy(offsetOutput, offsetInput, attribSize);
if (inputComponentCount < lastNonAlphaOutputComponent)
{
// Set the remaining G/B channels to 0.
size_t numComponents = (lastNonAlphaOutputComponent - inputComponentCount);
memset(&offsetOutput[inputComponentCount], 0, numComponents * sizeof(T));
}
if (inputComponentCount < outputComponentCount && outputComponentCount == 4)
{
// Set the remaining alpha channel to the defaultAlphaValue.
offsetOutput[3] = defaultAlphaValue;
}
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment