Commit eaef7c17 by Russ Lind Committed by Commit Bot

floating point perf improvement in CopyNativeVertexData

In the CopyNativeVertexData template function, in the else clause use memcpy/memset rather than for loops. This is a performance improvment for floating point versions of CopyNativeVertexData, particularly on 32-bit Firefox, where the -arch:IA32 compiler option used cause the compiler to generate fld/fstp operations inside the for loops. Update AUTHORS & CONTRIBUTORS to include AMD. BUG=angleproject:1317 Change-Id: I7133fc476028284f56783f4e2f9e2395f6227514 Reviewed-on: https://chromium-review.googlesource.com/327590Reviewed-by: 's avatarCorentin Wallez <cwallez@chromium.org> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarShannon Woods <shannonwoods@chromium.org> Commit-Queue: Jamie Madill <jmadill@chromium.org>
parent d2f02c2a
...@@ -26,6 +26,7 @@ Microsoft Open Technologies, Inc. ...@@ -26,6 +26,7 @@ Microsoft Open Technologies, Inc.
NVIDIA Corporation NVIDIA Corporation
Opera Software ASA Opera Software ASA
The Qt Company Ltd. The Qt Company Ltd.
Advanced Micro Devices, Inc.
Jacek Caban Jacek Caban
Mark Callow Mark Callow
......
...@@ -105,3 +105,6 @@ NVIDIA Corporation ...@@ -105,3 +105,6 @@ NVIDIA Corporation
Opera Software ASA Opera Software ASA
Daniel Bratell Daniel Bratell
Tomasz Moniuszko Tomasz Moniuszko
Advanced Micro Devices, Inc.
Russ Lind
...@@ -15,33 +15,42 @@ inline void CopyNativeVertexData(const uint8_t *input, size_t stride, size_t cou ...@@ -15,33 +15,42 @@ inline void CopyNativeVertexData(const uint8_t *input, size_t stride, size_t cou
if (attribSize == stride && inputComponentCount == outputComponentCount) if (attribSize == stride && inputComponentCount == outputComponentCount)
{ {
memcpy(output, input, count * attribSize); memcpy(output, input, count * attribSize);
return;
} }
else
{
const T defaultAlphaValue = gl::bitCast<T>(alphaDefaultValueBits);
const size_t lastNonAlphaOutputComponent = std::min<size_t>(outputComponentCount, 3);
if (inputComponentCount == outputComponentCount)
{
for (size_t i = 0; i < count; i++) for (size_t i = 0; i < count; i++)
{ {
const T *offsetInput = reinterpret_cast<const T*>(input + (i * stride)); const T *offsetInput = reinterpret_cast<const T*>(input + (i * stride));
T *offsetOutput = reinterpret_cast<T*>(output) + i * outputComponentCount; T *offsetOutput = reinterpret_cast<T*>(output) + i * outputComponentCount;
for (size_t j = 0; j < inputComponentCount; j++) memcpy(offsetOutput, offsetInput, attribSize);
{ }
offsetOutput[j] = offsetInput[j]; return;
} }
for (size_t j = inputComponentCount; j < lastNonAlphaOutputComponent; j++) const T defaultAlphaValue = gl::bitCast<T>(alphaDefaultValueBits);
{ const size_t lastNonAlphaOutputComponent = std::min<size_t>(outputComponentCount, 3);
// Set the remaining G/B channels to 0.
offsetOutput[j] = 0;
}
if (inputComponentCount < outputComponentCount && outputComponentCount == 4) for (size_t i = 0; i < count; i++)
{ {
// Set the remaining alpha channel to the defaultAlphaValue. const T *offsetInput = reinterpret_cast<const T*>(input + (i * stride));
offsetOutput[3] = defaultAlphaValue; T *offsetOutput = reinterpret_cast<T*>(output) + i * outputComponentCount;
}
memcpy(offsetOutput, offsetInput, attribSize);
if (inputComponentCount < lastNonAlphaOutputComponent)
{
// Set the remaining G/B channels to 0.
size_t numComponents = (lastNonAlphaOutputComponent - inputComponentCount);
memset(&offsetOutput[inputComponentCount], 0, numComponents * sizeof(T));
}
if (inputComponentCount < outputComponentCount && outputComponentCount == 4)
{
// Set the remaining alpha channel to the defaultAlphaValue.
offsetOutput[3] = defaultAlphaValue;
} }
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment