Commit eaef7c17 by Russ Lind Committed by Commit Bot

floating point perf improvement in CopyNativeVertexData

In the CopyNativeVertexData template function, in the else clause use memcpy/memset rather than for loops. This is a performance improvment for floating point versions of CopyNativeVertexData, particularly on 32-bit Firefox, where the -arch:IA32 compiler option used cause the compiler to generate fld/fstp operations inside the for loops. Update AUTHORS & CONTRIBUTORS to include AMD. BUG=angleproject:1317 Change-Id: I7133fc476028284f56783f4e2f9e2395f6227514 Reviewed-on: https://chromium-review.googlesource.com/327590Reviewed-by: 's avatarCorentin Wallez <cwallez@chromium.org> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarShannon Woods <shannonwoods@chromium.org> Commit-Queue: Jamie Madill <jmadill@chromium.org>
parent d2f02c2a
...@@ -26,6 +26,7 @@ Microsoft Open Technologies, Inc. ...@@ -26,6 +26,7 @@ Microsoft Open Technologies, Inc.
NVIDIA Corporation NVIDIA Corporation
Opera Software ASA Opera Software ASA
The Qt Company Ltd. The Qt Company Ltd.
Advanced Micro Devices, Inc.
Jacek Caban Jacek Caban
Mark Callow Mark Callow
......
...@@ -105,3 +105,6 @@ NVIDIA Corporation ...@@ -105,3 +105,6 @@ NVIDIA Corporation
Opera Software ASA Opera Software ASA
Daniel Bratell Daniel Bratell
Tomasz Moniuszko Tomasz Moniuszko
Advanced Micro Devices, Inc.
Russ Lind
...@@ -15,9 +15,21 @@ inline void CopyNativeVertexData(const uint8_t *input, size_t stride, size_t cou ...@@ -15,9 +15,21 @@ inline void CopyNativeVertexData(const uint8_t *input, size_t stride, size_t cou
if (attribSize == stride && inputComponentCount == outputComponentCount) if (attribSize == stride && inputComponentCount == outputComponentCount)
{ {
memcpy(output, input, count * attribSize); memcpy(output, input, count * attribSize);
return;
} }
else
if (inputComponentCount == outputComponentCount)
{
for (size_t i = 0; i < count; i++)
{ {
const T *offsetInput = reinterpret_cast<const T*>(input + (i * stride));
T *offsetOutput = reinterpret_cast<T*>(output) + i * outputComponentCount;
memcpy(offsetOutput, offsetInput, attribSize);
}
return;
}
const T defaultAlphaValue = gl::bitCast<T>(alphaDefaultValueBits); const T defaultAlphaValue = gl::bitCast<T>(alphaDefaultValueBits);
const size_t lastNonAlphaOutputComponent = std::min<size_t>(outputComponentCount, 3); const size_t lastNonAlphaOutputComponent = std::min<size_t>(outputComponentCount, 3);
...@@ -26,15 +38,13 @@ inline void CopyNativeVertexData(const uint8_t *input, size_t stride, size_t cou ...@@ -26,15 +38,13 @@ inline void CopyNativeVertexData(const uint8_t *input, size_t stride, size_t cou
const T *offsetInput = reinterpret_cast<const T*>(input + (i * stride)); const T *offsetInput = reinterpret_cast<const T*>(input + (i * stride));
T *offsetOutput = reinterpret_cast<T*>(output) + i * outputComponentCount; T *offsetOutput = reinterpret_cast<T*>(output) + i * outputComponentCount;
for (size_t j = 0; j < inputComponentCount; j++) memcpy(offsetOutput, offsetInput, attribSize);
{
offsetOutput[j] = offsetInput[j];
}
for (size_t j = inputComponentCount; j < lastNonAlphaOutputComponent; j++) if (inputComponentCount < lastNonAlphaOutputComponent)
{ {
// Set the remaining G/B channels to 0. // Set the remaining G/B channels to 0.
offsetOutput[j] = 0; size_t numComponents = (lastNonAlphaOutputComponent - inputComponentCount);
memset(&offsetOutput[inputComponentCount], 0, numComponents * sizeof(T));
} }
if (inputComponentCount < outputComponentCount && outputComponentCount == 4) if (inputComponentCount < outputComponentCount && outputComponentCount == 4)
...@@ -43,7 +53,6 @@ inline void CopyNativeVertexData(const uint8_t *input, size_t stride, size_t cou ...@@ -43,7 +53,6 @@ inline void CopyNativeVertexData(const uint8_t *input, size_t stride, size_t cou
offsetOutput[3] = defaultAlphaValue; offsetOutput[3] = defaultAlphaValue;
} }
} }
}
} }
template <size_t inputComponentCount, size_t outputComponentCount> template <size_t inputComponentCount, size_t outputComponentCount>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment