floating point perf improvement in CopyNativeVertexData

In the CopyNativeVertexData template function, in the else clause use memcpy/memset rather than for loops. This is a performance improvment for floating point versions of CopyNativeVertexData, particularly on 32-bit Firefox, where the -arch:IA32 compiler option used cause the compiler to generate fld/fstp operations inside the for loops. Update AUTHORS & CONTRIBUTORS to include AMD. BUG=angleproject:1317 Change-Id: I7133fc476028284f56783f4e2f9e2395f6227514 Reviewed-on: https://chromium-review.googlesource.com/327590Reviewed-by: Corentin Wallez <cwallez@chromium.org> Reviewed-by: Jamie Madill <jmadill@chromium.org> Reviewed-by: Shannon Woods <shannonwoods@chromium.org> Commit-Queue: Jamie Madill <jmadill@chromium.org>

floating point perf improvement in CopyNativeVertexData
eaef7c17 · Russ Lind · Commit Bot · d2f02c2a · eaef7c17 · eaef7c17
Commit eaef7c17 authored Feb 15, 2016 by Russ Lind Committed by Commit Bot Feb 29, 2016
Hide whitespace changes
Inline Side-by-side

Showing with 31 additions and 18 deletions

AUTHORS AUTHORS +1 -0

CONTRIBUTORS CONTRIBUTORS +3 -0

copyvertex.inl src/libANGLE/renderer/d3d/d3d11/copyvertex.inl +27 -18

No files found.
--- a/AUTHORS
+++ b/AUTHORS
@@ -26,6 +26,7 @@ Microsoft Open Technologies, Inc.
 NVIDIA Corporation
 Opera Software ASA
 The Qt Company Ltd.
+Advanced Micro Devices, Inc.

 Jacek Caban
 Mark Callow

--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -105,3 +105,6 @@ NVIDIA Corporation
 Opera Software ASA
 Daniel Bratell
 Tomasz Moniuszko
+
+Advanced Micro Devices, Inc.
+ Russ Lind
--- a/src/libANGLE/renderer/d3d/d3d11/copyvertex.inl
+++ b/src/libANGLE/renderer/d3d/d3d11/copyvertex.inl
@@ -15,33 +15,42 @@ inline void CopyNativeVertexData(const uint8_t *input, size_t stride, size_t cou
    if (attribSize == stride && inputComponentCount == outputComponentCount)
    {
        memcpy(output, input, count * attribSize);
+        return;
    }
-    else
-    {
-        const T defaultAlphaValue = gl::bitCast<T>(alphaDefaultValueBits);
-        const size_t lastNonAlphaOutputComponent = std::min<size_t>(outputComponentCount, 3);

+    if (inputComponentCount == outputComponentCount)
+    {
        for (size_t i = 0; i < count; i++)
        {
            const T *offsetInput = reinterpret_cast<const T*>(input + (i * stride));
            T *offsetOutput = reinterpret_cast<T*>(output) + i * outputComponentCount;

-            for (size_t j = 0; j < inputComponentCount; j++)
-            {
-                offsetOutput[j] = offsetInput[j];
-            }
+            memcpy(offsetOutput, offsetInput, attribSize);
+        }
+        return;
+    }

-            for (size_t j = inputComponentCount; j < lastNonAlphaOutputComponent; j++)
-            {
-                // Set the remaining G/B channels to 0.
-                offsetOutput[j] = 0;
-            }
+    const T defaultAlphaValue = gl::bitCast<T>(alphaDefaultValueBits);
+    const size_t lastNonAlphaOutputComponent = std::min<size_t>(outputComponentCount, 3);

-            if (inputComponentCount < outputComponentCount && outputComponentCount == 4)
-            {
-                // Set the remaining alpha channel to the defaultAlphaValue.
-                offsetOutput[3] = defaultAlphaValue;
-            }
+    for (size_t i = 0; i < count; i++)
+    {
+        const T *offsetInput = reinterpret_cast<const T*>(input + (i * stride));
+        T *offsetOutput = reinterpret_cast<T*>(output) + i * outputComponentCount;
+
+        memcpy(offsetOutput, offsetInput, attribSize);
+
+        if (inputComponentCount < lastNonAlphaOutputComponent)
+        {
+            // Set the remaining G/B channels to 0.
+            size_t numComponents = (lastNonAlphaOutputComponent - inputComponentCount);
+            memset(&offsetOutput[inputComponentCount], 0, numComponents * sizeof(T));
+        }
+
+        if (inputComponentCount < outputComponentCount && outputComponentCount == 4)
+        {
+            // Set the remaining alpha channel to the defaultAlphaValue.
+            offsetOutput[3] = defaultAlphaValue;
        }
    }
 }