D3D11: Improvements on ETC1 to BC1 transcode.

ETC1 data doesn't need to be fully decoded before encoding to BC1. Saves ~50% computation. BUG=angleproject:1285 Change-Id: I0cb479a08159ccd0472e5fdf40dd84323151883c Reviewed-on: https://chromium-review.googlesource.com/361553 Commit-Queue: Jamie Madill <jmadill@chromium.org> Reviewed-by: Jamie Madill <jmadill@chromium.org>

D3D11: Improvements on ETC1 to BC1 transcode.
41ee2024 · Minmin Gong · Commit Bot · 8cf70d55 · 41ee2024
Commit 41ee2024 authored Jul 18, 2016 by Minmin Gong Committed by Commit Bot Aug 12, 2016
Hide whitespace changes
Inline Side-by-side

Showing with 209 additions and 138 deletions

loadimage_etc.cpp src/image_util/loadimage_etc.cpp +209 -138

No files found.
--- a/src/image_util/loadimage_etc.cpp
+++ b/src/image_util/loadimage_etc.cpp
@@ -46,6 +46,8 @@ static const int intensityModifierNonOpaque[][4] =
 };
 // clang-format on

+static const int kNumPixelsInBlock = 16;
+
 struct ETC2Block
 {
    // Decodes unsigned single or dual channel block to bytes
@@ -629,10 +631,13 @@ struct ETC2Block
               (static_cast<uint16_t>(rgba.B >> 3) << 0);
    }

-    uint32_t matchBC1Bits(const R8G8B8A8 *rgba,
+    uint32_t matchBC1Bits(const int *pixelIndices,
+                          const int *pixelIndexCounts,
+                          const R8G8B8A8 *subblockColors,
+                          size_t numColors,
                          const R8G8B8A8 &minColor,
                          const R8G8B8A8 &maxColor,
-                          bool opaque) const
+                          bool nonOpaquePunchThroughAlpha) const
    {
        // Project each pixel on the (maxColor, minColor) line to decide which
        // BC1 code to assign to it.
@@ -653,102 +658,127 @@ struct ETC2Block
                       decodedColors[i][2] * direction[2];
        }

-        uint32_t bits = 0;
-        if (opaque)
+        ASSERT(numColors <= kNumPixelsInBlock);
+
+        int encodedColors[kNumPixelsInBlock];
+        if (nonOpaquePunchThroughAlpha)
        {
-            for (int i = 15; i >= 0; i--)
+            for (size_t i = 0; i < numColors; i++)
            {
-                // In opaque mode, the code is from 0 to 3.
-
-                bits <<= 2;
-                const int dot =
-                    rgba[i].R * direction[0] + rgba[i].G * direction[1] + rgba[i].B * direction[2];
-                const int factor = gl::clamp(
-                    static_cast<int>(
-                        (static_cast<float>(dot - stops[1]) / (stops[0] - stops[1])) * 3 + 0.5f),
-                    0, 3);
-                switch (factor)
+                const int count = pixelIndexCounts[i];
+                if (count > 0)
                {
-                    case 0:
-                        bits |= 1;
-                        break;
-                    case 1:
-                        bits |= 3;
-                        break;
-                    case 2:
-                        bits |= 2;
-                        break;
-                    case 3:
-                    default:
-                        bits |= 0;
-                        break;
+                    // In non-opaque mode, 3 is for tranparent pixels.
+
+                    if (0 == subblockColors[i].A)
+                    {
+                        encodedColors[i] = 3;
+                    }
+                    else
+                    {
+                        const R8G8B8A8 &pixel = subblockColors[i];
+                        const int dot         = pixel.R * direction[0] + pixel.G * direction[1] +
+                                        pixel.B * direction[2];
+                        const int factor = gl::clamp(
+                            static_cast<int>(
+                                (static_cast<float>(dot - stops[1]) / (stops[0] - stops[1])) * 2 +
+                                0.5f),
+                            0, 2);
+                        switch (factor)
+                        {
+                            case 0:
+                                encodedColors[i] = 0;
+                                break;
+                            case 1:
+                                encodedColors[i] = 2;
+                                break;
+                            case 2:
+                            default:
+                                encodedColors[i] = 1;
+                                break;
+                        }
+                    }
                }
            }
        }
        else
        {
-            for (int i = 15; i >= 0; i--)
+            for (size_t i = 0; i < numColors; i++)
            {
-                // In non-opaque mode, 3 is for tranparent pixels.
-
-                bits <<= 2;
-                if (0 == rgba[i].A)
+                const int count = pixelIndexCounts[i];
+                if (count > 0)
                {
-                    bits |= 3;
-                }
-                else
-                {
-                    const int dot = rgba[i].R * direction[0] + rgba[i].G * direction[1] +
-                                    rgba[i].B * direction[2];
+                    // In opaque mode, the code is from 0 to 3.
+
+                    const R8G8B8A8 &pixel = subblockColors[i];
+                    const int dot =
+                        pixel.R * direction[0] + pixel.G * direction[1] + pixel.B * direction[2];
                    const int factor = gl::clamp(
                        static_cast<int>(
-                            (static_cast<float>(dot - stops[1]) / (stops[0] - stops[1])) * 2 +
+                            (static_cast<float>(dot - stops[1]) / (stops[0] - stops[1])) * 3 +
                            0.5f),
-                        0, 2);
+                        0, 3);
                    switch (factor)
                    {
                        case 0:
-                            bits |= 0;
+                            encodedColors[i] = 1;
                            break;
                        case 1:
-                            bits |= 2;
+                            encodedColors[i] = 3;
                            break;
                        case 2:
+                            encodedColors[i] = 2;
+                            break;
+                        case 3:
                        default:
-                            bits |= 1;
+                            encodedColors[i] = 0;
                            break;
                    }
                }
            }
        }

+        uint32_t bits = 0;
+        for (int i = kNumPixelsInBlock - 1; i >= 0; i--)
+        {
+            bits <<= 2;
+            bits |= encodedColors[pixelIndices[i]];
+        }
+
        return bits;
    }

    void packBC1(void *bc1,
-                 const R8G8B8A8 *rgba,
-                 const R8G8B8A8 &minColor,
-                 const R8G8B8A8 &maxColor,
-                 bool opaque) const
+                 const int *pixelIndices,
+                 const int *pixelIndexCounts,
+                 const R8G8B8A8 *subblockColors,
+                 size_t numColors,
+                 int minColorIndex,
+                 int maxColorIndex,
+                 bool nonOpaquePunchThroughAlpha) const
    {
+        const R8G8B8A8 &minColor = subblockColors[minColorIndex];
+        const R8G8B8A8 &maxColor = subblockColors[maxColorIndex];
+
        uint32_t bits;
        uint16_t max16 = RGB8ToRGB565(maxColor);
        uint16_t min16 = RGB8ToRGB565(minColor);
        if (max16 != min16)
        {
            // Find the best BC1 code for each pixel
-            bits = matchBC1Bits(rgba, minColor, maxColor, opaque);
+            bits = matchBC1Bits(pixelIndices, pixelIndexCounts, subblockColors, numColors, minColor,
+                                maxColor, nonOpaquePunchThroughAlpha);
        }
        else
        {
            // Same colors, BC1 index 0 is the color in both opaque and transparent mode
            bits = 0;
            // BC1 index 3 is transparent
-            if (!opaque)
+            if (nonOpaquePunchThroughAlpha)
            {
-                for (int i = 0; i < 16; i++)
+                for (int i = 0; i < kNumPixelsInBlock; i++)
                {
-                    if (0 == rgba[i].A)
+                    if (0 == subblockColors[pixelIndices[i]].A)
                    {
                        bits |= (3 << (i * 2));
                    }
@@ -761,15 +791,7 @@ struct ETC2Block
            std::swap(max16, min16);

            uint32_t xorMask = 0;
-            if (opaque)
-            {
-                // In opaque mode switching the two colors is doing the
-                // following code swaps: 0 <-> 1 and 2 <-> 3. This is
-                // equivalent to flipping the first bit of each code
-                // (5 = 0b0101)
-                xorMask = 0x55555555;
-            }
-            else
+            if (nonOpaquePunchThroughAlpha)
            {
                // In transparent mode switching the colors is doing the
                // following code swap: 0 <-> 1. 0xA selects the second bit of
@@ -779,6 +801,14 @@ struct ETC2Block
                // 0 or 1.
                xorMask = ~((bits >> 1) | 0xAAAAAAAA);
            }
+            else
+            {
+                // In opaque mode switching the two colors is doing the
+                // following code swaps: 0 <-> 1 and 2 <-> 3. This is
+                // equivalent to flipping the first bit of each code
+                // (5 = 0b0101)
+                xorMask = 0x55555555;
+            }
            bits ^= xorMask;
        }

@@ -791,15 +821,15 @@ struct ETC2Block

        // Encode the opaqueness in the order of the two BC1 colors
        BC1Block *dest = reinterpret_cast<BC1Block *>(bc1);
-        if (opaque)
+        if (nonOpaquePunchThroughAlpha)
        {
-            dest->color0 = max16;
-            dest->color1 = min16;
+            dest->color0 = min16;
+            dest->color1 = max16;
        }
        else
        {
-            dest->color0 = min16;
-            dest->color1 = max16;
+            dest->color0 = max16;
+            dest->color1 = min16;
        }
        dest->bits = bits;
    }
@@ -842,15 +872,14 @@ struct ETC2Block
                                                    alphaValues, nonOpaquePunchThroughAlpha);
    }

-    void decodeSubblock(R8G8B8A8 *rgbaBlock,
-                        size_t x,
-                        size_t y,
-                        size_t w,
-                        size_t h,
-                        const uint8_t alphaValues[4][4],
-                        bool flipbit,
-                        size_t subblockIdx,
-                        const R8G8B8A8 subblockColors[2][4]) const
+    void extractPixelIndices(int *pixelIndices,
+                             int *pixelIndicesCounts,
+                             size_t x,
+                             size_t y,
+                             size_t w,
+                             size_t h,
+                             bool flipbit,
+                             size_t subblockIdx) const
    {
        size_t dxBegin = 0;
        size_t dxEnd   = 4;
@@ -864,53 +893,72 @@ struct ETC2Block

        for (size_t j = dyBegin; j < dyEnd && (y + j) < h; j++)
        {
-            R8G8B8A8 *row = &rgbaBlock[j * 4];
+            int *row = &pixelIndices[j * 4];
            for (size_t i = dxBegin; i < dxEnd && (x + i) < w; i++)
            {
-                const size_t pixelIndex = getIndex(i, j);
-                row[i]                  = subblockColors[subblockIdx][pixelIndex];
-                row[i].A                = alphaValues[j][i];
+                const size_t pixelIndex = subblockIdx * 4 + getIndex(i, j);
+                row[i]                  = static_cast<int>(pixelIndex);
+                pixelIndicesCounts[pixelIndex]++;
            }
        }
    }

-    void selectEndPointPCA(const R8G8B8A8 *pixels, R8G8B8A8 *minColor, R8G8B8A8 *maxColor) const
+    void selectEndPointPCA(const int *pixelIndexCounts,
+                           const R8G8B8A8 *subblockColors,
+                           size_t numColors,
+                           int *minColorIndex,
+                           int *maxColorIndex) const
    {
-        static const int kNumPixels = 16;
-
        // determine color distribution
        int mu[3], min[3], max[3];
        for (int ch = 0; ch < 3; ch++)
        {
-            int muv, minv, maxv;
-
-            muv = minv = maxv = (&pixels[0].R)[ch];
-            for (size_t i = 1; i < kNumPixels; i++)
+            int muv  = 0;
+            int minv = 255;
+            int maxv = 0;
+            for (size_t i = 0; i < numColors; i++)
            {
-                muv += (&pixels[i].R)[ch];
-                minv = std::min<int>(minv, (&pixels[i].R)[ch]);
-                maxv = std::max<int>(maxv, (&pixels[i].R)[ch]);
+                const int count = pixelIndexCounts[i];
+                if (count > 0)
+                {
+                    const auto &pixel = subblockColors[i];
+                    if (pixel.A > 0)
+                    {
+                        // Non-transparent pixels
+                        muv += (&pixel.R)[ch] * count;
+                        minv = std::min<int>(minv, (&pixel.R)[ch]);
+                        maxv = std::max<int>(maxv, (&pixel.R)[ch]);
+                    }
+                }
            }

-            mu[ch]  = (muv + kNumPixels / 2) / kNumPixels;
+            mu[ch]  = (muv + kNumPixelsInBlock / 2) / kNumPixelsInBlock;
            min[ch] = minv;
            max[ch] = maxv;
        }

        // determine covariance matrix
        int cov[6] = {0, 0, 0, 0, 0, 0};
-        for (size_t i = 0; i < kNumPixels; i++)
+        for (size_t i = 0; i < numColors; i++)
        {
-            int r = pixels[i].R - mu[0];
-            int g = pixels[i].G - mu[1];
-            int b = pixels[i].B - mu[2];
-
-            cov[0] += r * r;
-            cov[1] += r * g;
-            cov[2] += r * b;
-            cov[3] += g * g;
-            cov[4] += g * b;
-            cov[5] += b * b;
+            const int count = pixelIndexCounts[i];
+            if (count > 0)
+            {
+                const auto &pixel = subblockColors[i];
+                if (pixel.A > 0)
+                {
+                    int r = pixel.R - mu[0];
+                    int g = pixel.G - mu[1];
+                    int b = pixel.B - mu[2];
+
+                    cov[0] += r * r * count;
+                    cov[1] += r * g * count;
+                    cov[2] += r * b * count;
+                    cov[3] += g * g * count;
+                    cov[4] += g * b * count;
+                    cov[5] += b * b * count;
+                }
+            }
        }

        // Power iteration algorithm to get the eigenvalues and eigenvector
@@ -966,27 +1014,35 @@ struct ETC2Block
        }

        // Pick colors at extreme points
-        int minD        = pixels[0].R * vr + pixels[0].G * vg + pixels[0].B * vb;
-        int maxD        = minD;
+        int minD        = INT_MAX;
+        int maxD        = 0;
        size_t minIndex = 0;
        size_t maxIndex = 0;
-        for (size_t i = 1; i < kNumPixels; i++)
+        for (size_t i = 0; i < numColors; i++)
        {
-            int dot = pixels[i].R * vr + pixels[i].G * vg + pixels[i].B * vb;
-            if (dot < minD)
-            {
-                minD     = dot;
-                minIndex = i;
-            }
-            if (dot > maxD)
+            const int count = pixelIndexCounts[i];
+            if (count > 0)
            {
-                maxD     = dot;
-                maxIndex = i;
+                const auto &pixel = subblockColors[i];
+                if (pixel.A > 0)
+                {
+                    int dot = pixel.R * vr + pixel.G * vg + pixel.B * vb;
+                    if (dot < minD)
+                    {
+                        minD     = dot;
+                        minIndex = i;
+                    }
+                    if (dot > maxD)
+                    {
+                        maxD     = dot;
+                        maxIndex = i;
+                    }
+                }
            }
        }

-        *minColor = pixels[minIndex];
-        *maxColor = pixels[maxIndex];
+        *minColorIndex = static_cast<int>(minIndex);
+        *maxColorIndex = static_cast<int>(maxIndex);
    }

    void transcodeIndividualOrDifferentialBlockToBC1(uint8_t *dest,
@@ -1009,45 +1065,60 @@ struct ETC2Block
        // select axis by principal component analysis (PCA) to use as
        // our two BC1 endpoints and then map pixels to BC1 by projecting on the
        // line between the two endpoints and choosing the right fraction.
-        //
-        // In the future, we have a potential improvements to this algorithm.
-        // 1. We don't actually need to decode ETC blocks to RGBs. Instead,
-        //    the subblock colors and pixel indices alreay contains enough
-        //    information for transcode. A direct mapping would be more
-        //    efficient here.
+
+        // The goal of this algorithm is make it faster than decode ETC to RGBs
+        //   and then encode to BC. To achieve this, we only extract subblock
+        //   colors, pixel indices, and counts of each pixel indices from ETC.
+        //   With those information, we can only encode used subblock colors
+        //   to BC1, and copy the bits to the right pixels.
+        // Fully decode and encode need to process 16 RGBA pixels. With this
+        //   algorithm, it's 8 pixels at maximum for a individual or
+        //   differential block. Saves us bandwidth and computations.
+
+        static const size_t kNumColors = 8;

        const auto intensityModifier =
            nonOpaquePunchThroughAlpha ? intensityModifierNonOpaque : intensityModifierDefault;

        // Compute the colors that pixels can have in each subblock both for
        // the decoding of the RGBA data and BC1 encoding
-        R8G8B8A8 subblockColors[2][4];
+        R8G8B8A8 subblockColors[kNumColors];
        for (size_t modifierIdx = 0; modifierIdx < 4; modifierIdx++)
        {
-            const int i1                   = intensityModifier[u.idht.mode.idm.cw1][modifierIdx];
-            subblockColors[0][modifierIdx] = createRGBA(r1 + i1, g1 + i1, b1 + i1);
+            if (nonOpaquePunchThroughAlpha && (modifierIdx == 2))
+            {
+                // In ETC opaque punch through formats, individual and
+                // differential blocks take index 2 as transparent pixel.
+                // Thus we don't need to compute its color, just assign it
+                // as black.
+                subblockColors[modifierIdx]     = createRGBA(0, 0, 0, 0);
+                subblockColors[4 + modifierIdx] = createRGBA(0, 0, 0, 0);
+            }
+            else
+            {
+                const int i1                = intensityModifier[u.idht.mode.idm.cw1][modifierIdx];
+                subblockColors[modifierIdx] = createRGBA(r1 + i1, g1 + i1, b1 + i1);

-            const int i2                   = intensityModifier[u.idht.mode.idm.cw2][modifierIdx];
-            subblockColors[1][modifierIdx] = createRGBA(r2 + i2, g2 + i2, b2 + i2);
+                const int i2 = intensityModifier[u.idht.mode.idm.cw2][modifierIdx];
+                subblockColors[4 + modifierIdx] = createRGBA(r2 + i2, g2 + i2, b2 + i2);
+            }
        }

-        R8G8B8A8 rgbaBlock[16];
-        // Decode the block in rgbaBlock.
+        int pixelIndices[kNumPixelsInBlock];
+        int pixelIndexCounts[kNumColors] = {0};
+        // Extract pixel indices from a ETC block.
        for (size_t blockIdx = 0; blockIdx < 2; blockIdx++)
        {
-            decodeSubblock(rgbaBlock, x, y, w, h, alphaValues, u.idht.mode.idm.flipbit, blockIdx,
-                           subblockColors);
-        }
-        if (nonOpaquePunchThroughAlpha)
-        {
-            decodePunchThroughAlphaBlock(reinterpret_cast<uint8_t *>(rgbaBlock), x, y, w, h,
-                                         sizeof(R8G8B8A8) * 4);
+            extractPixelIndices(pixelIndices, pixelIndexCounts, x, y, w, h, u.idht.mode.idm.flipbit,
+                                blockIdx);
        }

-        R8G8B8A8 minColor, maxColor;
-        selectEndPointPCA(rgbaBlock, &minColor, &maxColor);
+        int minColorIndex, maxColorIndex;
+        selectEndPointPCA(pixelIndexCounts, subblockColors, kNumColors, &minColorIndex,
+                          &maxColorIndex);

-        packBC1(dest, rgbaBlock, minColor, maxColor, !nonOpaquePunchThroughAlpha);
+        packBC1(dest, pixelIndices, pixelIndexCounts, subblockColors, kNumColors, minColorIndex,
+                maxColorIndex, nonOpaquePunchThroughAlpha);
    }

    void transcodeTBlockToBC1(uint8_t *dest,