D3D9: Perform the MAD operation AFTER pre/unmultiply logic.

The multiply/add uniforms are to mask the output of the shader based on the destination format. The pre/unmuliply logic should be performed on the source data without masking applied. TEST=conformance/textures/canvas/tex-2d-rgb-rgb-unsigned_byte.html BUG=angleproject:2192 Change-Id: I6e5f29a7d56e18bd1a81ff56500edb4c6b55eb55 Reviewed-on: https://chromium-review.googlesource.com/730494 Commit-Queue: Geoff Lang <geofflang@chromium.org> Reviewed-by: Jamie Madill <jmadill@chromium.org>

D3D9: Perform the MAD operation AFTER pre/unmultiply logic.
8df71735 · Geoff Lang · Commit Bot · 56ff3ce2 · 8df71735 · 8df71735
Commit 8df71735 authored Oct 20, 2017 by Geoff Lang Committed by Commit Bot Oct 23, 2017
5 changed files
--- a/src/libANGLE/renderer/d3d/d3d9/shaders/Blit.ps
+++ b/src/libANGLE/renderer/d3d/d3d9/shaders/Blit.ps
@@ -26,19 +26,19 @@ float4 luminanceps(float4 texcoord : TEXCOORD0) : COLOR

 float4 luminancepremultps(float4 texcoord : TEXCOORD0) : COLOR
 {
-    float4 luma = (tex2D(tex, texcoord.xy).xw * mult.xw + add.xw).xxxy;
+    float4 luma = tex2D(tex, texcoord.xy).xxxw;
    luma.rgb *= luma.a;
-    return luma;
+    return luma * mult + add;
 };

 float4 luminanceunmultps(float4 texcoord : TEXCOORD0) : COLOR
 {
-    float4 luma = (tex2D(tex, texcoord.xy).xw * mult.xw + add.xw).xxxy;
+    float4 luma = tex2D(tex, texcoord.xy).xxxw;
    if (luma.a > 0.0f)
    {
        luma.rgb /= luma.a;
    }
-    return luma;
+    return luma * mult + add;
 };

 // RGB/A Component Mask Pixel Shader
@@ -51,17 +51,17 @@ float4 componentmaskps(float4 texcoord : TEXCOORD0) : COLOR

 float4 componentmaskpremultps(float4 texcoord : TEXCOORD0) : COLOR
 {
-    float4 color = tex2D(tex, texcoord.xy) * mult + add;
+    float4 color = tex2D(tex, texcoord.xy);
    color.rgb *= color.a;
-    return color;
+    return color * mult + add;
 };

 float4 componentmaskunmultps(float4 texcoord : TEXCOORD0) : COLOR
 {
-    float4 color =  tex2D(tex, texcoord.xy) * mult + add;
+    float4 color =  tex2D(tex, texcoord.xy);
    if (color.a > 0.0f)
    {
        color.rgb /= color.a;
    }
-    return color;
+    return color * mult + add;
 };
--- a/src/libANGLE/renderer/d3d/d3d9/shaders/compiled/componentmaskpremultps.h
+++ b/src/libANGLE/renderer/d3d/d3d9/shaders/compiled/componentmaskpremultps.h
@@ -22,9 +22,9 @@
    dcl t0.xy
    dcl_2d s0
    texld r0, t0, s0
+    mul r0.xyz, r0.w, r0
    mov r1, c0
    mad r0, r0, r1, c1
-    mul r0.xyz, r0.w, r0
    mov oC0, r0

 // approximately 5 instruction slots used (1 texture, 4 arithmetic)
@@ -44,7 +44,7 @@ const BYTE g_ps20_componentmaskpremultps[] = {
    54,  46,  51,  46,  57,  54,  48,  48,  46,  49,  54,  51,  56,  52,  0,   171, 171, 171, 31,
    0,   0,   2,   0,   0,   0,   128, 0,   0,   3,   176, 31,  0,   0,   2,   0,   0,   0,   144,
    0,   8,   15,  160, 66,  0,   0,   3,   0,   0,   15,  128, 0,   0,   228, 176, 0,   8,   228,
-    160, 1,   0,   0,   2,   1,   0,   15,  128, 0,   0,   228, 160, 4,   0,   0,   4,   0,   0,
-    15,  128, 0,   0,   228, 128, 1,   0,   228, 128, 1,   0,   228, 160, 5,   0,   0,   3,   0,
-    0,   7,   128, 0,   0,   255, 128, 0,   0,   228, 128, 1,   0,   0,   2,   0,   8,   15,  128,
+    160, 5,   0,   0,   3,   0,   0,   7,   128, 0,   0,   255, 128, 0,   0,   228, 128, 1,   0,
+    0,   2,   1,   0,   15,  128, 0,   0,   228, 160, 4,   0,   0,   4,   0,   0,   15,  128, 0,
+    0,   228, 128, 1,   0,   228, 128, 1,   0,   228, 160, 1,   0,   0,   2,   0,   8,   15,  128,
    0,   0,   228, 128, 255, 255, 0,   0};
--- a/src/libANGLE/renderer/d3d/d3d9/shaders/compiled/componentmaskunmultps.h
+++ b/src/libANGLE/renderer/d3d/d3d9/shaders/compiled/componentmaskunmultps.h
@@ -22,11 +22,11 @@
    dcl t0.xy
    dcl_2d s0
    texld r0, t0, s0
+    rcp r1.w, r0.w
+    mul r1.xyz, r0, r1.w
+    cmp r0.xyz, -r0.w, r0, r1
    mov r1, c0
    mad r0, r0, r1, c1
-    rcp r1.x, r0.w
-    mul r1.xyz, r0, r1.x
-    cmp r0.xyz, -r0.w, r0, r1
    mov oC0, r0

 // approximately 7 instruction slots used (1 texture, 6 arithmetic)
@@ -46,9 +46,9 @@ const BYTE g_ps20_componentmaskunmultps[] = {
    54,  46,  51,  46,  57,  54,  48,  48,  46,  49,  54,  51,  56,  52,  0,   171, 171, 171, 31,
    0,   0,   2,   0,   0,   0,   128, 0,   0,   3,   176, 31,  0,   0,   2,   0,   0,   0,   144,
    0,   8,   15,  160, 66,  0,   0,   3,   0,   0,   15,  128, 0,   0,   228, 176, 0,   8,   228,
-    160, 1,   0,   0,   2,   1,   0,   15,  128, 0,   0,   228, 160, 4,   0,   0,   4,   0,   0,
-    15,  128, 0,   0,   228, 128, 1,   0,   228, 128, 1,   0,   228, 160, 6,   0,   0,   2,   1,
-    0,   1,   128, 0,   0,   255, 128, 5,   0,   0,   3,   1,   0,   7,   128, 0,   0,   228, 128,
-    1,   0,   0,   128, 88,  0,   0,   4,   0,   0,   7,   128, 0,   0,   255, 129, 0,   0,   228,
-    128, 1,   0,   228, 128, 1,   0,   0,   2,   0,   8,   15,  128, 0,   0,   228, 128, 255, 255,
+    160, 6,   0,   0,   2,   1,   0,   8,   128, 0,   0,   255, 128, 5,   0,   0,   3,   1,   0,
+    7,   128, 0,   0,   228, 128, 1,   0,   255, 128, 88,  0,   0,   4,   0,   0,   7,   128, 0,
+    0,   255, 129, 0,   0,   228, 128, 1,   0,   228, 128, 1,   0,   0,   2,   1,   0,   15,  128,
+    0,   0,   228, 160, 4,   0,   0,   4,   0,   0,   15,  128, 0,   0,   228, 128, 1,   0,   228,
+    128, 1,   0,   228, 160, 1,   0,   0,   2,   0,   8,   15,  128, 0,   0,   228, 128, 255, 255,
    0,   0};
--- a/src/libANGLE/renderer/d3d/d3d9/shaders/compiled/luminancepremultps.h
+++ b/src/libANGLE/renderer/d3d/d3d9/shaders/compiled/luminancepremultps.h
@@ -22,14 +22,12 @@
    dcl t0.xy
    dcl_2d s0
    texld r0, t0, s0
-    mov r1.xw, c0
-    mad r0.x, r0.x, r1.x, c1.x
-    mad r0.y, r0.w, r1.w, c1.w
-    mul r1.xyz, r0.y, r0.x
-    mov r1.w, r0.y
-    mov oC0, r1
+    mul r0.xyz, r0.w, r0.x
+    mov r1, c0
+    mad r0, r0, r1, c1
+    mov oC0, r0

-// approximately 7 instruction slots used (1 texture, 6 arithmetic)
+// approximately 5 instruction slots used (1 texture, 4 arithmetic)
 #endif

 const BYTE g_ps20_luminancepremultps[] = {
@@ -46,9 +44,7 @@ const BYTE g_ps20_luminancepremultps[] = {
    54,  46,  51,  46,  57,  54,  48,  48,  46,  49,  54,  51,  56,  52,  0,   171, 171, 171, 31,
    0,   0,   2,   0,   0,   0,   128, 0,   0,   3,   176, 31,  0,   0,   2,   0,   0,   0,   144,
    0,   8,   15,  160, 66,  0,   0,   3,   0,   0,   15,  128, 0,   0,   228, 176, 0,   8,   228,
-    160, 1,   0,   0,   2,   1,   0,   9,   128, 0,   0,   228, 160, 4,   0,   0,   4,   0,   0,
-    1,   128, 0,   0,   0,   128, 1,   0,   0,   128, 1,   0,   0,   160, 4,   0,   0,   4,   0,
-    0,   2,   128, 0,   0,   255, 128, 1,   0,   255, 128, 1,   0,   255, 160, 5,   0,   0,   3,
-    1,   0,   7,   128, 0,   0,   85,  128, 0,   0,   0,   128, 1,   0,   0,   2,   1,   0,   8,
-    128, 0,   0,   85,  128, 1,   0,   0,   2,   0,   8,   15,  128, 1,   0,   228, 128, 255, 255,
-    0,   0};
+    160, 5,   0,   0,   3,   0,   0,   7,   128, 0,   0,   255, 128, 0,   0,   0,   128, 1,   0,
+    0,   2,   1,   0,   15,  128, 0,   0,   228, 160, 4,   0,   0,   4,   0,   0,   15,  128, 0,
+    0,   228, 128, 1,   0,   228, 128, 1,   0,   228, 160, 1,   0,   0,   2,   0,   8,   15,  128,
+    0,   0,   228, 128, 255, 255, 0,   0};
--- a/src/libANGLE/renderer/d3d/d3d9/shaders/compiled/luminanceunmultps.h
+++ b/src/libANGLE/renderer/d3d/d3d9/shaders/compiled/luminanceunmultps.h
@@ -22,16 +22,14 @@
    dcl t0.xy
    dcl_2d s0
    texld r0, t0, s0
-    mov r1.xw, c0
-    mad r0.x, r0.x, r1.x, c1.x
-    mad r0.y, r0.w, r1.w, c1.w
-    rcp r0.z, r0.y
-    mul r0.z, r0.z, r0.x
-    cmp r1.xyz, -r0.y, r0.x, r0.z
-    mov r1.w, r0.y
-    mov oC0, r1
+    rcp r1.w, r0.w
+    mul r1.x, r0.x, r1.w
+    cmp r0.xyz, -r0.w, r0.x, r1.x
+    mov r1, c0
+    mad r0, r0, r1, c1
+    mov oC0, r0

-// approximately 9 instruction slots used (1 texture, 8 arithmetic)
+// approximately 7 instruction slots used (1 texture, 6 arithmetic)
 #endif

 const BYTE g_ps20_luminanceunmultps[] = {
@@ -48,10 +46,9 @@ const BYTE g_ps20_luminanceunmultps[] = {
    54,  46,  51,  46,  57,  54,  48,  48,  46,  49,  54,  51,  56,  52,  0,   171, 171, 171, 31,
    0,   0,   2,   0,   0,   0,   128, 0,   0,   3,   176, 31,  0,   0,   2,   0,   0,   0,   144,
    0,   8,   15,  160, 66,  0,   0,   3,   0,   0,   15,  128, 0,   0,   228, 176, 0,   8,   228,
-    160, 1,   0,   0,   2,   1,   0,   9,   128, 0,   0,   228, 160, 4,   0,   0,   4,   0,   0,
-    1,   128, 0,   0,   0,   128, 1,   0,   0,   128, 1,   0,   0,   160, 4,   0,   0,   4,   0,
-    0,   2,   128, 0,   0,   255, 128, 1,   0,   255, 128, 1,   0,   255, 160, 6,   0,   0,   2,
-    0,   0,   4,   128, 0,   0,   85,  128, 5,   0,   0,   3,   0,   0,   4,   128, 0,   0,   170,
-    128, 0,   0,   0,   128, 88,  0,   0,   4,   1,   0,   7,   128, 0,   0,   85,  129, 0,   0,
-    0,   128, 0,   0,   170, 128, 1,   0,   0,   2,   1,   0,   8,   128, 0,   0,   85,  128, 1,
-    0,   0,   2,   0,   8,   15,  128, 1,   0,   228, 128, 255, 255, 0,   0};
+    160, 6,   0,   0,   2,   1,   0,   8,   128, 0,   0,   255, 128, 5,   0,   0,   3,   1,   0,
+    1,   128, 0,   0,   0,   128, 1,   0,   255, 128, 88,  0,   0,   4,   0,   0,   7,   128, 0,
+    0,   255, 129, 0,   0,   0,   128, 1,   0,   0,   128, 1,   0,   0,   2,   1,   0,   15,  128,
+    0,   0,   228, 160, 4,   0,   0,   4,   0,   0,   15,  128, 0,   0,   228, 128, 1,   0,   228,
+    128, 1,   0,   228, 160, 1,   0,   0,   2,   0,   8,   15,  128, 0,   0,   228, 128, 255, 255,
+    0,   0};