Commit 7ce9947d by Le Hoang Quyen Committed by Commit Bot

Metal: autogen for 3D texture's mipmap generating shader.

Bug: angleproject:4921 Bug: angleproject:2634 Change-Id: I5c379d750114e2ca1c5dd0203e94bb63dac1e0bf Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2336125 Commit-Queue: Le Hoang Quyen <le.hoang.q@gmail.com> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarJonah Ryan-Davis <jonahr@google.com>
parent 09b5e6b1
...@@ -4,37 +4,39 @@ ...@@ -4,37 +4,39 @@
"src/libANGLE/renderer/metal/shaders/clear.metal": "src/libANGLE/renderer/metal/shaders/clear.metal":
"67da9886363c530132e5bc2199bab2db", "67da9886363c530132e5bc2199bab2db",
"src/libANGLE/renderer/metal/shaders/common.h": "src/libANGLE/renderer/metal/shaders/common.h":
"5888cfe052e6e6332e8c7c8949e888fb", "4260719e5f35107db1029d2fe6e1a732",
"src/libANGLE/renderer/metal/shaders/compiled/compiled_default_metallib_2_1_debug_ios_autogen.inc": "src/libANGLE/renderer/metal/shaders/compiled/compiled_default_metallib_2_1_debug_ios_autogen.inc":
"39f2302c254c8490b0f5b4782355a9e4", "56c8d0decc672ce442ec4d179d889a90",
"src/libANGLE/renderer/metal/shaders/compiled/compiled_default_metallib_2_1_debug_ios_sim_autogen.inc": "src/libANGLE/renderer/metal/shaders/compiled/compiled_default_metallib_2_1_debug_ios_sim_autogen.inc":
"ab9be1624a4f15fc12647771727302b0", "4b582bf07c3729a99a64aaf4016ecdd4",
"src/libANGLE/renderer/metal/shaders/compiled/compiled_default_metallib_2_1_debug_mac_autogen.inc": "src/libANGLE/renderer/metal/shaders/compiled/compiled_default_metallib_2_1_debug_mac_autogen.inc":
"35f17267fa21f1f96d8a781d45485e1a", "0088f4968365c1297e67f53732245401",
"src/libANGLE/renderer/metal/shaders/compiled/compiled_default_metallib_2_1_ios_autogen.inc": "src/libANGLE/renderer/metal/shaders/compiled/compiled_default_metallib_2_1_ios_autogen.inc":
"81c4b4d1012dbcb7aa738f49bbed1645", "958a164f2ceeae4e4ff04e0225139b7d",
"src/libANGLE/renderer/metal/shaders/compiled/compiled_default_metallib_2_1_ios_sim_autogen.inc": "src/libANGLE/renderer/metal/shaders/compiled/compiled_default_metallib_2_1_ios_sim_autogen.inc":
"3446979c2d6504cb421e1f0004f5f15f", "bc5cfe5f1d0c27e771965f426cdb3b8d",
"src/libANGLE/renderer/metal/shaders/compiled/compiled_default_metallib_2_1_mac_autogen.inc": "src/libANGLE/renderer/metal/shaders/compiled/compiled_default_metallib_2_1_mac_autogen.inc":
"7839cf16e8e45e6dffa9454dd5d7d3dc", "642c73f07c80a3e776f050bc330de3d9",
"src/libANGLE/renderer/metal/shaders/compiled/compiled_default_metallib_debug_ios_autogen.inc": "src/libANGLE/renderer/metal/shaders/compiled/compiled_default_metallib_debug_ios_autogen.inc":
"82ccf14797364f2c3c5dee14227b2c24", "b37f61dfcbbed927d631a97ed2733382",
"src/libANGLE/renderer/metal/shaders/compiled/compiled_default_metallib_debug_ios_sim_autogen.inc": "src/libANGLE/renderer/metal/shaders/compiled/compiled_default_metallib_debug_ios_sim_autogen.inc":
"28f453c423b4029ab876668e4f1e8b21", "17937de14c1146f972184d67fe17b670",
"src/libANGLE/renderer/metal/shaders/compiled/compiled_default_metallib_debug_mac_autogen.inc": "src/libANGLE/renderer/metal/shaders/compiled/compiled_default_metallib_debug_mac_autogen.inc":
"3f402a605d97be11664ee0bb642e616a", "e819d0a8fb87db32e84946319b394524",
"src/libANGLE/renderer/metal/shaders/compiled/compiled_default_metallib_ios_autogen.inc": "src/libANGLE/renderer/metal/shaders/compiled/compiled_default_metallib_ios_autogen.inc":
"52c9275b8582f9e7c4a6b0b77c857a72", "d7d307d96b4cb4ce970d2b6438dfd3f4",
"src/libANGLE/renderer/metal/shaders/compiled/compiled_default_metallib_ios_sim_autogen.inc": "src/libANGLE/renderer/metal/shaders/compiled/compiled_default_metallib_ios_sim_autogen.inc":
"259bdeb1bc17ab5098e2be4032f5d608", "1943dad6dd5b28630ff1992fcef75da9",
"src/libANGLE/renderer/metal/shaders/compiled/compiled_default_metallib_mac_autogen.inc": "src/libANGLE/renderer/metal/shaders/compiled/compiled_default_metallib_mac_autogen.inc":
"e16bd5da4f614789be9e2545f2af4237", "a46aee7517016919d468bb780fd33f6a",
"src/libANGLE/renderer/metal/shaders/compiled/mtl_default_shaders_autogen.inc": "src/libANGLE/renderer/metal/shaders/compiled/mtl_default_shaders_autogen.inc":
"634a127f4e94f6bc3123e89850d010ee", "634a127f4e94f6bc3123e89850d010ee",
"src/libANGLE/renderer/metal/shaders/constants.h": "src/libANGLE/renderer/metal/shaders/constants.h":
"9bb6e63bf2b48a7a56978c787bde4850", "dad1a869a1095be669b7da5651901d38",
"src/libANGLE/renderer/metal/shaders/gen_indices.metal": "src/libANGLE/renderer/metal/shaders/gen_indices.metal":
"87a76d5e12825111c0595f69e79f5d20", "87a76d5e12825111c0595f69e79f5d20",
"src/libANGLE/renderer/metal/shaders/gen_mipmap.metal":
"54dca94c48bead446624079070b9b309",
"src/libANGLE/renderer/metal/shaders/gen_mtl_internal_shaders.py": "src/libANGLE/renderer/metal/shaders/gen_mtl_internal_shaders.py":
"962d0f3229d91ab71ad555f9fa3fe0c3" "566ddbcfb95ddc845da9ab94eed8640b"
} }
\ No newline at end of file
...@@ -107,6 +107,33 @@ static inline vec<T, 4> resolveTextureMS(texture2d_ms<T> srcTexture, uint2 coord ...@@ -107,6 +107,33 @@ static inline vec<T, 4> resolveTextureMS(texture2d_ms<T> srcTexture, uint2 coord
return output; return output;
} }
static inline float4 sRGBtoLinear(float4 color)
{
float3 linear1 = color.rgb / 12.92;
float3 linear2 = pow((color.rgb + float3(0.055)) / 1.055, 2.4);
float3 factor = float3(color.rgb <= float3(0.04045));
float4 linear = float4(factor * linear1 + float3(1.0 - factor) * linear2, color.a);
return linear;
}
static inline float linearToSRGB(float color)
{
if (color <= 0.0f)
return 0.0f;
else if (color < 0.0031308f)
return 12.92f * color;
else if (color < 1.0f)
return 1.055f * pow(color, 0.41666f) - 0.055f;
else
return 1.0f;
}
static inline float4 linearToSRGB(float4 color)
{
return float4(linearToSRGB(color.r), linearToSRGB(color.g), linearToSRGB(color.b), color.a);
}
} // namespace mtl_shader } // namespace mtl_shader
} // namespace rx } // namespace rx
......
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -23,6 +23,9 @@ enum ...@@ -23,6 +23,9 @@ enum
kTextureTypeCount = 5, kTextureTypeCount = 5,
}; };
// Metal doesn't support constexpr to be used as array size, so we need to use macro here
#define kGenerateMipThreadGroupSizePerDim 8
} // namespace mtl_shader } // namespace mtl_shader
} // namespace rx } // namespace rx
......
//
// Copyright 2020 The ANGLE Project. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
#include "common.h"
using namespace rx::mtl_shader;
#define kThreadGroupXYZ \
(kGenerateMipThreadGroupSizePerDim * kGenerateMipThreadGroupSizePerDim * \
kGenerateMipThreadGroupSizePerDim)
#define kThreadGroupXY (kGenerateMipThreadGroupSizePerDim * kGenerateMipThreadGroupSizePerDim)
#define kThreadGroupX kGenerateMipThreadGroupSizePerDim
#define TEXEL_STORE(index, texel) \
sR[index] = texel.r; \
sG[index] = texel.g; \
sB[index] = texel.b; \
sA[index] = texel.a;
#define TEXEL_LOAD(index) float4(sR[index], sG[index], sB[index], sA[index])
#define TO_LINEAR(texel) (options.sRGB ? sRGBtoLinear(texel) : texel)
#define OUT_OF_BOUND_CHECK(edgeValue, targetValue, condition) \
(condition) ? (edgeValue) : (targetValue)
struct GenMipParams
{
uint srcLevel;
uint numMipLevelsToGen;
bool sRGB;
};
// NOTE(hqle): For numMipLevelsToGen > 1, this function assumes the texture is power of two. If it
// is not, quality will not be good.
kernel void generate3DMipmaps(uint lIndex [[thread_index_in_threadgroup]],
ushort3 gIndices [[thread_position_in_grid]],
texture3d<float> srcTexture [[texture(0)]],
texture3d<float, access::write> dstMip1 [[texture(1)]],
texture3d<float, access::write> dstMip2 [[texture(2)]],
texture3d<float, access::write> dstMip3 [[texture(3)]],
texture3d<float, access::write> dstMip4 [[texture(4)]],
constant GenMipParams &options [[buffer(0)]])
{
ushort3 mipSize = ushort3(dstMip1.get_width(), dstMip1.get_height(), dstMip1.get_depth());
bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y && gIndices.z < mipSize.z;
constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
// NOTE(hqle): Use simd_group function whenever available. That could avoid barrier use.
// Use struct of array style to avoid bank conflict.
threadgroup float sR[kThreadGroupXYZ];
threadgroup float sG[kThreadGroupXYZ];
threadgroup float sB[kThreadGroupXYZ];
threadgroup float sA[kThreadGroupXYZ];
// ----- First mip level -------
float4 texel1;
if (validThread)
{
float3 texCoords = (float3(gIndices) + float3(0.5, 0.5, 0.5)) / float3(mipSize);
texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel));
// Write to texture
dstMip1.write(texel1, gIndices);
}
else
{
// This will invalidate all subsequent checks
lIndex = 0xffffffff;
}
if (options.numMipLevelsToGen == 1)
{
return;
}
// ---- Second mip level --------
// Write to shared memory
if (options.sRGB)
{
texel1 = linearToSRGB(texel1);
}
TEXEL_STORE(lIndex, texel1);
threadgroup_barrier(mem_flags::mem_threadgroup);
// Index must be even
if ((lIndex & 0x49) == 0) // (lIndex & b1001001) == 0
{
bool3 atEdge = gIndices == (mipSize - ushort3(1));
// (x+1, y, z)
// If the width of mip is 1, texel2 will equal to texel1:
float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 1), atEdge.x);
// (x, y+1, z)
float4 texel3 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + kThreadGroupX), atEdge.y);
// (x, y, z+1)
float4 texel4 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + kThreadGroupXY), atEdge.z);
// (x+1, y+1, z)
float4 texel5 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (kThreadGroupX + 1)),
atEdge.x | atEdge.y);
// (x+1, y, z+1)
float4 texel6 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (kThreadGroupXY + 1)),
atEdge.x | atEdge.z);
// (x, y+1, z+1)
float4 texel7 = OUT_OF_BOUND_CHECK(
texel3, TEXEL_LOAD(lIndex + (kThreadGroupXY + kThreadGroupX)), atEdge.y | atEdge.z);
// (x+1, y+1, z+1)
float4 texel8 =
OUT_OF_BOUND_CHECK(texel5, TEXEL_LOAD(lIndex + (kThreadGroupXY + kThreadGroupX + 1)),
atEdge.x | atEdge.y | atEdge.z);
texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;
dstMip2.write(TO_LINEAR(texel1), gIndices >> 1);
// Write to shared memory
TEXEL_STORE(lIndex, texel1);
}
if (options.numMipLevelsToGen == 2)
{
return;
}
// ---- 3rd mip level --------
threadgroup_barrier(mem_flags::mem_threadgroup);
// Index must be multiple of 4
if ((lIndex & 0xdb) == 0) // (lIndex & b11011011) == 0
{
mipSize = max(mipSize >> 1, ushort3(1));
bool3 atEdge = (gIndices >> 1) == (mipSize - ushort3(1));
// (x+1, y, z)
// If the width of mip is 1, texel2 will equal to texel1:
float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 2), atEdge.x);
// (x, y+1, z)
float4 texel3 =
OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + (2 * kThreadGroupX)), atEdge.y);
// (x, y, z+1)
float4 texel4 =
OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + (2 * kThreadGroupXY)), atEdge.z);
// (x+1, y+1, z)
float4 texel5 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (2 * kThreadGroupX + 2)),
atEdge.x | atEdge.y);
// (x+1, y, z+1)
float4 texel6 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (2 * kThreadGroupXY + 2)),
atEdge.x | atEdge.z);
// (x, y+1, z+1)
float4 texel7 = OUT_OF_BOUND_CHECK(
texel3, TEXEL_LOAD(lIndex + (2 * kThreadGroupXY + 2 * kThreadGroupX)),
atEdge.y | atEdge.z);
// (x+1, y+1, z+1)
float4 texel8 = OUT_OF_BOUND_CHECK(
texel5, TEXEL_LOAD(lIndex + (2 * kThreadGroupXY + 2 * kThreadGroupX + 2)),
atEdge.x | atEdge.y | atEdge.z);
texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;
dstMip3.write(TO_LINEAR(texel1), gIndices >> 2);
// Write to shared memory
TEXEL_STORE(lIndex, texel1);
}
if (options.numMipLevelsToGen == 3)
{
return;
}
// ---- 4th mip level --------
threadgroup_barrier(mem_flags::mem_threadgroup);
// Index must be multiple of 8
if ((lIndex & 0x1ff) == 0) // (lIndex & b111111111) == 0
{
mipSize = max(mipSize >> 1, ushort3(1));
bool3 atEdge = (gIndices >> 2) == (mipSize - ushort3(1));
// (x+1, y, z)
// If the width of mip is 1, texel2 will equal to texel1:
float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 4), atEdge.x);
// (x, y+1, z)
float4 texel3 =
OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + (4 * kThreadGroupX)), atEdge.y);
// (x, y, z+1)
float4 texel4 =
OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + (4 * kThreadGroupXY)), atEdge.z);
// (x+1, y+1, z)
float4 texel5 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (4 * kThreadGroupX + 4)),
atEdge.x | atEdge.y);
// (x+1, y, z+1)
float4 texel6 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (4 * kThreadGroupXY + 4)),
atEdge.x | atEdge.z);
// (x, y+1, z+1)
float4 texel7 = OUT_OF_BOUND_CHECK(
texel3, TEXEL_LOAD(lIndex + (4 * kThreadGroupXY + 4 * kThreadGroupX)),
atEdge.y | atEdge.z);
// (x+1, y+1, z+1)
float4 texel8 = OUT_OF_BOUND_CHECK(
texel5, TEXEL_LOAD(lIndex + (4 * kThreadGroupXY + 4 * kThreadGroupX + 4)),
atEdge.x | atEdge.y | atEdge.z);
texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;
dstMip4.write(TO_LINEAR(texel1), gIndices >> 3);
}
}
...@@ -162,7 +162,7 @@ def gen_precompiled_shaders(mac_version, ios_version, variable_name, additional_ ...@@ -162,7 +162,7 @@ def gen_precompiled_shaders(mac_version, ios_version, variable_name, additional_
def main(): def main():
src_files = ['blit.metal', 'clear.metal', 'gen_indices.metal'] src_files = ['blit.metal', 'clear.metal', 'gen_indices.metal', 'gen_mipmap.metal']
# yapf: disable # yapf: disable
os_specific_autogen_files = [ os_specific_autogen_files = [
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment