Commit 8a50b42b by Le Hoang Quyen Committed by Commit Bot

Metal: Convert index & vertex format on GPU when possible.

- When converting vertex buffer: - if there is no render pass active, use compute shader to convert. - if there is a render pass active and device supports explicit memory barrier then convert the buffer in vertex shader with direct buffer write and insert a memory barrier. - if there is a render pass active and device doesn't support explicit memory barrier then convert the buffer on CPU. Bug: angleproject:2634 Change-Id: I5346e3a2adb855f40e46a3912d9db404a4482e0f Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2434025 Commit-Queue: Le Hoang Quyen <le.hoang.q@gmail.com> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarJonah Ryan-Davis <jonahr@google.com>
parent ed23dc84
...@@ -26,6 +26,14 @@ struct FeaturesMtl : FeatureSetBase ...@@ -26,6 +26,14 @@ struct FeaturesMtl : FeatureSetBase
"has_depth_texture_filtering", FeatureCategory::MetalFeatures, "has_depth_texture_filtering", FeatureCategory::MetalFeatures,
"The renderer supports depth texture's filtering other than nearest", &members}; "The renderer supports depth texture's filtering other than nearest", &members};
// Support explicit memory barrier
Feature hasExplicitMemBarrier = {"has_explicit_mem_barrier_mtl", FeatureCategory::MetalFeatures,
"The renderer supports explicit memory barrier", &members};
// Some renderer can break render pass cheaply, i.e. desktop class GPUs.
Feature hasCheapRenderPass = {"has_cheap_render_pass_mtl", FeatureCategory::MetalFeatures,
"The renderer can cheaply break a render pass.", &members};
// Non-uniform compute shader dispatch support, i.e. Group size is not necessarily to be fixed: // Non-uniform compute shader dispatch support, i.e. Group size is not necessarily to be fixed:
Feature hasNonUniformDispatch = { Feature hasNonUniformDispatch = {
"has_non_uniform_dispatch", FeatureCategory::MetalFeatures, "has_non_uniform_dispatch", FeatureCategory::MetalFeatures,
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
"src/libANGLE/renderer/metal/shaders/constants.h": "src/libANGLE/renderer/metal/shaders/constants.h":
"dad1a869a1095be669b7da5651901d38", "dad1a869a1095be669b7da5651901d38",
"src/libANGLE/renderer/metal/shaders/copy_buffer.metal": "src/libANGLE/renderer/metal/shaders/copy_buffer.metal":
"813e16a38d6e3ba858b62a712b1b316d", "83d33cc789cb5df7b173b98c50770c0f",
"src/libANGLE/renderer/metal/shaders/format_autogen.h": "src/libANGLE/renderer/metal/shaders/format_autogen.h":
"b1d6512b904a7eb151b0095b7898b0e5", "b1d6512b904a7eb151b0095b7898b0e5",
"src/libANGLE/renderer/metal/shaders/gen_indices.metal": "src/libANGLE/renderer/metal/shaders/gen_indices.metal":
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
"src/libANGLE/renderer/metal/shaders/gen_mtl_internal_shaders.py": "src/libANGLE/renderer/metal/shaders/gen_mtl_internal_shaders.py":
"b48af61c8b02dda646b4c8febce50227", "b48af61c8b02dda646b4c8febce50227",
"src/libANGLE/renderer/metal/shaders/mtl_default_shaders_src_autogen.inc": "src/libANGLE/renderer/metal/shaders/mtl_default_shaders_src_autogen.inc":
"72e525145bc8f11993791c0f44e79b33", "a0164451469303a462fd777c289c36ee",
"src/libANGLE/renderer/metal/shaders/visibility.metal": "src/libANGLE/renderer/metal/shaders/visibility.metal":
"b82aa740cf4b0aed606aacef1024beea" "b82aa740cf4b0aed606aacef1024beea"
} }
\ No newline at end of file
...@@ -731,14 +731,19 @@ void DisplayMtl::initializeFeatures() ...@@ -731,14 +731,19 @@ void DisplayMtl::initializeFeatures()
// default values: // default values:
mFeatures.hasBaseVertexInstancedDraw.enabled = true; mFeatures.hasBaseVertexInstancedDraw.enabled = true;
mFeatures.hasDepthTextureFiltering.enabled = false; mFeatures.hasDepthTextureFiltering.enabled = false;
mFeatures.hasExplicitMemBarrier.enabled = false;
mFeatures.hasNonUniformDispatch.enabled = true; mFeatures.hasNonUniformDispatch.enabled = true;
mFeatures.hasStencilOutput.enabled = false; mFeatures.hasStencilOutput.enabled = false;
mFeatures.hasTextureSwizzle.enabled = false; mFeatures.hasTextureSwizzle.enabled = false;
mFeatures.allowSeparatedDepthStencilBuffers.enabled = false; mFeatures.allowSeparatedDepthStencilBuffers.enabled = false;
mFeatures.allowGenMultipleMipsPerPass.enabled = true; mFeatures.allowGenMultipleMipsPerPass.enabled = true;
mFeatures.hasCheapRenderPass.enabled = false;
ANGLE_FEATURE_CONDITION((&mFeatures), hasDepthTextureFiltering, ANGLE_FEATURE_CONDITION((&mFeatures), hasDepthTextureFiltering,
TARGET_OS_OSX || TARGET_OS_MACCATALYST); TARGET_OS_OSX || TARGET_OS_MACCATALYST);
ANGLE_FEATURE_CONDITION(
(&mFeatures), hasExplicitMemBarrier,
isMetal2_1 && (TARGET_OS_OSX || TARGET_OS_MACCATALYST) && !ANGLE_MTL_ARM);
ANGLE_FEATURE_CONDITION((&mFeatures), hasDepthAutoResolve, supportsEitherGPUFamily(3, 2)); ANGLE_FEATURE_CONDITION((&mFeatures), hasDepthAutoResolve, supportsEitherGPUFamily(3, 2));
ANGLE_FEATURE_CONDITION((&mFeatures), hasStencilAutoResolve, supportsEitherGPUFamily(5, 2)); ANGLE_FEATURE_CONDITION((&mFeatures), hasStencilAutoResolve, supportsEitherGPUFamily(5, 2));
ANGLE_FEATURE_CONDITION((&mFeatures), allowMultisampleStoreAndResolve, ANGLE_FEATURE_CONDITION((&mFeatures), allowMultisampleStoreAndResolve,
...@@ -756,6 +761,9 @@ void DisplayMtl::initializeFeatures() ...@@ -756,6 +761,9 @@ void DisplayMtl::initializeFeatures()
// Fence sync is flaky on Nvidia // Fence sync is flaky on Nvidia
ANGLE_FEATURE_CONDITION((&mFeatures), hasEvents, isMetal2_1 && !isNVIDIA()); ANGLE_FEATURE_CONDITION((&mFeatures), hasEvents, isMetal2_1 && !isNVIDIA());
ANGLE_FEATURE_CONDITION((&mFeatures), hasCheapRenderPass,
(TARGET_OS_OSX || TARGET_OS_MACCATALYST) && !ANGLE_MTL_ARM);
#if !TARGET_OS_MACCATALYST && (TARGET_OS_IOS || TARGET_OS_TV) #if !TARGET_OS_MACCATALYST && (TARGET_OS_IOS || TARGET_OS_TV)
// Base Vertex drawing is only supported since GPU family 3. // Base Vertex drawing is only supported since GPU family 3.
ANGLE_FEATURE_CONDITION((&mFeatures), hasBaseVertexInstancedDraw, supportsIOSGPUFamily(3)); ANGLE_FEATURE_CONDITION((&mFeatures), hasBaseVertexInstancedDraw, supportsIOSGPUFamily(3));
......
...@@ -95,11 +95,22 @@ class VertexArrayMtl : public VertexArrayImpl ...@@ -95,11 +95,22 @@ class VertexArrayMtl : public VertexArrayImpl
size_t attribIndex, size_t attribIndex,
const mtl::VertexFormat &vertexFormat); const mtl::VertexFormat &vertexFormat);
angle::Result convertVertexBufferCPU(const gl::Context *glContext, angle::Result convertVertexBufferCPU(ContextMtl *contextMtl,
BufferMtl *srcBuffer, BufferMtl *srcBuffer,
const gl::VertexBinding &binding, const gl::VertexBinding &binding,
size_t attribIndex, size_t attribIndex,
const mtl::VertexFormat &vertexFormat, const mtl::VertexFormat &convertedFormat,
GLuint targetStride,
size_t vertexCount,
ConversionBufferMtl *conversion);
angle::Result convertVertexBufferGPU(const gl::Context *glContext,
BufferMtl *srcBuffer,
const gl::VertexBinding &binding,
size_t attribIndex,
const mtl::VertexFormat &convertedFormat,
GLuint targetStride,
size_t vertexCount,
bool isExpandingComponents,
ConversionBufferMtl *conversion); ConversionBufferMtl *conversion);
// These can point to real BufferMtl or converted buffer in mConvertedArrayBufferHolders // These can point to real BufferMtl or converted buffer in mConvertedArrayBufferHolders
......
...@@ -623,8 +623,20 @@ angle::Result VertexArrayMtl::convertIndexBuffer(const gl::Context *glContext, ...@@ -623,8 +623,20 @@ angle::Result VertexArrayMtl::convertIndexBuffer(const gl::Context *glContext,
size_t indexCount = GetIndexCount(idxBuffer, offsetModulo, indexType); size_t indexCount = GetIndexCount(idxBuffer, offsetModulo, indexType);
if (!contextMtl->getDisplay()->getFeatures().hasCheapRenderPass.enabled &&
contextMtl->getRenderCommandEncoder())
{
// We shouldn't use GPU to convert when we are in a middle of a render pass.
ANGLE_TRY(StreamIndexData(contextMtl, &conversion->data,
idxBuffer->getClientShadowCopyData(contextMtl) + offsetModulo,
indexType, indexCount, &conversion->convertedBuffer,
&conversion->convertedOffset));
}
else
{
ANGLE_TRY(convertIndexBufferGPU(glContext, indexType, idxBuffer, offsetModulo, indexCount, ANGLE_TRY(convertIndexBufferGPU(glContext, indexType, idxBuffer, offsetModulo, indexCount,
conversion)); conversion));
}
*idxBufferOut = conversion->convertedBuffer; *idxBufferOut = conversion->convertedBuffer;
*idxBufferOffsetOut = conversion->convertedOffset + alignedOffset; *idxBufferOffsetOut = conversion->convertedOffset + alignedOffset;
...@@ -688,70 +700,74 @@ angle::Result VertexArrayMtl::convertVertexBuffer(const gl::Context *glContext, ...@@ -688,70 +700,74 @@ angle::Result VertexArrayMtl::convertVertexBuffer(const gl::Context *glContext,
size_t attribIndex, size_t attribIndex,
const mtl::VertexFormat &srcVertexFormat) const mtl::VertexFormat &srcVertexFormat)
{ {
unsigned srcFormatSize = srcVertexFormat.intendedAngleFormat().pixelBytes;
size_t numVertices = GetVertexCount(srcBuffer, binding, srcFormatSize);
if (numVertices == 0)
{
// Out of bound buffer access, can return any values.
// See KHR_robust_buffer_access_behavior
mCurrentArrayBuffers[attribIndex] = srcBuffer;
mCurrentArrayBufferFormats[attribIndex] = &srcVertexFormat;
mCurrentArrayBufferOffsets[attribIndex] = 0;
mCurrentArrayBufferStrides[attribIndex] = 16;
return angle::Result::Continue;
}
ContextMtl *contextMtl = mtl::GetImpl(glContext); ContextMtl *contextMtl = mtl::GetImpl(glContext);
const angle::Format &intendedAngleFormat = srcVertexFormat.intendedAngleFormat(); // Convert to tightly packed format
GLuint stride;
const mtl::VertexFormat &convertedFormat =
GetVertexConversionFormat(contextMtl, srcVertexFormat.intendedFormatId, &stride);
ConversionBufferMtl *conversion = srcBuffer->getVertexConversionBuffer( ConversionBufferMtl *conversion = srcBuffer->getVertexConversionBuffer(
contextMtl, intendedAngleFormat.id, binding.getStride(), binding.getOffset()); contextMtl, srcVertexFormat.intendedFormatId, binding.getStride(), binding.getOffset());
// Has the content of the buffer has changed since last conversion? // Has the content of the buffer has changed since last conversion?
if (!conversion->dirty) if (!conversion->dirty)
{ {
// Buffer's data hasn't been changed. Re-use last converted results // Buffer's data hasn't been changed. Re-use last converted results
GLuint stride;
const mtl::VertexFormat &vertexFormat =
GetVertexConversionFormat(contextMtl, intendedAngleFormat.id, &stride);
mConvertedArrayBufferHolders[attribIndex].set(conversion->convertedBuffer); mConvertedArrayBufferHolders[attribIndex].set(conversion->convertedBuffer);
mCurrentArrayBufferOffsets[attribIndex] = conversion->convertedOffset; mCurrentArrayBufferOffsets[attribIndex] = conversion->convertedOffset;
mCurrentArrayBuffers[attribIndex] = &mConvertedArrayBufferHolders[attribIndex]; mCurrentArrayBuffers[attribIndex] = &mConvertedArrayBufferHolders[attribIndex];
mCurrentArrayBufferFormats[attribIndex] = &vertexFormat; mCurrentArrayBufferFormats[attribIndex] = &convertedFormat;
mCurrentArrayBufferStrides[attribIndex] = stride; mCurrentArrayBufferStrides[attribIndex] = stride;
return angle::Result::Continue; return angle::Result::Continue;
} }
// NOTE(hqle): Do the conversion on GPU. const angle::Format &convertedAngleFormat = convertedFormat.actualAngleFormat();
return convertVertexBufferCPU(glContext, srcBuffer, binding, attribIndex, srcVertexFormat, bool canConvertToFloatOnGPU =
conversion); convertedAngleFormat.isFloat() && !convertedAngleFormat.isVertexTypeHalfFloat();
}
angle::Result VertexArrayMtl::convertVertexBufferCPU(const gl::Context *glContext, bool canExpandComponentsOnGPU = convertedFormat.actualSameGLType;
BufferMtl *srcBuffer,
const gl::VertexBinding &binding,
size_t attribIndex,
const mtl::VertexFormat &srcVertexFormat,
ConversionBufferMtl *conversion)
{
ContextMtl *contextMtl = mtl::GetImpl(glContext);
// Convert to tightly packed format if (contextMtl->getRenderCommandEncoder() &&
GLuint stride; !contextMtl->getDisplay()->getFeatures().hasCheapRenderPass.enabled &&
const mtl::VertexFormat &vertexFormat = !contextMtl->getDisplay()->getFeatures().hasExplicitMemBarrier.enabled)
GetVertexConversionFormat(contextMtl, srcVertexFormat.intendedFormatId, &stride); {
unsigned srcFormatSize = vertexFormat.intendedAngleFormat().pixelBytes; // Cannot use GPU to convert when we are in a middle of a render pass.
canConvertToFloatOnGPU = canExpandComponentsOnGPU = false;
}
conversion->data.releaseInFlightBuffers(contextMtl); conversion->data.releaseInFlightBuffers(contextMtl);
conversion->data.updateAlignment(contextMtl, convertedAngleFormat.pixelBytes);
size_t numVertices = GetVertexCount(srcBuffer, binding, srcFormatSize); if (canConvertToFloatOnGPU || canExpandComponentsOnGPU)
if (numVertices == 0)
{ {
return angle::Result::Continue; ANGLE_TRY(convertVertexBufferGPU(glContext, srcBuffer, binding, attribIndex,
convertedFormat, stride, numVertices,
canExpandComponentsOnGPU, conversion));
}
else
{
ANGLE_TRY(convertVertexBufferCPU(contextMtl, srcBuffer, binding, attribIndex,
convertedFormat, stride, numVertices, conversion));
} }
const uint8_t *srcBytes = srcBuffer->getClientShadowCopyData(contextMtl);
ANGLE_CHECK_GL_ALLOC(contextMtl, srcBytes);
srcBytes += binding.getOffset();
ANGLE_TRY(StreamVertexData(contextMtl, &conversion->data, srcBytes, numVertices * stride, 0,
numVertices, binding.getStride(), vertexFormat.vertexLoadFunction,
&mConvertedArrayBufferHolders[attribIndex],
&mCurrentArrayBufferOffsets[attribIndex]));
mCurrentArrayBuffers[attribIndex] = &mConvertedArrayBufferHolders[attribIndex]; mCurrentArrayBuffers[attribIndex] = &mConvertedArrayBufferHolders[attribIndex];
mCurrentArrayBufferFormats[attribIndex] = &vertexFormat; mCurrentArrayBufferFormats[attribIndex] = &convertedFormat;
mCurrentArrayBufferStrides[attribIndex] = stride; mCurrentArrayBufferStrides[attribIndex] = stride;
// Cache the last converted results to be re-used later if the buffer's content won't ever be // Cache the last converted results to be re-used later if the buffer's content won't ever be
...@@ -773,4 +789,102 @@ angle::Result VertexArrayMtl::convertVertexBufferCPU(const gl::Context *glContex ...@@ -773,4 +789,102 @@ angle::Result VertexArrayMtl::convertVertexBufferCPU(const gl::Context *glContex
return angle::Result::Continue; return angle::Result::Continue;
} }
angle::Result VertexArrayMtl::convertVertexBufferCPU(ContextMtl *contextMtl,
BufferMtl *srcBuffer,
const gl::VertexBinding &binding,
size_t attribIndex,
const mtl::VertexFormat &convertedFormat,
GLuint targetStride,
size_t numVertices,
ConversionBufferMtl *conversion)
{
const uint8_t *srcBytes = srcBuffer->getClientShadowCopyData(contextMtl);
ANGLE_CHECK_GL_ALLOC(contextMtl, srcBytes);
srcBytes += binding.getOffset();
ANGLE_TRY(StreamVertexData(
contextMtl, &conversion->data, srcBytes, numVertices * targetStride, 0, numVertices,
binding.getStride(), convertedFormat.vertexLoadFunction,
&mConvertedArrayBufferHolders[attribIndex], &mCurrentArrayBufferOffsets[attribIndex]));
return angle::Result::Continue;
}
angle::Result VertexArrayMtl::convertVertexBufferGPU(const gl::Context *glContext,
BufferMtl *srcBuffer,
const gl::VertexBinding &binding,
size_t attribIndex,
const mtl::VertexFormat &convertedFormat,
GLuint targetStride,
size_t numVertices,
bool isExpandingComponents,
ConversionBufferMtl *conversion)
{
ContextMtl *contextMtl = mtl::GetImpl(glContext);
mtl::BufferRef newBuffer;
size_t newBufferOffset;
ANGLE_TRY(conversion->data.allocate(contextMtl, numVertices * targetStride, nullptr, &newBuffer,
&newBufferOffset));
ANGLE_CHECK_GL_MATH(contextMtl, binding.getOffset() <= std::numeric_limits<uint32_t>::max());
ANGLE_CHECK_GL_MATH(contextMtl, newBufferOffset <= std::numeric_limits<uint32_t>::max());
ANGLE_CHECK_GL_MATH(contextMtl, numVertices <= std::numeric_limits<uint32_t>::max());
mtl::VertexFormatConvertParams params;
params.srcBuffer = srcBuffer->getCurrentBuffer();
params.srcBufferStartOffset = static_cast<uint32_t>(binding.getOffset());
params.srcStride = binding.getStride();
params.srcDefaultAlphaData = convertedFormat.defaultAlpha;
params.dstBuffer = newBuffer;
params.dstBufferStartOffset = static_cast<uint32_t>(newBufferOffset);
params.dstStride = targetStride;
params.dstComponents = convertedFormat.actualAngleFormat().channelCount;
params.vertexCount = static_cast<uint32_t>(numVertices);
mtl::RenderUtils &utils = contextMtl->getDisplay()->getUtils();
mtl::RenderCommandEncoder *renderEncoder = contextMtl->getRenderCommandEncoder();
if (renderEncoder && contextMtl->getDisplay()->getFeatures().hasExplicitMemBarrier.enabled)
{
// If we are in the middle of a render pass, use vertex shader based buffer conversion to
// avoid breaking the render pass.
if (!isExpandingComponents)
{
ANGLE_TRY(utils.convertVertexFormatToFloatVS(
glContext, renderEncoder, convertedFormat.intendedAngleFormat(), params));
}
else
{
ANGLE_TRY(utils.expandVertexFormatComponentsVS(
glContext, renderEncoder, convertedFormat.intendedAngleFormat(), params));
}
}
else
{
// Compute based buffer conversion.
if (!isExpandingComponents)
{
ANGLE_TRY(utils.convertVertexFormatToFloatCS(
contextMtl, convertedFormat.intendedAngleFormat(), params));
}
else
{
ANGLE_TRY(utils.expandVertexFormatComponentsCS(
contextMtl, convertedFormat.intendedAngleFormat(), params));
}
}
ANGLE_TRY(conversion->data.commit(contextMtl));
mConvertedArrayBufferHolders[attribIndex].set(newBuffer);
mCurrentArrayBufferOffsets[attribIndex] = newBufferOffset;
return angle::Result::Continue;
}
} }
...@@ -441,6 +441,10 @@ class RenderCommandEncoder final : public CommandEncoder ...@@ -441,6 +441,10 @@ class RenderCommandEncoder final : public CommandEncoder
MTLResourceUsage usage, MTLResourceUsage usage,
mtl::RenderStages states); mtl::RenderStages states);
RenderCommandEncoder &memoryBarrierWithResource(const BufferRef &resource,
mtl::RenderStages after,
mtl::RenderStages before);
RenderCommandEncoder &setColorStoreAction(MTLStoreAction action, uint32_t colorAttachmentIndex); RenderCommandEncoder &setColorStoreAction(MTLStoreAction action, uint32_t colorAttachmentIndex);
// Set store action for every color attachment. // Set store action for every color attachment.
RenderCommandEncoder &setColorStoreAction(MTLStoreAction action); RenderCommandEncoder &setColorStoreAction(MTLStoreAction action);
......
...@@ -64,6 +64,7 @@ namespace ...@@ -64,6 +64,7 @@ namespace
PROC(DrawIndexedInstancedBaseVertex) \ PROC(DrawIndexedInstancedBaseVertex) \
PROC(SetVisibilityResultMode) \ PROC(SetVisibilityResultMode) \
PROC(UseResource) \ PROC(UseResource) \
PROC(MemoryBarrierWithResource) \
PROC(PushDebugGroup) \ PROC(PushDebugGroup) \
PROC(PopDebugGroup) PROC(PopDebugGroup)
...@@ -347,6 +348,26 @@ void UseResourceCmd(id<MTLRenderCommandEncoder> encoder, IntermediateCommandStre ...@@ -347,6 +348,26 @@ void UseResourceCmd(id<MTLRenderCommandEncoder> encoder, IntermediateCommandStre
[resource ANGLE_MTL_RELEASE]; [resource ANGLE_MTL_RELEASE];
} }
void MemoryBarrierWithResourceCmd(id<MTLRenderCommandEncoder> encoder,
IntermediateCommandStream *stream)
{
id<MTLResource> resource = stream->fetch<id<MTLResource>>();
mtl::RenderStages after = stream->fetch<mtl::RenderStages>();
mtl::RenderStages before = stream->fetch<mtl::RenderStages>();
ANGLE_UNUSED_VARIABLE(after);
ANGLE_UNUSED_VARIABLE(before);
#if defined(__MAC_10_14) && (TARGET_OS_OSX || TARGET_OS_MACCATALYST)
if (ANGLE_APPLE_AVAILABLE_XC(10.14, 13.0))
{
[encoder memoryBarrierWithResources:&resource
count:1
afterStages:after
beforeStages:before];
}
#endif
[resource ANGLE_MTL_RELEASE];
}
void PushDebugGroupCmd(id<MTLRenderCommandEncoder> encoder, IntermediateCommandStream *stream) void PushDebugGroupCmd(id<MTLRenderCommandEncoder> encoder, IntermediateCommandStream *stream)
{ {
NSString *label = stream->fetch<NSString *>(); NSString *label = stream->fetch<NSString *>();
...@@ -1633,6 +1654,25 @@ RenderCommandEncoder &RenderCommandEncoder::useResource(const BufferRef &resourc ...@@ -1633,6 +1654,25 @@ RenderCommandEncoder &RenderCommandEncoder::useResource(const BufferRef &resourc
return *this; return *this;
} }
RenderCommandEncoder &RenderCommandEncoder::memoryBarrierWithResource(const BufferRef &resource,
mtl::RenderStages after,
mtl::RenderStages before)
{
if (!resource)
{
return *this;
}
cmdBuffer().setWriteDependency(resource);
mCommands.push(CmdType::MemoryBarrierWithResource)
.push([resource->get() ANGLE_MTL_RETAIN])
.push(after)
.push(before);
return *this;
}
void RenderCommandEncoder::pushDebugGroup(NSString *label) void RenderCommandEncoder::pushDebugGroup(NSString *label)
{ {
// Defer the insertion until endEncoding() // Defer the insertion until endEncoding()
......
...@@ -36,6 +36,12 @@ ...@@ -36,6 +36,12 @@
# define TARGET_OS_MACCATALYST 0 # define TARGET_OS_MACCATALYST 0
#endif #endif
#if defined(__ARM_ARCH)
# define ANGLE_MTL_ARM (__ARM_ARCH != 0)
#else
# define ANGLE_MTL_ARM 0
#endif
#define ANGLE_MTL_OBJC_SCOPE @autoreleasepool #define ANGLE_MTL_OBJC_SCOPE @autoreleasepool
#if !__has_feature(objc_arc) #if !__has_feature(objc_arc)
......
...@@ -161,6 +161,21 @@ struct CopyPixelsToBufferParams : CopyPixelsCommonParams ...@@ -161,6 +161,21 @@ struct CopyPixelsToBufferParams : CopyPixelsCommonParams
bool reverseTextureRowOrder; bool reverseTextureRowOrder;
}; };
struct VertexFormatConvertParams
{
BufferRef srcBuffer;
uint32_t srcBufferStartOffset = 0;
uint32_t srcStride = 0;
uint32_t srcDefaultAlphaData = 0; // casted as uint
BufferRef dstBuffer;
uint32_t dstBufferStartOffset = 0;
uint32_t dstStride = 0;
uint32_t dstComponents = 0;
uint32_t vertexCount = 0;
};
// Utils class for clear & blitting // Utils class for clear & blitting
class ClearUtils final : angle::NonCopyable class ClearUtils final : angle::NonCopyable
{ {
...@@ -356,7 +371,7 @@ class IndexGeneratorUtils final : angle::NonCopyable ...@@ -356,7 +371,7 @@ class IndexGeneratorUtils final : angle::NonCopyable
}; };
// Util class for handling visibility query result // Util class for handling visibility query result
class VisibilityResultUtils class VisibilityResultUtils final : angle::NonCopyable
{ {
public: public:
void onDestroy(); void onDestroy();
...@@ -396,7 +411,7 @@ class MipmapUtils final : angle::NonCopyable ...@@ -396,7 +411,7 @@ class MipmapUtils final : angle::NonCopyable
}; };
// Util class for handling pixels copy between buffers and textures // Util class for handling pixels copy between buffers and textures
class CopyPixelsUtils class CopyPixelsUtils final : angle::NonCopyable
{ {
public: public:
CopyPixelsUtils() = default; CopyPixelsUtils() = default;
...@@ -429,6 +444,73 @@ class CopyPixelsUtils ...@@ -429,6 +444,73 @@ class CopyPixelsUtils
const std::string mWriteShaderName; const std::string mWriteShaderName;
}; };
// Util class for handling vertex format conversion on GPU
class VertexFormatConversionUtils final : angle::NonCopyable
{
public:
void onDestroy();
// Convert vertex format to float. Compute shader version.
angle::Result convertVertexFormatToFloatCS(ContextMtl *contextMtl,
const angle::Format &srcAngleFormat,
const VertexFormatConvertParams &params);
// Convert vertex format to float. Vertex shader version. This version should be used if
// a render pass is active and we don't want to break it. Explicit memory barrier must be
// supported.
angle::Result convertVertexFormatToFloatVS(const gl::Context *context,
RenderCommandEncoder *renderEncoder,
const angle::Format &srcAngleFormat,
const VertexFormatConvertParams &params);
// Expand number of components per vertex's attribute (or just simply copy components between
// buffers with different stride and offset)
angle::Result expandVertexFormatComponentsCS(ContextMtl *contextMtl,
const angle::Format &srcAngleFormat,
const VertexFormatConvertParams &params);
angle::Result expandVertexFormatComponentsVS(const gl::Context *context,
RenderCommandEncoder *renderEncoder,
const angle::Format &srcAngleFormat,
const VertexFormatConvertParams &params);
private:
void ensureComponentsExpandComputePipelineCreated(ContextMtl *contextMtl);
AutoObjCPtr<id<MTLRenderPipelineState>> getComponentsExpandRenderPipeline(
ContextMtl *contextMtl,
RenderCommandEncoder *renderEncoder);
AutoObjCPtr<id<MTLComputePipelineState>> getFloatConverstionComputePipeline(
ContextMtl *contextMtl,
const angle::Format &srcAngleFormat);
AutoObjCPtr<id<MTLRenderPipelineState>> getFloatConverstionRenderPipeline(
ContextMtl *contextMtl,
RenderCommandEncoder *renderEncoder,
const angle::Format &srcAngleFormat);
template <typename EncoderType, typename PipelineType>
angle::Result setupCommonConvertVertexFormatToFloat(ContextMtl *contextMtl,
EncoderType cmdEncoder,
const PipelineType &pipeline,
const angle::Format &srcAngleFormat,
const VertexFormatConvertParams &params);
template <typename EncoderType, typename PipelineType>
angle::Result setupCommonExpandVertexFormatComponents(ContextMtl *contextMtl,
EncoderType cmdEncoder,
const PipelineType &pipeline,
const angle::Format &srcAngleFormat,
const VertexFormatConvertParams &params);
using ConvertToFloatCompPipelineArray =
std::array<AutoObjCPtr<id<MTLComputePipelineState>>, angle::kNumANGLEFormats>;
using ConvertToFloatRenderPipelineArray =
std::array<RenderPipelineCache, angle::kNumANGLEFormats>;
ConvertToFloatCompPipelineArray mConvertToFloatCompPipelineCaches;
ConvertToFloatRenderPipelineArray mConvertToFloatRenderPipelineCaches;
AutoObjCPtr<id<MTLComputePipelineState>> mComponentsExpandCompPipeline;
RenderPipelineCache mComponentsExpandRenderPipelineCache;
};
// RenderUtils: container class of various util classes above // RenderUtils: container class of various util classes above
class RenderUtils : public Context, angle::NonCopyable class RenderUtils : public Context, angle::NonCopyable
{ {
...@@ -501,6 +583,25 @@ class RenderUtils : public Context, angle::NonCopyable ...@@ -501,6 +583,25 @@ class RenderUtils : public Context, angle::NonCopyable
const angle::Format &dstAngleFormat, const angle::Format &dstAngleFormat,
const CopyPixelsToBufferParams &params); const CopyPixelsToBufferParams &params);
// See VertexFormatConversionUtils::convertVertexFormatToFloatCS()
angle::Result convertVertexFormatToFloatCS(ContextMtl *contextMtl,
const angle::Format &srcAngleFormat,
const VertexFormatConvertParams &params);
// See VertexFormatConversionUtils::convertVertexFormatToFloatVS()
angle::Result convertVertexFormatToFloatVS(const gl::Context *context,
RenderCommandEncoder *renderEncoder,
const angle::Format &srcAngleFormat,
const VertexFormatConvertParams &params);
// See VertexFormatConversionUtils::expandVertexFormatComponentsCS()
angle::Result expandVertexFormatComponentsCS(ContextMtl *contextMtl,
const angle::Format &srcAngleFormat,
const VertexFormatConvertParams &params);
// See VertexFormatConversionUtils::expandVertexFormatComponentsVS()
angle::Result expandVertexFormatComponentsVS(const gl::Context *context,
RenderCommandEncoder *renderEncoder,
const angle::Format &srcAngleFormat,
const VertexFormatConvertParams &params);
private: private:
// override ErrorHandler // override ErrorHandler
void handleError(GLenum error, void handleError(GLenum error,
...@@ -522,6 +623,7 @@ class RenderUtils : public Context, angle::NonCopyable ...@@ -522,6 +623,7 @@ class RenderUtils : public Context, angle::NonCopyable
VisibilityResultUtils mVisibilityResultUtils; VisibilityResultUtils mVisibilityResultUtils;
MipmapUtils mMipmapUtils; MipmapUtils mMipmapUtils;
std::array<CopyPixelsUtils, angle::EnumSize<PixelType>()> mCopyPixelsUtils; std::array<CopyPixelsUtils, angle::EnumSize<PixelType>()> mCopyPixelsUtils;
VertexFormatConversionUtils mVertexFormatUtils;
}; };
} // namespace mtl } // namespace mtl
......
...@@ -126,6 +126,23 @@ struct WritePixelToBufferUniforms ...@@ -126,6 +126,23 @@ struct WritePixelToBufferUniforms
uint8_t padding[11]; uint8_t padding[11];
}; };
struct CopyVertexUniforms
{
uint32_t srcBufferStartOffset;
uint32_t srcStride;
uint32_t srcComponentBytes;
uint32_t srcComponents;
uint32_t srcDefaultAlphaData;
uint32_t dstBufferStartOffset;
uint32_t dstStride;
uint32_t dstComponents;
uint32_t vertexCount;
uint32_t padding[3];
};
// Class to automatically disable occlusion query upon entering block and re-able it upon // Class to automatically disable occlusion query upon entering block and re-able it upon
// exiting block. // exiting block.
struct ScopedDisableOcclusionQuery struct ScopedDisableOcclusionQuery
...@@ -344,6 +361,87 @@ void EnsureSpecializedComputePipelineInitialized( ...@@ -344,6 +361,87 @@ void EnsureSpecializedComputePipelineInitialized(
} }
} }
// Function to initialize render pipeline cache with only vertex shader.
ANGLE_INLINE
void EnsureVertexShaderOnlyPipelineCacheInitialized(Context *context,
NSString *vertexFunctionName,
RenderPipelineCache *pipelineCacheOut)
{
RenderPipelineCache &pipelineCache = *pipelineCacheOut;
if (pipelineCache.getVertexShader())
{
// Already initialized
return;
}
ANGLE_MTL_OBJC_SCOPE
{
DisplayMtl *display = context->getDisplay();
id<MTLLibrary> shaderLib = display->getDefaultShadersLib();
id<MTLFunction> shader = [shaderLib newFunctionWithName:vertexFunctionName];
ASSERT([shader ANGLE_MTL_AUTORELEASE]);
pipelineCache.setVertexShader(context, shader);
}
}
// Function to initialize specialized render pipeline cache with only vertex shader.
ANGLE_INLINE
void EnsureSpecializedVertexShaderOnlyPipelineCacheInitialized(
Context *context,
NSString *vertexFunctionName,
MTLFunctionConstantValues *funcConstants,
RenderPipelineCache *pipelineCacheOut)
{
if (!funcConstants)
{
// Non specialized constants provided, use default creation function.
EnsureVertexShaderOnlyPipelineCacheInitialized(context, vertexFunctionName,
pipelineCacheOut);
return;
}
RenderPipelineCache &pipelineCache = *pipelineCacheOut;
if (pipelineCache.getVertexShader())
{
// Already initialized
return;
}
ANGLE_MTL_OBJC_SCOPE
{
DisplayMtl *display = context->getDisplay();
id<MTLLibrary> shaderLib = display->getDefaultShadersLib();
NSError *err = nil;
id<MTLFunction> shader = [shaderLib newFunctionWithName:vertexFunctionName
constantValues:funcConstants
error:&err];
if (err && !shader)
{
ERR() << "Internal error: " << err.localizedDescription.UTF8String << "\n";
}
ASSERT([shader ANGLE_MTL_AUTORELEASE]);
pipelineCache.setVertexShader(context, shader);
}
}
// Get pipeline descriptor for render pipeline that contains vertex shader acting as compute shader.
ANGLE_INLINE
RenderPipelineDesc GetComputingVertexShaderOnlyRenderPipelineDesc(RenderCommandEncoder *cmdEncoder)
{
RenderPipelineDesc pipelineDesc;
const RenderPassDesc &renderPassDesc = cmdEncoder->renderPassDesc();
renderPassDesc.populateRenderPipelineOutputDesc(&pipelineDesc.outputDescriptor);
pipelineDesc.rasterizationType = RenderPipelineRasterization::Disabled;
pipelineDesc.inputPrimitiveTopology = kPrimitiveTopologyClassPoint;
return pipelineDesc;
}
template <typename T> template <typename T>
void ClearRenderPipelineCacheArray(T *pipelineCacheArray) void ClearRenderPipelineCacheArray(T *pipelineCacheArray)
{ {
...@@ -512,6 +610,62 @@ void SetupCommonBlitWithDrawStates(const gl::Context *context, ...@@ -512,6 +610,62 @@ void SetupCommonBlitWithDrawStates(const gl::Context *context,
SetupBlitWithDrawUniformData(cmdEncoder, params, isColorBlit); SetupBlitWithDrawUniformData(cmdEncoder, params, isColorBlit);
} }
// Overloaded functions to be used with both compute and render command encoder.
ANGLE_INLINE void SetComputeOrVertexBuffer(RenderCommandEncoder *encoder,
const BufferRef &buffer,
uint32_t offset,
uint32_t index)
{
encoder->setBuffer(gl::ShaderType::Vertex, buffer, offset, index);
}
ANGLE_INLINE void SetComputeOrVertexBufferForWrite(RenderCommandEncoder *encoder,
const BufferRef &buffer,
uint32_t offset,
uint32_t index)
{
encoder->setBufferForWrite(gl::ShaderType::Vertex, buffer, offset, index);
}
ANGLE_INLINE void SetComputeOrVertexBuffer(ComputeCommandEncoder *encoder,
const BufferRef &buffer,
uint32_t offset,
uint32_t index)
{
encoder->setBuffer(buffer, offset, index);
}
ANGLE_INLINE void SetComputeOrVertexBufferForWrite(ComputeCommandEncoder *encoder,
const BufferRef &buffer,
uint32_t offset,
uint32_t index)
{
encoder->setBufferForWrite(buffer, offset, index);
}
template <typename T>
ANGLE_INLINE void SetComputeOrVertexData(RenderCommandEncoder *encoder,
const T &data,
uint32_t index)
{
encoder->setData(gl::ShaderType::Vertex, data, index);
}
template <typename T>
ANGLE_INLINE void SetComputeOrVertexData(ComputeCommandEncoder *encoder,
const T &data,
uint32_t index)
{
encoder->setData(data, index);
}
ANGLE_INLINE void SetPipelineState(RenderCommandEncoder *encoder,
id<MTLRenderPipelineState> pipeline)
{
encoder->setRenderPipelineState(pipeline);
}
ANGLE_INLINE void SetPipelineState(ComputeCommandEncoder *encoder,
id<MTLComputePipelineState> pipeline)
{
encoder->setComputePipelineState(pipeline);
}
} // namespace } // namespace
// StencilBlitViaBufferParams implementation // StencilBlitViaBufferParams implementation
...@@ -559,6 +713,7 @@ void RenderUtils::onDestroy() ...@@ -559,6 +713,7 @@ void RenderUtils::onDestroy()
mIndexUtils.onDestroy(); mIndexUtils.onDestroy();
mVisibilityResultUtils.onDestroy(); mVisibilityResultUtils.onDestroy();
mMipmapUtils.onDestroy(); mMipmapUtils.onDestroy();
mVertexFormatUtils.onDestroy();
mCopyTextureFloatToUIntUtils.onDestroy(); mCopyTextureFloatToUIntUtils.onDestroy();
for (ClearUtils &util : mClearUtils) for (ClearUtils &util : mClearUtils)
...@@ -745,6 +900,39 @@ angle::Result RenderUtils::packPixelsFromTextureToBuffer(ContextMtl *contextMtl, ...@@ -745,6 +900,39 @@ angle::Result RenderUtils::packPixelsFromTextureToBuffer(ContextMtl *contextMtl,
params); params);
} }
angle::Result RenderUtils::convertVertexFormatToFloatCS(ContextMtl *contextMtl,
const angle::Format &srcAngleFormat,
const VertexFormatConvertParams &params)
{
return mVertexFormatUtils.convertVertexFormatToFloatCS(contextMtl, srcAngleFormat, params);
}
angle::Result RenderUtils::convertVertexFormatToFloatVS(const gl::Context *context,
RenderCommandEncoder *encoder,
const angle::Format &srcAngleFormat,
const VertexFormatConvertParams &params)
{
return mVertexFormatUtils.convertVertexFormatToFloatVS(context, encoder, srcAngleFormat,
params);
}
// Expand number of components per vertex's attribute
angle::Result RenderUtils::expandVertexFormatComponentsCS(ContextMtl *contextMtl,
const angle::Format &srcAngleFormat,
const VertexFormatConvertParams &params)
{
return mVertexFormatUtils.expandVertexFormatComponentsCS(contextMtl, srcAngleFormat, params);
}
angle::Result RenderUtils::expandVertexFormatComponentsVS(const gl::Context *context,
RenderCommandEncoder *encoder,
const angle::Format &srcAngleFormat,
const VertexFormatConvertParams &params)
{
return mVertexFormatUtils.expandVertexFormatComponentsVS(context, encoder, srcAngleFormat,
params);
}
// ClearUtils implementation // ClearUtils implementation
ClearUtils::ClearUtils(const std::string &fragmentShaderName) ClearUtils::ClearUtils(const std::string &fragmentShaderName)
: mFragmentShaderName(fragmentShaderName) : mFragmentShaderName(fragmentShaderName)
...@@ -2087,5 +2275,237 @@ angle::Result CopyPixelsUtils::packPixelsFromTextureToBuffer(ContextMtl *context ...@@ -2087,5 +2275,237 @@ angle::Result CopyPixelsUtils::packPixelsFromTextureToBuffer(ContextMtl *context
return angle::Result::Continue; return angle::Result::Continue;
} }
// VertexFormatConversionUtils implementation
void VertexFormatConversionUtils::onDestroy()
{
ClearPipelineStateArray(&mConvertToFloatCompPipelineCaches);
ClearRenderPipelineCacheArray(&mConvertToFloatRenderPipelineCaches);
mComponentsExpandCompPipeline = nil;
mComponentsExpandRenderPipelineCache.clear();
}
angle::Result VertexFormatConversionUtils::convertVertexFormatToFloatCS(
ContextMtl *contextMtl,
const angle::Format &srcAngleFormat,
const VertexFormatConvertParams &params)
{
ComputeCommandEncoder *cmdEncoder = contextMtl->getComputeCommandEncoder();
ASSERT(cmdEncoder);
AutoObjCPtr<id<MTLComputePipelineState>> pipeline =
getFloatConverstionComputePipeline(contextMtl, srcAngleFormat);
ANGLE_TRY(setupCommonConvertVertexFormatToFloat(contextMtl, cmdEncoder, pipeline,
srcAngleFormat, params));
DispatchCompute(contextMtl, cmdEncoder, pipeline, params.vertexCount);
return angle::Result::Continue;
}
angle::Result VertexFormatConversionUtils::convertVertexFormatToFloatVS(
const gl::Context *context,
RenderCommandEncoder *cmdEncoder,
const angle::Format &srcAngleFormat,
const VertexFormatConvertParams &params)
{
ContextMtl *contextMtl = GetImpl(context);
ASSERT(cmdEncoder);
ASSERT(contextMtl->getDisplay()->getFeatures().hasExplicitMemBarrier.enabled);
AutoObjCPtr<id<MTLRenderPipelineState>> pipeline =
getFloatConverstionRenderPipeline(contextMtl, cmdEncoder, srcAngleFormat);
ANGLE_TRY(setupCommonConvertVertexFormatToFloat(contextMtl, cmdEncoder, pipeline,
srcAngleFormat, params));
cmdEncoder->draw(MTLPrimitiveTypePoint, 0, params.vertexCount);
cmdEncoder->memoryBarrierWithResource(params.dstBuffer, kRenderStageVertex, kRenderStageVertex);
// Invalidate current context's state.
// NOTE(hqle): Consider invalidating only affected states.
contextMtl->invalidateState(context);
return angle::Result::Continue;
}
template <typename EncoderType, typename PipelineType>
angle::Result VertexFormatConversionUtils::setupCommonConvertVertexFormatToFloat(
ContextMtl *contextMtl,
EncoderType cmdEncoder,
const PipelineType &pipeline,
const angle::Format &srcAngleFormat,
const VertexFormatConvertParams &params)
{
SetPipelineState(cmdEncoder, pipeline);
SetComputeOrVertexBuffer(cmdEncoder, params.srcBuffer, 0, 1);
SetComputeOrVertexBufferForWrite(cmdEncoder, params.dstBuffer, 0, 2);
CopyVertexUniforms options;
options.srcBufferStartOffset = params.srcBufferStartOffset;
options.srcStride = params.srcStride;
options.dstBufferStartOffset = params.dstBufferStartOffset;
options.dstStride = params.dstStride;
options.dstComponents = params.dstComponents;
options.vertexCount = params.vertexCount;
SetComputeOrVertexData(cmdEncoder, options, 0);
return angle::Result::Continue;
}
// Expand number of components per vertex's attribute
angle::Result VertexFormatConversionUtils::expandVertexFormatComponentsCS(
ContextMtl *contextMtl,
const angle::Format &srcAngleFormat,
const VertexFormatConvertParams &params)
{
ComputeCommandEncoder *cmdEncoder = contextMtl->getComputeCommandEncoder();
ASSERT(cmdEncoder);
ensureComponentsExpandComputePipelineCreated(contextMtl);
ANGLE_TRY(setupCommonExpandVertexFormatComponents(
contextMtl, cmdEncoder, mComponentsExpandCompPipeline, srcAngleFormat, params));
DispatchCompute(contextMtl, cmdEncoder, mComponentsExpandCompPipeline, params.vertexCount);
return angle::Result::Continue;
}
angle::Result VertexFormatConversionUtils::expandVertexFormatComponentsVS(
const gl::Context *context,
RenderCommandEncoder *cmdEncoder,
const angle::Format &srcAngleFormat,
const VertexFormatConvertParams &params)
{
ContextMtl *contextMtl = GetImpl(context);
ASSERT(cmdEncoder);
ASSERT(contextMtl->getDisplay()->getFeatures().hasExplicitMemBarrier.enabled);
AutoObjCPtr<id<MTLRenderPipelineState>> pipeline =
getComponentsExpandRenderPipeline(contextMtl, cmdEncoder);
ANGLE_TRY(setupCommonExpandVertexFormatComponents(contextMtl, cmdEncoder, pipeline,
srcAngleFormat, params));
cmdEncoder->draw(MTLPrimitiveTypePoint, 0, params.vertexCount);
cmdEncoder->memoryBarrierWithResource(params.dstBuffer, kRenderStageVertex, kRenderStageVertex);
// Invalidate current context's state.
// NOTE(hqle): Consider invalidating only affected states.
contextMtl->invalidateState(context);
return angle::Result::Continue;
}
template <typename EncoderType, typename PipelineType>
angle::Result VertexFormatConversionUtils::setupCommonExpandVertexFormatComponents(
ContextMtl *contextMtl,
EncoderType cmdEncoder,
const PipelineType &pipeline,
const angle::Format &srcAngleFormat,
const VertexFormatConvertParams &params)
{
SetPipelineState(cmdEncoder, pipeline);
SetComputeOrVertexBuffer(cmdEncoder, params.srcBuffer, 0, 1);
SetComputeOrVertexBufferForWrite(cmdEncoder, params.dstBuffer, 0, 2);
CopyVertexUniforms options;
options.srcBufferStartOffset = params.srcBufferStartOffset;
options.srcStride = params.srcStride;
options.srcComponentBytes = srcAngleFormat.pixelBytes / srcAngleFormat.channelCount;
options.srcComponents = srcAngleFormat.channelCount;
options.srcDefaultAlphaData = params.srcDefaultAlphaData;
options.dstBufferStartOffset = params.dstBufferStartOffset;
options.dstStride = params.dstStride;
options.dstComponents = params.dstComponents;
options.vertexCount = params.vertexCount;
SetComputeOrVertexData(cmdEncoder, options, 0);
return angle::Result::Continue;
}
void VertexFormatConversionUtils::ensureComponentsExpandComputePipelineCreated(
ContextMtl *contextMtl)
{
EnsureComputePipelineInitialized(contextMtl->getDisplay(), @"expandVertexFormatComponentsCS",
&mComponentsExpandCompPipeline);
}
AutoObjCPtr<id<MTLRenderPipelineState>>
VertexFormatConversionUtils::getComponentsExpandRenderPipeline(ContextMtl *contextMtl,
RenderCommandEncoder *cmdEncoder)
{
EnsureVertexShaderOnlyPipelineCacheInitialized(contextMtl, @"expandVertexFormatComponentsVS",
&mComponentsExpandRenderPipelineCache);
RenderPipelineDesc pipelineDesc = GetComputingVertexShaderOnlyRenderPipelineDesc(cmdEncoder);
return mComponentsExpandRenderPipelineCache.getRenderPipelineState(contextMtl, pipelineDesc);
}
AutoObjCPtr<id<MTLComputePipelineState>>
VertexFormatConversionUtils::getFloatConverstionComputePipeline(ContextMtl *contextMtl,
const angle::Format &srcAngleFormat)
{
int formatIDValue = static_cast<int>(srcAngleFormat.id);
AutoObjCPtr<id<MTLComputePipelineState>> &cache =
mConvertToFloatCompPipelineCaches[formatIDValue];
if (!cache)
{
// Pipeline not cached, create it now:
ANGLE_MTL_OBJC_SCOPE
{
auto funcConstants = [[[MTLFunctionConstantValues alloc] init] ANGLE_MTL_AUTORELEASE];
[funcConstants setConstantValue:&formatIDValue
type:MTLDataTypeInt
withName:COPY_FORMAT_TYPE_CONSTANT_NAME];
EnsureSpecializedComputePipelineInitialized(
contextMtl->getDisplay(), @"convertToFloatVertexFormatCS", funcConstants, &cache);
}
}
return cache;
}
AutoObjCPtr<id<MTLRenderPipelineState>>
VertexFormatConversionUtils::getFloatConverstionRenderPipeline(ContextMtl *contextMtl,
RenderCommandEncoder *cmdEncoder,
const angle::Format &srcAngleFormat)
{
int formatIDValue = static_cast<int>(srcAngleFormat.id);
RenderPipelineCache &cache = mConvertToFloatRenderPipelineCaches[formatIDValue];
if (!cache.getVertexShader())
{
// Pipeline cache not intialized, do it now:
ANGLE_MTL_OBJC_SCOPE
{
auto funcConstants = [[[MTLFunctionConstantValues alloc] init] ANGLE_MTL_AUTORELEASE];
[funcConstants setConstantValue:&formatIDValue
type:MTLDataTypeInt
withName:COPY_FORMAT_TYPE_CONSTANT_NAME];
EnsureSpecializedVertexShaderOnlyPipelineCacheInitialized(
contextMtl, @"convertToFloatVertexFormatVS", funcConstants, &cache);
}
}
RenderPipelineDesc pipelineDesc = GetComputingVertexShaderOnlyRenderPipelineDesc(cmdEncoder);
return cache.getRenderPipelineState(contextMtl, pipelineDesc);
}
} // namespace mtl } // namespace mtl
} // namespace rx } // namespace rx
...@@ -1495,3 +1495,184 @@ kernel void writeFromUIntTextureToBuffer(COMMON_WRITE_KERNEL_PARAMS(uint)) ...@@ -1495,3 +1495,184 @@ kernel void writeFromUIntTextureToBuffer(COMMON_WRITE_KERNEL_PARAMS(uint))
#undef SUPPORTED_FORMATS #undef SUPPORTED_FORMATS
} }
/** ----- vertex format conversion --------*/
struct CopyVertexParams
{
uint srcBufferStartOffset;
uint srcStride;
uint srcComponentBytes; // unused when convert to float
uint srcComponents; // unused when convert to float
// Default source alpha when expanding the number of components.
// if source has less than 32 bits per component, only those bits are usable in
// srcDefaultAlpha
uchar4 srcDefaultAlphaData; // unused when convert to float
uint dstBufferStartOffset;
uint dstStride;
uint dstComponents;
uint vertexCount;
};
#define INT_FORMAT_PROC(FORMAT, PROC) \
PROC(FORMAT##_UNORM) \
PROC(FORMAT##_SNORM) \
PROC(FORMAT##_UINT) \
PROC(FORMAT##_SINT) \
PROC(FORMAT##_USCALED) \
PROC(FORMAT##_SSCALED)
#define PURE_INT_FORMAT_PROC(FORMAT, PROC) \
PROC(FORMAT##_UINT) \
PROC(FORMAT##_SINT)
#define FLOAT_FORMAT_PROC(FORMAT, PROC) PROC(FORMAT##_FLOAT)
#define FIXED_FORMAT_PROC(FORMAT, PROC) PROC(FORMAT##_FIXED)
#define FORMAT_BITS_PROC(BITS, PROC1, PROC2) \
PROC1(R##BITS, PROC2) \
PROC1(R##BITS##G##BITS, PROC2) \
PROC1(R##BITS##G##BITS##B##BITS, PROC2) \
PROC1(R##BITS##G##BITS##B##BITS##A##BITS, PROC2)
template <typename IntType>
static inline void writeFloatVertex(constant CopyVertexParams &options,
uint idx,
vec<IntType, 4> data,
device uchar *dst)
{
uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset;
for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4)
{
floatToBytes(static_cast<float>(data[component]), dstOffset, dst);
}
}
template <>
inline void writeFloatVertex(constant CopyVertexParams &options,
uint idx,
vec<float, 4> data,
device uchar *dst)
{
uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset;
for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4)
{
floatToBytes(data[component], dstOffset, dst);
}
}
// Function to convert from any vertex format to float vertex format
static inline void convertToFloatVertexFormat(uint index,
constant CopyVertexParams &options,
constant uchar *srcBuffer,
device uchar *dstBuffer)
{
#define SUPPORTED_FORMATS(PROC) \
FORMAT_BITS_PROC(8, INT_FORMAT_PROC, PROC) \
FORMAT_BITS_PROC(16, INT_FORMAT_PROC, PROC) \
FORMAT_BITS_PROC(32, INT_FORMAT_PROC, PROC) \
FORMAT_BITS_PROC(16, FLOAT_FORMAT_PROC, PROC) \
FORMAT_BITS_PROC(32, FLOAT_FORMAT_PROC, PROC) \
FORMAT_BITS_PROC(32, FIXED_FORMAT_PROC, PROC) \
PROC(R10G10B10A2_SINT) \
PROC(R10G10B10A2_UINT) \
PROC(R10G10B10A2_SSCALED) \
PROC(R10G10B10A2_USCALED)
uint bufferOffset = options.srcBufferStartOffset + options.srcStride * index;
#define COMVERT_FLOAT_VERTEX_SWITCH_CASE(FORMAT) \
case FormatID::FORMAT: { \
auto data = read##FORMAT(bufferOffset, srcBuffer); \
writeFloatVertex(options, index, data, dstBuffer); \
} \
break;
switch (kCopyFormatType)
{
SUPPORTED_FORMATS(COMVERT_FLOAT_VERTEX_SWITCH_CASE)
}
#undef SUPPORTED_FORMATS
}
// Kernel to convert from any vertex format to float vertex format
kernel void convertToFloatVertexFormatCS(uint index [[thread_position_in_grid]],
constant CopyVertexParams &options [[buffer(0)]],
constant uchar *srcBuffer [[buffer(1)]],
device uchar *dstBuffer [[buffer(2)]])
{
ANGLE_KERNEL_GUARD(index, options.vertexCount);
convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer);
}
// Vertex shader to convert from any vertex format to float vertex format
vertex void convertToFloatVertexFormatVS(uint index [[vertex_id]],
constant CopyVertexParams &options [[buffer(0)]],
constant uchar *srcBuffer [[buffer(1)]],
device uchar *dstBuffer [[buffer(2)]])
{
convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer);
}
// Function to expand (or just simply copy) the components of the vertex
static inline void expandVertexFormatComponents(uint index,
constant CopyVertexParams &options,
constant uchar *srcBuffer,
device uchar *dstBuffer)
{
uint srcOffset = options.srcBufferStartOffset + options.srcStride * index;
uint dstOffset = options.dstBufferStartOffset + options.dstStride * index;
uint dstComponentsBeforeAlpha = min(options.dstComponents, 3u);
uint component;
for (component = 0; component < options.srcComponents; ++component,
srcOffset += options.srcComponentBytes, dstOffset += options.srcComponentBytes)
{
for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
{
dstBuffer[dstOffset + byte] = srcBuffer[srcOffset + byte];
}
}
for (; component < dstComponentsBeforeAlpha;
++component, dstOffset += options.srcComponentBytes)
{
for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
{
dstBuffer[dstOffset + byte] = 0;
}
}
if (component < options.dstComponents)
{
// Last alpha component
for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
{
dstBuffer[dstOffset + byte] = options.srcDefaultAlphaData[byte];
}
}
}
// Kernel to expand (or just simply copy) the components of the vertex
kernel void expandVertexFormatComponentsCS(uint index [[thread_position_in_grid]],
constant CopyVertexParams &options [[buffer(0)]],
constant uchar *srcBuffer [[buffer(1)]],
device uchar *dstBuffer [[buffer(2)]])
{
ANGLE_KERNEL_GUARD(index, options.vertexCount);
expandVertexFormatComponents(index, options, srcBuffer, dstBuffer);
}
// Vertex shader to expand (or just simply copy) the components of the vertex
vertex void expandVertexFormatComponentsVS(uint index [[vertex_id]],
constant CopyVertexParams &options [[buffer(0)]],
constant uchar *srcBuffer [[buffer(1)]],
device uchar *dstBuffer [[buffer(2)]])
{
expandVertexFormatComponents(index, options, srcBuffer, dstBuffer);
}
...@@ -2565,6 +2565,148 @@ kernel void writeFromUIntTextureToBuffer(ushort2 gIndices [[thread_position_in_g ...@@ -2565,6 +2565,148 @@ kernel void writeFromUIntTextureToBuffer(ushort2 gIndices [[thread_position_in_g
} }
struct CopyVertexParams
{
uint srcBufferStartOffset;
uint srcStride;
uint srcComponentBytes;
uint srcComponents;
uchar4 srcDefaultAlphaData;
uint dstBufferStartOffset;
uint dstStride;
uint dstComponents;
uint vertexCount;
};
# 1539 "./copy_buffer.metal"
template <typename IntType>
static inline void writeFloatVertex(constant CopyVertexParams &options,
uint idx,
vec<IntType, 4> data,
device uchar *dst)
{
uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset;
for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4)
{
floatToBytes(static_cast<float>(data[component]), dstOffset, dst);
}
}
template <>
inline void writeFloatVertex(constant CopyVertexParams &options,
uint idx,
vec<float, 4> data,
device uchar *dst)
{
uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset;
for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4)
{
floatToBytes(data[component], dstOffset, dst);
}
}
static inline void convertToFloatVertexFormat(uint index,
constant CopyVertexParams &options,
constant uchar *srcBuffer,
device uchar *dstBuffer)
{
# 1585 "./copy_buffer.metal"
uint bufferOffset = options.srcBufferStartOffset + options.srcStride * index;
# 1594 "./copy_buffer.metal"
switch (kCopyFormatType)
{
case FormatID::R8_UNORM: { auto data = readR8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SNORM: { auto data = readR8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_UINT: { auto data = readR8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SINT: { auto data = readR8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_USCALED: { auto data = readR8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SSCALED: { auto data = readR8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UNORM: { auto data = readR8G8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SNORM: { auto data = readR8G8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UINT: { auto data = readR8G8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SINT: { auto data = readR8G8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_USCALED: { auto data = readR8G8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SSCALED: { auto data = readR8G8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UNORM: { auto data = readR8G8B8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SNORM: { auto data = readR8G8B8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UINT: { auto data = readR8G8B8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SINT: { auto data = readR8G8B8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_USCALED: { auto data = readR8G8B8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SSCALED: { auto data = readR8G8B8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UNORM: { auto data = readR8G8B8A8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SNORM: { auto data = readR8G8B8A8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UINT: { auto data = readR8G8B8A8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SINT: { auto data = readR8G8B8A8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_USCALED: { auto data = readR8G8B8A8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SSCALED: { auto data = readR8G8B8A8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UNORM: { auto data = readR16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SNORM: { auto data = readR16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UINT: { auto data = readR16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SINT: { auto data = readR16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_USCALED: { auto data = readR16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SSCALED: { auto data = readR16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UNORM: { auto data = readR16G16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SNORM: { auto data = readR16G16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UINT: { auto data = readR16G16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SINT: { auto data = readR16G16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_USCALED: { auto data = readR16G16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SSCALED: { auto data = readR16G16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UNORM: { auto data = readR16G16B16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SNORM: { auto data = readR16G16B16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UINT: { auto data = readR16G16B16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SINT: { auto data = readR16G16B16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_USCALED: { auto data = readR16G16B16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SSCALED: { auto data = readR16G16B16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UNORM: { auto data = readR16G16B16A16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SNORM: { auto data = readR16G16B16A16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UINT: { auto data = readR16G16B16A16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SINT: { auto data = readR16G16B16A16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_USCALED: { auto data = readR16G16B16A16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SSCALED: { auto data = readR16G16B16A16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UNORM: { auto data = readR32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SNORM: { auto data = readR32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UINT: { auto data = readR32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SINT: { auto data = readR32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_USCALED: { auto data = readR32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SSCALED: { auto data = readR32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UNORM: { auto data = readR32G32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SNORM: { auto data = readR32G32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UINT: { auto data = readR32G32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SINT: { auto data = readR32G32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_USCALED: { auto data = readR32G32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SSCALED: { auto data = readR32G32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UNORM: { auto data = readR32G32B32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SNORM: { auto data = readR32G32B32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UINT: { auto data = readR32G32B32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SINT: { auto data = readR32G32B32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_USCALED: { auto data = readR32G32B32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SSCALED: { auto data = readR32G32B32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UNORM: { auto data = readR32G32B32A32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SNORM: { auto data = readR32G32B32A32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UINT: { auto data = readR32G32B32A32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SINT: { auto data = readR32G32B32A32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_USCALED: { auto data = readR32G32B32A32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SSCALED: { auto data = readR32G32B32A32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_FLOAT: { auto data = readR16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_FLOAT: { auto data = readR16G16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_FLOAT: { auto data = readR16G16B16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto data = readR16G16B16A16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FLOAT: { auto data = readR32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FLOAT: { auto data = readR32G32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FLOAT: { auto data = readR32G32B32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto data = readR32G32B32A32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FIXED: { auto data = readR32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FIXED: { auto data = readR32G32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FIXED: { auto data = readR32G32B32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FIXED: { auto data = readR32G32B32A32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SINT: { auto data = readR10G10B10A2_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_UINT: { auto data = readR10G10B10A2_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SSCALED: { auto data = readR10G10B10A2_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_USCALED: { auto data = readR10G10B10A2_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break;
}
}
kernel void convertToFloatVertexFormatCS(uint index [[thread_position_in_grid]],
constant CopyVertexParams &options [[buffer(0)]],
constant uchar *srcBuffer [[buffer(1)]],
device uchar *dstBuffer [[buffer(2)]])
{
if (index >= options.vertexCount) { return; };
convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer);
}
vertex void convertToFloatVertexFormatVS(uint index [[vertex_id]],
constant CopyVertexParams &options [[buffer(0)]],
constant uchar *srcBuffer [[buffer(1)]],
device uchar *dstBuffer [[buffer(2)]])
{
convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer);
}
static inline void expandVertexFormatComponents(uint index,
constant CopyVertexParams &options,
constant uchar *srcBuffer,
device uchar *dstBuffer)
{
uint srcOffset = options.srcBufferStartOffset + options.srcStride * index;
uint dstOffset = options.dstBufferStartOffset + options.dstStride * index;
uint dstComponentsBeforeAlpha = min(options.dstComponents, 3u);
uint component;
for (component = 0; component < options.srcComponents; ++component,
srcOffset += options.srcComponentBytes, dstOffset += options.srcComponentBytes)
{
for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
{
dstBuffer[dstOffset + byte] = srcBuffer[srcOffset + byte];
}
}
for (; component < dstComponentsBeforeAlpha;
++component, dstOffset += options.srcComponentBytes)
{
for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
{
dstBuffer[dstOffset + byte] = 0;
}
}
if (component < options.dstComponents)
{
for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
{
dstBuffer[dstOffset + byte] = options.srcDefaultAlphaData[byte];
}
}
}
kernel void expandVertexFormatComponentsCS(uint index [[thread_position_in_grid]],
constant CopyVertexParams &options [[buffer(0)]],
constant uchar *srcBuffer [[buffer(1)]],
device uchar *dstBuffer [[buffer(2)]])
{
if (index >= options.vertexCount) { return; };
expandVertexFormatComponents(index, options, srcBuffer, dstBuffer);
}
vertex void expandVertexFormatComponentsVS(uint index [[vertex_id]],
constant CopyVertexParams &options [[buffer(0)]],
constant uchar *srcBuffer [[buffer(1)]],
device uchar *dstBuffer [[buffer(2)]])
{
expandVertexFormatComponents(index, options, srcBuffer, dstBuffer);
}
# 6 "temp_master_source.metal" 2 # 6 "temp_master_source.metal" 2
# 1 "./visibility.metal" 1 # 1 "./visibility.metal" 1
......
...@@ -169,7 +169,8 @@ class VertexAttributeTest : public ANGLETest ...@@ -169,7 +169,8 @@ class VertexAttributeTest : public ANGLETest
bufferOffset(0), bufferOffset(0),
source(sourceIn), source(sourceIn),
inputData(inputDataIn), inputData(inputDataIn),
expectedData(expectedDataIn) expectedData(expectedDataIn),
clearBeforeDraw(false)
{} {}
GLenum type; GLenum type;
...@@ -179,6 +180,8 @@ class VertexAttributeTest : public ANGLETest ...@@ -179,6 +180,8 @@ class VertexAttributeTest : public ANGLETest
const void *inputData; const void *inputData;
const GLfloat *expectedData; const GLfloat *expectedData;
bool clearBeforeDraw;
}; };
void setupTest(const TestData &test, GLint typeSize) void setupTest(const TestData &test, GLint typeSize)
...@@ -269,6 +272,11 @@ class VertexAttributeTest : public ANGLETest ...@@ -269,6 +272,11 @@ class VertexAttributeTest : public ANGLETest
GLint typeSize = i + 1; GLint typeSize = i + 1;
setupTest(test, typeSize); setupTest(test, typeSize);
if (test.clearBeforeDraw)
{
glClear(GL_COLOR_BUFFER_BIT);
}
drawQuad(mProgram, "position", 0.5f); drawQuad(mProgram, "position", 0.5f);
glDisableVertexAttribArray(mTestAttrib); glDisableVertexAttribArray(mTestAttrib);
...@@ -986,6 +994,48 @@ TEST_P(VertexAttributeTestES3, IntNormalized) ...@@ -986,6 +994,48 @@ TEST_P(VertexAttributeTestES3, IntNormalized)
runTest(data); runTest(data);
} }
// Same as IntUnnormalized but with glClear() before running the test to force
// starting a render pass. This to verify that buffer format conversion within
// an active render pass works as expected in Metal back-end.
TEST_P(VertexAttributeTestES3, IntUnnormalizedWithClear)
{
GLint lo = std::numeric_limits<GLint>::min();
GLint hi = std::numeric_limits<GLint>::max();
std::array<GLint, kVertexCount> inputData = {
{0, 1, 2, 3, -1, -2, -3, -4, -1, hi, hi - 1, lo, lo + 1}};
std::array<GLfloat, kVertexCount> expectedData;
for (size_t i = 0; i < kVertexCount; i++)
{
expectedData[i] = static_cast<GLfloat>(inputData[i]);
}
TestData data(GL_INT, GL_FALSE, Source::BUFFER, inputData.data(), expectedData.data());
data.clearBeforeDraw = true;
runTest(data);
}
// Same as IntNormalized but with glClear() before running the test to force
// starting a render pass. This to verify that buffer format conversion within
// an active render pass works as expected in Metal back-end.
TEST_P(VertexAttributeTestES3, IntNormalizedWithClear)
{
GLint lo = std::numeric_limits<GLint>::min();
GLint hi = std::numeric_limits<GLint>::max();
std::array<GLint, kVertexCount> inputData = {
{0, 1, 2, 3, -1, -2, -3, -4, -1, hi, hi - 1, lo, lo + 1}};
std::array<GLfloat, kVertexCount> expectedData;
for (size_t i = 0; i < kVertexCount; i++)
{
expectedData[i] = Normalize(inputData[i]);
}
TestData data(GL_INT, GL_TRUE, Source::BUFFER, inputData.data(), expectedData.data());
data.clearBeforeDraw = true;
runTest(data);
}
TEST_P(VertexAttributeTestES3, UnsignedIntUnnormalized) TEST_P(VertexAttributeTestES3, UnsignedIntUnnormalized)
{ {
GLuint mid = std::numeric_limits<GLuint>::max() >> 1; GLuint mid = std::numeric_limits<GLuint>::max() >> 1;
...@@ -1018,6 +1068,26 @@ TEST_P(VertexAttributeTestES3, UnsignedIntNormalized) ...@@ -1018,6 +1068,26 @@ TEST_P(VertexAttributeTestES3, UnsignedIntNormalized)
runTest(data); runTest(data);
} }
// Same as UnsignedIntNormalized but with glClear() before running the test to force
// starting a render pass. This to verify that buffer format conversion within
// an active render pass works as expected in Metal back-end.
TEST_P(VertexAttributeTestES3, UnsignedIntNormalizedWithClear)
{
GLuint mid = std::numeric_limits<GLuint>::max() >> 1;
GLuint hi = std::numeric_limits<GLuint>::max();
std::array<GLuint, kVertexCount> inputData = {
{0, 1, 2, 3, 254, 255, 256, mid - 1, mid, mid + 1, hi - 2, hi - 1, hi}};
std::array<GLfloat, kVertexCount> expectedData;
for (size_t i = 0; i < kVertexCount; i++)
{
expectedData[i] = Normalize(inputData[i]);
}
TestData data(GL_UNSIGNED_INT, GL_TRUE, Source::BUFFER, inputData.data(), expectedData.data());
data.clearBeforeDraw = true;
runTest(data);
}
void SetupColorsForUnitQuad(GLint location, const GLColor32F &color, GLenum usage, GLBuffer *vbo) void SetupColorsForUnitQuad(GLint location, const GLColor32F &color, GLenum usage, GLBuffer *vbo)
{ {
glBindBuffer(GL_ARRAY_BUFFER, *vbo); glBindBuffer(GL_ARRAY_BUFFER, *vbo);
...@@ -3553,14 +3623,46 @@ void main() ...@@ -3553,14 +3623,46 @@ void main()
// tests should be run against. // tests should be run against.
// D3D11 Feature Level 9_3 uses different D3D formats for vertex attribs compared to Feature Levels // D3D11 Feature Level 9_3 uses different D3D formats for vertex attribs compared to Feature Levels
// 10_0+, so we should test them separately. // 10_0+, so we should test them separately.
ANGLE_INSTANTIATE_TEST_ES2_AND_ES3(VertexAttributeTest); ANGLE_INSTANTIATE_TEST_ES2_AND_ES3_AND(
VertexAttributeTest,
ANGLE_INSTANTIATE_TEST_ES2_AND_ES3(VertexAttributeOORTest); ES3_METAL(),
WithMetalMemoryBarrierAndCheapRenderPass(ES3_METAL(),
ANGLE_INSTANTIATE_TEST_ES3_AND(VertexAttributeTestES3, ES3_METAL()); /* hasBarrier */ false,
/* cheapRenderPass */ true),
WithMetalMemoryBarrierAndCheapRenderPass(ES3_METAL(),
/* hasBarrier */ false,
/* cheapRenderPass */ false));
ANGLE_INSTANTIATE_TEST_ES2_AND_ES3_AND(
VertexAttributeOORTest,
ES3_METAL(),
WithMetalMemoryBarrierAndCheapRenderPass(ES3_METAL(),
/* hasBarrier */ false,
/* cheapRenderPass */ true),
WithMetalMemoryBarrierAndCheapRenderPass(ES3_METAL(),
/* hasBarrier */ false,
/* cheapRenderPass */ false));
ANGLE_INSTANTIATE_TEST_ES3_AND(
VertexAttributeTestES3,
ES3_METAL(),
WithMetalMemoryBarrierAndCheapRenderPass(ES3_METAL(),
/* hasBarrier */ false,
/* cheapRenderPass */ true),
WithMetalMemoryBarrierAndCheapRenderPass(ES3_METAL(),
/* hasBarrier */ false,
/* cheapRenderPass */ false));
ANGLE_INSTANTIATE_TEST_ES31(VertexAttributeTestES31); ANGLE_INSTANTIATE_TEST_ES31(VertexAttributeTestES31);
ANGLE_INSTANTIATE_TEST_ES2_AND_ES3(VertexAttributeCachingTest); ANGLE_INSTANTIATE_TEST_ES2_AND_ES3_AND(
VertexAttributeCachingTest,
ES3_METAL(),
WithMetalMemoryBarrierAndCheapRenderPass(ES3_METAL(),
/* hasBarrier */ false,
/* cheapRenderPass */ true),
WithMetalMemoryBarrierAndCheapRenderPass(ES3_METAL(),
/* hasBarrier */ false,
/* cheapRenderPass */ false));
} // anonymous namespace } // anonymous namespace
...@@ -234,6 +234,16 @@ std::ostream &operator<<(std::ostream &stream, const PlatformParameters &pp) ...@@ -234,6 +234,16 @@ std::ostream &operator<<(std::ostream &stream, const PlatformParameters &pp)
break; break;
} }
if (pp.eglParameters.hasExplicitMemBarrierFeatureMtl == EGL_FALSE)
{
stream << "_NoExplicitMemoryBarrier";
}
if (pp.eglParameters.hasCheapRenderPassFeatureMtl == EGL_FALSE)
{
stream << "_NoCheapRenderPass";
}
return stream; return stream;
} }
......
...@@ -241,6 +241,16 @@ inline PlatformParameters WithNoGenMultipleMipsPerPass(const PlatformParameters ...@@ -241,6 +241,16 @@ inline PlatformParameters WithNoGenMultipleMipsPerPass(const PlatformParameters
return re; return re;
} }
inline PlatformParameters WithMetalMemoryBarrierAndCheapRenderPass(const PlatformParameters &params,
bool hasBarrier,
bool cheapRenderPass)
{
PlatformParameters re = params;
re.eglParameters.hasExplicitMemBarrierFeatureMtl = hasBarrier ? EGL_TRUE : EGL_FALSE;
re.eglParameters.hasCheapRenderPassFeatureMtl = cheapRenderPass ? EGL_TRUE : EGL_FALSE;
return re;
}
inline PlatformParameters WithRobustness(const PlatformParameters &params) inline PlatformParameters WithRobustness(const PlatformParameters &params)
{ {
PlatformParameters withRobustness = params; PlatformParameters withRobustness = params;
......
...@@ -62,7 +62,8 @@ struct EGLPlatformParameters ...@@ -62,7 +62,8 @@ struct EGLPlatformParameters
debugLayersEnabled, contextVirtualization, transformFeedbackFeature, debugLayersEnabled, contextVirtualization, transformFeedbackFeature,
allocateNonZeroMemoryFeature, emulateCopyTexImage2DFromRenderbuffers, allocateNonZeroMemoryFeature, emulateCopyTexImage2DFromRenderbuffers,
shaderStencilOutputFeature, genMultipleMipsPerPassFeature, platformMethods, shaderStencilOutputFeature, genMultipleMipsPerPassFeature, platformMethods,
robustness, emulatedPrerotation); robustness, emulatedPrerotation, hasExplicitMemBarrierFeatureMtl,
hasCheapRenderPassFeatureMtl);
} }
EGLint renderer = EGL_PLATFORM_ANGLE_TYPE_DEFAULT_ANGLE; EGLint renderer = EGL_PLATFORM_ANGLE_TYPE_DEFAULT_ANGLE;
...@@ -79,6 +80,8 @@ struct EGLPlatformParameters ...@@ -79,6 +80,8 @@ struct EGLPlatformParameters
EGLint shaderStencilOutputFeature = EGL_DONT_CARE; EGLint shaderStencilOutputFeature = EGL_DONT_CARE;
EGLint genMultipleMipsPerPassFeature = EGL_DONT_CARE; EGLint genMultipleMipsPerPassFeature = EGL_DONT_CARE;
uint32_t emulatedPrerotation = 0; // Can be 0, 90, 180 or 270 uint32_t emulatedPrerotation = 0; // Can be 0, 90, 180 or 270
EGLint hasExplicitMemBarrierFeatureMtl = EGL_DONT_CARE;
EGLint hasCheapRenderPassFeatureMtl = EGL_DONT_CARE;
angle::PlatformMethods *platformMethods = nullptr; angle::PlatformMethods *platformMethods = nullptr;
}; };
......
...@@ -221,6 +221,16 @@ bool EGLWindow::initializeDisplay(OSWindow *osWindow, ...@@ -221,6 +221,16 @@ bool EGLWindow::initializeDisplay(OSWindow *osWindow,
break; break;
} }
if (params.hasExplicitMemBarrierFeatureMtl == EGL_FALSE)
{
disabledFeatureOverrides.push_back("has_explicit_mem_barrier_mtl");
}
if (params.hasCheapRenderPassFeatureMtl == EGL_FALSE)
{
disabledFeatureOverrides.push_back("has_cheap_render_pass_mtl");
}
if (!disabledFeatureOverrides.empty()) if (!disabledFeatureOverrides.empty())
{ {
if (strstr(extensionString, "EGL_ANGLE_feature_control") == nullptr) if (strstr(extensionString, "EGL_ANGLE_feature_control") == nullptr)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment