Commit 69da0b92 by Le Hoang Quyen Committed by Commit Bot

Metal: Use shared memory for small dynamic buffers.

- If BufferMtl is static or large size, don't use shadow copy. Use one MTLBuffer and map directly on it. - If BufferMtl is dynamic and small size, use shadow copy and buffer pool of 10 MTLBuffer (s). The MTLBuffer is allocated in shared memory in this case (PCI-E memory for example). MTLBuffer in shared memory region doesn't need to sync content between CPU and GPU. - When copyBuffer, if BufferMtl is being used by GPU use blit command to do the copy on GPU side. - Also implemented GL_MAP_UNSYNCHRONIZED_BIT. Bug: angleproject:2634 Change-Id: I7a5aab309d24c76106a7087358ee5883ee05d250 Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2408592 Commit-Queue: Le Hoang Quyen <le.hoang.q@gmail.com> Reviewed-by: 's avatarJonah Ryan-Davis <jonahr@google.com> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org>
parent d59bccb5
......@@ -47,7 +47,7 @@ ConversionBufferMtl::ConversionBufferMtl(ContextMtl *contextMtl,
size_t alignment)
: dirty(true), convertedBuffer(nullptr), convertedOffset(0)
{
data.initialize(contextMtl, initialSize, alignment);
data.initialize(contextMtl, initialSize, alignment, 0);
}
ConversionBufferMtl::~ConversionBufferMtl() = default;
......@@ -123,6 +123,17 @@ angle::Result BufferMtl::copySubData(const gl::Context *context,
ContextMtl *contextMtl = mtl::GetImpl(context);
auto srcMtl = GetAs<BufferMtl>(source);
if (srcMtl->clientShadowCopyDataNeedSync(contextMtl) || mBuffer->isBeingUsedByGPU(contextMtl))
{
// If shadow copy requires a synchronization then use blit command instead.
// It might break a pending render pass, but still faster than synchronization with
// GPU.
mtl::BlitCommandEncoder *blitEncoder = contextMtl->getBlitCommandEncoder();
blitEncoder->copyBuffer(srcMtl->getCurrentBuffer(), sourceOffset, mBuffer, destOffset,
size);
return angle::Result::Continue;
}
return setSubDataImpl(context, srcMtl->getClientShadowCopyData(contextMtl) + sourceOffset, size,
destOffset);
}
......@@ -151,7 +162,16 @@ angle::Result BufferMtl::mapRange(const gl::Context *context,
if (mapPtr)
{
ContextMtl *contextMtl = mtl::GetImpl(context);
*mapPtr = syncAndObtainShadowCopy(contextMtl) + offset;
if (mBufferPool.getMaxBuffers() == 1)
{
*mapPtr = mBuffer->mapWithOpt(contextMtl, (access & GL_MAP_WRITE_BIT) == 0,
access & GL_MAP_UNSYNCHRONIZED_BIT) +
offset;
}
else
{
*mapPtr = syncAndObtainShadowCopy(contextMtl) + offset;
}
}
return angle::Result::Continue;
......@@ -159,11 +179,44 @@ angle::Result BufferMtl::mapRange(const gl::Context *context,
angle::Result BufferMtl::unmap(const gl::Context *context, GLboolean *result)
{
ASSERT(mShadowCopy.size());
ContextMtl *contextMtl = mtl::GetImpl(context);
size_t offset = static_cast<size_t>(mState.getMapOffset());
size_t len = static_cast<size_t>(mState.getMapLength());
markConversionBuffersDirty();
ANGLE_TRY(commitShadowCopy(context));
if (mBufferPool.getMaxBuffers() == 1)
{
ASSERT(mBuffer);
if (mState.getAccessFlags() & GL_MAP_WRITE_BIT)
{
mBuffer->unmapAndFlushSubset(contextMtl, offset, len);
}
else
{
// Buffer is already mapped with readonly flag, so just unmap it, no flushing will
// occur.
mBuffer->unmap(contextMtl);
}
}
else
{
ASSERT(mShadowCopy.size());
if (mState.getAccessFlags() & GL_MAP_UNSYNCHRONIZED_BIT)
{
// Copy the mapped region without synchronization with GPU
uint8_t *ptr =
mBuffer->mapWithOpt(contextMtl, /* readonly */ false, /* noSync */ true) + offset;
std::copy(mShadowCopy.data() + offset, mShadowCopy.data() + offset + len, ptr);
mBuffer->unmapAndFlushSubset(contextMtl, offset, len);
}
else
{
// commit shadow copy data to GPU synchronously
ANGLE_TRY(commitShadowCopy(context));
}
}
return angle::Result::Continue;
}
......@@ -175,8 +228,6 @@ angle::Result BufferMtl::getIndexRange(const gl::Context *context,
bool primitiveRestartEnabled,
gl::IndexRange *outRange)
{
ASSERT(mShadowCopy.size());
const uint8_t *indices = getClientShadowCopyData(mtl::GetImpl(context)) + offset;
*outRange = gl::ComputeIndexRange(type, indices, count, primitiveRestartEnabled);
......@@ -190,8 +241,6 @@ angle::Result BufferMtl::getFirstLastIndices(ContextMtl *contextMtl,
size_t count,
std::pair<uint32_t, uint32_t> *outIndices)
{
ASSERT(mShadowCopy.size());
const uint8_t *indices = getClientShadowCopyData(contextMtl) + offset;
switch (type)
......@@ -220,6 +269,11 @@ void BufferMtl::onDataChanged()
/* public */
const uint8_t *BufferMtl::getClientShadowCopyData(ContextMtl *contextMtl)
{
if (mBufferPool.getMaxBuffers() == 1)
{
// Don't need shadow copy in this case, use the buffer directly
return mBuffer->mapReadOnly(contextMtl);
}
return syncAndObtainShadowCopy(contextMtl);
}
......@@ -230,9 +284,8 @@ bool BufferMtl::clientShadowCopyDataNeedSync(ContextMtl *contextMtl)
void BufferMtl::ensureShadowCopySyncedFromGPU(ContextMtl *contextMtl)
{
if (clientShadowCopyDataNeedSync(contextMtl))
if (mBuffer->isCPUReadMemDirty())
{
// Copy data from GPU to shadow copy.
const uint8_t *ptr = mBuffer->mapReadOnly(contextMtl);
memcpy(mShadowCopy.data(), ptr, size());
mBuffer->unmap(contextMtl);
......@@ -334,39 +387,64 @@ angle::Result BufferMtl::setDataImpl(const gl::Context *context,
case gl::BufferUsage::StaticCopy:
case gl::BufferUsage::StaticDraw:
case gl::BufferUsage::StaticRead:
maxBuffers = 1; // static buffer doesn't need high speed data update
// NOTE(hqle): We don't really need buffer pool in this case. Consider disabling shadow
// copy in this case.
case gl::BufferUsage::DynamicRead:
case gl::BufferUsage::StreamRead:
maxBuffers = 1; // static/read buffer doesn't need high speed data update
mBufferPool.setAlwaysUseGPUMem();
break;
default:
// dynamic buffer, allow up to 2 update per frame/encoding without
// dynamic buffer, allow up to 10 update per frame/encoding without
// waiting for GPU.
// NOTE(hqle): If buffer size is too large, we should not use buffer pool, instead a
// single buffer should be used.
maxBuffers = 2;
if (adjustedSize <= mtl::kSharedMemBufferMaxBufSizeHint)
{
maxBuffers = 10;
mBufferPool.setAlwaysUseSharedMem();
}
else
{
maxBuffers = 1;
mBufferPool.setAlwaysUseGPUMem();
}
break;
}
// Re-create the buffer
mBuffer = nullptr;
mBufferPool.initialize(contextMtl, adjustedSize, 1, maxBuffers);
ANGLE_TRY(mBufferPool.reset(contextMtl, adjustedSize, 1, maxBuffers));
// We use shadow copy to maintain consistent data between buffers in pool
ANGLE_MTL_CHECK(contextMtl, mShadowCopy.resize(adjustedSize), GL_OUT_OF_MEMORY);
if (data)
if (maxBuffers > 1)
{
// Transfer data to shadow copy buffer
auto ptr = static_cast<const uint8_t *>(data);
std::copy(ptr, ptr + intendedSize, mShadowCopy.data());
// We use shadow copy to maintain consistent data between buffers in pool
ANGLE_MTL_CHECK(contextMtl, mShadowCopy.resize(adjustedSize), GL_OUT_OF_MEMORY);
if (data)
{
// Transfer data to shadow copy buffer
auto ptr = static_cast<const uint8_t *>(data);
std::copy(ptr, ptr + intendedSize, mShadowCopy.data());
// Transfer data from shadow copy buffer to GPU buffer.
ANGLE_TRY(commitShadowCopy(context, adjustedSize));
// Transfer data from shadow copy buffer to GPU buffer.
ANGLE_TRY(commitShadowCopy(context, adjustedSize));
}
else
{
// This is needed so that first buffer pointer could be available
ANGLE_TRY(commitShadowCopy(context, 0));
}
}
else
{
// This is needed so that first buffer pointer could be available
ANGLE_TRY(commitShadowCopy(context, 0));
// We don't need shadow copy if there will be only one buffer in the pool.
ANGLE_MTL_CHECK(contextMtl, mShadowCopy.resize(0), GL_OUT_OF_MEMORY);
// Allocate one buffer to use
ANGLE_TRY(
mBufferPool.allocate(contextMtl, adjustedSize, nullptr, &mBuffer, nullptr, nullptr));
if (data)
{
ANGLE_TRY(setSubDataImpl(context, data, intendedSize, 0));
}
}
#ifndef NDEBUG
......@@ -400,17 +478,27 @@ angle::Result BufferMtl::setSubDataImpl(const gl::Context *context,
markConversionBuffersDirty();
ASSERT(mShadowCopy.size());
if (mBufferPool.getMaxBuffers() == 1)
{
ASSERT(mBuffer);
uint8_t *ptr = mBuffer->map(contextMtl);
std::copy(srcPtr, srcPtr + sizeToCopy, ptr + offset);
mBuffer->unmapAndFlushSubset(contextMtl, offset, sizeToCopy);
}
else
{
ASSERT(mShadowCopy.size());
// 1. Before copying data from client, we need to synchronize modified data from GPU to shadow
// copy first.
ensureShadowCopySyncedFromGPU(contextMtl);
// 1. Before copying data from client, we need to synchronize modified data from GPU to
// shadow copy first.
ensureShadowCopySyncedFromGPU(contextMtl);
// 2. Copy data from client to shadow copy.
std::copy(srcPtr, srcPtr + sizeToCopy, mShadowCopy.data() + offset);
// 2. Copy data from client to shadow copy.
std::copy(srcPtr, srcPtr + sizeToCopy, mShadowCopy.data() + offset);
// 3. Copy data from shadow copy to GPU.
ANGLE_TRY(commitShadowCopy(context));
// 3. Copy data from shadow copy to GPU.
ANGLE_TRY(commitShadowCopy(context));
}
return angle::Result::Continue;
}
......
......@@ -158,7 +158,8 @@ VertexArrayMtl::VertexArrayMtl(const gl::VertexArrayState &state, ContextMtl *co
mDynamicVertexData.initialize(context, 0, mtl::kVertexAttribBufferStrideAlignment,
mtl::kMaxVertexAttribs);
mDynamicIndexData.initialize(context, kDynamicIndexDataSize, mtl::kIndexBufferOffsetAlignment);
mDynamicIndexData.initialize(context, kDynamicIndexDataSize, mtl::kIndexBufferOffsetAlignment,
0);
}
VertexArrayMtl::~VertexArrayMtl() {}
......
......@@ -12,6 +12,8 @@
#include "libANGLE/renderer/metal/mtl_resources.h"
#include <deque>
namespace rx
{
......@@ -20,6 +22,20 @@ class ContextMtl;
namespace mtl
{
enum class BufferPoolMemPolicy
{
// Always allocate buffer in shared memory, useful for dynamic small buffer.
// This translates to MTLResourceStorageModeShared.
AlwaysSharedMem,
// Always allocate buffer in GPU dedicated memory. Note: a CPU side copy is also allocated so
// that buffer can still be mapped on CPU side.
// This translates to MTLResourceStorageModeManaged on macOS or MTLResourceStorageModeShared on
// iOS.
AlwaysGPUMem,
// Auto allocate buffer in shared memory if it is small. GPU otherwise.
Auto,
};
// A buffer pool is conceptually an infinitely long buffer. Each time you write to the buffer,
// you will always write to a previously unused portion. After a series of writes, you must flush
// the buffer data to the device. Buffer lifetime currently assumes that each new allocation will
......@@ -34,16 +50,24 @@ namespace mtl
class BufferPool
{
public:
// alwaysAllocNewBuffer=true will always allocate new buffer or reuse free buffer on allocate(),
// regardless of whether current buffer still has unused portion or not.
BufferPool(bool alwaysAllocNewBuffer = false);
BufferPool();
// - alwaysAllocNewBuffer=true will always allocate new buffer or reuse free buffer on
// allocate(), regardless of whether current buffer still has unused portion or not.
// - memPolicy: indicate the allocated buffers should be in shared memory or not.
// See BufferPoolMemPolicy.
BufferPool(bool alwaysAllocNewBuffer);
BufferPool(bool alwaysAllocNewBuffer, BufferPoolMemPolicy memPolicy);
~BufferPool();
// Init is called after the buffer creation so that the alignment can be specified later.
void initialize(ContextMtl *contextMtl,
size_t initialSize,
size_t alignment,
size_t maxBuffers = 0);
void initialize(Context *context, size_t initialSize, size_t alignment, size_t maxBuffers);
// Calling this without initialize() will have same effect as calling initialize().
// If called after initialize(), the old pending buffers will be flushed and might be re-used if
// their size are big enough for the requested initialSize parameter.
angle::Result reset(ContextMtl *contextMtl,
size_t initialSize,
size_t alignment,
size_t maxBuffers);
// This call will allocate a new region at the end of the buffer. It internally may trigger
// a new buffer to be created (which is returned in the optional parameter
......@@ -56,7 +80,7 @@ class BufferPool
size_t *offsetOut = nullptr,
bool *newBufferAllocatedOut = nullptr);
// After a sequence of writes, call commit to ensure the data is visible to the device.
// After a sequence of CPU writes, call commit to ensure the data is visible to the device.
angle::Result commit(ContextMtl *contextMtl);
// This releases all the buffers that have been allocated since this was last called.
......@@ -68,28 +92,42 @@ class BufferPool
const BufferRef &getCurrentBuffer() { return mBuffer; }
size_t getAlignment() { return mAlignment; }
void updateAlignment(ContextMtl *contextMtl, size_t alignment);
void updateAlignment(Context *context, size_t alignment);
size_t getMaxBuffers() const { return mMaxBuffers; }
// Set whether allocate() will always allocate new buffer or attempting to append to previous
// buffer or not. Default is false.
void setAlwaysAllocateNewBuffer(bool e) { mAlwaysAllocateNewBuffer = e; }
void setMemoryPolicy(BufferPoolMemPolicy policy) { mMemPolicy = policy; }
// Set all subsequent allocated buffers should always use shared memory
void setAlwaysUseSharedMem() { setMemoryPolicy(BufferPoolMemPolicy::AlwaysSharedMem); }
// Set all subsequent allocated buffers should always use GPU memory
void setAlwaysUseGPUMem() { setMemoryPolicy(BufferPoolMemPolicy::AlwaysGPUMem); }
private:
bool shouldAllocateInSharedMem(ContextMtl *contextMtl) const;
void reset();
angle::Result allocateNewBuffer(ContextMtl *contextMtl);
void destroyBufferList(ContextMtl *contextMtl, std::vector<BufferRef> *buffers);
void destroyBufferList(ContextMtl *contextMtl, std::deque<BufferRef> *buffers);
angle::Result finalizePendingBuffer(ContextMtl *contextMtl);
size_t mInitialSize;
BufferRef mBuffer;
uint32_t mNextAllocationOffset;
uint32_t mLastFlushOffset;
size_t mSize;
size_t mAlignment;
std::vector<BufferRef> mInFlightBuffers;
std::vector<BufferRef> mBufferFreeList;
std::deque<BufferRef> mInFlightBuffers;
std::deque<BufferRef> mBufferFreeList;
size_t mBuffersAllocated;
size_t mMaxBuffers;
BufferPoolMemPolicy mMemPolicy;
bool mAlwaysAllocateNewBuffer;
};
......
......@@ -10,6 +10,7 @@
#include "libANGLE/renderer/metal/mtl_buffer_pool.h"
#include "libANGLE/renderer/metal/ContextMtl.h"
#include "libANGLE/renderer/metal/DisplayMtl.h"
namespace rx
{
......@@ -18,35 +19,110 @@ namespace mtl
{
// BufferPool implementation.
BufferPool::BufferPool() : BufferPool(false) {}
BufferPool::BufferPool(bool alwaysAllocNewBuffer)
: BufferPool(alwaysAllocNewBuffer, BufferPoolMemPolicy::Auto)
{}
BufferPool::BufferPool(bool alwaysAllocNewBuffer, BufferPoolMemPolicy policy)
: mInitialSize(0),
mBuffer(nullptr),
mNextAllocationOffset(0),
mLastFlushOffset(0),
mSize(0),
mAlignment(1),
mBuffersAllocated(0),
mMaxBuffers(0),
mMemPolicy(policy),
mAlwaysAllocateNewBuffer(alwaysAllocNewBuffer)
{}
void BufferPool::initialize(ContextMtl *contextMtl,
angle::Result BufferPool::reset(ContextMtl *contextMtl,
size_t initialSize,
size_t alignment,
size_t maxBuffers)
{
ANGLE_TRY(finalizePendingBuffer(contextMtl));
releaseInFlightBuffers(contextMtl);
mSize = 0;
if (mBufferFreeList.size() && mInitialSize <= mBufferFreeList.front()->size())
{
// Instead of deleteing old buffers, we should reset them to avoid excessive
// memory re-allocations
if (maxBuffers && mBufferFreeList.size() > maxBuffers)
{
mBufferFreeList.resize(maxBuffers);
mBuffersAllocated = maxBuffers;
}
mSize = mBufferFreeList.front()->size();
for (size_t i = 0; i < mBufferFreeList.size(); ++i)
{
BufferRef &buffer = mBufferFreeList[i];
if (!buffer->isBeingUsedByGPU(contextMtl))
{
// If buffer is not used by GPU, re-use it immediately.
continue;
}
bool useSharedMem = shouldAllocateInSharedMem(contextMtl);
if (IsError(buffer->resetWithSharedMemOpt(contextMtl, useSharedMem, mSize, nullptr)))
{
mBufferFreeList.clear();
mBuffersAllocated = 0;
mSize = 0;
break;
}
}
}
else
{
mBufferFreeList.clear();
mBuffersAllocated = 0;
}
mInitialSize = initialSize;
mMaxBuffers = maxBuffers;
updateAlignment(contextMtl, alignment);
return angle::Result::Continue;
}
void BufferPool::initialize(Context *context,
size_t initialSize,
size_t alignment,
size_t maxBuffers)
{
destroy(contextMtl);
if (mBuffersAllocated)
{
// Invalid call, must call destroy() first.
UNREACHABLE();
}
mInitialSize = initialSize;
mSize = 0;
mMaxBuffers = maxBuffers;
updateAlignment(contextMtl, alignment);
updateAlignment(context, alignment);
}
BufferPool::~BufferPool() {}
bool BufferPool::shouldAllocateInSharedMem(ContextMtl *contextMtl) const
{
switch (mMemPolicy)
{
case BufferPoolMemPolicy::AlwaysSharedMem:
return true;
case BufferPoolMemPolicy::AlwaysGPUMem:
return false;
default:
return mSize <= kSharedMemBufferMaxBufSizeHint;
}
}
angle::Result BufferPool::allocateNewBuffer(ContextMtl *contextMtl)
{
if (mMaxBuffers > 0 && mBuffersAllocated >= mMaxBuffers)
......@@ -77,7 +153,9 @@ angle::Result BufferPool::allocateNewBuffer(ContextMtl *contextMtl)
return angle::Result::Continue;
}
ANGLE_TRY(Buffer::MakeBuffer(contextMtl, mSize, nullptr, &mBuffer));
bool useSharedMem = shouldAllocateInSharedMem(contextMtl);
ANGLE_TRY(
Buffer::MakeBufferWithSharedMemOpt(contextMtl, useSharedMem, mSize, nullptr, &mBuffer));
ASSERT(mBuffer);
......@@ -99,11 +177,13 @@ angle::Result BufferPool::allocate(ContextMtl *contextMtl,
checkedNextWriteOffset += sizeToAllocate;
if (!mBuffer || !checkedNextWriteOffset.IsValid() ||
checkedNextWriteOffset.ValueOrDie() >= mSize || mAlwaysAllocateNewBuffer)
checkedNextWriteOffset.ValueOrDie() >= mSize ||
// If the current buffer has been modified by GPU, do not reuse it:
mBuffer->isCPUReadMemNeedSync() || mAlwaysAllocateNewBuffer)
{
if (mBuffer)
{
ANGLE_TRY(commit(contextMtl));
ANGLE_TRY(finalizePendingBuffer(contextMtl));
}
if (sizeToAllocate > mSize)
......@@ -129,6 +209,7 @@ angle::Result BufferPool::allocate(ContextMtl *contextMtl,
ASSERT(mBuffer->size() == mSize);
mNextAllocationOffset = 0;
mLastFlushOffset = 0;
if (newBufferAllocatedOut != nullptr)
{
......@@ -150,7 +231,10 @@ angle::Result BufferPool::allocate(ContextMtl *contextMtl,
// Optionally map() the buffer if possible
if (ptrOut)
{
*ptrOut = mBuffer->map(contextMtl) + mNextAllocationOffset;
// We don't need to synchronize with GPU access, since allocation should return a
// non-overlapped region each time.
*ptrOut = mBuffer->mapWithOpt(contextMtl, /** readOnly */ false, /** noSync */ true) +
mNextAllocationOffset;
}
if (offsetOut)
......@@ -163,15 +247,28 @@ angle::Result BufferPool::allocate(ContextMtl *contextMtl,
angle::Result BufferPool::commit(ContextMtl *contextMtl)
{
if (mBuffer && mNextAllocationOffset > mLastFlushOffset)
{
mBuffer->flush(contextMtl, mLastFlushOffset, mNextAllocationOffset - mLastFlushOffset);
mLastFlushOffset = mNextAllocationOffset;
}
return angle::Result::Continue;
}
angle::Result BufferPool::finalizePendingBuffer(ContextMtl *contextMtl)
{
if (mBuffer)
{
mBuffer->unmap(contextMtl);
ANGLE_TRY(commit(contextMtl));
// commit() already flushes so no need to flush here.
mBuffer->unmapNoFlush(contextMtl);
mInFlightBuffers.push_back(mBuffer);
mBuffer = nullptr;
}
mNextAllocationOffset = 0;
mLastFlushOffset = 0;
return angle::Result::Continue;
}
......@@ -181,7 +278,12 @@ void BufferPool::releaseInFlightBuffers(ContextMtl *contextMtl)
for (auto &toRelease : mInFlightBuffers)
{
// If the dynamic buffer was resized we cannot reuse the retained buffer.
if (toRelease->size() < mSize)
if (toRelease->size() < mSize
#if TARGET_OS_OSX || TARGET_OS_MACCATALYST
// Also release buffer if it was allocated in different policy
|| toRelease->useSharedMem() != shouldAllocateInSharedMem(contextMtl)
#endif
)
{
toRelease = nullptr;
mBuffersAllocated--;
......@@ -195,7 +297,7 @@ void BufferPool::releaseInFlightBuffers(ContextMtl *contextMtl)
mInFlightBuffers.clear();
}
void BufferPool::destroyBufferList(ContextMtl *contextMtl, std::vector<BufferRef> *buffers)
void BufferPool::destroyBufferList(ContextMtl *contextMtl, std::deque<BufferRef> *buffers)
{
ASSERT(mBuffersAllocated >= buffers->size());
mBuffersAllocated -= buffers->size();
......@@ -217,19 +319,25 @@ void BufferPool::destroy(ContextMtl *contextMtl)
}
}
void BufferPool::updateAlignment(ContextMtl *contextMtl, size_t alignment)
void BufferPool::updateAlignment(Context *context, size_t alignment)
{
ASSERT(alignment > 0);
// NOTE(hqle): May check additional platform limits.
mAlignment = alignment;
// If alignment has changed, make sure the next allocation is done at an aligned offset.
if (alignment != mAlignment)
{
mNextAllocationOffset = roundUp(mNextAllocationOffset, static_cast<uint32_t>(alignment));
mAlignment = alignment;
}
}
void BufferPool::reset()
{
mSize = 0;
mNextAllocationOffset = 0;
mLastFlushOffset = 0;
mMaxBuffers = 0;
mAlwaysAllocateNewBuffer = false;
mBuffersAllocated = 0;
......
......@@ -99,6 +99,11 @@ constexpr uint32_t kMaxVertexAttribs = gl::MAX_VERTEX_ATTRIBS;
// NOTE(hqle): support variable max number of render targets
constexpr uint32_t kMaxRenderTargets = 4;
// The max size of a buffer that will be allocated in shared memory.
// NOTE(hqle): This is just a hint. There is no official document on what is the max allowed size
// for shared memory.
constexpr size_t kSharedMemBufferMaxBufSizeHint = 128 * 1024;
constexpr size_t kDefaultAttributeSize = 4 * sizeof(float);
// Metal limits
......
......@@ -257,6 +257,12 @@ class Buffer final : public Resource, public WrappedObject<id<MTLBuffer>>
const uint8_t *data,
BufferRef *bufferOut);
static angle::Result MakeBufferWithSharedMemOpt(ContextMtl *context,
bool forceUseSharedMem,
size_t size,
const uint8_t *data,
BufferRef *bufferOut);
static angle::Result MakeBufferWithResOpt(ContextMtl *context,
MTLResourceOptions resourceOptions,
size_t size,
......@@ -264,23 +270,33 @@ class Buffer final : public Resource, public WrappedObject<id<MTLBuffer>>
BufferRef *bufferOut);
angle::Result reset(ContextMtl *context, size_t size, const uint8_t *data);
angle::Result resetWithSharedMemOpt(ContextMtl *context,
bool forceUseSharedMem,
size_t size,
const uint8_t *data);
angle::Result resetWithResOpt(ContextMtl *context,
MTLResourceOptions resourceOptions,
size_t size,
const uint8_t *data);
const uint8_t *mapReadOnly(ContextMtl *context);
uint8_t *mapWithOpt(ContextMtl *context, bool readonly);
uint8_t *map(ContextMtl *context);
uint8_t *mapWithOpt(ContextMtl *context, bool readonly, bool noSync);
void unmap(ContextMtl *context);
// Same as unmap but do not do implicit flush()
void unmapNoFlush(ContextMtl *context);
void unmapAndFlushSubset(ContextMtl *context, size_t offsetWritten, size_t sizeWritten);
void flush(ContextMtl *context, size_t offsetWritten, size_t sizeWritten);
size_t size() const;
bool useSharedMem() const;
// Explicitly sync content between CPU and GPU
void syncContent(ContextMtl *context, mtl::BlitCommandEncoder *encoder);
private:
Buffer(ContextMtl *context, size_t size, const uint8_t *data);
Buffer(ContextMtl *context, bool forceUseSharedMem, size_t size, const uint8_t *data);
Buffer(ContextMtl *context,
MTLResourceOptions resourceOptions,
size_t size,
......
......@@ -654,7 +654,17 @@ angle::Result Buffer::MakeBuffer(ContextMtl *context,
const uint8_t *data,
BufferRef *bufferOut)
{
bufferOut->reset(new Buffer(context, size, data));
return MakeBufferWithSharedMemOpt(context, false, size, data, bufferOut);
}
angle::Result Buffer::MakeBufferWithSharedMemOpt(ContextMtl *context,
bool forceUseSharedMem,
size_t size,
const uint8_t *data,
BufferRef *bufferOut)
{
bufferOut->reset(new Buffer(context, forceUseSharedMem, size, data));
if (!bufferOut || !bufferOut->get())
{
......@@ -680,9 +690,9 @@ angle::Result Buffer::MakeBufferWithResOpt(ContextMtl *context,
return angle::Result::Continue;
}
Buffer::Buffer(ContextMtl *context, size_t size, const uint8_t *data)
Buffer::Buffer(ContextMtl *context, bool forceUseSharedMem, size_t size, const uint8_t *data)
{
(void)reset(context, size, data);
(void)resetWithSharedMemOpt(context, forceUseSharedMem, size, data);
}
Buffer::Buffer(ContextMtl *context, MTLResourceOptions options, size_t size, const uint8_t *data)
......@@ -692,12 +702,27 @@ Buffer::Buffer(ContextMtl *context, MTLResourceOptions options, size_t size, con
angle::Result Buffer::reset(ContextMtl *context, size_t size, const uint8_t *data)
{
return resetWithSharedMemOpt(context, false, size, data);
}
angle::Result Buffer::resetWithSharedMemOpt(ContextMtl *context,
bool forceUseSharedMem,
size_t size,
const uint8_t *data)
{
MTLResourceOptions options;
options = 0;
#if TARGET_OS_OSX || TARGET_OS_MACCATALYST
options |= MTLResourceStorageModeManaged;
if (!forceUseSharedMem)
{
options |= MTLResourceStorageModeManaged;
}
else
#endif
{
options |= MTLResourceStorageModeShared;
}
return resetWithResOpt(context, options, size, data);
}
......@@ -737,49 +762,77 @@ void Buffer::syncContent(ContextMtl *context, mtl::BlitCommandEncoder *blitEncod
const uint8_t *Buffer::mapReadOnly(ContextMtl *context)
{
return mapWithOpt(context, true);
return mapWithOpt(context, true, false);
}
uint8_t *Buffer::map(ContextMtl *context)
{
return mapWithOpt(context, false);
return mapWithOpt(context, false, false);
}
uint8_t *Buffer::mapWithOpt(ContextMtl *context, bool readonly)
uint8_t *Buffer::mapWithOpt(ContextMtl *context, bool readonly, bool noSync)
{
mMapReadOnly = readonly;
CommandQueue &cmdQueue = context->cmdQueue();
if (!noSync)
{
CommandQueue &cmdQueue = context->cmdQueue();
EnsureCPUMemWillBeSynced(context, this);
EnsureCPUMemWillBeSynced(context, this);
if (this->isBeingUsedByGPU(context))
{
context->flushCommandBufer();
}
if (this->isBeingUsedByGPU(context))
{
context->flushCommandBufer();
}
cmdQueue.ensureResourceReadyForCPU(this);
cmdQueue.ensureResourceReadyForCPU(this);
}
return reinterpret_cast<uint8_t *>([get() contents]);
}
void Buffer::unmap(ContextMtl *context)
{
flush(context, 0, size());
// Reset read only flag
mMapReadOnly = true;
}
void Buffer::unmapNoFlush(ContextMtl *context)
{
mMapReadOnly = true;
}
void Buffer::unmapAndFlushSubset(ContextMtl *context, size_t offsetWritten, size_t sizeWritten)
{
#if TARGET_OS_OSX || TARGET_OS_MACCATALYST
flush(context, offsetWritten, sizeWritten);
#endif
mMapReadOnly = true;
}
void Buffer::flush(ContextMtl *context, size_t offsetWritten, size_t sizeWritten)
{
#if TARGET_OS_OSX || TARGET_OS_MACCATALYST
if (!mMapReadOnly)
{
if (get().storageMode == MTLStorageModeManaged)
{
[get() didModifyRange:NSMakeRange(0, size())];
[get() didModifyRange:NSMakeRange(offsetWritten, sizeWritten)];
}
}
#endif
mMapReadOnly = true;
}
size_t Buffer::size() const
{
return get().length;
}
bool Buffer::useSharedMem() const
{
return get().storageMode == MTLStorageModeShared;
}
}
}
......@@ -681,6 +681,87 @@ TEST_P(BufferDataTest, MapBufferOES)
EXPECT_EQ(data, actualData);
}
// Test to verify mapping a dynamic buffer with GL_MAP_UNSYNCHRONIZED_BIT to modify a portion
// won't affect draw calls using other portions.
TEST_P(BufferDataTest, MapDynamicBufferUnsynchronizedEXTTest)
{
ANGLE_SKIP_TEST_IF(!IsGLExtensionEnabled("GL_EXT_map_buffer_range"));
const char simpleVertex[] = R"(attribute vec2 position;
attribute vec4 color;
varying vec4 vColor;
void main()
{
gl_Position = vec4(position, 0, 1);
vColor = color;
}
)";
const char simpleFragment[] = R"(precision mediump float;
varying vec4 vColor;
void main()
{
gl_FragColor = vColor;
}
)";
constexpr int kNumVertices = 6;
std::vector<GLubyte> color(8 * kNumVertices);
for (int i = 0; i < kNumVertices; ++i)
{
color[4 * i] = 255;
color[4 * i + 3] = 255;
}
GLBuffer buffer;
glBindBuffer(GL_ARRAY_BUFFER, buffer.get());
glBufferData(GL_ARRAY_BUFFER, color.size(), color.data(), GL_DYNAMIC_DRAW);
ANGLE_GL_PROGRAM(program, simpleVertex, simpleFragment);
glUseProgram(program);
GLint colorLoc = glGetAttribLocation(program, "color");
ASSERT_NE(-1, colorLoc);
glVertexAttribPointer(colorLoc, 4, GL_UNSIGNED_BYTE, GL_TRUE, 0, nullptr);
glEnableVertexAttribArray(colorLoc);
glViewport(0, 0, 2, 2);
drawQuad(program, "position", 0.5f, 1.0f, true);
ASSERT_GL_NO_ERROR();
// Map with GL_MAP_UNSYNCHRONIZED_BIT and overwrite buffers data at offset 24
uint8_t *data = reinterpret_cast<uint8_t *>(
glMapBufferRangeEXT(GL_ARRAY_BUFFER, 4 * kNumVertices, 4 * kNumVertices,
GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT));
EXPECT_GL_NO_ERROR();
for (int i = 0; i < kNumVertices; ++i)
{
data[4 * i] = 0;
data[4 * i + 1] = 255;
data[4 * i + 2] = 0;
data[4 * i + 3] = 255;
}
glUnmapBufferOES(GL_ARRAY_BUFFER);
EXPECT_GL_NO_ERROR();
// Re-draw using offset = 0 but to different viewport
glViewport(0, 2, 2, 2);
drawQuad(program, "position", 0.5f, 1.0f, true);
ASSERT_GL_NO_ERROR();
// Change vertex attribute to use buffer starting from offset 24
glVertexAttribPointer(colorLoc, 4, GL_UNSIGNED_BYTE, GL_TRUE, 0,
reinterpret_cast<void *>(4 * kNumVertices));
glViewport(2, 2, 2, 2);
drawQuad(program, "position", 0.5f, 1.0f, true);
ASSERT_GL_NO_ERROR();
EXPECT_PIXEL_COLOR_EQ(1, 1, GLColor::red);
EXPECT_PIXEL_COLOR_EQ(1, 3, GLColor::red);
EXPECT_PIXEL_COLOR_EQ(3, 3, GLColor::green);
}
// Tests a bug where copying buffer data immediately after creation hit a nullptr in D3D11.
TEST_P(BufferDataTestES3, NoBufferInitDataCopyBug)
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment