Commit d8f3faad by jbauman@chromium.org

Avoid resending lots of D3D state

This change uses trivial caching to determines whether to reset shaders, the viewport, and the currently set vertex declaration. It also caches the render target desc to avoid rereading that. Serial numbers are added to vertex and index buffers, so resending those can be avoided. These changes can give a big speedup (30% has been measured) on simple content, particularly when used directly or through pepper/native client. BUG= TEST=bunch of pages using webgl Review URL: http://codereview.appspot.com/4964057 git-svn-id: https://angleproject.googlecode.com/svn/trunk@743 736b8ea6-26fd-11df-bfd4-992fa37f6226
parent 43cbe74e
#define MAJOR_VERSION 0
#define MINOR_VERSION 0
#define BUILD_VERSION 0
#define BUILD_REVISION 742
#define BUILD_REVISION 743
#define STRINGIFY(x) #x
#define MACRO_STRINGIFY(x) STRINGIFY(x)
......
......@@ -363,7 +363,12 @@ void Context::markAllStateDirty()
mAppliedRenderTargetSerial = 0;
mAppliedDepthbufferSerial = 0;
mAppliedStencilbufferSerial = 0;
mAppliedIBSerial = 0;
mDepthStencilInitialized = false;
mViewportInitialized = false;
mRenderTargetDescInitialized = false;
mVertexDeclarationCache.markStateDirty();
mClearStateDirty = true;
mCullStateDirty = true;
......@@ -1618,12 +1623,14 @@ bool Context::applyRenderTarget(bool ignoreViewport)
IDirect3DSurface9 *depthStencil = NULL;
bool renderTargetChanged = false;
unsigned int renderTargetSerial = framebufferObject->getRenderTargetSerial();
if (renderTargetSerial != mAppliedRenderTargetSerial)
{
device->SetRenderTarget(0, renderTarget);
mAppliedRenderTargetSerial = renderTargetSerial;
mScissorStateDirty = true; // Scissor area must be clamped to render target's size-- this is different for different render targets.
renderTargetChanged = true;
}
unsigned int depthbufferSerial = 0;
......@@ -1661,9 +1668,13 @@ bool Context::applyRenderTarget(bool ignoreViewport)
mDepthStencilInitialized = true;
}
if (!mRenderTargetDescInitialized || renderTargetChanged)
{
renderTarget->GetDesc(&mRenderTargetDesc);
mRenderTargetDescInitialized = true;
}
D3DVIEWPORT9 viewport;
D3DSURFACE_DESC desc;
renderTarget->GetDesc(&desc);
float zNear = clamp01(mState.zNear);
float zFar = clamp01(mState.zFar);
......@@ -1672,18 +1683,18 @@ bool Context::applyRenderTarget(bool ignoreViewport)
{
viewport.X = 0;
viewport.Y = 0;
viewport.Width = desc.Width;
viewport.Height = desc.Height;
viewport.Width = mRenderTargetDesc.Width;
viewport.Height = mRenderTargetDesc.Height;
viewport.MinZ = 0.0f;
viewport.MaxZ = 1.0f;
}
else
{
RECT rect = transformPixelRect(mState.viewportX, mState.viewportY, mState.viewportWidth, mState.viewportHeight, desc.Height);
viewport.X = clamp(rect.left, 0L, static_cast<LONG>(desc.Width));
viewport.Y = clamp(rect.top, 0L, static_cast<LONG>(desc.Height));
viewport.Width = clamp(rect.right - rect.left, 0L, static_cast<LONG>(desc.Width) - static_cast<LONG>(viewport.X));
viewport.Height = clamp(rect.bottom - rect.top, 0L, static_cast<LONG>(desc.Height) - static_cast<LONG>(viewport.Y));
RECT rect = transformPixelRect(mState.viewportX, mState.viewportY, mState.viewportWidth, mState.viewportHeight, mRenderTargetDesc.Height);
viewport.X = clamp(rect.left, 0L, static_cast<LONG>(mRenderTargetDesc.Width));
viewport.Y = clamp(rect.top, 0L, static_cast<LONG>(mRenderTargetDesc.Height));
viewport.Width = clamp(rect.right - rect.left, 0L, static_cast<LONG>(mRenderTargetDesc.Width) - static_cast<LONG>(viewport.X));
viewport.Height = clamp(rect.bottom - rect.top, 0L, static_cast<LONG>(mRenderTargetDesc.Height) - static_cast<LONG>(viewport.Y));
viewport.MinZ = zNear;
viewport.MaxZ = zFar;
}
......@@ -1693,17 +1704,22 @@ bool Context::applyRenderTarget(bool ignoreViewport)
return false; // Nothing to render
}
device->SetViewport(&viewport);
if (!mViewportInitialized || memcmp(&viewport, &mSetViewport, sizeof mSetViewport) != 0)
{
device->SetViewport(&viewport);
mSetViewport = viewport;
mViewportInitialized = true;
}
if (mScissorStateDirty)
{
if (mState.scissorTest)
{
RECT rect = transformPixelRect(mState.scissorX, mState.scissorY, mState.scissorWidth, mState.scissorHeight, desc.Height);
rect.left = clamp(rect.left, 0L, static_cast<LONG>(desc.Width));
rect.top = clamp(rect.top, 0L, static_cast<LONG>(desc.Height));
rect.right = clamp(rect.right, 0L, static_cast<LONG>(desc.Width));
rect.bottom = clamp(rect.bottom, 0L, static_cast<LONG>(desc.Height));
RECT rect = transformPixelRect(mState.scissorX, mState.scissorY, mState.scissorWidth, mState.scissorHeight, mRenderTargetDesc.Height);
rect.left = clamp(rect.left, 0L, static_cast<LONG>(mRenderTargetDesc.Width));
rect.top = clamp(rect.top, 0L, static_cast<LONG>(mRenderTargetDesc.Height));
rect.right = clamp(rect.right, 0L, static_cast<LONG>(mRenderTargetDesc.Width));
rect.bottom = clamp(rect.bottom, 0L, static_cast<LONG>(mRenderTargetDesc.Height));
device->SetScissorRect(&rect);
device->SetRenderState(D3DRS_SCISSORTESTENABLE, TRUE);
}
......@@ -2024,7 +2040,11 @@ GLenum Context::applyIndexBuffer(const void *indices, GLsizei count, GLenum mode
if (err == GL_NO_ERROR)
{
device->SetIndices(indexInfo->indexBuffer);
if (indexInfo->serial != mAppliedIBSerial)
{
device->SetIndices(indexInfo->indexBuffer);
mAppliedIBSerial = indexInfo->serial;
}
}
return err;
......@@ -2035,14 +2055,13 @@ void Context::applyShaders()
{
IDirect3DDevice9 *device = getDevice();
Program *programObject = getCurrentProgram();
IDirect3DVertexShader9 *vertexShader = programObject->getVertexShader();
IDirect3DPixelShader9 *pixelShader = programObject->getPixelShader();
device->SetVertexShader(vertexShader);
device->SetPixelShader(pixelShader);
if (programObject->getSerial() != mAppliedProgramSerial)
{
IDirect3DVertexShader9 *vertexShader = programObject->getVertexShader();
IDirect3DPixelShader9 *pixelShader = programObject->getPixelShader();
device->SetPixelShader(pixelShader);
device->SetVertexShader(vertexShader);
programObject->dirtyAllUniforms();
mAppliedProgramSerial = programObject->getSerial();
}
......@@ -2907,6 +2926,7 @@ void Context::drawClosingLine(unsigned int first, unsigned int last)
if (succeeded)
{
device->SetIndices(mClosingIB->getBuffer());
mAppliedIBSerial = mClosingIB->getSerial();
device->DrawIndexedPrimitive(D3DPT_LINELIST, 0, 0, last, offset, 1);
}
......@@ -3744,7 +3764,15 @@ GLenum VertexDeclarationCache::applyDeclaration(TranslatedAttribute attributes[]
{
if (attributes[i].active)
{
device->SetStreamSource(i, attributes[i].vertexBuffer, attributes[i].offset, attributes[i].stride);
if (mAppliedVBs[i].serial != attributes[i].serial ||
mAppliedVBs[i].stride != attributes[i].stride ||
mAppliedVBs[i].offset != attributes[i].offset)
{
device->SetStreamSource(i, attributes[i].vertexBuffer, attributes[i].offset, attributes[i].stride);
mAppliedVBs[i].serial = attributes[i].serial;
mAppliedVBs[i].stride = attributes[i].stride;
mAppliedVBs[i].offset = attributes[i].offset;
}
element->Stream = i;
element->Offset = 0;
......@@ -3765,8 +3793,12 @@ GLenum VertexDeclarationCache::applyDeclaration(TranslatedAttribute attributes[]
if (memcmp(entry->cachedElements, elements, (element - elements) * sizeof(D3DVERTEXELEMENT9)) == 0 && entry->vertexDeclaration)
{
entry->lruCount = ++mMaxLru;
device->SetVertexDeclaration(entry->vertexDeclaration);
if(entry->vertexDeclaration != mLastSetVDecl)
{
device->SetVertexDeclaration(entry->vertexDeclaration);
mLastSetVDecl = entry->vertexDeclaration;
}
return GL_NO_ERROR;
}
}
......@@ -3785,16 +3817,29 @@ GLenum VertexDeclarationCache::applyDeclaration(TranslatedAttribute attributes[]
{
lastCache->vertexDeclaration->Release();
lastCache->vertexDeclaration = NULL;
// mLastSetVDecl is set to the replacement, so we don't have to worry
// about it.
}
memcpy(lastCache->cachedElements, elements, (element - elements) * sizeof(D3DVERTEXELEMENT9));
device->CreateVertexDeclaration(elements, &lastCache->vertexDeclaration);
device->SetVertexDeclaration(lastCache->vertexDeclaration);
mLastSetVDecl = lastCache->vertexDeclaration;
lastCache->lruCount = ++mMaxLru;
return GL_NO_ERROR;
}
void VertexDeclarationCache::markStateDirty()
{
for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++)
{
mAppliedVBs[i].serial = 0;
}
mLastSetVDecl = NULL;
}
}
extern "C"
......
......@@ -230,11 +230,23 @@ class VertexDeclarationCache
GLenum applyDeclaration(TranslatedAttribute attributes[], Program *program);
void markStateDirty();
private:
UINT mMaxLru;
enum { NUM_VERTEX_DECL_CACHE_ENTRIES = 16 };
struct VBData
{
unsigned int serial;
unsigned int stride;
unsigned int offset;
};
VBData mAppliedVBs[MAX_VERTEX_ATTRIBS];
IDirect3DVertexDeclaration9 *mLastSetVDecl;
struct VertexDeclCacheEntry
{
D3DVERTEXELEMENT9 cachedElements[MAX_VERTEX_ATTRIBS + 1];
......@@ -526,7 +538,12 @@ class Context
unsigned int mAppliedRenderTargetSerial;
unsigned int mAppliedDepthbufferSerial;
unsigned int mAppliedStencilbufferSerial;
unsigned int mAppliedIBSerial;
bool mDepthStencilInitialized;
bool mViewportInitialized;
D3DVIEWPORT9 mSetViewport;
bool mRenderTargetDescInitialized;
D3DSURFACE_DESC mRenderTargetDesc;
bool mSupportsShaderModel3;
bool mSupportsVertexTexture;
......
......@@ -22,6 +22,7 @@ namespace
namespace gl
{
unsigned int IndexBuffer::mCurrentSerial = 1;
IndexDataManager::IndexDataManager(Context *context, IDirect3DDevice9 *device) : mDevice(device)
{
......@@ -200,6 +201,7 @@ GLenum IndexDataManager::prepareIndexData(GLenum type, GLsizei count, Buffer *bu
}
translated->indexBuffer = indexBuffer->getBuffer();
translated->serial = indexBuffer->getSerial();
translated->startIndex = streamOffset / indexSize(format);
if (buffer)
......@@ -232,6 +234,7 @@ IndexBuffer::IndexBuffer(IDirect3DDevice9 *device, UINT size, D3DFORMAT format)
{
D3DPOOL pool = getDisplay()->getBufferPool(D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY);
HRESULT result = device->CreateIndexBuffer(size, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, format, pool, &mIndexBuffer, NULL);
mSerial = issueSerial();
if (FAILED(result))
{
......@@ -253,6 +256,16 @@ IDirect3DIndexBuffer9 *IndexBuffer::getBuffer() const
return mIndexBuffer;
}
unsigned int IndexBuffer::getSerial() const
{
return mSerial;
}
unsigned int IndexBuffer::issueSerial()
{
return mCurrentSerial++;
}
void IndexBuffer::unmap()
{
if (mIndexBuffer)
......@@ -305,6 +318,7 @@ void StreamingIndexBuffer::reserveSpace(UINT requiredSpace, GLenum type)
D3DPOOL pool = getDisplay()->getBufferPool(D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY);
HRESULT result = mDevice->CreateIndexBuffer(mBufferSize, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, type == GL_UNSIGNED_INT ? D3DFMT_INDEX32 : D3DFMT_INDEX16, pool, &mIndexBuffer, NULL);
mSerial = issueSerial();
if (FAILED(result))
{
......@@ -358,6 +372,7 @@ void StaticIndexBuffer::reserveSpace(UINT requiredSpace, GLenum type)
{
D3DPOOL pool = getDisplay()->getBufferPool(D3DUSAGE_WRITEONLY);
HRESULT result = mDevice->CreateIndexBuffer(requiredSpace, D3DUSAGE_WRITEONLY, type == GL_UNSIGNED_INT ? D3DFMT_INDEX32 : D3DFMT_INDEX16, pool, &mIndexBuffer, NULL);
mSerial = issueSerial();
if (FAILED(result))
{
......
......@@ -28,6 +28,7 @@ struct TranslatedIndexData
UINT startIndex;
IDirect3DIndexBuffer9 *indexBuffer;
unsigned int serial;
};
class IndexBuffer
......@@ -42,6 +43,7 @@ class IndexBuffer
virtual void reserveSpace(UINT requiredSpace, GLenum type) = 0;
IDirect3DIndexBuffer9 *getBuffer() const;
unsigned int getSerial() const;
protected:
IDirect3DDevice9 *const mDevice;
......@@ -49,6 +51,10 @@ class IndexBuffer
IDirect3DIndexBuffer9 *mIndexBuffer;
UINT mBufferSize;
unsigned int mSerial;
static unsigned int issueSerial();
static unsigned int mCurrentSerial;
private:
DISALLOW_COPY_AND_ASSIGN(IndexBuffer);
};
......
......@@ -25,6 +25,7 @@ namespace
namespace gl
{
unsigned int VertexBuffer::mCurrentSerial = 1;
VertexDataManager::VertexDataManager(Context *context, IDirect3DDevice9 *device) : mContext(context), mDevice(device)
{
......@@ -234,6 +235,7 @@ GLenum VertexDataManager::prepareVertexData(GLint start, GLsizei count, Translat
}
translated[i].vertexBuffer = vertexBuffer->getBuffer();
translated[i].serial = vertexBuffer->getSerial();
translated[i].type = converter.d3dDeclType;
translated[i].stride = converter.outputElementSize;
translated[i].offset = streamOffset;
......@@ -248,6 +250,7 @@ GLenum VertexDataManager::prepareVertexData(GLint start, GLsizei count, Translat
}
translated[i].vertexBuffer = mCurrentValueBuffer[i]->getBuffer();
translated[i].serial = mCurrentValueBuffer[i]->getSerial();
translated[i].type = D3DDECLTYPE_FLOAT4;
translated[i].stride = 0;
......@@ -521,6 +524,7 @@ VertexBuffer::VertexBuffer(IDirect3DDevice9 *device, std::size_t size, DWORD usa
{
D3DPOOL pool = getDisplay()->getBufferPool(usageFlags);
HRESULT result = device->CreateVertexBuffer(size, usageFlags, 0, pool, &mVertexBuffer, NULL);
mSerial = issueSerial();
if (FAILED(result))
{
......@@ -550,6 +554,16 @@ IDirect3DVertexBuffer9 *VertexBuffer::getBuffer() const
return mVertexBuffer;
}
unsigned int VertexBuffer::getSerial() const
{
return mSerial;
}
unsigned int VertexBuffer::issueSerial()
{
return mCurrentSerial++;
}
ConstantVertexBuffer::ConstantVertexBuffer(IDirect3DDevice9 *device, float x, float y, float z, float w) : VertexBuffer(device, 4 * sizeof(float), D3DUSAGE_WRITEONLY)
{
void *buffer = NULL;
......@@ -640,6 +654,7 @@ void StreamingVertexBuffer::reserveRequiredSpace()
D3DPOOL pool = getDisplay()->getBufferPool(D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY);
HRESULT result = mDevice->CreateVertexBuffer(mBufferSize, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, 0, pool, &mVertexBuffer, NULL);
mSerial = issueSerial();
if (FAILED(result))
{
......@@ -702,7 +717,8 @@ void StaticVertexBuffer::reserveRequiredSpace()
{
D3DPOOL pool = getDisplay()->getBufferPool(D3DUSAGE_WRITEONLY);
HRESULT result = mDevice->CreateVertexBuffer(mRequiredSpace, D3DUSAGE_WRITEONLY, 0, pool, &mVertexBuffer, NULL);
mSerial = issueSerial();
if (FAILED(result))
{
ERR("Out of memory allocating a vertex buffer of size %lu.", mRequiredSpace);
......
......@@ -30,6 +30,7 @@ struct TranslatedAttribute
UINT stride; // 0 means not to advance the read pointer at all
IDirect3DVertexBuffer9 *vertexBuffer;
unsigned int serial;
};
class VertexBuffer
......@@ -41,11 +42,16 @@ class VertexBuffer
void unmap();
IDirect3DVertexBuffer9 *getBuffer() const;
unsigned int getSerial() const;
protected:
IDirect3DDevice9 *const mDevice;
IDirect3DVertexBuffer9 *mVertexBuffer;
unsigned int mSerial;
static unsigned int issueSerial();
static unsigned int mCurrentSerial;
private:
DISALLOW_COPY_AND_ASSIGN(VertexBuffer);
};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment