Use instancing for non-array vertex attributes.

TRAC #12303 Signed-off-by: Nicolas Capens Signed-off-by: Daniel Koch git-svn-id: https://angleproject.googlecode.com/svn/trunk@314 736b8ea6-26fd-11df-bfd4-992fa37f6226
parent 8c9ff193
......@@ -143,6 +143,8 @@ void Surface::writeRecordableFlipState(IDirect3DDevice9 *device, IDirect3DTextur
device->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
device->SetSamplerState(0, D3DSAMP_SRGBTEXTURE, FALSE);
device->SetFVF(D3DFVF_XYZRHW | D3DFVF_TEX1);
device->SetStreamSourceFreq(0, 1); // DrawPrimitiveUP only cares about stream 0, not the rest.
}
void Surface::applyFlipState(IDirect3DDevice9 *device, IDirect3DTexture9 *source)
......
......@@ -464,6 +464,11 @@ void Blit::setCommonBlitState()
device->SetSamplerState(0, D3DSAMP_SRGBTEXTURE, FALSE);
device->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
device->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
for (int i = 0; i < MAX_VERTEX_ATTRIBS+1; i++)
{
device->SetStreamSourceFreq(i, 1);
}
}
void Blit::render()
......
......@@ -1943,20 +1943,39 @@ void Context::lookupAttributeMapping(TranslatedAttribute *attributes)
}
}
GLenum Context::applyVertexBuffer(GLint first, GLsizei count)
GLenum Context::applyVertexBuffer(GLenum mode, GLint first, GLsizei count, bool *useIndexing, TranslatedIndexData *indexInfo)
{
TranslatedAttribute translated[MAX_VERTEX_ATTRIBS];
GLenum err = mVertexDataManager->preRenderValidate(first, count, translated);
if (err != GL_NO_ERROR)
{
return err;
}
if (err == GL_NO_ERROR)
lookupAttributeMapping(translated);
mBufferBackEnd->setupAttributesPreDraw(translated);
for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++)
{
lookupAttributeMapping(translated);
if (translated[i].enabled && translated[i].nonArray)
{
err = mIndexDataManager->preRenderValidateUnindexed(mode, count, indexInfo);
if (err != GL_NO_ERROR)
{
return err;
}
mBufferBackEnd->setupAttributesPreDraw(translated);
mBufferBackEnd->setupIndicesPreDraw(*indexInfo);
*useIndexing = true;
return GL_NO_ERROR;
}
}
return err;
*useIndexing = false;
return GL_NO_ERROR;
}
GLenum Context::applyVertexBuffer(const TranslatedIndexData &indexInfo)
......@@ -1978,14 +1997,14 @@ GLenum Context::applyVertexBuffer(const TranslatedIndexData &indexInfo)
// Applies the indices and element array bindings to the Direct3D 9 device
GLenum Context::applyIndexBuffer(const void *indices, GLsizei count, GLenum mode, GLenum type, TranslatedIndexData *indexInfo)
{
GLenum error = mIndexDataManager->preRenderValidate(mode, type, count, getBuffer(mState.elementArrayBuffer), indices, indexInfo);
GLenum err = mIndexDataManager->preRenderValidate(mode, type, count, getBuffer(mState.elementArrayBuffer), indices, indexInfo);
if (error == GL_NO_ERROR)
if (err == GL_NO_ERROR)
{
mBufferBackEnd->setupIndicesPreDraw(*indexInfo);
}
return error;
return err;
}
// Applies the shaders and shader constants to the Direct3D 9 device
......@@ -2359,6 +2378,7 @@ void Context::clear(GLbitfield mask)
device->SetPixelShader(NULL);
device->SetVertexShader(NULL);
device->SetFVF(D3DFVF_XYZRHW | D3DFVF_DIFFUSE);
device->SetStreamSourceFreq(0, 1);
struct Vertex
{
......@@ -2434,7 +2454,9 @@ void Context::drawArrays(GLenum mode, GLint first, GLsizei count)
applyState(mode);
GLenum err = applyVertexBuffer(first, count);
TranslatedIndexData indexInfo;
bool useIndexing;
GLenum err = applyVertexBuffer(mode, first, count, &useIndexing, &indexInfo);
if (err != GL_NO_ERROR)
{
return error(err);
......@@ -2451,7 +2473,14 @@ void Context::drawArrays(GLenum mode, GLint first, GLsizei count)
if (!cullSkipsDraw(mode))
{
display->startScene();
device->DrawPrimitive(primitiveType, 0, primitiveCount);
if (useIndexing)
{
device->DrawIndexedPrimitive(primitiveType, -(INT)indexInfo.minIndex, indexInfo.minIndex, indexInfo.maxIndex-indexInfo.minIndex+1, indexInfo.offset/indexInfo.indexSize, primitiveCount);
}
else
{
device->DrawPrimitive(primitiveType, 0, primitiveCount);
}
}
}
......@@ -2538,6 +2567,7 @@ void Context::finish()
occlusionQuery->Issue(D3DISSUE_BEGIN);
// Render something outside the render target
device->SetStreamSourceFreq(0, 1);
device->SetPixelShader(NULL);
device->SetVertexShader(NULL);
device->SetFVF(D3DFVF_XYZRHW);
......
......@@ -351,9 +351,10 @@ class Context
bool applyRenderTarget(bool ignoreViewport);
void applyState(GLenum drawMode);
GLenum applyVertexBuffer(GLint first, GLsizei count);
GLenum applyVertexBuffer(GLenum mode, GLint first, GLsizei count, bool *useIndexing, TranslatedIndexData *indexInfo);
GLenum applyVertexBuffer(const TranslatedIndexData &indexInfo);
GLenum applyIndexBuffer(const void *indices, GLsizei count, GLenum mode, GLenum type, TranslatedIndexData *indexInfo);
GLenum applyCountingIndexBuffer(GLenum mode, GLenum count, TranslatedIndexData *indexInfo);
void applyShaders();
void applyTextures();
......
......@@ -12,6 +12,7 @@
#include "common/debug.h"
#include "libGLESv2/Buffer.h"
#include "libGLESv2/mathutil.h"
#include "libGLESv2/geometry/backend.h"
namespace
......@@ -25,6 +26,11 @@ namespace gl
IndexDataManager::IndexDataManager(Context *context, BufferBackEnd *backend)
: mContext(context), mBackend(backend), mIntIndicesSupported(backend->supportIntIndices())
{
mCountingBuffer = NULL;
mCountingBufferSize = 0;
mLineLoopBuffer = NULL;
mStreamBufferShort = mBackend->createIndexBuffer(INITIAL_INDEX_BUFFER_SIZE, GL_UNSIGNED_SHORT);
if (mIntIndicesSupported)
......@@ -41,6 +47,8 @@ IndexDataManager::~IndexDataManager()
{
delete mStreamBufferShort;
delete mStreamBufferInt;
delete mCountingBuffer;
delete mLineLoopBuffer;
}
namespace
......@@ -100,8 +108,6 @@ GLenum IndexDataManager::preRenderValidate(GLenum mode, GLenum type, GLsizei cou
translated->offset = offset;
translated->indexSize = indexSize(type);
translated->indices = output;
if (type == GL_UNSIGNED_BYTE)
{
const GLubyte *in = static_cast<const GLubyte*>(indices);
......@@ -193,4 +199,63 @@ TranslatedIndexBuffer *IndexDataManager::prepareIndexBuffer(GLenum type, std::si
return streamIb;
}
GLenum IndexDataManager::preRenderValidateUnindexed(GLenum mode, GLsizei count, TranslatedIndexData *indexInfo)
{
if (count >= 65535) return GL_OUT_OF_MEMORY;
if (mode == GL_LINE_LOOP)
{
// For line loops, create a single-use buffer that runs 0 - count-1, 0.
delete mLineLoopBuffer;
mLineLoopBuffer = mBackend->createIndexBuffer((count+1) * sizeof(unsigned short), GL_UNSIGNED_SHORT);
unsigned short *indices = static_cast<unsigned short *>(mLineLoopBuffer->map());
for (int i = 0; i < count; i++)
{
indices[i] = i;
}
indices[count] = 0;
mLineLoopBuffer->unmap();
indexInfo->buffer = mLineLoopBuffer;
indexInfo->count = count + 1;
indexInfo->maxIndex = count - 1;
}
else if (mCountingBufferSize < count)
{
mCountingBufferSize = std::max(static_cast<GLsizei>(ceilPow2(count)), mCountingBufferSize*2);
delete mCountingBuffer;
mCountingBuffer = mBackend->createIndexBuffer(count * sizeof(unsigned short), GL_UNSIGNED_SHORT);
unsigned short *indices = static_cast<unsigned short *>(mCountingBuffer->map());
for (int i = 0; i < count; i++)
{
indices[i] = i;
}
mCountingBuffer->unmap();
indexInfo->buffer = mCountingBuffer;
indexInfo->count = count;
indexInfo->maxIndex = count - 1;
}
else
{
indexInfo->buffer = mCountingBuffer;
indexInfo->count = count;
indexInfo->maxIndex = count - 1;
}
indexInfo->indexSize = sizeof(unsigned short);
indexInfo->minIndex = 0;
indexInfo->offset = 0;
return GL_NO_ERROR;
}
}
......@@ -33,8 +33,6 @@ struct TranslatedIndexData
GLuint count;
GLuint indexSize;
const void *indices;
TranslatedIndexBuffer *buffer;
GLsizei offset;
};
......@@ -46,6 +44,7 @@ class IndexDataManager
~IndexDataManager();
GLenum preRenderValidate(GLenum mode, GLenum type, GLsizei count, Buffer *arrayElementBuffer, const void *indices, TranslatedIndexData *translated);
GLenum preRenderValidateUnindexed(GLenum mode, GLsizei count, TranslatedIndexData *indexInfo);
private:
std::size_t IndexDataManager::typeSize(GLenum type) const;
......@@ -60,6 +59,11 @@ class IndexDataManager
TranslatedIndexBuffer *mStreamBufferShort;
TranslatedIndexBuffer *mStreamBufferInt;
TranslatedIndexBuffer *mCountingBuffer;
GLsizei mCountingBufferSize;
TranslatedIndexBuffer *mLineLoopBuffer;
};
}
......
......@@ -22,8 +22,6 @@
namespace
{
enum { INITIAL_STREAM_BUFFER_SIZE = 1024*1024 };
enum { MAX_CURRENT_VALUE_EXPANSION = 16 };
enum { CURRENT_VALUES_REQUIRED_SPACE = 4 * sizeof(float) * gl::MAX_VERTEX_ATTRIBS * MAX_CURRENT_VALUE_EXPANSION };
}
namespace gl
......@@ -105,11 +103,6 @@ GLenum VertexDataManager::preRenderValidate(GLint start, GLsizei count,
}
}
if (usesCurrentValues)
{
requiredSpace += CURRENT_VALUES_REQUIRED_SPACE;
}
if (requiredSpace > mStreamBuffer->size())
{
std::size_t newSize = std::max(requiredSpace, 3 * mStreamBuffer->size() / 2); // 1.5 x mStreamBuffer->size() is arbitrary and should be checked to see we don't have too many reallocations.
......@@ -128,6 +121,7 @@ GLenum VertexDataManager::preRenderValidate(GLint start, GLsizei count,
{
FormatConverter formatConverter = mBackend->getFormatConverter(attribs[i].mType, attribs[i].mSize, attribs[i].mNormalized);
translated[i].nonArray = false;
translated[i].type = attribs[i].mType;
translated[i].size = attribs[i].mSize;
translated[i].normalized = attribs[i].mNormalized;
......@@ -237,65 +231,39 @@ std::size_t VertexDataManager::spaceRequired(const AttributeState &attrib, std::
void VertexDataManager::processNonArrayAttributes(const AttributeState *attribs, const std::bitset<MAX_VERTEX_ATTRIBS> &activeAttribs, TranslatedAttribute *translated, std::size_t count)
{
if (count <= MAX_CURRENT_VALUE_EXPANSION)
if (mDirtyCurrentValues)
{
if (mDirtyCurrentValues || mCurrentValueLoadBuffer != mStreamBuffer)
{
float *p = static_cast<float*>(mStreamBuffer->map(CURRENT_VALUES_REQUIRED_SPACE, &mCurrentValueOffset));
std::size_t totalSize = 4 * sizeof(float) * MAX_VERTEX_ATTRIBS;
for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++)
{
float *out = p + MAX_CURRENT_VALUE_EXPANSION * 4 * i;
for (unsigned int j = 0; j < MAX_CURRENT_VALUE_EXPANSION; j++)
{
*out++ = attribs[i].mCurrentValue[0];
*out++ = attribs[i].mCurrentValue[1];
*out++ = attribs[i].mCurrentValue[2];
*out++ = attribs[i].mCurrentValue[3];
}
}
mCurrentValueBuffer->reserveSpace(totalSize);
mStreamBuffer->unmap();
float* currentValues = static_cast<float*>(mCurrentValueBuffer->map(totalSize, &mCurrentValueOffset));
mCurrentValueLoadBuffer = mStreamBuffer;
mCurrentValueSize = MAX_CURRENT_VALUE_EXPANSION;
mCurrentValueStride = 4 * sizeof(float);
}
}
else
{
if (mDirtyCurrentValues || mCurrentValueLoadBuffer != mCurrentValueBuffer)
for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++)
{
std::size_t totalSize = 4 * sizeof(float) * MAX_VERTEX_ATTRIBS;
mCurrentValueBuffer->reserveSpace(totalSize);
float* p = static_cast<float*>(mCurrentValueBuffer->map(totalSize, &mCurrentValueOffset));
for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++)
{
memcpy(&p[i*4], attribs[i].mCurrentValue, sizeof(attribs[i].mCurrentValue)); // FIXME: this should be doing a translation. This assumes that GL_FLOATx4 is supported.
}
mCurrentValueBuffer->unmap();
mCurrentValueLoadBuffer = mCurrentValueBuffer;
mCurrentValueSize = 1;
mCurrentValueStride = 0;
// This assumes that the GL_FLOATx4 is supported by the back-end. (For D3D9, it is a mandatory format.)
currentValues[i*4+0] = attribs[i].mCurrentValue[0];
currentValues[i*4+1] = attribs[i].mCurrentValue[1];
currentValues[i*4+2] = attribs[i].mCurrentValue[2];
currentValues[i*4+3] = attribs[i].mCurrentValue[3];
}
mCurrentValueBuffer->unmap();
}
for (std::size_t i = 0; i < MAX_VERTEX_ATTRIBS; i++)
{
if (activeAttribs[i] && !attribs[i].mEnabled)
{
translated[i].buffer = mCurrentValueLoadBuffer;
translated[i].nonArray = true;
translated[i].buffer = mCurrentValueBuffer;
translated[i].type = GL_FLOAT;
translated[i].size = 4;
translated[i].normalized = false;
translated[i].stride = mCurrentValueStride;
translated[i].offset = mCurrentValueOffset + 4 * sizeof(float) * i * mCurrentValueSize;
translated[i].stride = 0;
translated[i].offset = mCurrentValueOffset + 4 * sizeof(float) * i;
}
}
}
......
......@@ -59,9 +59,6 @@ class VertexDataManager
bool mDirtyCurrentValues;
std::size_t mCurrentValueOffset; // Offset within mCurrentValueBuffer that the current attribute values were last loaded at.
TranslatedVertexBuffer *mCurrentValueBuffer;
TranslatedVertexBuffer *mCurrentValueLoadBuffer;
std::size_t mCurrentValueStride;
std::size_t mCurrentValueSize;
};
}
......
......@@ -32,6 +32,7 @@ struct FormatConverter
struct TranslatedAttribute
{
bool enabled;
bool nonArray;
// These are the original untranslated values. (Or just have some sort of BufferBackEnd::TranslatedTypeKey.)
GLenum type;
......
......@@ -62,7 +62,14 @@ Dx9BackEnd::Dx9BackEnd(IDirect3DDevice9 *d3ddevice)
for (int i = 0; i < MAX_VERTEX_ATTRIBS; ++i)
{
mAppliedAttribEnabled[i] = true;
mStreamFrequency[i] = STREAM_FREQUENCY_UNINSTANCED;
}
mStreamFrequency[MAX_VERTEX_ATTRIBS] = STREAM_FREQUENCY_UNINSTANCED;
D3DCAPS9 caps;
mDevice->GetDeviceCaps(&caps);
mUseInstancingForStrideZero = (caps.VertexShaderVersion >= D3DVS_VERSION(3, 0));
}
Dx9BackEnd::~Dx9BackEnd()
......@@ -85,7 +92,14 @@ TranslatedVertexBuffer *Dx9BackEnd::createVertexBuffer(std::size_t size)
TranslatedVertexBuffer *Dx9BackEnd::createVertexBufferForStrideZero(std::size_t size)
{
return new Dx9VertexBufferZeroStrideWorkaround(mDevice, size);
if (mUseInstancingForStrideZero)
{
return new Dx9VertexBuffer(mDevice, size);
}
else
{
return new Dx9VertexBufferZeroStrideWorkaround(mDevice, size);
}
}
TranslatedIndexBuffer *Dx9BackEnd::createIndexBuffer(std::size_t size, GLenum type)
......@@ -220,7 +234,7 @@ GLenum Dx9BackEnd::setupAttributesPreDraw(const TranslatedAttribute *attributes)
{
if (attributes[i].enabled)
{
nextElement->Stream = i;
nextElement->Stream = i + 1; // Stream 0 is skipped because D3D does not permit it to be an instanced stream.
nextElement->Offset = 0;
nextElement->Type = static_cast<BYTE>(mapAttributeType(attributes[i].type, attributes[i].size, attributes[i].normalized));
nextElement->Method = D3DDECLMETHOD_DEFAULT;
......@@ -238,11 +252,17 @@ GLenum Dx9BackEnd::setupAttributesPreDraw(const TranslatedAttribute *attributes)
mDevice->SetVertexDeclaration(vertexDeclaration);
vertexDeclaration->Release();
mDevice->SetStreamSource(0, NULL, 0, 0);
bool nonArrayAttributes = false;
for (size_t i = 0; i < MAX_VERTEX_ATTRIBS; i++)
{
if (attributes[i].enabled)
{
mDevice->SetStreamSource(i, getDxBuffer(attributes[i].buffer), attributes[i].offset, attributes[i].stride);
if (attributes[i].nonArray) nonArrayAttributes = true;
mDevice->SetStreamSource(i + 1, getDxBuffer(attributes[i].buffer), attributes[i].offset, attributes[i].stride);
if (!mAppliedAttribEnabled[i])
{
mAppliedAttribEnabled[i] = true;
......@@ -252,12 +272,60 @@ GLenum Dx9BackEnd::setupAttributesPreDraw(const TranslatedAttribute *attributes)
{
if (mAppliedAttribEnabled[i])
{
mDevice->SetStreamSource(i, 0, 0, 0);
mDevice->SetStreamSource(i + 1, 0, 0, 0);
mAppliedAttribEnabled[i] = false;
}
}
}
if (mUseInstancingForStrideZero)
{
// When there are no stride zero attributes, we disable instancing so that DrawPrimitive can be used.
if (nonArrayAttributes)
{
if (mStreamFrequency[0] != STREAM_FREQUENCY_INDEXED)
{
mStreamFrequency[0] = STREAM_FREQUENCY_INDEXED;
mDevice->SetStreamSourceFreq(0, D3DSTREAMSOURCE_INDEXEDDATA | 1);
}
for (size_t i = 0; i < MAX_VERTEX_ATTRIBS; i++)
{
if (attributes[i].enabled)
{
if (attributes[i].nonArray)
{
if (mStreamFrequency[i+1] != STREAM_FREQUENCY_INSTANCED)
{
mStreamFrequency[i+1] = STREAM_FREQUENCY_INSTANCED;
mDevice->SetStreamSourceFreq(i + 1, D3DSTREAMSOURCE_INSTANCEDATA | 1);
}
}
else
{
if (mStreamFrequency[i+1] != STREAM_FREQUENCY_INDEXED)
{
mStreamFrequency[i+1] = STREAM_FREQUENCY_INDEXED;
mDevice->SetStreamSourceFreq(i + 1, D3DSTREAMSOURCE_INDEXEDDATA | 1);
}
}
}
}
}
else
{
for (size_t i = 0; i < MAX_VERTEX_ATTRIBS + 1; i++)
{
if (mStreamFrequency[i] != STREAM_FREQUENCY_UNINSTANCED)
{
mStreamFrequency[i] = STREAM_FREQUENCY_UNINSTANCED;
mDevice->SetStreamSourceFreq(i, 1);
}
}
}
}
return GL_NO_ERROR;
}
......
......@@ -38,8 +38,19 @@ class Dx9BackEnd : public BufferBackEnd
private:
IDirect3DDevice9 *mDevice;
bool mUseInstancingForStrideZero;
bool mAppliedAttribEnabled[MAX_VERTEX_ATTRIBS];
enum StreamFrequency
{
STREAM_FREQUENCY_UNINSTANCED = 0,
STREAM_FREQUENCY_INDEXED,
STREAM_FREQUENCY_INSTANCED
};
StreamFrequency mStreamFrequency[MAX_VERTEX_ATTRIBS+1];
class Dx9VertexBuffer : public TranslatedVertexBuffer
{
public:
......
......@@ -25,6 +25,19 @@ inline int log2(int x)
return r;
}
inline unsigned int ceilPow2(unsigned int x)
{
if (x != 0) x--;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
x++;
return x;
}
inline float clamp01(float x)
{
return x < 0 ? 0 : (x > 1 ? 1 : x);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment