Commit aa9e9971 by Jamie Madill

D3D11: Reduce overhead of clearing unused textures.

*re-land with compile fix* We would spend a fair bit of time iterating over the unused textures, setting them to null one-by-one, and updating our cache. We can reduce this time by smarter caching, and skipping unmodified ranges. BUG=angleproject:959 Change-Id: I4de20bc131c4a568108ad670a2ef491cfd4c50ed Reviewed-on: https://chromium-review.googlesource.com/280916Tested-by: 's avatarJamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarGeoff Lang <geofflang@chromium.org>
parent d4898490
...@@ -524,6 +524,17 @@ struct Range ...@@ -524,6 +524,17 @@ struct Range
return start < other.end; return start < other.end;
} }
} }
void extend(T value)
{
start = value > start ? value : start;
end = value < end ? value : end;
}
bool empty() const
{
return end <= start;
}
}; };
typedef Range<int> RangeI; typedef Range<int> RangeI;
......
...@@ -444,14 +444,7 @@ gl::Error RendererD3D::applyTextures(const gl::Data &data, gl::SamplerType shade ...@@ -444,14 +444,7 @@ gl::Error RendererD3D::applyTextures(const gl::Data &data, gl::SamplerType shade
// Set all the remaining textures to NULL // Set all the remaining textures to NULL
size_t samplerCount = (shaderType == gl::SAMPLER_PIXEL) ? data.caps->maxTextureImageUnits size_t samplerCount = (shaderType == gl::SAMPLER_PIXEL) ? data.caps->maxTextureImageUnits
: data.caps->maxVertexTextureImageUnits; : data.caps->maxVertexTextureImageUnits;
for (size_t samplerIndex = samplerRange; samplerIndex < samplerCount; samplerIndex++) clearTextures(shaderType, samplerRange, samplerCount);
{
gl::Error error = setTexture(shaderType, samplerIndex, NULL);
if (error.isError())
{
return error;
}
}
return gl::Error(GL_NO_ERROR); return gl::Error(GL_NO_ERROR);
} }
......
...@@ -190,6 +190,9 @@ class RendererD3D : public Renderer, public BufferFactoryD3D ...@@ -190,6 +190,9 @@ class RendererD3D : public Renderer, public BufferFactoryD3D
void pushGroupMarker(GLsizei length, const char *marker) override; void pushGroupMarker(GLsizei length, const char *marker) override;
void popGroupMarker() override; void popGroupMarker() override;
// In D3D11, faster than calling setTexture a jillion times
virtual gl::Error clearTextures(gl::SamplerType samplerType, size_t rangeStart, size_t rangeEnd) = 0;
protected: protected:
virtual gl::Error drawArrays(const gl::Data &data, GLenum mode, GLsizei count, GLsizei instances, bool usesPointSize) = 0; virtual gl::Error drawArrays(const gl::Data &data, GLenum mode, GLsizei count, GLsizei instances, bool usesPointSize) = 0;
virtual gl::Error drawElements(GLenum mode, GLsizei count, GLenum type, const GLvoid *indices, virtual gl::Error drawElements(GLenum mode, GLsizei count, GLenum type, const GLvoid *indices,
......
...@@ -188,6 +188,45 @@ ANGLEFeatureLevel GetANGLEFeatureLevel(D3D_FEATURE_LEVEL d3dFeatureLevel) ...@@ -188,6 +188,45 @@ ANGLEFeatureLevel GetANGLEFeatureLevel(D3D_FEATURE_LEVEL d3dFeatureLevel)
} }
void Renderer11::SRVCache::update(size_t resourceIndex, ID3D11ShaderResourceView *srv)
{
ASSERT(resourceIndex < mCurrentSRVs.size());
SRVRecord *record = &mCurrentSRVs[resourceIndex];
record->srv = reinterpret_cast<uintptr_t>(srv);
if (srv)
{
record->resource = reinterpret_cast<uintptr_t>(GetViewResource(srv));
srv->GetDesc(&record->desc);
mHighestUsedSRV = std::max(resourceIndex + 1, mHighestUsedSRV);
}
else
{
record->resource = 0;
if (resourceIndex + 1 == mHighestUsedSRV)
{
do
{
--mHighestUsedSRV;
}
while (mHighestUsedSRV > 0 &&
mCurrentSRVs[mHighestUsedSRV].srv == 0);
}
}
}
void Renderer11::SRVCache::clear()
{
if (mCurrentSRVs.empty())
{
return;
}
memset(&mCurrentSRVs[0], 0, sizeof(SRVRecord) * mCurrentSRVs.size());
mHighestUsedSRV = 0;
}
Renderer11::Renderer11(egl::Display *display) Renderer11::Renderer11(egl::Display *display)
: RendererD3D(display), : RendererD3D(display),
mStateCache(this), mStateCache(this),
...@@ -586,8 +625,8 @@ void Renderer11::initializeDevice() ...@@ -586,8 +625,8 @@ void Renderer11::initializeDevice()
mForceSetPixelSamplerStates.resize(rendererCaps.maxTextureImageUnits); mForceSetPixelSamplerStates.resize(rendererCaps.maxTextureImageUnits);
mCurPixelSamplerStates.resize(rendererCaps.maxTextureImageUnits); mCurPixelSamplerStates.resize(rendererCaps.maxTextureImageUnits);
mCurVertexSRVs.resize(rendererCaps.maxVertexTextureImageUnits); mCurVertexSRVs.initialize(rendererCaps.maxVertexTextureImageUnits);
mCurPixelSRVs.resize(rendererCaps.maxTextureImageUnits); mCurPixelSRVs.initialize(rendererCaps.maxTextureImageUnits);
markAllStateDirty(); markAllStateDirty();
...@@ -605,6 +644,9 @@ void Renderer11::initializeDevice() ...@@ -605,6 +644,9 @@ void Renderer11::initializeDevice()
angleFeatureLevel = ANGLE_FEATURE_LEVEL_11_1; angleFeatureLevel = ANGLE_FEATURE_LEVEL_11_1;
} }
// Initialize cached NULL SRV block
mNullSRVs.resize(getRendererCaps().maxTextureImageUnits, nullptr);
ANGLE_HISTOGRAM_ENUMERATION("GPU.ANGLE.D3D11FeatureLevel", ANGLE_HISTOGRAM_ENUMERATION("GPU.ANGLE.D3D11FeatureLevel",
angleFeatureLevel, angleFeatureLevel,
NUM_ANGLE_FEATURE_LEVELS); NUM_ANGLE_FEATURE_LEVELS);
...@@ -2184,8 +2226,8 @@ void Renderer11::markAllStateDirty() ...@@ -2184,8 +2226,8 @@ void Renderer11::markAllStateDirty()
// We reset the current SRV data because it might not be in sync with D3D's state // We reset the current SRV data because it might not be in sync with D3D's state
// anymore. For example when a currently used SRV is used as an RTV, D3D silently // anymore. For example when a currently used SRV is used as an RTV, D3D silently
// remove it from its state. // remove it from its state.
memset(mCurVertexSRVs.data(), 0, sizeof(SRVRecord) * mCurVertexSRVs.size()); mCurVertexSRVs.clear();
memset(mCurPixelSRVs.data(), 0, sizeof(SRVRecord) * mCurPixelSRVs.size()); mCurPixelSRVs.clear();
ASSERT(mForceSetVertexSamplerStates.size() == mCurVertexSRVs.size()); ASSERT(mForceSetVertexSamplerStates.size() == mCurVertexSRVs.size());
for (size_t vsamplerId = 0; vsamplerId < mForceSetVertexSamplerStates.size(); ++vsamplerId) for (size_t vsamplerId = 0; vsamplerId < mForceSetVertexSamplerStates.size(); ++vsamplerId)
...@@ -3675,7 +3717,7 @@ void Renderer11::setShaderResource(gl::SamplerType shaderType, UINT resourceSlot ...@@ -3675,7 +3717,7 @@ void Renderer11::setShaderResource(gl::SamplerType shaderType, UINT resourceSlot
auto &currentSRVs = (shaderType == gl::SAMPLER_VERTEX ? mCurVertexSRVs : mCurPixelSRVs); auto &currentSRVs = (shaderType == gl::SAMPLER_VERTEX ? mCurVertexSRVs : mCurPixelSRVs);
ASSERT(static_cast<size_t>(resourceSlot) < currentSRVs.size()); ASSERT(static_cast<size_t>(resourceSlot) < currentSRVs.size());
auto &record = currentSRVs[resourceSlot]; const SRVRecord &record = currentSRVs[resourceSlot];
if (record.srv != reinterpret_cast<uintptr_t>(srv)) if (record.srv != reinterpret_cast<uintptr_t>(srv))
{ {
...@@ -3688,16 +3730,7 @@ void Renderer11::setShaderResource(gl::SamplerType shaderType, UINT resourceSlot ...@@ -3688,16 +3730,7 @@ void Renderer11::setShaderResource(gl::SamplerType shaderType, UINT resourceSlot
mDeviceContext->PSSetShaderResources(resourceSlot, 1, &srv); mDeviceContext->PSSetShaderResources(resourceSlot, 1, &srv);
} }
record.srv = reinterpret_cast<uintptr_t>(srv); currentSRVs.update(resourceSlot, srv);
if (srv)
{
record.resource = reinterpret_cast<uintptr_t>(GetViewResource(srv));
srv->GetDesc(&record.desc);
}
else
{
record.resource = 0;
}
} }
} }
...@@ -3706,4 +3739,38 @@ void Renderer11::createAnnotator() ...@@ -3706,4 +3739,38 @@ void Renderer11::createAnnotator()
mAnnotator = new DebugAnnotator11(); mAnnotator = new DebugAnnotator11();
} }
gl::Error Renderer11::clearTextures(gl::SamplerType samplerType, size_t rangeStart, size_t rangeEnd)
{
if (rangeStart == rangeEnd)
{
return gl::Error(GL_NO_ERROR);
}
auto &currentSRVs = (samplerType == gl::SAMPLER_VERTEX ? mCurVertexSRVs : mCurPixelSRVs);
gl::Range<size_t> clearRange(rangeStart, rangeStart);
clearRange.extend(std::min(rangeEnd, currentSRVs.highestUsed()));
if (clearRange.empty())
{
return gl::Error(GL_NO_ERROR);
}
if (samplerType == gl::SAMPLER_VERTEX)
{
mDeviceContext->VSSetShaderResources(rangeStart, rangeEnd - rangeStart, &mNullSRVs[0]);
}
else
{
mDeviceContext->PSSetShaderResources(rangeStart, rangeEnd - rangeStart, &mNullSRVs[0]);
}
for (size_t samplerIndex = rangeStart; samplerIndex < rangeEnd; ++samplerIndex)
{
currentSRVs.update(samplerIndex, nullptr);
}
return gl::Error(GL_NO_ERROR);
}
} }
...@@ -266,6 +266,7 @@ class Renderer11 : public RendererD3D ...@@ -266,6 +266,7 @@ class Renderer11 : public RendererD3D
protected: protected:
void createAnnotator() override; void createAnnotator() override;
gl::Error clearTextures(gl::SamplerType samplerType, size_t rangeStart, size_t rangeEnd) override;
private: private:
void generateCaps(gl::Caps *outCaps, gl::TextureCapsMap *outTextureCaps, gl::Extensions *outExtensions) const override; void generateCaps(gl::Caps *outCaps, gl::TextureCapsMap *outTextureCaps, gl::Extensions *outExtensions) const override;
...@@ -320,8 +321,41 @@ class Renderer11 : public RendererD3D ...@@ -320,8 +321,41 @@ class Renderer11 : public RendererD3D
uintptr_t resource; uintptr_t resource;
D3D11_SHADER_RESOURCE_VIEW_DESC desc; D3D11_SHADER_RESOURCE_VIEW_DESC desc;
}; };
std::vector<SRVRecord> mCurVertexSRVs;
std::vector<SRVRecord> mCurPixelSRVs; // A cache of current SRVs that also tracks the highest 'used' (non-NULL) SRV
// We might want to investigate a more robust approach that is also fast when there's
// a large gap between used SRVs (e.g. if SRV 0 and 7 are non-NULL, this approach will
// waste time on SRVs 1-6.)
class SRVCache : angle::NonCopyable
{
public:
SRVCache()
: mHighestUsedSRV(0)
{
}
void initialize(size_t size)
{
mCurrentSRVs.resize(size);
}
size_t size() const { return mCurrentSRVs.size(); }
size_t highestUsed() const { return mHighestUsedSRV; }
const SRVRecord &operator[](size_t index) const { return mCurrentSRVs[index]; }
void clear();
void update(size_t resourceIndex, ID3D11ShaderResourceView *srv);
private:
std::vector<SRVRecord> mCurrentSRVs;
size_t mHighestUsedSRV;
};
SRVCache mCurVertexSRVs;
SRVCache mCurPixelSRVs;
// A block of NULL pointers, cached so we don't re-allocate every draw call
std::vector<ID3D11ShaderResourceView*> mNullSRVs;
// Currently applied blend state // Currently applied blend state
bool mForceSetBlendState; bool mForceSetBlendState;
......
...@@ -2942,4 +2942,19 @@ void Renderer9::createAnnotator() ...@@ -2942,4 +2942,19 @@ void Renderer9::createAnnotator()
mAnnotator = new DebugAnnotator9(); mAnnotator = new DebugAnnotator9();
} }
gl::Error Renderer9::clearTextures(gl::SamplerType samplerType, size_t rangeStart, size_t rangeEnd)
{
// TODO(jmadill): faster way?
for (size_t samplerIndex = rangeStart; samplerIndex < rangeEnd; samplerIndex++)
{
gl::Error error = setTexture(samplerType, samplerIndex, nullptr);
if (error.isError())
{
return error;
}
}
return gl::Error(GL_NO_ERROR);
}
} }
...@@ -235,6 +235,7 @@ class Renderer9 : public RendererD3D ...@@ -235,6 +235,7 @@ class Renderer9 : public RendererD3D
protected: protected:
void createAnnotator() override; void createAnnotator() override;
gl::Error clearTextures(gl::SamplerType samplerType, size_t rangeStart, size_t rangeEnd) override;
private: private:
void generateCaps(gl::Caps *outCaps, gl::TextureCapsMap *outTextureCaps, gl::Extensions *outExtensions) const override; void generateCaps(gl::Caps *outCaps, gl::TextureCapsMap *outTextureCaps, gl::Extensions *outExtensions) const override;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment