Commit e2a8a69a by Shahbaz Youssefi Committed by Commit Bot

Vulkan: Move xfb emulation offset calculation to translator

A new function is added by the translator to calculate the transform feedback offsets in the emulation path. This function makes the generated code for transform feedback smaller. Bug: angleproject:3606 Change-Id: I01460f907e20e2887cb720bddad96697fdcb0cf3 Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2607492 Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org> Reviewed-by: 's avatarTim Van Patten <timvp@google.com> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org>
parent 07d619cb
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
// Version number for shader translation API. // Version number for shader translation API.
// It is incremented every time the API changes. // It is incremented every time the API changes.
#define ANGLE_SH_VERSION 247 #define ANGLE_SH_VERSION 248
enum ShShaderSpec enum ShShaderSpec
{ {
...@@ -348,6 +348,9 @@ const ShCompileOptions SH_FORCE_SHADER_PRECISION_HIGHP_TO_MEDIUMP = UINT64_C(1) ...@@ -348,6 +348,9 @@ const ShCompileOptions SH_FORCE_SHADER_PRECISION_HIGHP_TO_MEDIUMP = UINT64_C(1)
// Allow compiler to use specialization constant to do pre-rotation and y flip. // Allow compiler to use specialization constant to do pre-rotation and y flip.
const ShCompileOptions SH_USE_SPECIALIZATION_CONSTANT = UINT64_C(1) << 58; const ShCompileOptions SH_USE_SPECIALIZATION_CONSTANT = UINT64_C(1) << 58;
// Ask compiler to generate transform feedback emulation support code.
const ShCompileOptions SH_ADD_VULKAN_XFB_EMULATION_SUPPORT_CODE = UINT64_C(1) << 59;
// Defines alternate strategies for implementing array index clamping. // Defines alternate strategies for implementing array index clamping.
enum ShArrayIndexClampingStrategy enum ShArrayIndexClampingStrategy
{ {
...@@ -870,6 +873,9 @@ extern const char kAtomicCountersBlockName[]; ...@@ -870,6 +873,9 @@ extern const char kAtomicCountersBlockName[];
// Line raster emulation varying // Line raster emulation varying
extern const char kLineRasterEmulationPosition[]; extern const char kLineRasterEmulationPosition[];
// Transform feedback emulation helper function
extern const char kXfbEmulationGetOffsetsFunctionName[];
} // namespace vk } // namespace vk
namespace mtl namespace mtl
......
...@@ -792,6 +792,8 @@ const char kAtomicCountersBlockName[] = "ANGLEAtomicCounters"; ...@@ -792,6 +792,8 @@ const char kAtomicCountersBlockName[] = "ANGLEAtomicCounters";
const char kLineRasterEmulationPosition[] = "ANGLEPosition"; const char kLineRasterEmulationPosition[] = "ANGLEPosition";
const char kXfbEmulationGetOffsetsFunctionName[] = "ANGLEGetXfbOffsets";
} // namespace vk } // namespace vk
} // namespace sh } // namespace sh
...@@ -446,6 +446,72 @@ ANGLE_NO_DISCARD bool AddBresenhamEmulationVS(TCompiler *compiler, ...@@ -446,6 +446,72 @@ ANGLE_NO_DISCARD bool AddBresenhamEmulationVS(TCompiler *compiler,
return compiler->validateAST(root); return compiler->validateAST(root);
} }
ANGLE_NO_DISCARD bool AddXfbEmulationSupport(TCompiler *compiler,
TIntermBlock *root,
TSymbolTable *symbolTable,
const DriverUniform *driverUniforms)
{
// Generate the following function and place it before main(). This function takes a "strides"
// parameter that is determined at link time, and calculates for each transform feedback buffer
// (of which there are a maximum of four) what the starting index is to write to the output
// buffer.
//
// ivec4 ANGLEGetXfbOffsets(ivec4 strides)
// {
// int xfbIndex = gl_VertexIndex
// + gl_InstanceIndex * ANGLEUniforms.xfbVerticesPerInstance;
// return ANGLEUniforms.xfbBufferOffsets + xfbIndex * strides;
// }
const TType *ivec4Type = StaticType::GetBasic<EbtInt, 4>();
// Create the parameter variable.
TVariable *stridesVar = new TVariable(symbolTable, ImmutableString("strides"), ivec4Type,
SymbolType::AngleInternal);
TIntermSymbol *stridesSymbol = new TIntermSymbol(stridesVar);
// Create references to gl_VertexIndex, gl_InstanceIndex, ANGLEUniforms.xfbVerticesPerInstance
// and ANGLEUniforms.xfbBufferOffsets.
TIntermSymbol *vertexIndex = new TIntermSymbol(BuiltInVariable::gl_VertexIndex());
TIntermSymbol *instanceIndex = new TIntermSymbol(BuiltInVariable::gl_InstanceIndex());
TIntermBinary *xfbVerticesPerInstance = driverUniforms->getXfbVerticesPerInstance();
TIntermBinary *xfbBufferOffsets = driverUniforms->getXfbBufferOffsets();
// gl_InstanceIndex * ANGLEUniforms.xfbVerticesPerInstance
TIntermBinary *xfbInstanceIndex =
new TIntermBinary(EOpMul, instanceIndex, xfbVerticesPerInstance);
// gl_VertexIndex + |xfbInstanceIndex|
TIntermBinary *xfbIndex = new TIntermBinary(EOpAdd, vertexIndex, xfbInstanceIndex);
// |xfbIndex| * |strides|
TIntermBinary *xfbStrides = new TIntermBinary(EOpVectorTimesScalar, xfbIndex, stridesSymbol);
// ANGLEUniforms.xfbBufferOffsets + |xfbStrides|
TIntermBinary *xfbOffsets = new TIntermBinary(EOpAdd, xfbBufferOffsets, xfbStrides);
// Create the function body, which has a single return statement. Note that the `xfbIndex`
// variable declared in the comment at the beginning of this function is simply replaced in the
// return statement for brevity.
TIntermBlock *body = new TIntermBlock;
body->appendStatement(new TIntermBranch(EOpReturn, xfbOffsets));
// Declare the function
TFunction *getOffsetsFunction =
new TFunction(symbolTable, ImmutableString(vk::kXfbEmulationGetOffsetsFunctionName),
SymbolType::AngleInternal, ivec4Type, true);
getOffsetsFunction->addParameter(stridesVar);
TIntermFunctionDefinition *functionDef =
CreateInternalFunctionDefinitionNode(*getOffsetsFunction, body);
// Insert the function declaration before main().
size_t mainIndex = FindMainIndex(root);
root->insertChildNodes(mainIndex, {functionDef});
return compiler->validateAST(root);
}
ANGLE_NO_DISCARD bool InsertFragCoordCorrection(TCompiler *compiler, ANGLE_NO_DISCARD bool InsertFragCoordCorrection(TCompiler *compiler,
ShCompileOptions compileOptions, ShCompileOptions compileOptions,
TIntermBlock *root, TIntermBlock *root,
...@@ -974,6 +1040,17 @@ bool TranslatorVulkan::translateImpl(TIntermBlock *root, ...@@ -974,6 +1040,17 @@ bool TranslatorVulkan::translateImpl(TIntermBlock *root,
} }
} }
if (compileOptions & SH_ADD_VULKAN_XFB_EMULATION_SUPPORT_CODE)
{
// Add support code for transform feedback emulation. Only applies to vertex shader
// as tessellation and geometry shader transform feedback capture require
// VK_EXT_transform_feedback.
if (!AddXfbEmulationSupport(this, root, &getSymbolTable(), driverUniforms))
{
return false;
}
}
// Search for the gl_ClipDistance usage, if its used, we need to do some replacements. // Search for the gl_ClipDistance usage, if its used, we need to do some replacements.
bool useClipDistance = false; bool useClipDistance = false;
for (const ShaderVariable &outputVarying : mOutputVaryings) for (const ShaderVariable &outputVarying : mOutputVaryings)
......
...@@ -24,14 +24,14 @@ namespace ...@@ -24,14 +24,14 @@ namespace
{ {
constexpr ImmutableString kEmulatedDepthRangeParams = ImmutableString("ANGLEDepthRangeParams"); constexpr ImmutableString kEmulatedDepthRangeParams = ImmutableString("ANGLEDepthRangeParams");
constexpr const char kViewport[] = "viewport"; constexpr const char kViewport[] = "viewport";
constexpr const char kClipDistancesEnabled[] = "clipDistancesEnabled"; constexpr const char kClipDistancesEnabled[] = "clipDistancesEnabled";
constexpr const char kXfbActiveUnpaused[] = "xfbActiveUnpaused"; constexpr const char kXfbActiveUnpaused[] = "xfbActiveUnpaused";
constexpr const char kXfbVerticesPerDraw[] = "xfbVerticesPerDraw"; constexpr const char kXfbVerticesPerInstance[] = "xfbVerticesPerInstance";
constexpr const char kXfbBufferOffsets[] = "xfbBufferOffsets"; constexpr const char kXfbBufferOffsets[] = "xfbBufferOffsets";
constexpr const char kAcbBufferOffsets[] = "acbBufferOffsets"; constexpr const char kAcbBufferOffsets[] = "acbBufferOffsets";
constexpr const char kDepthRange[] = "depthRange"; constexpr const char kDepthRange[] = "depthRange";
constexpr const char kNumSamples[] = "numSamples"; constexpr const char kNumSamples[] = "numSamples";
constexpr const char kHalfRenderArea[] = "halfRenderArea"; constexpr const char kHalfRenderArea[] = "halfRenderArea";
constexpr const char kFlipXY[] = "flipXY"; constexpr const char kFlipXY[] = "flipXY";
...@@ -75,7 +75,7 @@ TFieldList *DriverUniform::createUniformFields(TSymbolTable *symbolTable) const ...@@ -75,7 +75,7 @@ TFieldList *DriverUniform::createUniformFields(TSymbolTable *symbolTable) const
{ {
constexpr size_t kNumGraphicsDriverUniforms = 8; constexpr size_t kNumGraphicsDriverUniforms = 8;
constexpr std::array<const char *, kNumGraphicsDriverUniforms> kGraphicsDriverUniformNames = { constexpr std::array<const char *, kNumGraphicsDriverUniforms> kGraphicsDriverUniformNames = {
{kViewport, kClipDistancesEnabled, kXfbActiveUnpaused, kXfbVerticesPerDraw, kNumSamples, {kViewport, kClipDistancesEnabled, kXfbActiveUnpaused, kXfbVerticesPerInstance, kNumSamples,
kXfbBufferOffsets, kAcbBufferOffsets, kDepthRange}}; kXfbBufferOffsets, kAcbBufferOffsets, kDepthRange}};
// This field list mirrors the structure of GraphicsDriverUniforms in ContextVk.cpp. // This field list mirrors the structure of GraphicsDriverUniforms in ContextVk.cpp.
...@@ -85,7 +85,7 @@ TFieldList *DriverUniform::createUniformFields(TSymbolTable *symbolTable) const ...@@ -85,7 +85,7 @@ TFieldList *DriverUniform::createUniformFields(TSymbolTable *symbolTable) const
new TType(EbtFloat, 4), new TType(EbtFloat, 4),
new TType(EbtUInt), // uint clipDistancesEnabled; // 32 bits for 32 clip distances max new TType(EbtUInt), // uint clipDistancesEnabled; // 32 bits for 32 clip distances max
new TType(EbtUInt), new TType(EbtUInt),
new TType(EbtUInt), new TType(EbtInt),
new TType(EbtInt), new TType(EbtInt),
new TType(EbtInt, 4), new TType(EbtInt, 4),
new TType(EbtUInt, 4), new TType(EbtUInt, 4),
...@@ -179,6 +179,16 @@ TIntermBinary *DriverUniform::getAbcBufferOffsets() const ...@@ -179,6 +179,16 @@ TIntermBinary *DriverUniform::getAbcBufferOffsets() const
return createDriverUniformRef(kAcbBufferOffsets); return createDriverUniformRef(kAcbBufferOffsets);
} }
TIntermBinary *DriverUniform::getXfbVerticesPerInstance() const
{
return createDriverUniformRef(kXfbVerticesPerInstance);
}
TIntermBinary *DriverUniform::getXfbBufferOffsets() const
{
return createDriverUniformRef(kXfbBufferOffsets);
}
TIntermBinary *DriverUniform::getClipDistancesEnabled() const TIntermBinary *DriverUniform::getClipDistancesEnabled() const
{ {
return createDriverUniformRef(kClipDistancesEnabled); return createDriverUniformRef(kClipDistancesEnabled);
......
...@@ -34,6 +34,8 @@ class DriverUniform ...@@ -34,6 +34,8 @@ class DriverUniform
TIntermBinary *getViewportRef() const; TIntermBinary *getViewportRef() const;
TIntermBinary *getAbcBufferOffsets() const; TIntermBinary *getAbcBufferOffsets() const;
TIntermBinary *getXfbVerticesPerInstance() const;
TIntermBinary *getXfbBufferOffsets() const;
TIntermBinary *getClipDistancesEnabled() const; TIntermBinary *getClipDistancesEnabled() const;
TIntermBinary *getDepthRangeRef() const; TIntermBinary *getDepthRangeRef() const;
TIntermBinary *getDepthRangeReservedFieldRef() const; TIntermBinary *getDepthRangeReservedFieldRef() const;
......
...@@ -404,11 +404,8 @@ void GenerateTransformFeedbackEmulationOutputs(const GlslangSourceOptions &optio ...@@ -404,11 +404,8 @@ void GenerateTransformFeedbackEmulationOutputs(const GlslangSourceOptions &optio
const std::string driverUniforms = std::string(sh::vk::kDriverUniformsVarName); const std::string driverUniforms = std::string(sh::vk::kDriverUniformsVarName);
std::ostringstream xfbOut; std::ostringstream xfbOut;
xfbOut << "if (" << driverUniforms xfbOut << "if (" << driverUniforms << ".xfbActiveUnpaused != 0)\n{\nivec4 xfbOffsets = "
<< ".xfbActiveUnpaused != 0)\n{\n" << sh::vk::kXfbEmulationGetOffsetsFunctionName << "(ivec4(";
"int xfbIndex = gl_VertexIndex + gl_InstanceIndex * int("
<< driverUniforms << ".xfbVerticesPerDraw);\nivec4 xfbOffsets = " << driverUniforms
<< ".xfbBufferOffsets + xfbIndex * ivec4(";
for (size_t bufferIndex = 0; bufferIndex < bufferCount; ++bufferIndex) for (size_t bufferIndex = 0; bufferIndex < bufferCount; ++bufferIndex)
{ {
if (bufferIndex > 0) if (bufferIndex > 0)
...@@ -423,7 +420,7 @@ void GenerateTransformFeedbackEmulationOutputs(const GlslangSourceOptions &optio ...@@ -423,7 +420,7 @@ void GenerateTransformFeedbackEmulationOutputs(const GlslangSourceOptions &optio
{ {
xfbOut << ", 0"; xfbOut << ", 0";
} }
xfbOut << ");\n"; xfbOut << "));\n";
size_t outputOffset = 0; size_t outputOffset = 0;
for (size_t varyingIndex = 0; varyingIndex < varyings.size(); ++varyingIndex) for (size_t varyingIndex = 0; varyingIndex < varyings.size(); ++varyingIndex)
{ {
......
...@@ -482,7 +482,7 @@ class ContextMtl : public ContextImpl, public mtl::Context ...@@ -482,7 +482,7 @@ class ContextMtl : public ContextImpl, public mtl::Context
uint32_t enabledClipDistances; uint32_t enabledClipDistances;
uint32_t xfbActiveUnpaused; uint32_t xfbActiveUnpaused;
uint32_t xfbVerticesPerDraw; int32_t xfbVerticesPerInstance;
int32_t numSamples; int32_t numSamples;
......
...@@ -2142,7 +2142,7 @@ angle::Result ContextMtl::fillDriverXFBUniforms(GLint drawCallFirstVertex, ...@@ -2142,7 +2142,7 @@ angle::Result ContextMtl::fillDriverXFBUniforms(GLint drawCallFirstVertex,
return angle::Result::Continue; return angle::Result::Continue;
} }
mDriverUniforms.xfbVerticesPerDraw = verticesPerInstance; mDriverUniforms.xfbVerticesPerInstance = verticesPerInstance;
TransformFeedbackMtl *transformFeedbackMtl = mtl::GetImpl(transformFeedback); TransformFeedbackMtl *transformFeedbackMtl = mtl::GetImpl(transformFeedback);
......
...@@ -34,6 +34,9 @@ std::shared_ptr<WaitableCompileEvent> ShaderMtl::compile(const gl::Context *cont ...@@ -34,6 +34,9 @@ std::shared_ptr<WaitableCompileEvent> ShaderMtl::compile(const gl::Context *cont
compileOptions |= SH_CLAMP_POINT_SIZE; compileOptions |= SH_CLAMP_POINT_SIZE;
// Transform feedback is always emulated on Metal.
compileOptions |= SH_ADD_VULKAN_XFB_EMULATION_SUPPORT_CODE;
return compileImpl(context, compilerInstance, mState.getSource(), compileOptions | options); return compileImpl(context, compilerInstance, mState.getSource(), compileOptions | options);
} }
......
...@@ -64,7 +64,7 @@ struct GraphicsDriverUniforms ...@@ -64,7 +64,7 @@ struct GraphicsDriverUniforms
uint32_t enabledClipPlanes; uint32_t enabledClipPlanes;
uint32_t xfbActiveUnpaused; uint32_t xfbActiveUnpaused;
uint32_t xfbVerticesPerDraw; int32_t xfbVerticesPerInstance;
// Used to replace gl_NumSamples. Because gl_NumSamples cannot be recognized in SPIR-V. // Used to replace gl_NumSamples. Because gl_NumSamples cannot be recognized in SPIR-V.
int32_t numSamples; int32_t numSamples;
...@@ -3869,7 +3869,7 @@ angle::Result ContextVk::handleDirtyGraphicsDriverUniforms(const gl::Context *co ...@@ -3869,7 +3869,7 @@ angle::Result ContextVk::handleDirtyGraphicsDriverUniforms(const gl::Context *co
static_cast<float>(glViewport.width), static_cast<float>(glViewport.height)}, static_cast<float>(glViewport.width), static_cast<float>(glViewport.height)},
mState.getEnabledClipDistances().bits(), mState.getEnabledClipDistances().bits(),
xfbActiveUnpaused, xfbActiveUnpaused,
mXfbVertexCountPerInstance, static_cast<int32_t>(mXfbVertexCountPerInstance),
numSamples, numSamples,
{}, {},
{}, {},
......
...@@ -87,6 +87,11 @@ std::shared_ptr<WaitableCompileEvent> ShaderVk::compile(const gl::Context *conte ...@@ -87,6 +87,11 @@ std::shared_ptr<WaitableCompileEvent> ShaderVk::compile(const gl::Context *conte
compileOptions |= SH_ADD_PRE_ROTATION; compileOptions |= SH_ADD_PRE_ROTATION;
} }
if (contextVk->getFeatures().emulateTransformFeedback.enabled)
{
compileOptions |= SH_ADD_VULKAN_XFB_EMULATION_SUPPORT_CODE;
}
return compileImpl(context, compilerInstance, mState.getSource(), compileOptions | options); return compileImpl(context, compilerInstance, mState.getSource(), compileOptions | options);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment