Robust buffer access for vertex inputs

Robust buffer access for vertex inputs/attributes was not implemented. It's fixed in this cl by adding robustBufferAccess to the Context structure, adding a check in Renderer::advanceInstanceAttributes() to prevent advancing a buffer past its end and adding out of bounds checks in VertexRoutine::readStream() This affects dEQP-VK.robustness.vertex_access.* tests, which will be enabled when the vertexPipelineStoresAndAtomics feature is turned on. For now, this change should be noop in test results. Bug b/140294254 b/131224163 Change-Id: Ib4d4cdb73f48495a556b6bd05b1d0e3ce800ad15 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/36268Tested-by: Alexis Hétu <sugoi@google.com> Reviewed-by: Nicolas Capens <nicolascapens@google.com>

Robust buffer access for vertex inputs
f9c8d5fd · Alexis Hetu · Alexis Hétu · 1ea7af54 · f9c8d5fd · f9c8d5fd
Commit f9c8d5fd authored Sep 12, 2019 by Alexis Hetu Committed by Alexis Hétu Sep 16, 2019
11 changed files
--- a/src/Device/Context.cpp
+++ b/src/Device/Context.cpp
@@ -99,6 +99,8 @@ namespace sw
 		frontStencil = {};
 		backStencil = {};

+		robustBufferAccess = false;
+
 		rasterizerDiscard = false;

 		depthCompareMode = VK_COMPARE_OP_LESS;

--- a/src/Device/Context.hpp
+++ b/src/Device/Context.hpp
@@ -107,6 +107,7 @@ namespace sw
 		vk::DescriptorSet::Bindings descriptorSets = {};
 		vk::DescriptorSet::DynamicOffsets descriptorDynamicOffsets = {};
 		Stream input[MAX_INTERFACE_COMPONENTS / 4];
+		bool robustBufferAccess;

 		vk::ImageView *renderTarget[RENDERTARGETS];
 		vk::ImageView *depthBuffer;

--- a/src/Device/Renderer.cpp
+++ b/src/Device/Renderer.cpp
@@ -277,6 +277,7 @@ namespace sw
 		for(int i = 0; i < MAX_INTERFACE_COMPONENTS/4; i++)
 		{
 			data->input[i] = context->input[i].buffer;
+			data->robustnessSize[i] = context->input[i].robustnessSize;
 			data->stride[i] = context->input[i].vertexStride;
 		}

@@ -1057,10 +1058,11 @@ namespace sw
 		for(uint32_t i = 0; i < vk::MAX_VERTEX_INPUT_BINDINGS; i++)
 		{
 			auto &attrib = inputs[i];
-			if (attrib.count && attrib.instanceStride)
+			if (attrib.count && attrib.instanceStride && (attrib.instanceStride < attrib.robustnessSize))
 			{
 				// Under the casts: attrib.buffer += attrib.instanceStride
 				attrib.buffer = (void const *)((uintptr_t)attrib.buffer + attrib.instanceStride);
+				attrib.robustnessSize -= attrib.instanceStride;
 			}
 		}
 	}

--- a/src/Device/Renderer.hpp
+++ b/src/Device/Renderer.hpp
@@ -66,6 +66,7 @@ namespace sw
 		vk::DescriptorSet::DynamicOffsets descriptorDynamicOffsets = {};

 		const void *input[MAX_INTERFACE_COMPONENTS / 4];
+		unsigned int robustnessSize[MAX_INTERFACE_COMPONENTS / 4];
 		unsigned int stride[MAX_INTERFACE_COMPONENTS / 4];
 		const void *indices;


--- a/src/Device/Stream.hpp
+++ b/src/Device/Stream.hpp
@@ -39,6 +39,7 @@ namespace sw
 	struct Stream
 	{
 		const void *buffer = nullptr;
+		unsigned int robustnessSize = 0;
 		unsigned int vertexStride = 0;
 		unsigned int instanceStride = 0;
 		StreamType type = STREAMTYPE_FLOAT;

--- a/src/Device/VertexProcessor.cpp
+++ b/src/Device/VertexProcessor.cpp
@@ -44,6 +44,32 @@ namespace sw
 		return hash;
 	}

+	unsigned int VertexProcessor::States::Input::bytesPerAttrib() const
+	{
+		switch(type)
+		{
+		case STREAMTYPE_FLOAT:
+		case STREAMTYPE_INT:
+		case STREAMTYPE_UINT:
+			return count * sizeof(uint32_t);
+		case STREAMTYPE_HALF:
+		case STREAMTYPE_SHORT:
+		case STREAMTYPE_USHORT:
+			return count * sizeof(uint16_t);
+		case STREAMTYPE_BYTE:
+		case STREAMTYPE_SBYTE:
+			return count * sizeof(uint8_t);
+		case STREAMTYPE_COLOR:
+		case STREAMTYPE_2_10_10_10_INT:
+		case STREAMTYPE_2_10_10_10_UINT:
+			return sizeof(int);
+		default:
+			UNSUPPORTED("stream.type %d", int(type));
+		}
+
+		return 0;
+	}
+
 	bool VertexProcessor::State::operator==(const State &state) const
 	{
 		if(hash != state.hash)
@@ -78,6 +104,7 @@ namespace sw
 		State state;

 		state.shaderID = context->vertexShader->getSerialID();
+		state.robustBufferAccess = context->robustBufferAccess;
 		state.isPoint = context->topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST;

 		for(int i = 0; i < MAX_INTERFACE_COMPONENTS / 4; i++)

--- a/src/Device/VertexProcessor.hpp
+++ b/src/Device/VertexProcessor.hpp
@@ -68,6 +68,8 @@ namespace sw
 					return count != 0;
 				}

+				unsigned int bytesPerAttrib() const;
+
 				StreamType type    : BITS(STREAMTYPE_LAST);
 				unsigned int count : 3;
 				bool normalized    : 1;
@@ -75,6 +77,7 @@ namespace sw
 			};

 			Input input[MAX_INTERFACE_COMPONENTS / 4];
+			bool robustBufferAccess : 1;
 			bool isPoint : 1;
 		};


--- a/src/Pipeline/VertexRoutine.cpp
+++ b/src/Pipeline/VertexRoutine.cpp
@@ -94,8 +94,13 @@ namespace sw
 			{
 				Pointer<Byte> input = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, input) + sizeof(void*) * (i / 4));
 				UInt stride = *Pointer<UInt>(data + OFFSET(DrawData, stride) + sizeof(uint32_t) * (i / 4));
+				UInt robustnessSize(0);
+				if(state.robustBufferAccess)
+				{
+					robustnessSize = *Pointer<UInt>(data + OFFSET(DrawData, robustnessSize) + sizeof(uint32_t) * (i / 4));
+				}

-				auto value = readStream(input, stride, state.input[i / 4], batch);
+				auto value = readStream(input, stride, state.input[i / 4], batch, state.robustBufferAccess, robustnessSize);
 				routine.inputs[i + 0] = value.x;
 				routine.inputs[i + 1] = value.y;
 				routine.inputs[i + 2] = value.z;
@@ -137,14 +142,28 @@ namespace sw
 		clipFlags |= Pointer<Int>(constants + OFFSET(Constants,fini))[SignMask(finiteXYZ)];
 	}

-	Vector4f VertexRoutine::readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, Pointer<UInt> &batch)
+	Vector4f VertexRoutine::readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, Pointer<UInt> &batch,
+	                                   bool robustBufferAccess, UInt & robustnessSize)
 	{
 		Vector4f v;
+		UInt4 offsets = *Pointer<UInt4>(As<Pointer<UInt4>>(batch)) * UInt4(stride);
+
+		Pointer<Byte> source0 = buffer + offsets.x;
+		Pointer<Byte> source1 = buffer + offsets.y;
+		Pointer<Byte> source2 = buffer + offsets.z;
+		Pointer<Byte> source3 = buffer + offsets.w;

-		Pointer<Byte> source0 = buffer + batch[0] * stride;
-		Pointer<Byte> source1 = buffer + batch[1] * stride;
-		Pointer<Byte> source2 = buffer + batch[2] * stride;
-		Pointer<Byte> source3 = buffer + batch[3] * stride;
+		UInt4 zero(0);
+		if (robustBufferAccess)
+		{
+			// TODO(b/141124876): Optimize for wide-vector gather operations.
+			UInt4 limits = offsets + UInt4(stream.bytesPerAttrib());
+			Pointer<Byte> zeroSource = As<Pointer<Byte>>(&zero);
+			source0 = IfThenElse(limits.x <= robustnessSize, source0, zeroSource);
+			source1 = IfThenElse(limits.y <= robustnessSize, source1, zeroSource);
+			source2 = IfThenElse(limits.z <= robustnessSize, source2, zeroSource);
+			source3 = IfThenElse(limits.w <= robustnessSize, source3, zeroSource);
+		}

 		bool isNativeFloatAttrib = (stream.attribType == SpirvShader::ATTRIBTYPE_FLOAT) || stream.normalized;


--- a/src/Pipeline/VertexRoutine.hpp
+++ b/src/Pipeline/VertexRoutine.hpp
@@ -66,7 +66,8 @@ namespace sw

 		typedef VertexProcessor::State::Input Stream;

-		Vector4f readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, Pointer<UInt> &batch);
+		Vector4f readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, Pointer<UInt> &batch,
+		                    bool robustBufferAccess, UInt& robustnessSize);
 		void readInput(Pointer<UInt> &batch);
 		void computeClipFlags();
 		void writeCache(Pointer<Byte> &vertexCache, Pointer<UInt> &tagCache, Pointer<UInt> &batch);

--- a/src/Vulkan/VkCommandBuffer.cpp
+++ b/src/Vulkan/VkCommandBuffer.cpp
@@ -419,8 +419,13 @@ void CommandBuffer::ExecutionState::bindVertexInputs(sw::Context& context, int f
 		if (attrib.count)
 		{
 			const auto &vertexInput = vertexInputBindings[attrib.binding];
-			attrib.buffer = vertexInput.buffer ? vertexInput.buffer->getOffsetPointer(
-					attrib.offset + vertexInput.offset + attrib.vertexStride * firstVertex + attrib.instanceStride * firstInstance) : nullptr;
+			VkDeviceSize offset = attrib.offset + vertexInput.offset +
+			                      attrib.vertexStride * firstVertex +
+			                      attrib.instanceStride * firstInstance;
+			attrib.buffer = vertexInput.buffer ? vertexInput.buffer->getOffsetPointer(offset) : nullptr;
+
+			VkDeviceSize size = vertexInput.buffer ? vertexInput.buffer->getSize() : 0;
+			attrib.robustnessSize = (size > offset) ? size - offset : 0;
 		}
 	}
 }

--- a/src/Vulkan/VkPipeline.cpp
+++ b/src/Vulkan/VkPipeline.cpp
@@ -283,6 +283,8 @@ Pipeline::Pipeline(PipelineLayout const *layout, const Device *device)
 GraphicsPipeline::GraphicsPipeline(const VkGraphicsPipelineCreateInfo* pCreateInfo, void* mem, const Device *device)
 	: Pipeline(vk::Cast(pCreateInfo->layout), device)
 {
+	context.robustBufferAccess = robustBufferAccess;
+
 	if(((pCreateInfo->flags &
 		~(VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT |
 	      VK_PIPELINE_CREATE_DERIVATIVE_BIT |