Commit a29aa771 by Nicolas Capens Committed by Nicolas Capens

Don't emit bounds checks when robustness is disabled

The Vulkan device's robustBufferAccess feature determines whether accesses to buffers must be bounds-checked against the range of the buffer descriptor. The spec states that "Some features, such as robustBufferAccess, may incur a run-time performance cost. Application writers should carefully consider the implications of enabling all supported features." Hence in many cases applications will leave the feature disabled to ensure maximum performance. Bug: b/131224163 Change-Id: I69e150d5043f0118db8a4b2751b24b5d87bccd75 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/33389 Presubmit-Ready: Nicolas Capens <nicolascapens@google.com> Tested-by: 's avatarNicolas Capens <nicolascapens@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Reviewed-by: 's avatarChris Forbes <chrisforbes@google.com> Reviewed-by: 's avatarBen Clayton <bclayton@google.com>
parent 8f5bdcf0
......@@ -287,7 +287,7 @@ namespace sw
{
template<typename T>
T Load(Pointer ptr, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */, int alignment /* = sizeof(float) */)
T Load(Pointer ptr, bool robust, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */, int alignment /* = sizeof(float) */)
{
using EL = typename Element<T>::type;
......@@ -301,7 +301,11 @@ namespace sw
}
auto offsets = ptr.offsets();
mask &= ptr.isInBounds(sizeof(float)); // Disable OOB reads.
if(robust) // Disable OOB reads.
{
mask &= ptr.isInBounds(sizeof(float));
}
if (!atomic && order == std::memory_order_relaxed)
{
......@@ -359,12 +363,17 @@ namespace sw
}
template<typename T>
void Store(Pointer ptr, T val, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */)
void Store(Pointer ptr, T val, bool robust, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */)
{
using EL = typename Element<T>::type;
constexpr size_t alignment = sizeof(float);
auto offsets = ptr.offsets();
mask &= ptr.isInBounds(sizeof(float)); // Disable OOB writes.
if(robust) // Disable OOB writes.
{
mask &= ptr.isInBounds(sizeof(float));
}
if (!atomic && order == std::memory_order_relaxed)
{
if (ptr.hasStaticEqualOffsets())
......@@ -434,10 +443,12 @@ namespace sw
const char *entryPointName,
InsnStore const &insns,
const vk::RenderPass *renderPass,
uint32_t subpassIndex)
uint32_t subpassIndex,
bool robustBufferAccess)
: insns{insns}, inputs{MAX_INTERFACE_COMPONENTS},
outputs{MAX_INTERFACE_COMPONENTS},
codeSerialID(codeSerialID), modes{}
outputs{MAX_INTERFACE_COMPONENTS},
codeSerialID(codeSerialID), modes{},
robustBufferAccess(robustBufferAccess)
{
ASSERT(insns.size() > 0);
......@@ -2024,7 +2035,7 @@ namespace sw
void SpirvShader::emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, const vk::DescriptorSet::Bindings &descriptorSets) const
{
EmitState state(routine, activeLaneMask, descriptorSets);
EmitState state(routine, activeLaneMask, descriptorSets, robustBufferAccess);
// Emit everything up to the first label
// TODO: Separate out dispatch of block from non-block instructions?
......@@ -2773,7 +2784,7 @@ namespace sw
{
auto p = ptr + offset;
if (interleavedByLane) { p = interleaveByLane(p); }
SIMD::Store(p, initialValue.Float(i), state->activeLaneMask());
SIMD::Store(p, initialValue.Float(i), state->robust, state->activeLaneMask());
});
break;
}
......@@ -2826,7 +2837,7 @@ namespace sw
{
auto p = ptr + offset;
if (interleavedByLane) { p = interleaveByLane(p); }
dst.move(i, SIMD::Load<SIMD::Float>(p, state->activeLaneMask(), atomic, memoryOrder));
dst.move(i, SIMD::Load<SIMD::Float>(p, state->robust, state->activeLaneMask(), atomic, memoryOrder));
});
return EmitResult::Continue;
......@@ -2864,7 +2875,7 @@ namespace sw
{
auto p = ptr + offset;
if (interleavedByLane) { p = interleaveByLane(p); }
SIMD::Store(p, SIMD::Float(src[i]), state->activeLaneMask(), atomic, memoryOrder);
SIMD::Store(p, SIMD::Float(src[i]), state->robust, state->activeLaneMask(), atomic, memoryOrder);
});
}
else
......@@ -2875,7 +2886,7 @@ namespace sw
{
auto p = ptr + offset;
if (interleavedByLane) { p = interleaveByLane(p); }
SIMD::Store(p, src.Float(i), state->activeLaneMask(), atomic, memoryOrder);
SIMD::Store(p, src.Float(i), state->robust, state->activeLaneMask(), atomic, memoryOrder);
});
}
......@@ -3949,7 +3960,7 @@ namespace sw
dst.move(i, frac);
auto p = ptr + (i * sizeof(float));
if (interleavedByLane) { p = interleaveByLane(p); }
SIMD::Store(p, whole, state->activeLaneMask());
SIMD::Store(p, whole, state->robust, state->activeLaneMask());
}
break;
}
......@@ -4085,7 +4096,7 @@ namespace sw
auto p = ptr + (i * sizeof(float));
if (interleavedByLane) { p = interleaveByLane(p); }
SIMD::Store(p, exponent, state->activeLaneMask());
SIMD::Store(p, exponent, state->robust, state->activeLaneMask());
}
break;
}
......@@ -5300,7 +5311,7 @@ namespace sw
// TODO: specialize for small formats?
for (auto i = 0; i < (texelSize + 3)/4; i++)
{
packed[i] = SIMD::Load<SIMD::Int>(texelPtr, state->activeLaneMask(), false, std::memory_order_relaxed, std::min(texelSize, 4));
packed[i] = SIMD::Load<SIMD::Int>(texelPtr, state->robust, state->activeLaneMask(), false, std::memory_order_relaxed, std::min(texelSize, 4));
texelPtr += sizeof(float);
}
......@@ -5638,7 +5649,7 @@ namespace sw
for (auto i = 0u; i < numPackedElements; i++)
{
SIMD::Store(texelPtr, packed[i], state->activeLaneMask());
SIMD::Store(texelPtr, packed[i], state->robust, state->activeLaneMask());
texelPtr += sizeof(float);
}
......@@ -5826,7 +5837,9 @@ namespace sw
auto src = srcPtr + srcOffset;
if (dstInterleavedByLane) { dst = interleaveByLane(dst); }
if (srcInterleavedByLane) { src = interleaveByLane(src); }
SIMD::Store(dst, SIMD::Load<SIMD::Float>(src, state->activeLaneMask()), state->activeLaneMask());
auto value = SIMD::Load<SIMD::Float>(src, state->robust, state->activeLaneMask());
SIMD::Store(dst, value, state->robust, state->activeLaneMask());
});
return EmitResult::Continue;
}
......
......@@ -258,16 +258,16 @@ namespace sw
template <> struct Element<UInt> { using type = rr::UInt; };
template<typename T>
void Store(Pointer ptr, T val, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed);
void Store(Pointer ptr, T val, bool robust, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed);
template<typename T>
void Store(Pointer ptr, RValue<T> val, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed)
void Store(Pointer ptr, RValue<T> val, bool robust, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed)
{
Store(ptr, T(val), mask, atomic, order);
Store(ptr, T(val), robust, mask, atomic, order);
}
template<typename T>
T Load(Pointer ptr, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed, int alignment = sizeof(float));
T Load(Pointer ptr, bool robust, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed, int alignment = sizeof(float));
}
// Incrementally constructed complex bundle of rvalues
......@@ -632,7 +632,8 @@ namespace sw
const char *entryPointName,
InsnStore const &insns,
const vk::RenderPass *renderPass,
uint32_t subpassIndex);
uint32_t subpassIndex,
bool robustBufferAccess);
struct Modes
{
......@@ -831,6 +832,8 @@ namespace sw
HandleMap<Block> blocks;
Block::ID entryPointBlockId; // Block of the entry point function.
const bool robustBufferAccess = true;
// Walks all reachable the blocks starting from id adding them to
// reachable.
void TraverseReachableBlocks(Block::ID id, Block::Set& reachable);
......@@ -937,10 +940,11 @@ namespace sw
class EmitState
{
public:
EmitState(SpirvRoutine *routine, RValue<SIMD::Int> activeLaneMask, const vk::DescriptorSet::Bindings &descriptorSets)
EmitState(SpirvRoutine *routine, RValue<SIMD::Int> activeLaneMask, const vk::DescriptorSet::Bindings &descriptorSets, bool robustBufferAccess)
: routine(routine),
activeLaneMaskValue(activeLaneMask.value),
descriptorSets(descriptorSets)
descriptorSets(descriptorSets),
robust(robustBufferAccess)
{
}
......@@ -974,6 +978,8 @@ namespace sw
std::deque<Block::ID> *pending;
const vk::DescriptorSet::Bindings &descriptorSets;
const bool robust = true; // Emit robustBufferAccess safe code.
};
// EmitResult is an enumerator of result values from the Emit functions.
......
......@@ -36,10 +36,11 @@ namespace
namespace vk
{
Device::Device(const VkDeviceCreateInfo* pCreateInfo, void* mem, PhysicalDevice *physicalDevice)
Device::Device(const VkDeviceCreateInfo* pCreateInfo, void* mem, PhysicalDevice *physicalDevice, const VkPhysicalDeviceFeatures *enabledFeatures)
: physicalDevice(physicalDevice),
queues(reinterpret_cast<Queue*>(mem)),
enabledExtensionCount(pCreateInfo->enabledExtensionCount)
enabledExtensionCount(pCreateInfo->enabledExtensionCount),
enabledFeatures(enabledFeatures ? *enabledFeatures : VkPhysicalDeviceFeatures{}) // "Setting pEnabledFeatures to NULL and not including a VkPhysicalDeviceFeatures2 in the pNext member of VkDeviceCreateInfo is equivalent to setting all members of the structure to VK_FALSE."
{
for(uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
{
......
......@@ -33,7 +33,7 @@ class Device
public:
static constexpr VkSystemAllocationScope GetAllocationScope() { return VK_SYSTEM_ALLOCATION_SCOPE_DEVICE; }
Device(const VkDeviceCreateInfo* pCreateInfo, void* mem, PhysicalDevice *physicalDevice);
Device(const VkDeviceCreateInfo* pCreateInfo, void* mem, PhysicalDevice *physicalDevice, const VkPhysicalDeviceFeatures *enabledFeatures);
void destroy(const VkAllocationCallbacks* pAllocator);
static size_t ComputeRequiredAllocationSize(const VkDeviceCreateInfo* pCreateInfo);
......@@ -47,16 +47,20 @@ public:
PhysicalDevice *getPhysicalDevice() const { return physicalDevice; }
void updateDescriptorSets(uint32_t descriptorWriteCount, const VkWriteDescriptorSet* pDescriptorWrites,
uint32_t descriptorCopyCount, const VkCopyDescriptorSet* pDescriptorCopies);
const VkPhysicalDeviceFeatures &getEnabledFeatures() const { return enabledFeatures; }
sw::Blitter* getBlitter() const { return blitter; }
private:
PhysicalDevice *physicalDevice = nullptr;
Queue* queues = nullptr;
PhysicalDevice *const physicalDevice = nullptr;
Queue *const queues = nullptr;
uint32_t queueCount = 0;
sw::Blitter* blitter = nullptr;
uint32_t enabledExtensionCount = 0;
const uint32_t enabledExtensionCount = 0;
typedef char ExtensionName[VK_MAX_EXTENSION_NAME_SIZE];
ExtensionName* extensions = nullptr;
const VkPhysicalDeviceFeatures enabledFeatures = {};
sw::Blitter* blitter = nullptr;
};
using DispatchableDevice = DispatchableObject<Device, VkDevice>;
......
......@@ -13,6 +13,8 @@
// limitations under the License.
#include "VkPipeline.hpp"
#include "VkDevice.hpp"
#include "VkPipelineCache.hpp"
#include "VkPipelineLayout.hpp"
#include "VkShaderModule.hpp"
......@@ -220,7 +222,7 @@ std::vector<uint32_t> preprocessSpirv(
return optimized;
}
std::shared_ptr<sw::SpirvShader> createShader(const vk::PipelineCache::SpirvShaderKey& key, const vk::ShaderModule *module)
std::shared_ptr<sw::SpirvShader> createShader(const vk::PipelineCache::SpirvShaderKey& key, const vk::ShaderModule *module, bool robustBufferAccess)
{
auto code = preprocessSpirv(key.getInsns(), key.getSpecializationInfo());
ASSERT(code.size() > 0);
......@@ -231,7 +233,7 @@ std::shared_ptr<sw::SpirvShader> createShader(const vk::PipelineCache::SpirvShad
// TODO(b/119409619): use allocator.
return std::make_shared<sw::SpirvShader>(codeSerialID, key.getPipelineStage(), key.getEntryPointName().c_str(),
code, key.getRenderPass(), key.getSubpassIndex());
code, key.getRenderPass(), key.getSubpassIndex(), robustBufferAccess);
}
std::shared_ptr<sw::ComputeProgram> createProgram(const vk::PipelineCache::ComputeProgramKey& key)
......@@ -249,10 +251,14 @@ std::shared_ptr<sw::ComputeProgram> createProgram(const vk::PipelineCache::Compu
namespace vk
{
Pipeline::Pipeline(PipelineLayout const *layout) : layout(layout) {}
Pipeline::Pipeline(PipelineLayout const *layout, const Device *device)
: layout(layout),
robustBufferAccess(device->getEnabledFeatures().robustBufferAccess)
{
}
GraphicsPipeline::GraphicsPipeline(const VkGraphicsPipelineCreateInfo* pCreateInfo, void* mem)
: Pipeline(vk::Cast(pCreateInfo->layout))
GraphicsPipeline::GraphicsPipeline(const VkGraphicsPipelineCreateInfo* pCreateInfo, void* mem, const Device *device)
: Pipeline(vk::Cast(pCreateInfo->layout), device)
{
if(((pCreateInfo->flags &
~(VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT |
......@@ -515,6 +521,8 @@ void GraphicsPipeline::compileShaders(const VkAllocationCallbacks* pAllocator, c
const PipelineCache::SpirvShaderKey key(pStage->stage, pStage->pName, module->getCode(),
vk::Cast(pCreateInfo->renderPass), pCreateInfo->subpass,
pStage->pSpecializationInfo);
auto pipelineStage = key.getPipelineStage();
if(pPipelineCache)
{
PipelineCache& pipelineCache = *pPipelineCache;
......@@ -523,18 +531,20 @@ void GraphicsPipeline::compileShaders(const VkAllocationCallbacks* pAllocator, c
const std::shared_ptr<sw::SpirvShader>* spirvShader = pipelineCache[key];
if(!spirvShader)
{
setShader(key.getPipelineStage(), createShader(key, module));
pipelineCache.insert(key, getShader(key.getPipelineStage()));
auto shader = createShader(key, module, robustBufferAccess);
setShader(pipelineStage, shader);
pipelineCache.insert(key, getShader(pipelineStage));
}
else
{
setShader(key.getPipelineStage(), *spirvShader);
setShader(pipelineStage, *spirvShader);
}
}
}
else
{
setShader(key.getPipelineStage(), createShader(key, module));
auto shader = createShader(key, module, robustBufferAccess);
setShader(pipelineStage, shader);
}
}
}
......@@ -587,8 +597,8 @@ bool GraphicsPipeline::hasDynamicState(VkDynamicState dynamicState) const
return (dynamicStateFlags & (1 << dynamicState)) != 0;
}
ComputePipeline::ComputePipeline(const VkComputePipelineCreateInfo* pCreateInfo, void* mem)
: Pipeline(vk::Cast(pCreateInfo->layout))
ComputePipeline::ComputePipeline(const VkComputePipelineCreateInfo* pCreateInfo, void* mem, const Device *device)
: Pipeline(vk::Cast(pCreateInfo->layout), device)
{
}
......@@ -621,7 +631,7 @@ void ComputePipeline::compileShaders(const VkAllocationCallbacks* pAllocator, co
const std::shared_ptr<sw::SpirvShader>* spirvShader = pipelineCache[shaderKey];
if(!spirvShader)
{
shader = createShader(shaderKey, module);
shader = createShader(shaderKey, module, robustBufferAccess);
pipelineCache.insert(shaderKey, shader);
}
else
......@@ -647,7 +657,7 @@ void ComputePipeline::compileShaders(const VkAllocationCallbacks* pAllocator, co
}
else
{
shader = createShader(shaderKey, module);
shader = createShader(shaderKey, module, robustBufferAccess);
const PipelineCache::ComputeProgramKey programKey(shader.get(), layout);
program = createProgram(programKey);
}
......
......@@ -33,11 +33,12 @@ namespace vk
class PipelineCache;
class PipelineLayout;
class ShaderModule;
class Device;
class Pipeline
{
public:
Pipeline(PipelineLayout const *layout);
Pipeline(PipelineLayout const *layout, const Device *device);
virtual ~Pipeline() = default;
operator VkPipeline()
......@@ -64,12 +65,14 @@ public:
protected:
PipelineLayout const *layout = nullptr;
const bool robustBufferAccess = true;
};
class GraphicsPipeline : public Pipeline, public ObjectBase<GraphicsPipeline, VkPipeline>
{
public:
GraphicsPipeline(const VkGraphicsPipelineCreateInfo* pCreateInfo, void* mem);
GraphicsPipeline(const VkGraphicsPipelineCreateInfo* pCreateInfo, void* mem, const Device *device);
virtual ~GraphicsPipeline() = default;
void destroyPipeline(const VkAllocationCallbacks* pAllocator) override;
......@@ -110,7 +113,7 @@ private:
class ComputePipeline : public Pipeline, public ObjectBase<ComputePipeline, VkPipeline>
{
public:
ComputePipeline(const VkComputePipelineCreateInfo* pCreateInfo, void* mem);
ComputePipeline(const VkComputePipelineCreateInfo* pCreateInfo, void* mem, const Device *device);
virtual ~ComputePipeline() = default;
void destroyPipeline(const VkAllocationCallbacks* pAllocator) override;
......
......@@ -393,6 +393,8 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateDevice(VkPhysicalDevice physicalDevice, c
const VkBaseInStructure* extensionCreateInfo = reinterpret_cast<const VkBaseInStructure*>(pCreateInfo->pNext);
const VkPhysicalDeviceFeatures *enabledFeatures = pCreateInfo->pEnabledFeatures;
while(extensionCreateInfo)
{
switch(extensionCreateInfo->sType)
......@@ -410,10 +412,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateDevice(VkPhysicalDevice physicalDevice, c
const VkPhysicalDeviceFeatures2* physicalDeviceFeatures2 = reinterpret_cast<const VkPhysicalDeviceFeatures2*>(extensionCreateInfo);
if(!vk::Cast(physicalDevice)->hasFeatures(physicalDeviceFeatures2->features))
{
return VK_ERROR_FEATURE_NOT_PRESENT;
}
enabledFeatures = &physicalDeviceFeatures2->features;
}
break;
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES:
......@@ -473,9 +472,9 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateDevice(VkPhysicalDevice physicalDevice, c
ASSERT(pCreateInfo->queueCreateInfoCount > 0);
if(pCreateInfo->pEnabledFeatures)
if(enabledFeatures)
{
if(!vk::Cast(physicalDevice)->hasFeatures(*(pCreateInfo->pEnabledFeatures)))
if(!vk::Cast(physicalDevice)->hasFeatures(*enabledFeatures))
{
return VK_ERROR_FEATURE_NOT_PRESENT;
}
......@@ -495,7 +494,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateDevice(VkPhysicalDevice physicalDevice, c
(void)queueFamilyPropertyCount; // Silence unused variable warning
}
return vk::DispatchableDevice::Create(pAllocator, pCreateInfo, pDevice, vk::Cast(physicalDevice));
return vk::DispatchableDevice::Create(pAllocator, pCreateInfo, pDevice, vk::Cast(physicalDevice), enabledFeatures);
}
VKAPI_ATTR void VKAPI_CALL vkDestroyDevice(VkDevice device, const VkAllocationCallbacks* pAllocator)
......@@ -1193,7 +1192,8 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateGraphicsPipelines(VkDevice device, VkPipe
VkResult errorResult = VK_SUCCESS;
for(uint32_t i = 0; i < createInfoCount; i++)
{
VkResult result = vk::GraphicsPipeline::Create(pAllocator, &pCreateInfos[i], &pPipelines[i]);
VkResult result = vk::GraphicsPipeline::Create(pAllocator, &pCreateInfos[i], &pPipelines[i], vk::Cast(device));
if(result == VK_SUCCESS)
{
static_cast<vk::GraphicsPipeline*>(vk::Cast(pPipelines[i]))->compileShaders(pAllocator, &pCreateInfos[i], vk::Cast(pipelineCache));
......@@ -1224,7 +1224,8 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateComputePipelines(VkDevice device, VkPipel
VkResult errorResult = VK_SUCCESS;
for(uint32_t i = 0; i < createInfoCount; i++)
{
VkResult result = vk::ComputePipeline::Create(pAllocator, &pCreateInfos[i], &pPipelines[i]);
VkResult result = vk::ComputePipeline::Create(pAllocator, &pCreateInfos[i], &pPipelines[i], vk::Cast(device));
if(result == VK_SUCCESS)
{
static_cast<vk::ComputePipeline*>(vk::Cast(pPipelines[i]))->compileShaders(pAllocator, &pCreateInfos[i], vk::Cast(pipelineCache));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment