Commit b3240d0c by Nicolas Capens Committed by Nicolas Capens

Optimize clearing of depth and stencil images

Extend Blitter::fastClear(), which is based on memset() instead of using Reactor routines, to also handle D32_SFLOAT and S8_UINT formats. Benchmark results: Run on (48 X 2594 MHz CPU s) CPU Caches: L1 Data 32 KiB (x24) L1 Instruction 32 KiB (x24) L2 Unified 256 KiB (x24) L3 Unified 30720 KiB (x2) -------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------- (LLVM, before) ClearImage/VK_FORMAT_D32_SFLOAT 3.74 ms 0.016 ms 1000 (LLVM, after) ClearImage/VK_FORMAT_D32_SFLOAT 1.08 ms 0.044 ms 10000 (Subzero, before) ClearImage/VK_FORMAT_D32_SFLOAT 4.51 ms 0.063 ms 1000 (Subzero, after) ClearImage/VK_FORMAT_D32_SFLOAT 0.963 ms 0.040 ms 7467 This change re-implements https://swiftshader-review.googlesource.com/c/SwiftShader/+/45888 which was reading 'clearValue' out of bounds when accessing color[1] when it's only a single depth or stencil value. Bug: b/159455503 Bug: chromium:1097740 Change-Id: Id3e74b4fa28ee0422540a8480814f8c9988f402a Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/45949 Kokoro-Result: kokoro <noreply+kokoro@google.com> Tested-by: 's avatarNicolas Capens <nicolascapens@google.com> Reviewed-by: 's avatarAntonio Maiorano <amaiorano@google.com>
parent 5c7b568c
...@@ -154,53 +154,77 @@ void Blitter::clear(void *pixel, vk::Format format, vk::Image *dest, const vk::F ...@@ -154,53 +154,77 @@ void Blitter::clear(void *pixel, vk::Format format, vk::Image *dest, const vk::F
} }
} }
bool Blitter::fastClear(void *pixel, vk::Format format, vk::Image *dest, const vk::Format &viewFormat, const VkImageSubresourceRange &subresourceRange, const VkRect2D *renderArea) bool Blitter::fastClear(void *clearValue, vk::Format clearFormat, vk::Image *dest, const vk::Format &viewFormat, const VkImageSubresourceRange &subresourceRange, const VkRect2D *renderArea)
{ {
if(format != VK_FORMAT_R32G32B32A32_SFLOAT) if(clearFormat != VK_FORMAT_R32G32B32A32_SFLOAT &&
clearFormat != VK_FORMAT_D32_SFLOAT &&
clearFormat != VK_FORMAT_S8_UINT)
{ {
return false; return false;
} }
float *color = (float *)pixel; union ClearValue
float r = color[0]; {
float g = color[1]; struct
float b = color[2]; {
float a = color[3]; float r;
float g;
float b;
float a;
};
uint32_t packed; float rgb[3];
float d;
uint32_t d_as_u32;
uint32_t s;
};
ClearValue &c = *reinterpret_cast<ClearValue *>(clearValue);
uint32_t packed = 0;
VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceRange.aspectMask); VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceRange.aspectMask);
switch(viewFormat) switch(viewFormat)
{ {
case VK_FORMAT_R5G6B5_UNORM_PACK16: case VK_FORMAT_R5G6B5_UNORM_PACK16:
packed = ((uint16_t)(31 * b + 0.5f) << 0) | packed = ((uint16_t)(31 * c.b + 0.5f) << 0) |
((uint16_t)(63 * g + 0.5f) << 5) | ((uint16_t)(63 * c.g + 0.5f) << 5) |
((uint16_t)(31 * r + 0.5f) << 11); ((uint16_t)(31 * c.r + 0.5f) << 11);
break; break;
case VK_FORMAT_B5G6R5_UNORM_PACK16: case VK_FORMAT_B5G6R5_UNORM_PACK16:
packed = ((uint16_t)(31 * r + 0.5f) << 0) | packed = ((uint16_t)(31 * c.r + 0.5f) << 0) |
((uint16_t)(63 * g + 0.5f) << 5) | ((uint16_t)(63 * c.g + 0.5f) << 5) |
((uint16_t)(31 * b + 0.5f) << 11); ((uint16_t)(31 * c.b + 0.5f) << 11);
break; break;
case VK_FORMAT_A8B8G8R8_UINT_PACK32: case VK_FORMAT_A8B8G8R8_UINT_PACK32:
case VK_FORMAT_A8B8G8R8_UNORM_PACK32: case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
case VK_FORMAT_R8G8B8A8_UNORM: case VK_FORMAT_R8G8B8A8_UNORM:
packed = ((uint32_t)(255 * a + 0.5f) << 24) | packed = ((uint32_t)(255 * c.a + 0.5f) << 24) |
((uint32_t)(255 * b + 0.5f) << 16) | ((uint32_t)(255 * c.b + 0.5f) << 16) |
((uint32_t)(255 * g + 0.5f) << 8) | ((uint32_t)(255 * c.g + 0.5f) << 8) |
((uint32_t)(255 * r + 0.5f) << 0); ((uint32_t)(255 * c.r + 0.5f) << 0);
break; break;
case VK_FORMAT_B8G8R8A8_UNORM: case VK_FORMAT_B8G8R8A8_UNORM:
packed = ((uint32_t)(255 * a + 0.5f) << 24) | packed = ((uint32_t)(255 * c.a + 0.5f) << 24) |
((uint32_t)(255 * r + 0.5f) << 16) | ((uint32_t)(255 * c.r + 0.5f) << 16) |
((uint32_t)(255 * g + 0.5f) << 8) | ((uint32_t)(255 * c.g + 0.5f) << 8) |
((uint32_t)(255 * b + 0.5f) << 0); ((uint32_t)(255 * c.b + 0.5f) << 0);
break; break;
case VK_FORMAT_B10G11R11_UFLOAT_PACK32: case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
packed = R11G11B10F(color); packed = R11G11B10F(c.rgb);
break; break;
case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32: case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
packed = RGB9E5(color); packed = RGB9E5(c.rgb);
break;
case VK_FORMAT_D32_SFLOAT:
ASSERT(clearFormat == VK_FORMAT_D32_SFLOAT);
packed = c.d_as_u32; // float reinterpreted as uint32
break;
case VK_FORMAT_S8_UINT:
ASSERT(clearFormat == VK_FORMAT_S8_UINT);
packed = static_cast<uint8_t>(c.s);
break; break;
default: default:
return false; return false;
...@@ -249,6 +273,14 @@ bool Blitter::fastClear(void *pixel, vk::Format format, vk::Image *dest, const v ...@@ -249,6 +273,14 @@ bool Blitter::fastClear(void *pixel, vk::Format format, vk::Image *dest, const v
switch(viewFormat.bytes()) switch(viewFormat.bytes())
{ {
case 4:
for(uint32_t i = 0; i < area.extent.height; i++)
{
ASSERT(d < dest->end());
sw::clear((uint32_t *)d, packed, area.extent.width);
d += rowPitchBytes;
}
break;
case 2: case 2:
for(uint32_t i = 0; i < area.extent.height; i++) for(uint32_t i = 0; i < area.extent.height; i++)
{ {
...@@ -257,11 +289,11 @@ bool Blitter::fastClear(void *pixel, vk::Format format, vk::Image *dest, const v ...@@ -257,11 +289,11 @@ bool Blitter::fastClear(void *pixel, vk::Format format, vk::Image *dest, const v
d += rowPitchBytes; d += rowPitchBytes;
} }
break; break;
case 4: case 1:
for(uint32_t i = 0; i < area.extent.height; i++) for(uint32_t i = 0; i < area.extent.height; i++)
{ {
ASSERT(d < dest->end()); ASSERT(d < dest->end());
sw::clear((uint32_t *)d, packed, area.extent.width); memset(d, packed, area.extent.width);
d += rowPitchBytes; d += rowPitchBytes;
} }
break; break;
......
...@@ -141,7 +141,7 @@ public: ...@@ -141,7 +141,7 @@ public:
Blitter(); Blitter();
virtual ~Blitter(); virtual ~Blitter();
void clear(void *pixel, vk::Format format, vk::Image *dest, const vk::Format &viewFormat, const VkImageSubresourceRange &subresourceRange, const VkRect2D *renderArea = nullptr); void clear(void *clearValue, vk::Format clearFormat, vk::Image *dest, const vk::Format &viewFormat, const VkImageSubresourceRange &subresourceRange, const VkRect2D *renderArea = nullptr);
void blit(const vk::Image *src, vk::Image *dst, VkImageBlit region, VkFilter filter); void blit(const vk::Image *src, vk::Image *dst, VkImageBlit region, VkFilter filter);
void copy(const vk::Image *src, uint8_t *dst, unsigned int dstPitch); void copy(const vk::Image *src, uint8_t *dst, unsigned int dstPitch);
...@@ -157,7 +157,7 @@ private: ...@@ -157,7 +157,7 @@ private:
LEFT LEFT
}; };
bool fastClear(void *pixel, vk::Format format, vk::Image *dest, const vk::Format &viewFormat, const VkImageSubresourceRange &subresourceRange, const VkRect2D *renderArea); bool fastClear(void *clearValue, vk::Format clearFormat, vk::Image *dest, const vk::Format &viewFormat, const VkImageSubresourceRange &subresourceRange, const VkRect2D *renderArea);
Float4 readFloat4(Pointer<Byte> element, const State &state); Float4 readFloat4(Pointer<Byte> element, const State &state);
void write(Float4 &color, Pointer<Byte> element, const State &state); void write(Float4 &color, Pointer<Byte> element, const State &state);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment