Commit 802d142a by Nicolas Capens

Use rep stos x86 assembly for fast clears.

Change-Id: I20b385c316f24b703da1d3071e393b4cde450173 Reviewed-on: https://swiftshader-review.googlesource.com/8811Tested-by: 's avatarNicolas Capens <capn@google.com> Reviewed-by: 's avatarAlexis Hétu <sugoi@google.com> Reviewed-by: 's avatarNicolas Capens <capn@google.com>
parent 91da6b00
......@@ -22,6 +22,7 @@
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#include <intrin.h>
#else
#include <sys/mman.h>
#include <unistd.h>
......@@ -34,6 +35,10 @@
#undef allocateZero
#undef deallocateZero
#if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined (_M_X64)) && !defined(__x86__)
#define __x86__
#endif
namespace sw
{
size_t memoryPageSize()
......@@ -128,4 +133,32 @@ void deallocateExecutable(void *memory, size_t bytes)
deallocate(memory);
}
void clear(uint16_t *memory, uint16_t element, size_t count)
{
#if defined(_MSC_VER) && defined(__x86__)
__stosw(memory, element, count);
#elif defined(__GNUC__) && defined(__x86__)
__asm__("rep stosw" : : "D"(memory), "a"(element), "c"(count) : "%edi", "%ecx");
#else
for(size_t i = 0; i < count; i++)
{
memory[i] = element;
}
#endif
}
void clear(uint32_t *memory, uint32_t element, size_t count)
{
#if defined(_MSC_VER) && defined(__x86__)
__stosd((unsigned long*)memory, element, count);
#elif defined(__GNUC__) && defined(__x86__)
__asm__("rep stosl" : : "D"(memory), "a"(element), "c"(count) : "%edi", "%ecx");
#else
for(size_t i = 0; i < count; i++)
{
memory[i] = element;
}
#endif
}
}
......@@ -16,6 +16,7 @@
#define Memory_hpp
#include <stddef.h>
#include <stdint.h>
namespace sw
{
......@@ -28,6 +29,9 @@ void deallocate(void *memory);
void *allocateExecutable(size_t bytes); // Allocates memory that can be made executable using markExecutable()
void markExecutable(void *memory, size_t bytes);
void deallocateExecutable(void *memory, size_t bytes);
void clear(uint16_t *memory, uint16_t element, size_t count);
void clear(uint32_t *memory, uint32_t element, size_t count);
}
#endif // Memory_hpp
......@@ -14,8 +14,9 @@
#include "Blitter.hpp"
#include "Common/Debug.hpp"
#include "Reactor/Reactor.hpp"
#include "Common/Memory.hpp"
#include "Common/Debug.hpp"
namespace sw
{
......@@ -33,6 +34,11 @@ namespace sw
void Blitter::clear(void* pixel, sw::Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
{
if(fastClear(pixel, format, dest, dRect, rgbaMask))
{
return;
}
sw::Surface color(1, 1, 1, format, pixel, sw::Surface::bytes(format), sw::Surface::bytes(format));
Blitter::Options clearOptions = static_cast<sw::Blitter::Options>((rgbaMask & 0xF) | CLEAR_OPERATION);
SliceRect sRect(dRect);
......@@ -40,6 +46,88 @@ namespace sw
blit(&color, sRect, dest, dRect, clearOptions);
}
bool Blitter::fastClear(void* pixel, sw::Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
{
if(format != FORMAT_A32B32G32R32F)
{
return false;
}
float *color = (float*)pixel;
float r = color[0];
float g = color[1];
float b = color[2];
float a = color[3];
uint32_t packed;
switch(dest->getFormat())
{
case FORMAT_R5G6B5:
if((rgbaMask & 0x7) != 0x7) return false;
packed = ((uint16_t)(31 * b + 0.5f) << 0) |
((uint16_t)(63 * g + 0.5f) << 5) |
((uint16_t)(31 * r + 0.5f) << 11);
break;
case FORMAT_X8B8G8R8:
if((rgbaMask & 0x7) != 0x7) return false;
packed = ((uint32_t)(255) << 24) |
((uint32_t)(255 * b + 0.5f) << 16) |
((uint32_t)(255 * g + 0.5f) << 8) |
((uint32_t)(255 * r + 0.5f) << 0);
break;
case FORMAT_A8B8G8R8:
if((rgbaMask & 0xF) != 0xF) return false;
packed = ((uint32_t)(255 * a + 0.5f) << 24) |
((uint32_t)(255 * b + 0.5f) << 16) |
((uint32_t)(255 * g + 0.5f) << 8) |
((uint32_t)(255 * r + 0.5f) << 0);
break;
case FORMAT_X8R8G8B8:
if((rgbaMask & 0x7) != 0x7) return false;
packed = ((uint32_t)(255) << 24) |
((uint32_t)(255 * r + 0.5f) << 16) |
((uint32_t)(255 * g + 0.5f) << 8) |
((uint32_t)(255 * b + 0.5f) << 0);
break;
case FORMAT_A8R8G8B8:
if((rgbaMask & 0xF) != 0xF) return false;
packed = ((uint32_t)(255 * a + 0.5f) << 24) |
((uint32_t)(255 * r + 0.5f) << 16) |
((uint32_t)(255 * g + 0.5f) << 8) |
((uint32_t)(255 * b + 0.5f) << 0);
break;
default:
return false;
}
uint8_t *d = (uint8_t*)dest->lockInternal(dRect.x0, dRect.y0, dRect.slice, sw::LOCK_WRITEONLY, sw::PUBLIC);
switch(Surface::bytes(dest->getFormat()))
{
case 2:
for(int i = dRect.y0; i < dRect.y1; i++)
{
sw::clear((uint16_t*)d, packed, dRect.x1 - dRect.x0);
d += dest->getInternalPitchB();
}
break;
case 4:
for(int i = dRect.y0; i < dRect.y1; i++)
{
sw::clear((uint32_t*)d, packed, dRect.x1 - dRect.x0);
d += dest->getInternalPitchB();
}
break;
default:
assert(false);
}
dest->unlockInternal();
return true;
}
void Blitter::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil)
{
Blitter::Options options = WRITE_RGBA;
......
......@@ -81,6 +81,8 @@ namespace sw
void blit3D(Surface *source, Surface *dest);
private:
bool fastClear(void* pixel, sw::Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask);
bool read(Float4 &color, Pointer<Byte> element, Format format);
bool write(Float4 &color, Pointer<Byte> element, Format format, const Blitter::Options& options);
bool read(Int4 &color, Pointer<Byte> element, Format format);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment