Commit 802d142a by Nicolas Capens

Use rep stos x86 assembly for fast clears.

Change-Id: I20b385c316f24b703da1d3071e393b4cde450173 Reviewed-on: https://swiftshader-review.googlesource.com/8811Tested-by: 's avatarNicolas Capens <capn@google.com> Reviewed-by: 's avatarAlexis Hétu <sugoi@google.com> Reviewed-by: 's avatarNicolas Capens <capn@google.com>
parent 91da6b00
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#define WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN
#endif #endif
#include <windows.h> #include <windows.h>
#include <intrin.h>
#else #else
#include <sys/mman.h> #include <sys/mman.h>
#include <unistd.h> #include <unistd.h>
...@@ -34,6 +35,10 @@ ...@@ -34,6 +35,10 @@
#undef allocateZero #undef allocateZero
#undef deallocateZero #undef deallocateZero
#if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined (_M_X64)) && !defined(__x86__)
#define __x86__
#endif
namespace sw namespace sw
{ {
size_t memoryPageSize() size_t memoryPageSize()
...@@ -128,4 +133,32 @@ void deallocateExecutable(void *memory, size_t bytes) ...@@ -128,4 +133,32 @@ void deallocateExecutable(void *memory, size_t bytes)
deallocate(memory); deallocate(memory);
} }
void clear(uint16_t *memory, uint16_t element, size_t count)
{
#if defined(_MSC_VER) && defined(__x86__)
__stosw(memory, element, count);
#elif defined(__GNUC__) && defined(__x86__)
__asm__("rep stosw" : : "D"(memory), "a"(element), "c"(count) : "%edi", "%ecx");
#else
for(size_t i = 0; i < count; i++)
{
memory[i] = element;
}
#endif
}
void clear(uint32_t *memory, uint32_t element, size_t count)
{
#if defined(_MSC_VER) && defined(__x86__)
__stosd((unsigned long*)memory, element, count);
#elif defined(__GNUC__) && defined(__x86__)
__asm__("rep stosl" : : "D"(memory), "a"(element), "c"(count) : "%edi", "%ecx");
#else
for(size_t i = 0; i < count; i++)
{
memory[i] = element;
}
#endif
}
} }
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#define Memory_hpp #define Memory_hpp
#include <stddef.h> #include <stddef.h>
#include <stdint.h>
namespace sw namespace sw
{ {
...@@ -28,6 +29,9 @@ void deallocate(void *memory); ...@@ -28,6 +29,9 @@ void deallocate(void *memory);
void *allocateExecutable(size_t bytes); // Allocates memory that can be made executable using markExecutable() void *allocateExecutable(size_t bytes); // Allocates memory that can be made executable using markExecutable()
void markExecutable(void *memory, size_t bytes); void markExecutable(void *memory, size_t bytes);
void deallocateExecutable(void *memory, size_t bytes); void deallocateExecutable(void *memory, size_t bytes);
void clear(uint16_t *memory, uint16_t element, size_t count);
void clear(uint32_t *memory, uint32_t element, size_t count);
} }
#endif // Memory_hpp #endif // Memory_hpp
...@@ -14,8 +14,9 @@ ...@@ -14,8 +14,9 @@
#include "Blitter.hpp" #include "Blitter.hpp"
#include "Common/Debug.hpp"
#include "Reactor/Reactor.hpp" #include "Reactor/Reactor.hpp"
#include "Common/Memory.hpp"
#include "Common/Debug.hpp"
namespace sw namespace sw
{ {
...@@ -33,6 +34,11 @@ namespace sw ...@@ -33,6 +34,11 @@ namespace sw
void Blitter::clear(void* pixel, sw::Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask) void Blitter::clear(void* pixel, sw::Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
{ {
if(fastClear(pixel, format, dest, dRect, rgbaMask))
{
return;
}
sw::Surface color(1, 1, 1, format, pixel, sw::Surface::bytes(format), sw::Surface::bytes(format)); sw::Surface color(1, 1, 1, format, pixel, sw::Surface::bytes(format), sw::Surface::bytes(format));
Blitter::Options clearOptions = static_cast<sw::Blitter::Options>((rgbaMask & 0xF) | CLEAR_OPERATION); Blitter::Options clearOptions = static_cast<sw::Blitter::Options>((rgbaMask & 0xF) | CLEAR_OPERATION);
SliceRect sRect(dRect); SliceRect sRect(dRect);
...@@ -40,6 +46,88 @@ namespace sw ...@@ -40,6 +46,88 @@ namespace sw
blit(&color, sRect, dest, dRect, clearOptions); blit(&color, sRect, dest, dRect, clearOptions);
} }
bool Blitter::fastClear(void* pixel, sw::Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
{
if(format != FORMAT_A32B32G32R32F)
{
return false;
}
float *color = (float*)pixel;
float r = color[0];
float g = color[1];
float b = color[2];
float a = color[3];
uint32_t packed;
switch(dest->getFormat())
{
case FORMAT_R5G6B5:
if((rgbaMask & 0x7) != 0x7) return false;
packed = ((uint16_t)(31 * b + 0.5f) << 0) |
((uint16_t)(63 * g + 0.5f) << 5) |
((uint16_t)(31 * r + 0.5f) << 11);
break;
case FORMAT_X8B8G8R8:
if((rgbaMask & 0x7) != 0x7) return false;
packed = ((uint32_t)(255) << 24) |
((uint32_t)(255 * b + 0.5f) << 16) |
((uint32_t)(255 * g + 0.5f) << 8) |
((uint32_t)(255 * r + 0.5f) << 0);
break;
case FORMAT_A8B8G8R8:
if((rgbaMask & 0xF) != 0xF) return false;
packed = ((uint32_t)(255 * a + 0.5f) << 24) |
((uint32_t)(255 * b + 0.5f) << 16) |
((uint32_t)(255 * g + 0.5f) << 8) |
((uint32_t)(255 * r + 0.5f) << 0);
break;
case FORMAT_X8R8G8B8:
if((rgbaMask & 0x7) != 0x7) return false;
packed = ((uint32_t)(255) << 24) |
((uint32_t)(255 * r + 0.5f) << 16) |
((uint32_t)(255 * g + 0.5f) << 8) |
((uint32_t)(255 * b + 0.5f) << 0);
break;
case FORMAT_A8R8G8B8:
if((rgbaMask & 0xF) != 0xF) return false;
packed = ((uint32_t)(255 * a + 0.5f) << 24) |
((uint32_t)(255 * r + 0.5f) << 16) |
((uint32_t)(255 * g + 0.5f) << 8) |
((uint32_t)(255 * b + 0.5f) << 0);
break;
default:
return false;
}
uint8_t *d = (uint8_t*)dest->lockInternal(dRect.x0, dRect.y0, dRect.slice, sw::LOCK_WRITEONLY, sw::PUBLIC);
switch(Surface::bytes(dest->getFormat()))
{
case 2:
for(int i = dRect.y0; i < dRect.y1; i++)
{
sw::clear((uint16_t*)d, packed, dRect.x1 - dRect.x0);
d += dest->getInternalPitchB();
}
break;
case 4:
for(int i = dRect.y0; i < dRect.y1; i++)
{
sw::clear((uint32_t*)d, packed, dRect.x1 - dRect.x0);
d += dest->getInternalPitchB();
}
break;
default:
assert(false);
}
dest->unlockInternal();
return true;
}
void Blitter::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil) void Blitter::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil)
{ {
Blitter::Options options = WRITE_RGBA; Blitter::Options options = WRITE_RGBA;
......
...@@ -81,6 +81,8 @@ namespace sw ...@@ -81,6 +81,8 @@ namespace sw
void blit3D(Surface *source, Surface *dest); void blit3D(Surface *source, Surface *dest);
private: private:
bool fastClear(void* pixel, sw::Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask);
bool read(Float4 &color, Pointer<Byte> element, Format format); bool read(Float4 &color, Pointer<Byte> element, Format format);
bool write(Float4 &color, Pointer<Byte> element, Format format, const Blitter::Options& options); bool write(Float4 &color, Pointer<Byte> element, Format format, const Blitter::Options& options);
bool read(Int4 &color, Pointer<Byte> element, Format format); bool read(Int4 &color, Pointer<Byte> element, Format format);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment