Commit a6bc61d6 by Nicolas Capens Committed by Nicolas Capens

Use half-precision for special internal formats.

This change uses half-precision floating-point implementation formats for all OpenGL half-precision floating-point formats, as well as the R11F_G11F_B10F and RGB9_E5 special internal formats. sw::FORMAT_X16B16G16R16F was implemented for the formats without alpha. RGB9E5 conversion was optimized to not require powf(2.0, x), and 11- and 10-bit floating-point formats were optimized to map directly to 16-bit half-precision floating-point. Change-Id: Ic33f903d01f37394244aec9f53b0e67d1c978764 Reviewed-on: https://swiftshader-review.googlesource.com/15410Tested-by: 's avatarNicolas Capens <nicolascapens@google.com> Reviewed-by: 's avatarAlexis Hétu <sugoi@google.com> Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com>
parent 18abaf5a
...@@ -20,6 +20,7 @@ namespace sw ...@@ -20,6 +20,7 @@ namespace sw
class half class half
{ {
public: public:
half() = default;
explicit half(float f); explicit half(float f);
operator float() const; operator float() const;
...@@ -30,6 +31,63 @@ namespace sw ...@@ -30,6 +31,63 @@ namespace sw
private: private:
unsigned short fp16i; unsigned short fp16i;
}; };
inline half shortAsHalf(short s)
{
union
{
half h;
short s;
} hs;
hs.s = s;
return hs.h;
}
class RGB9E5
{
unsigned int R : 9;
unsigned int G : 9;
unsigned int B : 9;
unsigned int E : 5;
public:
void toRGB16F(half rgb[3]) const
{
constexpr int offset = 24; // Exponent bias (15) + number of mantissa bits per component (9) = 24
const float factor = (1u << E) * (1.0f / (1 << offset));
rgb[0] = half(R * factor);
rgb[1] = half(G * factor);
rgb[2] = half(B * factor);
}
};
class R11G11B10F
{
unsigned int R : 11;
unsigned int G : 11;
unsigned int B : 10;
static inline half float11ToFloat16(unsigned short fp11)
{
return shortAsHalf(fp11 << 4); // Sign bit 0
}
static inline half float10ToFloat16(unsigned short fp10)
{
return shortAsHalf(fp10 << 5); // Sign bit 0
}
public:
void toRGB16F(half rgb[3]) const
{
rgb[0] = float11ToFloat16(R);
rgb[1] = float11ToFloat16(G);
rgb[2] = float10ToFloat16(B);
}
};
} }
#endif // sw_Half_hpp #endif // sw_Half_hpp
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#define sw_Math_hpp #define sw_Math_hpp
#include "Types.hpp" #include "Types.hpp"
#include "Half.hpp"
#include <cmath> #include <cmath>
#if defined(_MSC_VER) #if defined(_MSC_VER)
...@@ -360,122 +361,6 @@ namespace sw ...@@ -360,122 +361,6 @@ namespace sw
{ {
return static_cast<int>(min(x, 0x7FFFFFFFu)); return static_cast<int>(min(x, 0x7FFFFFFFu));
} }
class RGB9E5Data
{
unsigned int R : 9;
unsigned int G : 9;
unsigned int B : 9;
unsigned int E : 5;
public:
void toRGBFloats(float* rgb) const
{
static const float Offset = -24.0f; // Exponent Bias (15) + Number of mantissa bits per component (9) = 24
const float factor = powf(2.0f, static_cast<float>(E) + Offset);
rgb[0] = static_cast<float>(R) * factor;
rgb[1] = static_cast<float>(G) * factor;
rgb[2] = static_cast<float>(B) * factor;
}
};
class R11G11B10FData
{
unsigned int R : 11;
unsigned int G : 11;
unsigned int B : 10;
static inline float float11ToFloat32(unsigned short fp11)
{
unsigned short exponent = (fp11 >> 6) & 0x1F;
unsigned short mantissa = fp11 & 0x3F;
unsigned int output;
if(exponent == 0x1F)
{
// INF or NAN
output = 0x7f800000 | (mantissa << 17);
}
else
{
if(exponent != 0)
{
// normalized
}
else if(mantissa != 0)
{
// The value is denormalized
exponent = 1;
do
{
exponent--;
mantissa <<= 1;
} while((mantissa & 0x40) == 0);
mantissa = mantissa & 0x3F;
}
else // The value is zero
{
exponent = static_cast<unsigned short>(-112);
}
output = ((exponent + 112) << 23) | (mantissa << 17);
}
return *(float*)(&output);
}
static inline float float10ToFloat32(unsigned short fp10)
{
unsigned short exponent = (fp10 >> 5) & 0x1F;
unsigned short mantissa = fp10 & 0x1F;
unsigned int output;
if(exponent == 0x1F)
{
// INF or NAN
output = 0x7f800000 | (mantissa << 17);
}
else
{
if(exponent != 0)
{
// normalized
}
else if(mantissa != 0)
{
// The value is denormalized
exponent = 1;
do
{
exponent--;
mantissa <<= 1;
} while((mantissa & 0x20) == 0);
mantissa = mantissa & 0x1F;
}
else // The value is zero
{
exponent = static_cast<unsigned short>(-112);
}
output = ((exponent + 112) << 23) | (mantissa << 18);
}
return *(float*)(&output);
}
public:
void toRGBFloats(float* rgb) const
{
rgb[0] = float11ToFloat32(R);
rgb[1] = float11ToFloat32(G);
rgb[2] = float10ToFloat32(B);
}
};
} }
#endif // sw_Math_hpp #endif // sw_Math_hpp
...@@ -272,12 +272,12 @@ namespace ...@@ -272,12 +272,12 @@ namespace
template<> template<>
void LoadImageRow<R11G11B10F>(const unsigned char *source, unsigned char *dest, GLint xoffset, GLsizei width) void LoadImageRow<R11G11B10F>(const unsigned char *source, unsigned char *dest, GLint xoffset, GLsizei width)
{ {
const sw::R11G11B10FData *sourceRGB = reinterpret_cast<const sw::R11G11B10FData*>(source); const sw::R11G11B10F *sourceRGB = reinterpret_cast<const sw::R11G11B10F*>(source);
float *destF = reinterpret_cast<float*>(dest + xoffset * 16); sw::half *destF = reinterpret_cast<sw::half*>(dest + xoffset * 8);
for(int x = 0; x < width; x++, sourceRGB++, destF+=4) for(int x = 0; x < width; x++, sourceRGB++, destF+=4)
{ {
sourceRGB->toRGBFloats(destF); sourceRGB->toRGB16F(destF);
destF[3] = 1.0f; destF[3] = 1.0f;
} }
} }
...@@ -285,12 +285,12 @@ namespace ...@@ -285,12 +285,12 @@ namespace
template<> template<>
void LoadImageRow<RGB9E5>(const unsigned char *source, unsigned char *dest, GLint xoffset, GLsizei width) void LoadImageRow<RGB9E5>(const unsigned char *source, unsigned char *dest, GLint xoffset, GLsizei width)
{ {
const sw::RGB9E5Data *sourceRGB = reinterpret_cast<const sw::RGB9E5Data*>(source); const sw::RGB9E5 *sourceRGB = reinterpret_cast<const sw::RGB9E5*>(source);
float *destF = reinterpret_cast<float*>(dest + xoffset * 16); sw::half *destF = reinterpret_cast<sw::half*>(dest + xoffset * 8);
for(int x = 0; x < width; x++, sourceRGB++, destF += 4) for(int x = 0; x < width; x++, sourceRGB++, destF += 4)
{ {
sourceRGB->toRGBFloats(destF); sourceRGB->toRGB16F(destF);
destF[3] = 1.0f; destF[3] = 1.0f;
} }
} }
...@@ -665,9 +665,10 @@ namespace egl ...@@ -665,9 +665,10 @@ namespace egl
return sw::FORMAT_G32R32F; return sw::FORMAT_G32R32F;
case GL_RGB: case GL_RGB:
case GL_RGB32F: case GL_RGB32F:
return sw::FORMAT_X32B32G32R32F;
case GL_R11F_G11F_B10F: case GL_R11F_G11F_B10F:
case GL_RGB9_E5: case GL_RGB9_E5:
return sw::FORMAT_X32B32G32R32F; return sw::FORMAT_X16B16G16R16F;
case GL_RGBA: case GL_RGBA:
case GL_RGBA32F: case GL_RGBA32F:
return sw::FORMAT_A32B32G32R32F; return sw::FORMAT_A32B32G32R32F;
...@@ -676,6 +677,7 @@ namespace egl ...@@ -676,6 +677,7 @@ namespace egl
case GL_RG16F: case GL_RG16F:
return sw::FORMAT_G16R16F; return sw::FORMAT_G16R16F;
case GL_RGB16F: case GL_RGB16F:
return sw::FORMAT_X16B16G16R16F;
case GL_RGBA16F: case GL_RGBA16F:
return sw::FORMAT_A16B16G16R16F; return sw::FORMAT_A16B16G16R16F;
case GL_DEPTH_COMPONENT: case GL_DEPTH_COMPONENT:
...@@ -703,14 +705,14 @@ namespace egl ...@@ -703,14 +705,14 @@ namespace egl
case GL_RG: case GL_RG:
case GL_RG16F: case GL_RG16F:
return sw::FORMAT_G16R16F; return sw::FORMAT_G16R16F;
case GL_RGB:
case GL_RGB16F:
case GL_RGBA: case GL_RGBA:
case GL_RGBA16F: case GL_RGBA16F:
return sw::FORMAT_A16B16G16R16F; return sw::FORMAT_A16B16G16R16F;
case GL_RGB:
case GL_RGB16F:
case GL_R11F_G11F_B10F: case GL_R11F_G11F_B10F:
case GL_RGB9_E5: case GL_RGB9_E5:
return sw::FORMAT_X32B32G32R32F; return sw::FORMAT_X16B16G16R16F;
default: default:
UNREACHABLE(format); UNREACHABLE(format);
} }
...@@ -914,7 +916,7 @@ namespace egl ...@@ -914,7 +916,7 @@ namespace egl
} }
case GL_UNSIGNED_INT_10F_11F_11F_REV: case GL_UNSIGNED_INT_10F_11F_11F_REV:
case GL_UNSIGNED_INT_5_9_9_9_REV: // 5 is the exponent field, not alpha. case GL_UNSIGNED_INT_5_9_9_9_REV: // 5 is the exponent field, not alpha.
return sw::FORMAT_X32B32G32R32F; return sw::FORMAT_X16B16G16R16F;
default: default:
UNREACHABLE(type); UNREACHABLE(type);
} }
......
...@@ -3314,7 +3314,7 @@ void Context::readPixels(GLint x, GLint y, GLsizei width, GLsizei height, GLenum ...@@ -3314,7 +3314,7 @@ void Context::readPixels(GLint x, GLint y, GLsizei width, GLsizei height, GLenum
egl::Image *renderTarget = nullptr; egl::Image *renderTarget = nullptr;
switch(format) switch(format)
{ {
case GL_DEPTH_COMPONENT: case GL_DEPTH_COMPONENT: // GL_NV_read_depth
renderTarget = framebuffer->getDepthBuffer(); renderTarget = framebuffer->getDepthBuffer();
break; break;
default: default:
......
...@@ -27,7 +27,7 @@ namespace sw ...@@ -27,7 +27,7 @@ namespace sw
{ {
struct Options struct Options
{ {
Options() {} Options() = default;
Options(bool filter, bool useStencil, bool convertSRGB) Options(bool filter, bool useStencil, bool convertSRGB)
: writeMask(0xF), clearOperation(false), filter(filter), useStencil(useStencil), convertSRGB(convertSRGB) {} : writeMask(0xF), clearOperation(false), filter(filter), useStencil(useStencil), convertSRGB(convertSRGB) {}
Options(unsigned int writeMask) Options(unsigned int writeMask)
...@@ -54,7 +54,7 @@ namespace sw ...@@ -54,7 +54,7 @@ namespace sw
struct State : Options struct State : Options
{ {
State() {} State() = default;
State(const Options &options) : Options(options) {} State(const Options &options) : Options(options) {}
bool operator==(const State &state) const bool operator==(const State &state) const
......
...@@ -281,7 +281,7 @@ namespace sw ...@@ -281,7 +281,7 @@ namespace sw
setupPrimitives = &Renderer::setupPoints; setupPrimitives = &Renderer::setupPoints;
} }
DrawCall *draw = 0; DrawCall *draw = nullptr;
do do
{ {
......
...@@ -304,6 +304,8 @@ namespace sw ...@@ -304,6 +304,8 @@ namespace sw
((half*)element)[0] = (half)r; ((half*)element)[0] = (half)r;
((half*)element)[1] = (half)g; ((half*)element)[1] = (half)g;
break; break;
case FORMAT_X16B16G16R16F:
((half*)element)[3] = 1.0f;
case FORMAT_B16G16R16F: case FORMAT_B16G16R16F:
((half*)element)[0] = (half)r; ((half*)element)[0] = (half)r;
((half*)element)[1] = (half)g; ((half*)element)[1] = (half)g;
...@@ -962,6 +964,7 @@ namespace sw ...@@ -962,6 +964,7 @@ namespace sw
r = ((half*)element)[0]; r = ((half*)element)[0];
g = ((half*)element)[1]; g = ((half*)element)[1];
break; break;
case FORMAT_X16B16G16R16F:
case FORMAT_B16G16R16F: case FORMAT_B16G16R16F:
r = ((half*)element)[0]; r = ((half*)element)[0];
g = ((half*)element)[1]; g = ((half*)element)[1];
...@@ -1654,6 +1657,7 @@ namespace sw ...@@ -1654,6 +1657,7 @@ namespace sw
case FORMAT_R16F: return 2; case FORMAT_R16F: return 2;
case FORMAT_G16R16F: return 4; case FORMAT_G16R16F: return 4;
case FORMAT_B16G16R16F: return 6; case FORMAT_B16G16R16F: return 6;
case FORMAT_X16B16G16R16F: return 8;
case FORMAT_A16B16G16R16F: return 8; case FORMAT_A16B16G16R16F: return 8;
case FORMAT_A32F: return 4; case FORMAT_A32F: return 4;
case FORMAT_R32F: return 4; case FORMAT_R32F: return 4;
...@@ -2891,6 +2895,7 @@ namespace sw ...@@ -2891,6 +2895,7 @@ namespace sw
case FORMAT_R16F: case FORMAT_R16F:
case FORMAT_G16R16F: case FORMAT_G16R16F:
case FORMAT_B16G16R16F: case FORMAT_B16G16R16F:
case FORMAT_X16B16G16R16F:
case FORMAT_A16B16G16R16F: case FORMAT_A16B16G16R16F:
case FORMAT_R32F: case FORMAT_R32F:
case FORMAT_G32R32F: case FORMAT_G32R32F:
...@@ -3947,6 +3952,7 @@ namespace sw ...@@ -3947,6 +3952,7 @@ namespace sw
case FORMAT_R16F: return FORMAT_R32F; case FORMAT_R16F: return FORMAT_R32F;
case FORMAT_G16R16F: return FORMAT_G32R32F; case FORMAT_G16R16F: return FORMAT_G32R32F;
case FORMAT_B16G16R16F: return FORMAT_X32B32G32R32F; case FORMAT_B16G16R16F: return FORMAT_X32B32G32R32F;
case FORMAT_X16B16G16R16F: return FORMAT_X32B32G32R32F;
case FORMAT_A16B16G16R16F: return FORMAT_A32B32G32R32F; case FORMAT_A16B16G16R16F: return FORMAT_A32B32G32R32F;
case FORMAT_A32F: return FORMAT_A32B32G32R32F; case FORMAT_A32F: return FORMAT_A32B32G32R32F;
case FORMAT_R32F: return FORMAT_R32F; case FORMAT_R32F: return FORMAT_R32F;
......
...@@ -170,6 +170,7 @@ namespace sw ...@@ -170,6 +170,7 @@ namespace sw
FORMAT_R16F, FORMAT_R16F,
FORMAT_G16R16F, FORMAT_G16R16F,
FORMAT_B16G16R16F, FORMAT_B16G16R16F,
FORMAT_X16B16G16R16F,
FORMAT_A16B16G16R16F, FORMAT_A16B16G16R16F,
FORMAT_A32F, FORMAT_A32F,
FORMAT_R32F, FORMAT_R32F,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment