Use half-precision for special internal formats.

This change uses half-precision floating-point implementation formats for all OpenGL half-precision floating-point formats, as well as the R11F_G11F_B10F and RGB9_E5 special internal formats. sw::FORMAT_X16B16G16R16F was implemented for the formats without alpha. RGB9E5 conversion was optimized to not require powf(2.0, x), and 11- and 10-bit floating-point formats were optimized to map directly to 16-bit half-precision floating-point. Change-Id: Ic33f903d01f37394244aec9f53b0e67d1c978764 Reviewed-on: https://swiftshader-review.googlesource.com/15410Tested-by: Nicolas Capens <nicolascapens@google.com> Reviewed-by: Alexis Hétu <sugoi@google.com> Reviewed-by: Nicolas Capens <nicolascapens@google.com>

Use half-precision for special internal formats.
a6bc61d6 · Nicolas Capens · Nicolas Capens · 18abaf5a · a6bc61d6 · a6bc61d6
Commit a6bc61d6 authored Dec 20, 2017 by Nicolas Capens Committed by Nicolas Capens Dec 20, 2017
8 changed files
--- a/src/Common/Half.hpp
+++ b/src/Common/Half.hpp
@@ -20,6 +20,7 @@ namespace sw
 	class half
 	{
 	public:
+		half() = default;
 		explicit half(float f);
 		operator float() const;
@@ -30,6 +31,63 @@ namespace sw
 	private:
 		unsigned short fp16i;
 	};
+	inline half shortAsHalf(short s)
+	{
+		union
+		{
+			half h;
+			short s;
+		} hs;
+		hs.s = s;
+		return hs.h;
+	}
+	class RGB9E5
+	{
+		unsigned int R : 9;
+		unsigned int G : 9;
+		unsigned int B : 9;
+		unsigned int E : 5;
+	public:
+		void toRGB16F(half rgb[3]) const
+		{
+			constexpr int offset = 24;   // Exponent bias (15) + number of mantissa bits per component (9) = 24
+			const float factor = (1u << E) * (1.0f / (1 << offset));
+			rgb[0] = half(R * factor);
+			rgb[1] = half(G * factor);
+			rgb[2] = half(B * factor);
+		}
+	};
+	class R11G11B10F
+	{
+		unsigned int R : 11;
+		unsigned int G : 11;
+		unsigned int B : 10;
+		static inline half float11ToFloat16(unsigned short fp11)
+		{
+			return shortAsHalf(fp11 << 4);   // Sign bit 0
+		}
+		static inline half float10ToFloat16(unsigned short fp10)
+		{
+			return shortAsHalf(fp10 << 5);   // Sign bit 0
+		}
+	public:
+		void toRGB16F(half rgb[3]) const
+		{
+			rgb[0] = float11ToFloat16(R);
+			rgb[1] = float11ToFloat16(G);
+			rgb[2] = float10ToFloat16(B);
+		}
+	};
 }
 #endif   // sw_Half_hpp
--- a/src/Common/Math.hpp
+++ b/src/Common/Math.hpp
@@ -16,6 +16,7 @@
 #define sw_Math_hpp
 #include "Types.hpp"
+#include "Half.hpp"
 #include <cmath>
 #if defined(_MSC_VER)
@@ -360,122 +361,6 @@ namespace sw
 	{
 		return static_cast<int>(min(x, 0x7FFFFFFFu));
 	}
-	class RGB9E5Data
-	{
-		unsigned int R : 9;
-		unsigned int G : 9;
-		unsigned int B : 9;
-		unsigned int E : 5;
-	public:
-		void toRGBFloats(float* rgb) const
-		{
-			static const float Offset = -24.0f; // Exponent Bias (15) + Number of mantissa bits per component (9) = 24
-			const float factor = powf(2.0f, static_cast<float>(E) + Offset);
-			rgb[0] = static_cast<float>(R) * factor;
-			rgb[1] = static_cast<float>(G) * factor;
-			rgb[2] = static_cast<float>(B) * factor;
-		}
-	};
-	class R11G11B10FData
-	{
-		unsigned int R : 11;
-		unsigned int G : 11;
-		unsigned int B : 10;
-		static inline float float11ToFloat32(unsigned short fp11)
-		{
-			unsigned short exponent = (fp11 >> 6) & 0x1F;
-			unsigned short mantissa = fp11 & 0x3F;
-			unsigned int output;
-			if(exponent == 0x1F)
-			{
-				// INF or NAN
-				output = 0x7f800000 | (mantissa << 17);
-			}
-			else
-			{
-				if(exponent != 0)
-				{
-					// normalized
-				}
-				else if(mantissa != 0)
-				{
-					// The value is denormalized
-					exponent = 1;
-					do
-					{
-						exponent--;
-						mantissa <<= 1;
-					} while((mantissa & 0x40) == 0);
-					mantissa = mantissa & 0x3F;
-				}
-				else // The value is zero
-				{
-					exponent = static_cast<unsigned short>(-112);
-				}
-				output = ((exponent + 112) << 23) | (mantissa << 17);
-			}
-			return *(float*)(&output);
-		}
-		static inline float float10ToFloat32(unsigned short fp10)
-		{
-			unsigned short exponent = (fp10 >> 5) & 0x1F;
-			unsigned short mantissa = fp10 & 0x1F;
-			unsigned int output;
-			if(exponent == 0x1F)
-			{
-				// INF or NAN
-				output = 0x7f800000 | (mantissa << 17);
-			}
-			else
-			{
-				if(exponent != 0)
-				{
-					// normalized
-				}
-				else if(mantissa != 0)
-				{
-					// The value is denormalized
-					exponent = 1;
-					do
-					{
-						exponent--;
-						mantissa <<= 1;
-					} while((mantissa & 0x20) == 0);
-					mantissa = mantissa & 0x1F;
-				}
-				else // The value is zero
-				{
-					exponent = static_cast<unsigned short>(-112);
-				}
-				output = ((exponent + 112) << 23) | (mantissa << 18);
-			}
-			return *(float*)(&output);
-		}
-	public:
-		void toRGBFloats(float* rgb) const
-		{
-			rgb[0] = float11ToFloat32(R);
-			rgb[1] = float11ToFloat32(G);
-			rgb[2] = float10ToFloat32(B);
-		}
-	};
 }
 #endif   // sw_Math_hpp
--- a/src/OpenGL/common/Image.cpp
+++ b/src/OpenGL/common/Image.cpp
@@ -272,12 +272,12 @@ namespace
 	template<>
 	void LoadImageRow<R11G11B10F>(const unsigned char *source, unsigned char *dest, GLint xoffset, GLsizei width)
 	{
-		const sw::R11G11B10FData *sourceRGB = reinterpret_cast<const sw::R11G11B10FData*>(source);
+		const sw::R11G11B10F *sourceRGB = reinterpret_cast<const sw::R11G11B10F*>(source);
-		float *destF = reinterpret_cast<float*>(dest + xoffset * 16);
+		sw::half *destF = reinterpret_cast<sw::half*>(dest + xoffset * 8);
 		for(int x = 0; x < width; x++, sourceRGB++, destF+=4)
 		{
-			sourceRGB->toRGBFloats(destF);
+			sourceRGB->toRGB16F(destF);
 			destF[3] = 1.0f;
 		}
 	}
@@ -285,12 +285,12 @@ namespace
 	template<>
 	void LoadImageRow<RGB9E5>(const unsigned char *source, unsigned char *dest, GLint xoffset, GLsizei width)
 	{
-		const sw::RGB9E5Data *sourceRGB = reinterpret_cast<const sw::RGB9E5Data*>(source);
+		const sw::RGB9E5 *sourceRGB = reinterpret_cast<const sw::RGB9E5*>(source);
-		float *destF = reinterpret_cast<float*>(dest + xoffset * 16);
+		sw::half *destF = reinterpret_cast<sw::half*>(dest + xoffset * 8);
 		for(int x = 0; x < width; x++, sourceRGB++, destF += 4)
 		{
-			sourceRGB->toRGBFloats(destF);
+			sourceRGB->toRGB16F(destF);
 			destF[3] = 1.0f;
 		}
 	}
@@ -665,9 +665,10 @@ namespace egl
 				return sw::FORMAT_G32R32F;
 			case GL_RGB:
 			case GL_RGB32F:
+				return sw::FORMAT_X32B32G32R32F;
 			case GL_R11F_G11F_B10F:
 			case GL_RGB9_E5:
-				return sw::FORMAT_X32B32G32R32F;
+				return sw::FORMAT_X16B16G16R16F;
 			case GL_RGBA:
 			case GL_RGBA32F:
 				return sw::FORMAT_A32B32G32R32F;
@@ -676,6 +677,7 @@ namespace egl
 			case GL_RG16F:
 				return sw::FORMAT_G16R16F;
 			case GL_RGB16F:
+				return sw::FORMAT_X16B16G16R16F;
 			case GL_RGBA16F:
 				return sw::FORMAT_A16B16G16R16F;
 			case GL_DEPTH_COMPONENT:
@@ -703,14 +705,14 @@ namespace egl
 			case GL_RG:
 			case GL_RG16F:
 				return sw::FORMAT_G16R16F;
-			case GL_RGB:
-			case GL_RGB16F:
 			case GL_RGBA:
 			case GL_RGBA16F:
 				return sw::FORMAT_A16B16G16R16F;
+			case GL_RGB:
+			case GL_RGB16F:
 			case GL_R11F_G11F_B10F:
 			case GL_RGB9_E5:
-				return sw::FORMAT_X32B32G32R32F;
+				return sw::FORMAT_X16B16G16R16F;
 			default:
 				UNREACHABLE(format);
 			}
@@ -914,7 +916,7 @@ namespace egl
 			}
 		case GL_UNSIGNED_INT_10F_11F_11F_REV:
 		case GL_UNSIGNED_INT_5_9_9_9_REV:   // 5 is the exponent field, not alpha.
-			return sw::FORMAT_X32B32G32R32F;
+			return sw::FORMAT_X16B16G16R16F;
 		default:
 			UNREACHABLE(type);
 		}

--- a/src/OpenGL/libGLESv2/Context.cpp
+++ b/src/OpenGL/libGLESv2/Context.cpp
@@ -3314,7 +3314,7 @@ void Context::readPixels(GLint x, GLint y, GLsizei width, GLsizei height, GLenum
 	egl::Image *renderTarget = nullptr;
 	switch(format)
 	{
-	case GL_DEPTH_COMPONENT:
+	case GL_DEPTH_COMPONENT:   // GL_NV_read_depth
 		renderTarget = framebuffer->getDepthBuffer();
 		break;
 	default:

--- a/src/Renderer/Blitter.hpp
+++ b/src/Renderer/Blitter.hpp
@@ -27,7 +27,7 @@ namespace sw
 	{
 		struct Options
 		{
-			Options() {}
+			Options() = default;
 			Options(bool filter, bool useStencil, bool convertSRGB)
 				: writeMask(0xF), clearOperation(false), filter(filter), useStencil(useStencil), convertSRGB(convertSRGB) {}
 			Options(unsigned int writeMask)
@@ -54,7 +54,7 @@ namespace sw
 		struct State : Options
 		{
-			State() {}
+			State() = default;
 			State(const Options &options) : Options(options) {}
 			bool operator==(const State &state) const

--- a/src/Renderer/Renderer.cpp
+++ b/src/Renderer/Renderer.cpp
@@ -281,7 +281,7 @@ namespace sw
 				setupPrimitives = &Renderer::setupPoints;
 			}
-			DrawCall *draw = 0;
+			DrawCall *draw = nullptr;
 			do
 			{

--- a/src/Renderer/Surface.cpp
+++ b/src/Renderer/Surface.cpp
@@ -304,6 +304,8 @@ namespace sw
 			((half*)element)[0] = (half)r;
 			((half*)element)[1] = (half)g;
 			break;
+		case FORMAT_X16B16G16R16F:
+			((half*)element)[3] = 1.0f;
 		case FORMAT_B16G16R16F:
 			((half*)element)[0] = (half)r;
 			((half*)element)[1] = (half)g;
@@ -962,6 +964,7 @@ namespace sw
 			r = ((half*)element)[0];
 			g = ((half*)element)[1];
 			break;
+		case FORMAT_X16B16G16R16F:
 		case FORMAT_B16G16R16F:
 			r = ((half*)element)[0];
 			g = ((half*)element)[1];
@@ -1654,6 +1657,7 @@ namespace sw
 		case FORMAT_R16F:				return 2;
 		case FORMAT_G16R16F:			return 4;
 		case FORMAT_B16G16R16F:			return 6;
+		case FORMAT_X16B16G16R16F:		return 8;
 		case FORMAT_A16B16G16R16F:		return 8;
 		case FORMAT_A32F:				return 4;
 		case FORMAT_R32F:				return 4;
@@ -2891,6 +2895,7 @@ namespace sw
 		case FORMAT_R16F:
 		case FORMAT_G16R16F:
 		case FORMAT_B16G16R16F:
+		case FORMAT_X16B16G16R16F:
 		case FORMAT_A16B16G16R16F:
 		case FORMAT_R32F:
 		case FORMAT_G32R32F:
@@ -3947,6 +3952,7 @@ namespace sw
 		case FORMAT_R16F:			return FORMAT_R32F;
 		case FORMAT_G16R16F:		return FORMAT_G32R32F;
 		case FORMAT_B16G16R16F:     return FORMAT_X32B32G32R32F;
+		case FORMAT_X16B16G16R16F:	return FORMAT_X32B32G32R32F;
 		case FORMAT_A16B16G16R16F:	return FORMAT_A32B32G32R32F;
 		case FORMAT_A32F:			return FORMAT_A32B32G32R32F;
 		case FORMAT_R32F:			return FORMAT_R32F;

--- a/src/Renderer/Surface.hpp
+++ b/src/Renderer/Surface.hpp
@@ -170,6 +170,7 @@ namespace sw
 		FORMAT_R16F,
 		FORMAT_G16R16F,
 		FORMAT_B16G16R16F,
+		FORMAT_X16B16G16R16F,
 		FORMAT_A16B16G16R16F,
 		FORMAT_A32F,
 		FORMAT_R32F,