Commit ae022faf by Antonio Maiorano

Add support for configurable subpixel precision

Bug: b/141676114 Change-Id: I47e7d90e14b44533e64d352ecc6440495c0b7d3f Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/36597 Presubmit-Ready: Antonio Maiorano <amaiorano@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Reviewed-by: 's avatarAlexis Hétu <sugoi@google.com> Tested-by: 's avatarAntonio Maiorano <amaiorano@google.com>
parent 53096e48
...@@ -330,16 +330,17 @@ namespace sw ...@@ -330,16 +330,17 @@ namespace sw
float N = viewport.minDepth; float N = viewport.minDepth;
float F = viewport.maxDepth; float F = viewport.maxDepth;
float Z = F - N; float Z = F - N;
constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
if(context->isDrawTriangle(false)) if(context->isDrawTriangle(false))
{ {
N += context->depthBias; N += context->depthBias;
} }
data->Wx16 = replicate(W * 16); data->WxF = replicate(W * subPixF);
data->Hx16 = replicate(H * 16); data->HxF = replicate(H * subPixF);
data->X0x16 = replicate(X0 * 16 - 8); data->X0xF = replicate(X0 * subPixF - subPixF / 2);
data->Y0x16 = replicate(Y0 * 16 - 8); data->Y0xF = replicate(Y0 * subPixF - subPixF / 2);
data->halfPixelX = replicate(0.5f / W); data->halfPixelX = replicate(0.5f / W);
data->halfPixelY = replicate(0.5f / H); data->halfPixelY = replicate(0.5f / H);
data->viewportHeight = abs(viewport.height); data->viewportHeight = abs(viewport.height);
...@@ -799,8 +800,10 @@ namespace sw ...@@ -799,8 +800,10 @@ namespace sw
return false; return false;
} }
const float W = data.Wx16[0] * (1.0f / 16.0f); constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
const float H = data.Hx16[0] * (1.0f / 16.0f);
const float W = data.WxF[0] * (1.0f / subPixF);
const float H = data.HxF[0] * (1.0f / subPixF);
float dx = W * (P1.x / P1.w - P0.x / P0.w); float dx = W * (P1.x / P1.w - P0.x / P0.w);
float dy = H * (P1.y / P1.w - P0.y / P0.w); float dy = H * (P1.y / P1.w - P0.y / P0.w);
...@@ -1029,8 +1032,10 @@ namespace sw ...@@ -1029,8 +1032,10 @@ namespace sw
triangle.v1 = triangle.v0; triangle.v1 = triangle.v0;
triangle.v2 = triangle.v0; triangle.v2 = triangle.v0;
triangle.v1.projected.x += iround(16 * 0.5f * pSize); constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
triangle.v2.projected.y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
triangle.v1.projected.x += iround(subPixF * 0.5f * pSize);
triangle.v2.projected.y -= iround(subPixF * 0.5f * pSize) * (data.HxF[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
return setupRoutine(&primitive, &triangle, &polygon, &data); return setupRoutine(&primitive, &triangle, &polygon, &data);
} }
......
...@@ -79,10 +79,10 @@ namespace sw ...@@ -79,10 +79,10 @@ namespace sw
PixelProcessor::Factor factor; PixelProcessor::Factor factor;
unsigned int occlusion[MaxClusterCount]; // Number of pixels passing depth test unsigned int occlusion[MaxClusterCount]; // Number of pixels passing depth test
float4 Wx16; float4 WxF;
float4 Hx16; float4 HxF;
float4 X0x16; float4 X0xF;
float4 Y0x16; float4 Y0xF;
float4 halfPixelX; float4 halfPixelX;
float4 halfPixelY; float4 halfPixelY;
float viewportHeight; float viewportHeight;
......
...@@ -290,29 +290,18 @@ namespace sw ...@@ -290,29 +290,18 @@ namespace sw
sRGBtoLinear12_16[i] = (unsigned short)(clamp(sw::sRGBtoLinear((float)i / 0x0FFF) * 0xFFFF + 0.5f, 0.0f, (float)0xFFFF)); sRGBtoLinear12_16[i] = (unsigned short)(clamp(sw::sRGBtoLinear((float)i / 0x0FFF) * 0xFFFF + 0.5f, 0.0f, (float)0xFFFF));
} }
// VK_SAMPLE_COUNT_4_BIT
// https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#primsrast-multisampling
constexpr float sampleLocations4[][2] = {
{0.375, 0.125},
{0.875, 0.375},
{0.125, 0.625},
{0.625, 0.875},
};
// Vulkan spec sample positions are relative to 0,0 in top left corner, with Y+ going down.
// Convert to our space, with 0,0 in center, and Y+ going up.
constexpr float4 X[4] = { constexpr float4 X[4] = {
sw::replicate(sampleLocations4[0][0] - 0.5f), // -0.125 sw::replicate(SampleLocationsX[0]),
sw::replicate(sampleLocations4[1][0] - 0.5f), // +0.375 sw::replicate(SampleLocationsX[1]),
sw::replicate(sampleLocations4[2][0] - 0.5f), // -0.375 sw::replicate(SampleLocationsX[2]),
sw::replicate(sampleLocations4[3][0] - 0.5f), // +0.125 sw::replicate(SampleLocationsX[3]),
}; };
constexpr float4 Y[4] = { constexpr float4 Y[4] = {
sw::replicate(-(sampleLocations4[0][1] - 0.5f)), // +0.375 sw::replicate(SampleLocationsY[0]),
sw::replicate(-(sampleLocations4[1][1] - 0.5f)), // +0.125 sw::replicate(SampleLocationsY[1]),
sw::replicate(-(sampleLocations4[2][1] - 0.5f)), // -0.125 sw::replicate(SampleLocationsY[2]),
sw::replicate(-(sampleLocations4[3][1] - 0.5f)), // -0.375 sw::replicate(SampleLocationsY[3]),
}; };
for(int q = 0; q < 4; q++) for(int q = 0; q < 4; q++)
...@@ -332,8 +321,11 @@ namespace sw ...@@ -332,8 +321,11 @@ namespace sw
} }
} }
const int Xf[4] = {-5, +5, +2, -2}; // Fragment offsets constexpr auto subPixB = vk::SUBPIXEL_PRECISION_BITS;
const int Yf[4] = {-2, +2, -5, +5}; // Fragment offsets
// Reorder sample points for fragment offset computation
const int Xf[4] = { toFixedPoint(X[2][0], subPixB), toFixedPoint(X[1][0], subPixB), toFixedPoint(X[3][0], subPixB), toFixedPoint(X[0][0], subPixB) };
const int Yf[4] = { toFixedPoint(Y[2][0], subPixB), toFixedPoint(Y[1][0], subPixB), toFixedPoint(Y[3][0], subPixB), toFixedPoint(Y[0][0], subPixB) };
memcpy(&this->Xf, &Xf, sizeof(Xf)); memcpy(&this->Xf, &Xf, sizeof(Xf));
memcpy(&this->Yf, &Yf, sizeof(Yf)); memcpy(&this->Yf, &Yf, sizeof(Yf));
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
#define sw_Constants_hpp #define sw_Constants_hpp
#include "System/Types.hpp" #include "System/Types.hpp"
#include "System/Math.hpp"
#include "Vulkan/VkConfig.h"
namespace sw namespace sw
{ {
...@@ -86,6 +88,36 @@ namespace sw ...@@ -86,6 +88,36 @@ namespace sw
float4 X[4]; float4 X[4];
float4 Y[4]; float4 Y[4];
// VK_SAMPLE_COUNT_4_BIT
// https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#primsrast-multisampling
static constexpr float VkSampleLocations4[][2] = {
{0.375, 0.125},
{0.875, 0.375},
{0.125, 0.625},
{0.625, 0.875},
};
// Vulkan spec sample positions are relative to 0,0 in top left corner, with Y+ going down.
// Convert to our space, with 0,0 in center, and Y+ going up.
static constexpr float SampleLocationsX[4] = {
VkSampleLocations4[0][0] - 0.5f,
VkSampleLocations4[1][0] - 0.5f,
VkSampleLocations4[2][0] - 0.5f,
VkSampleLocations4[3][0] - 0.5f,
};
static constexpr float SampleLocationsY[4] = {
-(VkSampleLocations4[0][1] - 0.5f),
-(VkSampleLocations4[1][1] - 0.5f),
-(VkSampleLocations4[2][1] - 0.5f),
-(VkSampleLocations4[3][1] - 0.5f),
};
// Compute the yMin and yMax multisample offsets so that they are just
// large enough (+/- max range - epsilon) to include sample points
static constexpr int yMinMultiSampleOffset = sw::toFixedPoint(1, vk::SUBPIXEL_PRECISION_BITS) - sw::toFixedPoint(sw::max(SampleLocationsY[0], SampleLocationsY[1], SampleLocationsY[2], SampleLocationsY[3]), vk::SUBPIXEL_PRECISION_BITS) - 1;
static constexpr int yMaxMultiSampleOffset = sw::toFixedPoint(1, vk::SUBPIXEL_PRECISION_BITS) + sw::toFixedPoint(sw::max(SampleLocationsY[0], SampleLocationsY[1], SampleLocationsY[2], SampleLocationsY[3]), vk::SUBPIXEL_PRECISION_BITS) - 1;
dword maxX[16]; dword maxX[16];
dword maxY[16]; dword maxY[16];
dword maxZ[16]; dword maxZ[16];
......
...@@ -139,8 +139,8 @@ namespace sw ...@@ -139,8 +139,8 @@ namespace sw
Float w = v.w; Float w = v.w;
Float rhw = IfThenElse(w != 0.0f, 1.0f / w, Float(1.0f)); Float rhw = IfThenElse(w != 0.0f, 1.0f / w, Float(1.0f));
X[i] = RoundInt(*Pointer<Float>(data + OFFSET(DrawData,X0x16)) + v.x * rhw * *Pointer<Float>(data + OFFSET(DrawData,Wx16))); X[i] = RoundInt(*Pointer<Float>(data + OFFSET(DrawData,X0xF)) + v.x * rhw * *Pointer<Float>(data + OFFSET(DrawData,WxF)));
Y[i] = RoundInt(*Pointer<Float>(data + OFFSET(DrawData,Y0x16)) + v.y * rhw * *Pointer<Float>(data + OFFSET(DrawData,Hx16))); Y[i] = RoundInt(*Pointer<Float>(data + OFFSET(DrawData,Y0xF)) + v.y * rhw * *Pointer<Float>(data + OFFSET(DrawData,HxF)));
i++; i++;
} }
...@@ -162,15 +162,19 @@ namespace sw ...@@ -162,15 +162,19 @@ namespace sw
} }
Until(i >= n) Until(i >= n)
constexpr int subPixB = vk::SUBPIXEL_PRECISION_BITS;
constexpr int subPixM = vk::SUBPIXEL_PRECISION_MASK;
constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
if(state.multiSample > 1) if(state.multiSample > 1)
{ {
yMin = (yMin + 0x0A) >> 4; yMin = (yMin + Constants::yMinMultiSampleOffset) >> subPixB;
yMax = (yMax + 0x14) >> 4; yMax = (yMax + Constants::yMaxMultiSampleOffset) >> subPixB;
} }
else else
{ {
yMin = (yMin + 0x0F) >> 4; yMin = (yMin + subPixM) >> subPixB;
yMax = (yMax + 0x0F) >> 4; yMax = (yMax + subPixM) >> subPixB;
} }
yMin = Max(yMin, *Pointer<Int>(data + OFFSET(DrawData,scissorY0))); yMin = Max(yMin, *Pointer<Int>(data + OFFSET(DrawData,scissorY0)));
...@@ -213,7 +217,7 @@ namespace sw ...@@ -213,7 +217,7 @@ namespace sw
{ {
Int xMin = *Pointer<Int>(data + OFFSET(DrawData, scissorX0)); Int xMin = *Pointer<Int>(data + OFFSET(DrawData, scissorX0));
Int xMax = *Pointer<Int>(data + OFFSET(DrawData, scissorX1)); Int xMax = *Pointer<Int>(data + OFFSET(DrawData, scissorX1));
Short x = Short(Clamp((X[0] + 0xF) >> 4, xMin, xMax)); Short x = Short(Clamp((X[0] + subPixM) >> subPixB, xMin, xMax));
For(Int y = yMin - 1, y < yMax + 1, y++) For(Int y = yMin - 1, y < yMax + 1, y++)
{ {
...@@ -323,8 +327,8 @@ namespace sw ...@@ -323,8 +327,8 @@ namespace sw
Y2 = Y1 + X0 - X1; Y2 = Y1 + X0 - X1;
} }
Float dx = Float(X0) * (1.0f / 16.0f); Float dx = Float(X0) * (1.0f / subPixF);
Float dy = Float(Y0) * (1.0f / 16.0f); Float dy = Float(Y0) * (1.0f / subPixF);
X1 -= X0; X1 -= X0;
Y1 -= Y0; Y1 -= Y0;
...@@ -332,11 +336,11 @@ namespace sw ...@@ -332,11 +336,11 @@ namespace sw
X2 -= X0; X2 -= X0;
Y2 -= Y0; Y2 -= Y0;
Float x1 = w1 * (1.0f / 16.0f) * Float(X1); Float x1 = w1 * (1.0f / subPixF) * Float(X1);
Float y1 = w1 * (1.0f / 16.0f) * Float(Y1); Float y1 = w1 * (1.0f / subPixF) * Float(Y1);
Float x2 = w2 * (1.0f / 16.0f) * Float(X2); Float x2 = w2 * (1.0f / subPixF) * Float(X2);
Float y2 = w2 * (1.0f / 16.0f) * Float(Y2); Float y2 = w2 * (1.0f / subPixF) * Float(Y2);
Float a = x1 * y2 - x2 * y1; Float a = x1 * y2 - x2 * y1;
...@@ -403,10 +407,10 @@ namespace sw ...@@ -403,10 +407,10 @@ namespace sw
if(!point) if(!point)
{ {
Float x1 = Float(X1) * (1.0f / 16.0f); Float x1 = Float(X1) * (1.0f / subPixF);
Float y1 = Float(Y1) * (1.0f / 16.0f); Float y1 = Float(Y1) * (1.0f / subPixF);
Float x2 = Float(X2) * (1.0f / 16.0f); Float x2 = Float(X2) * (1.0f / subPixF);
Float y2 = Float(Y2) * (1.0f / 16.0f); Float y2 = Float(Y2) * (1.0f / subPixF);
Float D = *Pointer<Float>(data + OFFSET(DrawData,depthRange)) / (x1 * y2 - x2 * y1); Float D = *Pointer<Float>(data + OFFSET(DrawData,depthRange)) / (x1 * y2 - x2 * y1);
...@@ -509,8 +513,11 @@ namespace sw ...@@ -509,8 +513,11 @@ namespace sw
Int Y1 = IfThenElse(swap, Yb, Ya); Int Y1 = IfThenElse(swap, Yb, Ya);
Int Y2 = IfThenElse(swap, Ya, Yb); Int Y2 = IfThenElse(swap, Ya, Yb);
Int y1 = Max((Y1 + 0x0000000F) >> 4, *Pointer<Int>(data + OFFSET(DrawData,scissorY0))); constexpr int subPixB = vk::SUBPIXEL_PRECISION_BITS;
Int y2 = Min((Y2 + 0x0000000F) >> 4, *Pointer<Int>(data + OFFSET(DrawData,scissorY1))); constexpr int subPixM = vk::SUBPIXEL_PRECISION_MASK;
Int y1 = Max((Y1 + subPixM) >> subPixB, *Pointer<Int>(data + OFFSET(DrawData,scissorY0)));
Int y2 = Min((Y2 + subPixM) >> subPixB, *Pointer<Int>(data + OFFSET(DrawData,scissorY1)));
If(y1 < y2) If(y1 < y2)
{ {
...@@ -525,11 +532,11 @@ namespace sw ...@@ -525,11 +532,11 @@ namespace sw
Int DX12 = X2 - X1; Int DX12 = X2 - X1;
Int DY12 = Y2 - Y1; Int DY12 = Y2 - Y1;
Int FDX12 = DX12 << 4; Int FDX12 = DX12 << subPixB;
Int FDY12 = DY12 << 4; Int FDY12 = DY12 << subPixB;
Int X = DX12 * ((y1 << 4) - Y1) + (X1 & 0x0000000F) * DY12; Int X = DX12 * ((y1 << subPixB) - Y1) + (X1 & subPixM) * DY12;
Int x = (X1 >> 4) + X / FDY12; // Edge Int x = (X1 >> subPixB) + X / FDY12; // Edge
Int d = X % FDY12; // Error-term Int d = X % FDY12; // Error-term
Int ceil = -d >> 31; // Ceiling division: remainder <= 0 Int ceil = -d >> 31; // Ceiling division: remainder <= 0
x -= ceil; x -= ceil;
......
...@@ -543,8 +543,8 @@ namespace sw ...@@ -543,8 +543,8 @@ namespace sw
Float4 rhw = Float4(1.0f) / w; Float4 rhw = Float4(1.0f) / w;
Vector4f proj; Vector4f proj;
proj.x = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,X0x16)) + pos.x * rhw * *Pointer<Float4>(data + OFFSET(DrawData,Wx16)))); proj.x = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,X0xF)) + pos.x * rhw * *Pointer<Float4>(data + OFFSET(DrawData,WxF))));
proj.y = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,Y0x16)) + pos.y * rhw * *Pointer<Float4>(data + OFFSET(DrawData,Hx16)))); proj.y = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,Y0xF)) + pos.y * rhw * *Pointer<Float4>(data + OFFSET(DrawData,HxF))));
proj.z = pos.z * rhw; proj.z = pos.z * rhw;
proj.w = rhw; proj.w = rhw;
......
...@@ -32,37 +32,37 @@ namespace sw ...@@ -32,37 +32,37 @@ namespace sw
#undef max #undef max
template<class T> template<class T>
inline T max(T a, T b) inline T constexpr max(T a, T b)
{ {
return a > b ? a : b; return a > b ? a : b;
} }
template<class T> template<class T>
inline T min(T a, T b) inline constexpr T min(T a, T b)
{ {
return a < b ? a : b; return a < b ? a : b;
} }
template<class T> template<class T>
inline T max(T a, T b, T c) inline constexpr T max(T a, T b, T c)
{ {
return max(max(a, b), c); return max(max(a, b), c);
} }
template<class T> template<class T>
inline T min(T a, T b, T c) inline constexpr T min(T a, T b, T c)
{ {
return min(min(a, b), c); return min(min(a, b), c);
} }
template<class T> template<class T>
inline T max(T a, T b, T c, T d) inline constexpr T max(T a, T b, T c, T d)
{ {
return max(max(a, b), max(c, d)); return max(max(a, b), max(c, d));
} }
template<class T> template<class T>
inline T min(T a, T b, T c, T d) inline constexpr T min(T a, T b, T c, T d)
{ {
return min(min(a, b), min(c, d)); return min(min(a, b), min(c, d));
} }
...@@ -372,6 +372,11 @@ namespace sw ...@@ -372,6 +372,11 @@ namespace sw
{ {
return static_cast<int>(min(x, 0x7FFFFFFFu)); return static_cast<int>(min(x, 0x7FFFFFFFu));
} }
// Convert floating value v to fixed point with p digits after the decimal point
constexpr int toFixedPoint(float v, int p) {
return static_cast<int>(v * (1 << p));
}
} }
#endif // sw_Math_hpp #endif // sw_Math_hpp
...@@ -78,6 +78,10 @@ enum ...@@ -78,6 +78,10 @@ enum
MAX_POINT_SIZE = 1, // Large points are not supported. If/when we turn this on, must be >= 64. MAX_POINT_SIZE = 1, // Large points are not supported. If/when we turn this on, must be >= 64.
}; };
constexpr int SUBPIXEL_PRECISION_BITS = 4;
constexpr float SUBPIXEL_PRECISION_FACTOR = static_cast<float>(1 << SUBPIXEL_PRECISION_BITS);
constexpr int SUBPIXEL_PRECISION_MASK = 0xFFFFFFFF >> (32 - SUBPIXEL_PRECISION_BITS);
} }
#endif // VK_CONFIG_HPP_ #endif // VK_CONFIG_HPP_
...@@ -200,7 +200,7 @@ const VkPhysicalDeviceLimits& PhysicalDevice::getLimits() const ...@@ -200,7 +200,7 @@ const VkPhysicalDeviceLimits& PhysicalDevice::getLimits() const
{ 65535, 65535, 65535 }, // maxComputeWorkGroupCount[3] { 65535, 65535, 65535 }, // maxComputeWorkGroupCount[3]
128, // maxComputeWorkGroupInvocations 128, // maxComputeWorkGroupInvocations
{ 128, 128, 64, }, // maxComputeWorkGroupSize[3] { 128, 128, 64, }, // maxComputeWorkGroupSize[3]
4, // subPixelPrecisionBits vk::SUBPIXEL_PRECISION_BITS, // subPixelPrecisionBits
4, // subTexelPrecisionBits 4, // subTexelPrecisionBits
4, // mipmapPrecisionBits 4, // mipmapPrecisionBits
UINT32_MAX, // maxDrawIndexedIndexValue UINT32_MAX, // maxDrawIndexedIndexValue
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment