Commit 8bcd1744 by Nicolas Capens Committed by Nicolas Capens

Fix alignment of projected vertex coordinates

Also reorder fields to reduce space wasted on alignment padding, and shuffle writing them to the vertex cache in the same order for consistency. Bug: b/27351835 Change-Id: I06ca0c836aabd9d095893762d973c098f694ee30 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/32788 Presubmit-Ready: Nicolas Capens <nicolascapens@google.com> Tested-by: 's avatarNicolas Capens <nicolascapens@google.com> Reviewed-by: 's avatarAlexis Hétu <sugoi@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
parent a4308130
...@@ -66,7 +66,7 @@ namespace sw ...@@ -66,7 +66,7 @@ namespace sw
MAX_TEXTURE_LOD = MIPMAP_LEVELS - 2, // Trilinear accesses lod+1 MAX_TEXTURE_LOD = MIPMAP_LEVELS - 2, // Trilinear accesses lod+1
RENDERTARGETS = 8, RENDERTARGETS = 8,
NUM_TEMPORARY_REGISTERS = 4096, NUM_TEMPORARY_REGISTERS = 4096,
MAX_INTERFACE_COMPONENTS = 32 * 4, MAX_INTERFACE_COMPONENTS = 32 * 4, // Must be multiple of 4 for 16-byte alignment.
}; };
} }
......
...@@ -888,7 +888,7 @@ namespace sw ...@@ -888,7 +888,7 @@ namespace sw
if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE) if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
{ {
Polygon polygon(&v0.builtins.position, &v1.builtins.position, &v2.builtins.position); Polygon polygon(&v0.position, &v1.position, &v2.position);
int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags; int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags;
...@@ -972,8 +972,8 @@ namespace sw ...@@ -972,8 +972,8 @@ namespace sw
Vertex &v0 = triangle.v0; Vertex &v0 = triangle.v0;
Vertex &v1 = triangle.v1; Vertex &v1 = triangle.v1;
const float4 &P0 = v0.builtins.position; const float4 &P0 = v0.position;
const float4 &P1 = v1.builtins.position; const float4 &P1 = v1.position;
if(P0.w <= 0 && P1.w <= 0) if(P0.w <= 0 && P1.w <= 0)
{ {
...@@ -1162,17 +1162,17 @@ namespace sw ...@@ -1162,17 +1162,17 @@ namespace sw
Vertex &v = triangle.v0; Vertex &v = triangle.v0;
float pSize = v.builtins.pointSize; float pSize = v.pointSize;
pSize = clamp(pSize, 1.0f, static_cast<float>(vk::MAX_POINT_SIZE)); pSize = clamp(pSize, 1.0f, static_cast<float>(vk::MAX_POINT_SIZE));
float4 P[4]; float4 P[4];
int C[4]; int C[4];
P[0] = v.builtins.position; P[0] = v.position;
P[1] = v.builtins.position; P[1] = v.position;
P[2] = v.builtins.position; P[2] = v.position;
P[3] = v.builtins.position; P[3] = v.position;
const float X = pSize * P[0].w * data.halfPixelX[0]; const float X = pSize * P[0].w * data.halfPixelX[0];
const float Y = pSize * P[0].w * data.halfPixelY[0]; const float Y = pSize * P[0].w * data.halfPixelY[0];
......
...@@ -23,14 +23,12 @@ namespace sw ...@@ -23,14 +23,12 @@ namespace sw
{ {
ALIGN(16, struct Vertex ALIGN(16, struct Vertex
{ {
float v[MAX_INTERFACE_COMPONENTS]; float4 position;
float pointSize;
struct int clipFlags;
{
float4 position; alignas(16) struct
float pointSize;
} builtins;
struct
{ {
int x; int x;
int y; int y;
...@@ -38,8 +36,7 @@ namespace sw ...@@ -38,8 +36,7 @@ namespace sw
float w; float w;
} projected; } projected;
int clipFlags; alignas(16) float v[MAX_INTERFACE_COMPONENTS];
int padding[2];
}); });
static_assert((sizeof(Vertex) & 0x0000000F) == 0, "Vertex size not a multiple of 16 bytes (alignment requirement)"); static_assert((sizeof(Vertex) & 0x0000000F) == 0, "Vertex size not a multiple of 16 bytes (alignment requirement)");
......
...@@ -86,9 +86,9 @@ namespace sw ...@@ -86,9 +86,9 @@ namespace sw
Return(0); Return(0);
} }
Int w0w1w2 = *Pointer<Int>(v0 + OFFSET(Vertex, builtins.position.w)) ^ Int w0w1w2 = *Pointer<Int>(v0 + OFFSET(Vertex, position.w)) ^
*Pointer<Int>(v1 + OFFSET(Vertex, builtins.position.w)) ^ *Pointer<Int>(v1 + OFFSET(Vertex, position.w)) ^
*Pointer<Int>(v2 + OFFSET(Vertex, builtins.position.w)); *Pointer<Int>(v2 + OFFSET(Vertex, position.w));
A = IfThenElse(w0w1w2 < 0, -A, A); A = IfThenElse(w0w1w2 < 0, -A, A);
...@@ -268,9 +268,9 @@ namespace sw ...@@ -268,9 +268,9 @@ namespace sw
// Sort by minimum y // Sort by minimum y
if(triangle) if(triangle)
{ {
Float y0 = *Pointer<Float>(v0 + OFFSET(Vertex, builtins.position.y)); Float y0 = *Pointer<Float>(v0 + OFFSET(Vertex, position.y));
Float y1 = *Pointer<Float>(v1 + OFFSET(Vertex, builtins.position.y)); Float y1 = *Pointer<Float>(v1 + OFFSET(Vertex, position.y));
Float y2 = *Pointer<Float>(v2 + OFFSET(Vertex, builtins.position.y)); Float y2 = *Pointer<Float>(v2 + OFFSET(Vertex, position.y));
Float yMin = Min(Min(y0, y1), y2); Float yMin = Min(Min(y0, y1), y2);
...@@ -281,9 +281,9 @@ namespace sw ...@@ -281,9 +281,9 @@ namespace sw
// Sort by maximum w // Sort by maximum w
if(triangle) if(triangle)
{ {
Float w0 = *Pointer<Float>(v0 + OFFSET(Vertex, builtins.position.w)); Float w0 = *Pointer<Float>(v0 + OFFSET(Vertex, position.w));
Float w1 = *Pointer<Float>(v1 + OFFSET(Vertex, builtins.position.w)); Float w1 = *Pointer<Float>(v1 + OFFSET(Vertex, position.w));
Float w2 = *Pointer<Float>(v2 + OFFSET(Vertex, builtins.position.w)); Float w2 = *Pointer<Float>(v2 + OFFSET(Vertex, position.w));
Float wMax = Max(Max(w0, w1), w2); Float wMax = Max(Max(w0, w1), w2);
...@@ -292,13 +292,13 @@ namespace sw ...@@ -292,13 +292,13 @@ namespace sw
} }
*Pointer<Float>(primitive + OFFSET(Primitive, pointCoordX)) = *Pointer<Float>(primitive + OFFSET(Primitive, pointCoordX)) =
*Pointer<Float>(v0 + OFFSET(Vertex, builtins.position.x)); *Pointer<Float>(v0 + OFFSET(Vertex, position.x));
*Pointer<Float>(primitive + OFFSET(Primitive, pointCoordY)) = *Pointer<Float>(primitive + OFFSET(Primitive, pointCoordY)) =
*Pointer<Float>(v0 + OFFSET(Vertex, builtins.position.y)); *Pointer<Float>(v0 + OFFSET(Vertex, position.y));
Float w0 = *Pointer<Float>(v0 + OFFSET(Vertex, builtins.position.w)); Float w0 = *Pointer<Float>(v0 + OFFSET(Vertex, position.w));
Float w1 = *Pointer<Float>(v1 + OFFSET(Vertex, builtins.position.w)); Float w1 = *Pointer<Float>(v1 + OFFSET(Vertex, position.w));
Float w2 = *Pointer<Float>(v2 + OFFSET(Vertex, builtins.position.w)); Float w2 = *Pointer<Float>(v2 + OFFSET(Vertex, position.w));
Float4 w012; Float4 w012;
......
...@@ -12,15 +12,14 @@ ...@@ -12,15 +12,14 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include <Device/Vertex.hpp>
#include "VertexRoutine.hpp" #include "VertexRoutine.hpp"
#include "Constants.hpp" #include "Constants.hpp"
#include "SpirvShader.hpp"
#include "Device/Vertex.hpp" #include "Device/Vertex.hpp"
#include "Device/Renderer.hpp" #include "Device/Renderer.hpp"
#include "System/Half.hpp"
#include "Vulkan/VkDebug.hpp" #include "Vulkan/VkDebug.hpp"
#include "SpirvShader.hpp" #include "System/Half.hpp"
namespace sw namespace sw
{ {
...@@ -51,7 +50,7 @@ namespace sw ...@@ -51,7 +50,7 @@ namespace sw
Do Do
{ {
UInt index = *Pointer<UInt>(batch); UInt index = *batch;
UInt tagIndex = index & 0x0000003C; UInt tagIndex = index & 0x0000003C;
UInt indexQ = index & 0xFFFFFFFC; UInt indexQ = index & 0xFFFFFFFC;
...@@ -72,7 +71,7 @@ namespace sw ...@@ -72,7 +71,7 @@ namespace sw
writeVertex(vertex, cacheLine); writeVertex(vertex, cacheLine);
vertex += sizeof(Vertex); vertex += sizeof(Vertex);
batch += sizeof(unsigned int); batch = Pointer<UInt>(Pointer<Byte>(batch) + sizeof(uint32_t));
vertexCount--; vertexCount--;
} }
Until(vertexCount == 0) Until(vertexCount == 0)
...@@ -84,20 +83,20 @@ namespace sw ...@@ -84,20 +83,20 @@ namespace sw
{ {
for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i += 4) for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i += 4)
{ {
if (spirvShader->inputs[i].Type != SpirvShader::ATTRIBTYPE_UNUSED || if(spirvShader->inputs[i + 0].Type != SpirvShader::ATTRIBTYPE_UNUSED ||
spirvShader->inputs[i + 1].Type != SpirvShader::ATTRIBTYPE_UNUSED || spirvShader->inputs[i + 1].Type != SpirvShader::ATTRIBTYPE_UNUSED ||
spirvShader->inputs[i + 2].Type != SpirvShader::ATTRIBTYPE_UNUSED || spirvShader->inputs[i + 2].Type != SpirvShader::ATTRIBTYPE_UNUSED ||
spirvShader->inputs[i + 3].Type != SpirvShader::ATTRIBTYPE_UNUSED) spirvShader->inputs[i + 3].Type != SpirvShader::ATTRIBTYPE_UNUSED)
{ {
Pointer<Byte> input = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, input) + sizeof(void *) * (i/4)); Pointer<Byte> input = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, input) + sizeof(void*) * (i / 4));
UInt stride = *Pointer<UInt>(data + OFFSET(DrawData, stride) + sizeof(unsigned int) * (i/4)); UInt stride = *Pointer<UInt>(data + OFFSET(DrawData, stride) + sizeof(uint32_t) * (i / 4));
auto value = readStream(input, stride, state.input[i/4], index); auto value = readStream(input, stride, state.input[i / 4], index);
routine.inputs[i] = value.x; routine.inputs[i + 0] = value.x;
routine.inputs[i+1] = value.y; routine.inputs[i + 1] = value.y;
routine.inputs[i+2] = value.z; routine.inputs[i + 2] = value.z;
routine.inputs[i+3] = value.w; routine.inputs[i + 3] = value.w;
} }
} }
} }
...@@ -108,7 +107,7 @@ namespace sw ...@@ -108,7 +107,7 @@ namespace sw
assert(it != spirvShader->outputBuiltins.end()); assert(it != spirvShader->outputBuiltins.end());
assert(it->second.SizeInComponents == 4); assert(it->second.SizeInComponents == 4);
auto &pos = routine.getVariable(it->second.Id); auto &pos = routine.getVariable(it->second.Id);
auto posX = pos[it->second.FirstComponent]; auto posX = pos[it->second.FirstComponent + 0];
auto posY = pos[it->second.FirstComponent + 1]; auto posY = pos[it->second.FirstComponent + 1];
auto posZ = pos[it->second.FirstComponent + 2]; auto posZ = pos[it->second.FirstComponent + 2];
auto posW = pos[it->second.FirstComponent + 3]; auto posW = pos[it->second.FirstComponent + 3];
...@@ -120,19 +119,19 @@ namespace sw ...@@ -120,19 +119,19 @@ namespace sw
Int4 minY = CmpNLE(-posW, posY); Int4 minY = CmpNLE(-posW, posY);
Int4 minZ = CmpNLE(Float4(0.0f), posZ); Int4 minZ = CmpNLE(Float4(0.0f), posZ);
clipFlags = *Pointer<Int>(constants + OFFSET(Constants,maxX) + SignMask(maxX) * 4); // FIXME: Array indexing clipFlags = Pointer<Int>(constants + OFFSET(Constants,maxX))[SignMask(maxX)];
clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,maxY) + SignMask(maxY) * 4); clipFlags |= Pointer<Int>(constants + OFFSET(Constants,maxY))[SignMask(maxY)];
clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,maxZ) + SignMask(maxZ) * 4); clipFlags |= Pointer<Int>(constants + OFFSET(Constants,maxZ))[SignMask(maxZ)];
clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minX) + SignMask(minX) * 4); clipFlags |= Pointer<Int>(constants + OFFSET(Constants,minX))[SignMask(minX)];
clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minY) + SignMask(minY) * 4); clipFlags |= Pointer<Int>(constants + OFFSET(Constants,minY))[SignMask(minY)];
clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minZ) + SignMask(minZ) * 4); clipFlags |= Pointer<Int>(constants + OFFSET(Constants,minZ))[SignMask(minZ)];
Int4 finiteX = CmpLE(Abs(posX), *Pointer<Float4>(constants + OFFSET(Constants,maxPos))); Int4 finiteX = CmpLE(Abs(posX), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
Int4 finiteY = CmpLE(Abs(posY), *Pointer<Float4>(constants + OFFSET(Constants,maxPos))); Int4 finiteY = CmpLE(Abs(posY), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
Int4 finiteZ = CmpLE(Abs(posZ), *Pointer<Float4>(constants + OFFSET(Constants,maxPos))); Int4 finiteZ = CmpLE(Abs(posZ), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
Int4 finiteXYZ = finiteX & finiteY & finiteZ; Int4 finiteXYZ = finiteX & finiteY & finiteZ;
clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,fini) + SignMask(finiteXYZ) * 4); clipFlags |= Pointer<Int>(constants + OFFSET(Constants,fini))[SignMask(finiteXYZ)];
} }
Vector4f VertexRoutine::readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, const UInt &index) Vector4f VertexRoutine::readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, const UInt &index)
...@@ -476,7 +475,7 @@ namespace sw ...@@ -476,7 +475,7 @@ namespace sw
} }
break; break;
default: default:
ASSERT(false); UNSUPPORTED("stream.type %d", int(stream.type));
} }
if(stream.count < 1) v.x = Float4(0.0f); if(stream.count < 1) v.x = Float4(0.0f);
...@@ -489,99 +488,95 @@ namespace sw ...@@ -489,99 +488,95 @@ namespace sw
void VertexRoutine::writeCache(Pointer<Byte> &cacheLine) void VertexRoutine::writeCache(Pointer<Byte> &cacheLine)
{ {
Vector4f v;
for (int i = 0; i < MAX_INTERFACE_COMPONENTS; i += 4)
{
if (spirvShader->outputs[i].Type != SpirvShader::ATTRIBTYPE_UNUSED ||
spirvShader->outputs[i+1].Type != SpirvShader::ATTRIBTYPE_UNUSED ||
spirvShader->outputs[i+2].Type != SpirvShader::ATTRIBTYPE_UNUSED ||
spirvShader->outputs[i+3].Type != SpirvShader::ATTRIBTYPE_UNUSED)
{
v.x = routine.outputs[i];
v.y = routine.outputs[i+1];
v.z = routine.outputs[i+2];
v.w = routine.outputs[i+3];
transpose4x4(v.x, v.y, v.z, v.w);
*Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 0, 16) = v.x;
*Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 1, 16) = v.y;
*Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 2, 16) = v.z;
*Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 3, 16) = v.w;
}
}
*Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 0) = (clipFlags >> 0) & 0x0000000FF;
*Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 1) = (clipFlags >> 8) & 0x0000000FF;
*Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 2) = (clipFlags >> 16) & 0x0000000FF;
*Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 3) = (clipFlags >> 24) & 0x0000000FF;
// Viewport transform
auto it = spirvShader->outputBuiltins.find(spv::BuiltInPosition); auto it = spirvShader->outputBuiltins.find(spv::BuiltInPosition);
assert(it != spirvShader->outputBuiltins.end()); assert(it != spirvShader->outputBuiltins.end());
assert(it->second.SizeInComponents == 4); assert(it->second.SizeInComponents == 4);
auto &pos = routine.getVariable(it->second.Id); auto &position = routine.getVariable(it->second.Id);
auto posX = pos[it->second.FirstComponent];
auto posY = pos[it->second.FirstComponent + 1];
auto posZ = pos[it->second.FirstComponent + 2];
auto posW = pos[it->second.FirstComponent + 3];
v.x = posX; Vector4f pos;
v.y = posY; pos.x = position[it->second.FirstComponent + 0];
v.z = posZ; pos.y = position[it->second.FirstComponent + 1];
v.w = posW; pos.z = position[it->second.FirstComponent + 2];
pos.w = position[it->second.FirstComponent + 3];
// Write the builtin pos into the vertex; it's not going to be consumed by the FS, but may need to reproject if we have to clip. // Projection and viewport transform.
Vector4f v2 = v; Float4 w = As<Float4>(As<Int4>(pos.w) | (As<Int4>(CmpEQ(pos.w, Float4(0.0f))) & As<Int4>(Float4(1.0f))));
transpose4x4(v2.x, v2.y, v2.z, v2.w);
*Pointer<Float4>(cacheLine + OFFSET(Vertex,builtins.position) + sizeof(Vertex) * 0, 16) = v2.x;
*Pointer<Float4>(cacheLine + OFFSET(Vertex,builtins.position) + sizeof(Vertex) * 1, 16) = v2.y;
*Pointer<Float4>(cacheLine + OFFSET(Vertex,builtins.position) + sizeof(Vertex) * 2, 16) = v2.z;
*Pointer<Float4>(cacheLine + OFFSET(Vertex,builtins.position) + sizeof(Vertex) * 3, 16) = v2.w;
Float4 w = As<Float4>(As<Int4>(v.w) | (As<Int4>(CmpEQ(v.w, Float4(0.0f))) & As<Int4>(Float4(1.0f))));
Float4 rhw = Float4(1.0f) / w; Float4 rhw = Float4(1.0f) / w;
v.x = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,X0x16)) + v.x * rhw * *Pointer<Float4>(data + OFFSET(DrawData,Wx16)))); Vector4f proj;
v.y = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,Y0x16)) + v.y * rhw * *Pointer<Float4>(data + OFFSET(DrawData,Hx16)))); proj.x = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,X0x16)) + pos.x * rhw * *Pointer<Float4>(data + OFFSET(DrawData,Wx16))));
v.z = v.z * rhw; proj.y = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,Y0x16)) + pos.y * rhw * *Pointer<Float4>(data + OFFSET(DrawData,Hx16))));
v.w = rhw; proj.z = pos.z * rhw;
proj.w = rhw;
transpose4x4(v.x, v.y, v.z, v.w); transpose4x4(pos.x, pos.y, pos.z, pos.w);
*Pointer<Float4>(cacheLine + OFFSET(Vertex,projected) + sizeof(Vertex) * 0, 16) = v.x; *Pointer<Float4>(cacheLine + OFFSET(Vertex,position) + sizeof(Vertex) * 0, 16) = pos.x;
*Pointer<Float4>(cacheLine + OFFSET(Vertex,projected) + sizeof(Vertex) * 1, 16) = v.y; *Pointer<Float4>(cacheLine + OFFSET(Vertex,position) + sizeof(Vertex) * 1, 16) = pos.y;
*Pointer<Float4>(cacheLine + OFFSET(Vertex,projected) + sizeof(Vertex) * 2, 16) = v.z; *Pointer<Float4>(cacheLine + OFFSET(Vertex,position) + sizeof(Vertex) * 2, 16) = pos.z;
*Pointer<Float4>(cacheLine + OFFSET(Vertex,projected) + sizeof(Vertex) * 3, 16) = v.w; *Pointer<Float4>(cacheLine + OFFSET(Vertex,position) + sizeof(Vertex) * 3, 16) = pos.w;
it = spirvShader->outputBuiltins.find(spv::BuiltInPointSize); it = spirvShader->outputBuiltins.find(spv::BuiltInPointSize);
if (it != spirvShader->outputBuiltins.end()) if(it != spirvShader->outputBuiltins.end())
{ {
assert(it->second.SizeInComponents == 1); assert(it->second.SizeInComponents == 1);
auto psize = routine.getVariable(it->second.Id)[it->second.FirstComponent]; auto psize = routine.getVariable(it->second.Id)[it->second.FirstComponent];
*Pointer<Float>(cacheLine + OFFSET(Vertex,builtins.pointSize) + sizeof(Vertex) * 0) = Extract(psize, 0);
*Pointer<Float>(cacheLine + OFFSET(Vertex,builtins.pointSize) + sizeof(Vertex) * 1) = Extract(psize, 1); *Pointer<Float>(cacheLine + OFFSET(Vertex,pointSize) + sizeof(Vertex) * 0) = Extract(psize, 0);
*Pointer<Float>(cacheLine + OFFSET(Vertex,builtins.pointSize) + sizeof(Vertex) * 2) = Extract(psize, 2); *Pointer<Float>(cacheLine + OFFSET(Vertex,pointSize) + sizeof(Vertex) * 1) = Extract(psize, 1);
*Pointer<Float>(cacheLine + OFFSET(Vertex,builtins.pointSize) + sizeof(Vertex) * 3) = Extract(psize, 3); *Pointer<Float>(cacheLine + OFFSET(Vertex,pointSize) + sizeof(Vertex) * 2) = Extract(psize, 2);
*Pointer<Float>(cacheLine + OFFSET(Vertex,pointSize) + sizeof(Vertex) * 3) = Extract(psize, 3);
}
*Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 0) = (clipFlags >> 0) & 0x0000000FF;
*Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 1) = (clipFlags >> 8) & 0x0000000FF;
*Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 2) = (clipFlags >> 16) & 0x0000000FF;
*Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 3) = (clipFlags >> 24) & 0x0000000FF;
transpose4x4(proj.x, proj.y, proj.z, proj.w);
*Pointer<Float4>(cacheLine + OFFSET(Vertex,projected) + sizeof(Vertex) * 0, 16) = proj.x;
*Pointer<Float4>(cacheLine + OFFSET(Vertex,projected) + sizeof(Vertex) * 1, 16) = proj.y;
*Pointer<Float4>(cacheLine + OFFSET(Vertex,projected) + sizeof(Vertex) * 2, 16) = proj.z;
*Pointer<Float4>(cacheLine + OFFSET(Vertex,projected) + sizeof(Vertex) * 3, 16) = proj.w;
for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i += 4)
{
if(spirvShader->outputs[i + 0].Type != SpirvShader::ATTRIBTYPE_UNUSED ||
spirvShader->outputs[i + 1].Type != SpirvShader::ATTRIBTYPE_UNUSED ||
spirvShader->outputs[i + 2].Type != SpirvShader::ATTRIBTYPE_UNUSED ||
spirvShader->outputs[i + 3].Type != SpirvShader::ATTRIBTYPE_UNUSED)
{
Vector4f v;
v.x = routine.outputs[i + 0];
v.y = routine.outputs[i + 1];
v.z = routine.outputs[i + 2];
v.w = routine.outputs[i + 3];
transpose4x4(v.x, v.y, v.z, v.w);
*Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 0, 16) = v.x;
*Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 1, 16) = v.y;
*Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 2, 16) = v.z;
*Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 3, 16) = v.w;
}
} }
} }
void VertexRoutine::writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cache) void VertexRoutine::writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cacheEntry)
{ {
*Pointer<Int4>(vertex + OFFSET(Vertex,position)) = *Pointer<Int4>(cacheEntry + OFFSET(Vertex,position));
*Pointer<Int>(vertex + OFFSET(Vertex,pointSize)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex,pointSize));
*Pointer<Int>(vertex + OFFSET(Vertex,clipFlags)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex,clipFlags));
*Pointer<Int4>(vertex + OFFSET(Vertex,projected)) = *Pointer<Int4>(cacheEntry + OFFSET(Vertex,projected));
for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i++) for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i++)
{ {
if(spirvShader->outputs[i].Type != SpirvShader::ATTRIBTYPE_UNUSED) if(spirvShader->outputs[i].Type != SpirvShader::ATTRIBTYPE_UNUSED)
{ {
*Pointer<Int>(vertex + OFFSET(Vertex, v[i]), 4) = *Pointer<Int>(cache + OFFSET(Vertex, v[i]), 4); *Pointer<Int>(vertex + OFFSET(Vertex, v[i]), 4) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, v[i]), 4);
} }
} }
*Pointer<Int4>(vertex + OFFSET(Vertex,projected)) = *Pointer<Int4>(cache + OFFSET(Vertex,projected));
*Pointer<Int>(vertex + OFFSET(Vertex,clipFlags)) = *Pointer<Int>(cache + OFFSET(Vertex,clipFlags));
*Pointer<Int4>(vertex + OFFSET(Vertex,builtins.position)) = *Pointer<Int4>(cache + OFFSET(Vertex,builtins.position));
*Pointer<Int>(vertex + OFFSET(Vertex,builtins.pointSize)) = *Pointer<Int>(cache + OFFSET(Vertex,builtins.pointSize));
} }
} }
...@@ -15,10 +15,10 @@ ...@@ -15,10 +15,10 @@
#ifndef sw_VertexRoutine_hpp #ifndef sw_VertexRoutine_hpp
#define sw_VertexRoutine_hpp #define sw_VertexRoutine_hpp
#include "Device/Color.hpp"
#include "Device/VertexProcessor.hpp"
#include "ShaderCore.hpp" #include "ShaderCore.hpp"
#include "SpirvShader.hpp" #include "SpirvShader.hpp"
#include "Device/Color.hpp"
#include "Device/VertexProcessor.hpp"
namespace vk namespace vk
{ {
...@@ -27,7 +27,7 @@ namespace vk ...@@ -27,7 +27,7 @@ namespace vk
namespace sw namespace sw
{ {
class VertexRoutinePrototype : public Function<Void(Pointer<Byte>, Pointer<Byte>, Pointer<Byte>, Pointer<Byte>)> class VertexRoutinePrototype : public Function<Void(Pointer<Byte>, Pointer<UInt>, Pointer<Byte>, Pointer<Byte>)>
{ {
public: public:
VertexRoutinePrototype() : vertex(Arg<0>()), batch(Arg<1>()), task(Arg<2>()), data(Arg<3>()) {} VertexRoutinePrototype() : vertex(Arg<0>()), batch(Arg<1>()), task(Arg<2>()), data(Arg<3>()) {}
...@@ -35,7 +35,7 @@ namespace sw ...@@ -35,7 +35,7 @@ namespace sw
protected: protected:
Pointer<Byte> vertex; Pointer<Byte> vertex;
Pointer<Byte> batch; Pointer<UInt> batch;
Pointer<Byte> task; Pointer<Byte> task;
Pointer<Byte> data; Pointer<Byte> data;
}; };
...@@ -70,7 +70,7 @@ namespace sw ...@@ -70,7 +70,7 @@ namespace sw
void readInput(UInt &index); void readInput(UInt &index);
void computeClipFlags(); void computeClipFlags();
void writeCache(Pointer<Byte> &cacheLine); void writeCache(Pointer<Byte> &cacheLine);
void writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cacheLine); void writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cacheEntry);
}; };
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment