Commit 7551ac68 by Nicolas Capens

Make vertex registers members of routine classes.

Bug 22652760 Change-Id: I698ce910ee4302178d7235fa316aaa2b268e71a8 Reviewed-on: https://swiftshader-review.googlesource.com/4560Tested-by: 's avatarNicolas Capens <capn@google.com> Reviewed-by: 's avatarNicolas Capens <capn@google.com>
parent 907700d1
......@@ -53,7 +53,7 @@ namespace sw
{
for(int i = 0; i < 4; i++)
{
Float4 B = r.v[BlendIndices].x;
Float4 B = v[BlendIndices].x;
UInt indices;
switch(i)
......@@ -88,9 +88,9 @@ namespace sw
switch(state.vertexBlendMatrixCount)
{
case 4: weight2 = r.v[BlendWeight].z;
case 3: weight1 = r.v[BlendWeight].y;
case 2: weight0 = r.v[BlendWeight].x;
case 4: weight2 = v[BlendWeight].z;
case 3: weight1 = v[BlendWeight].y;
case 2: weight0 = v[BlendWeight].x;
case 1:
break;
}
......@@ -162,23 +162,23 @@ namespace sw
if(!state.preTransformed)
{
position = transformBlend(r.v[Position], Pointer<Byte>(r.data + OFFSET(DrawData,ff.transformT)), true);
position = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.transformT)), true);
}
else
{
position = r.v[PositionT];
position = v[PositionT];
}
r.o[Pos].x = position.x;
r.o[Pos].y = position.y;
r.o[Pos].z = position.z;
r.o[Pos].w = position.w;
o[Pos].x = position.x;
o[Pos].y = position.y;
o[Pos].z = position.z;
o[Pos].w = position.w;
Vector4f vertexPosition = transformBlend(r.v[Position], Pointer<Byte>(r.data + OFFSET(DrawData,ff.cameraTransformT)), true);
Vector4f vertexPosition = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true);
if(state.vertexNormalActive)
{
normal = transformBlend(r.v[Normal], Pointer<Byte>(r.data + OFFSET(DrawData,ff.normalTransformT)), false);
normal = transformBlend(v[Normal], Pointer<Byte>(data + OFFSET(DrawData,ff.normalTransformT)), false);
if(state.normalizeNormals)
{
......@@ -191,53 +191,53 @@ namespace sw
// FIXME: Don't process if not used at all
if(state.diffuseActive && state.input[Color0])
{
Vector4f diffuse = r.v[Color0];
Vector4f diffuse = v[Color0];
r.o[D0].x = diffuse.x;
r.o[D0].y = diffuse.y;
r.o[D0].z = diffuse.z;
r.o[D0].w = diffuse.w;
o[D0].x = diffuse.x;
o[D0].y = diffuse.y;
o[D0].z = diffuse.z;
o[D0].w = diffuse.w;
}
else
{
r.o[D0].x = Float4(1.0f);
r.o[D0].y = Float4(1.0f);
r.o[D0].z = Float4(1.0f);
r.o[D0].w = Float4(1.0f);
o[D0].x = Float4(1.0f);
o[D0].y = Float4(1.0f);
o[D0].z = Float4(1.0f);
o[D0].w = Float4(1.0f);
}
// FIXME: Don't process if not used at all
if(state.specularActive && state.input[Color1])
{
Vector4f specular = r.v[Color1];
Vector4f specular = v[Color1];
r.o[D1].x = specular.x;
r.o[D1].y = specular.y;
r.o[D1].z = specular.z;
r.o[D1].w = specular.w;
o[D1].x = specular.x;
o[D1].y = specular.y;
o[D1].z = specular.z;
o[D1].w = specular.w;
}
else
{
r.o[D1].x = Float4(0.0f);
r.o[D1].y = Float4(0.0f);
r.o[D1].z = Float4(0.0f);
r.o[D1].w = Float4(1.0f);
o[D1].x = Float4(0.0f);
o[D1].y = Float4(0.0f);
o[D1].z = Float4(0.0f);
o[D1].w = Float4(1.0f);
}
}
else
{
r.o[D0].x = Float4(0.0f);
r.o[D0].y = Float4(0.0f);
r.o[D0].z = Float4(0.0f);
r.o[D0].w = Float4(0.0f);
o[D0].x = Float4(0.0f);
o[D0].y = Float4(0.0f);
o[D0].z = Float4(0.0f);
o[D0].w = Float4(0.0f);
r.o[D1].x = Float4(0.0f);
r.o[D1].y = Float4(0.0f);
r.o[D1].z = Float4(0.0f);
r.o[D1].w = Float4(0.0f);
o[D1].x = Float4(0.0f);
o[D1].y = Float4(0.0f);
o[D1].z = Float4(0.0f);
o[D1].w = Float4(0.0f);
Vector4f ambient;
Float4 globalAmbient = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.globalAmbient)); // FIXME: Unpack
Float4 globalAmbient = *Pointer<Float4>(data + OFFSET(DrawData,ff.globalAmbient)); // FIXME: Unpack
ambient.x = globalAmbient.x;
ambient.y = globalAmbient.y;
......@@ -257,7 +257,7 @@ namespace sw
{
Float4 d; // Distance
L.x = L.y = L.z = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.lightPosition[i])); // FIXME: Unpack
L.x = L.y = L.z = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightPosition[i])); // FIXME: Unpack
L.x = L.x.xxxx;
L.y = L.y.yyyy;
L.z = L.z.zzzz;
......@@ -272,16 +272,16 @@ namespace sw
L.z *= d;
d = Rcp_pp(d); // FIXME: Sufficient precision?
Float4 q = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.attenuationQuadratic[i]));
Float4 l = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.attenuationLinear[i]));
Float4 c = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.attenuationConstant[i]));
Float4 q = *Pointer<Float4>(data + OFFSET(DrawData,ff.attenuationQuadratic[i]));
Float4 l = *Pointer<Float4>(data + OFFSET(DrawData,ff.attenuationLinear[i]));
Float4 c = *Pointer<Float4>(data + OFFSET(DrawData,ff.attenuationConstant[i]));
att = Rcp_pp((q * d + l) * d + c);
}
// Ambient per light
{
Float4 lightAmbient = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.lightAmbient[i])); // FIXME: Unpack
Float4 lightAmbient = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightAmbient[i])); // FIXME: Unpack
ambient.x = ambient.x + lightAmbient.x * att;
ambient.y = ambient.y + lightAmbient.y * att;
......@@ -301,26 +301,26 @@ namespace sw
if(state.vertexDiffuseMaterialSourceActive == MATERIAL_MATERIAL)
{
diff.x = diff.y = diff.z = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.materialDiffuse)); // FIXME: Unpack
diff.x = diff.y = diff.z = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialDiffuse)); // FIXME: Unpack
diff.x = diff.x.xxxx;
diff.y = diff.y.yyyy;
diff.z = diff.z.zzzz;
}
else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR1)
{
diff = r.v[Color0];
diff = v[Color0];
}
else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR2)
{
diff = r.v[Color1];
diff = v[Color1];
}
else ASSERT(false);
Float4 lightDiffuse = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.lightDiffuse[i]));
Float4 lightDiffuse = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightDiffuse[i]));
r.o[D0].x = r.o[D0].x + diff.x * dot * lightDiffuse.x; // FIXME: Clamp first?
r.o[D0].y = r.o[D0].y + diff.y * dot * lightDiffuse.y; // FIXME: Clamp first?
r.o[D0].z = r.o[D0].z + diff.z * dot * lightDiffuse.z; // FIXME: Clamp first?
o[D0].x = o[D0].x + diff.x * dot * lightDiffuse.x; // FIXME: Clamp first?
o[D0].y = o[D0].y + diff.y * dot * lightDiffuse.y; // FIXME: Clamp first?
o[D0].z = o[D0].z + diff.z * dot * lightDiffuse.z; // FIXME: Clamp first?
}
// Specular
......@@ -330,7 +330,7 @@ namespace sw
Vector4f C; // Camera vector
Float4 pow;
pow = *Pointer<Float>(r.data + OFFSET(DrawData,ff.materialShininess));
pow = *Pointer<Float>(data + OFFSET(DrawData,ff.materialShininess));
S.x = Float4(0.0f) - vertexPosition.x;
S.y = Float4(0.0f) - vertexPosition.y;
......@@ -351,7 +351,7 @@ namespace sw
if(state.vertexSpecularMaterialSourceActive == MATERIAL_MATERIAL)
{
Float4 materialSpecular = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.materialSpecular)); // FIXME: Unpack
Float4 materialSpecular = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialSpecular)); // FIXME: Unpack
spec.x = materialSpecular.x;
spec.y = materialSpecular.y;
......@@ -359,15 +359,15 @@ namespace sw
}
else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR1)
{
spec = r.v[Color0];
spec = v[Color0];
}
else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR2)
{
spec = r.v[Color1];
spec = v[Color1];
}
else ASSERT(false);
Float4 lightSpecular = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.lightSpecular[i]));
Float4 lightSpecular = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightSpecular[i]));
spec.x *= lightSpecular.x;
spec.y *= lightSpecular.y;
......@@ -383,22 +383,22 @@ namespace sw
if(secondaryColor)
{
r.o[D1].x = r.o[D1].x + spec.x;
r.o[D1].y = r.o[D1].y + spec.y;
r.o[D1].z = r.o[D1].z + spec.z;
o[D1].x = o[D1].x + spec.x;
o[D1].y = o[D1].y + spec.y;
o[D1].z = o[D1].z + spec.z;
}
else
{
r.o[D0].x = r.o[D0].x + spec.x;
r.o[D0].y = r.o[D0].y + spec.y;
r.o[D0].z = r.o[D0].z + spec.z;
o[D0].x = o[D0].x + spec.x;
o[D0].y = o[D0].y + spec.y;
o[D0].z = o[D0].z + spec.z;
}
}
}
if(state.vertexAmbientMaterialSourceActive == MATERIAL_MATERIAL)
{
Float4 materialAmbient = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.materialAmbient)); // FIXME: Unpack
Float4 materialAmbient = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialAmbient)); // FIXME: Unpack
ambient.x = ambient.x * materialAmbient.x;
ambient.y = ambient.y * materialAmbient.y;
......@@ -406,7 +406,7 @@ namespace sw
}
else if(state.vertexAmbientMaterialSourceActive == MATERIAL_COLOR1)
{
Vector4f materialDiffuse = r.v[Color0];
Vector4f materialDiffuse = v[Color0];
ambient.x = ambient.x * materialDiffuse.x;
ambient.y = ambient.y * materialDiffuse.y;
......@@ -414,7 +414,7 @@ namespace sw
}
else if(state.vertexAmbientMaterialSourceActive == MATERIAL_COLOR2)
{
Vector4f materialSpecular = r.v[Color1];
Vector4f materialSpecular = v[Color1];
ambient.x = ambient.x * materialSpecular.x;
ambient.y = ambient.y * materialSpecular.y;
......@@ -422,51 +422,51 @@ namespace sw
}
else ASSERT(false);
r.o[D0].x = r.o[D0].x + ambient.x;
r.o[D0].y = r.o[D0].y + ambient.y;
r.o[D0].z = r.o[D0].z + ambient.z;
o[D0].x = o[D0].x + ambient.x;
o[D0].y = o[D0].y + ambient.y;
o[D0].z = o[D0].z + ambient.z;
// Emissive
if(state.vertexEmissiveMaterialSourceActive == MATERIAL_MATERIAL)
{
Float4 materialEmission = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.materialEmission)); // FIXME: Unpack
Float4 materialEmission = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialEmission)); // FIXME: Unpack
r.o[D0].x = r.o[D0].x + materialEmission.x;
r.o[D0].y = r.o[D0].y + materialEmission.y;
r.o[D0].z = r.o[D0].z + materialEmission.z;
o[D0].x = o[D0].x + materialEmission.x;
o[D0].y = o[D0].y + materialEmission.y;
o[D0].z = o[D0].z + materialEmission.z;
}
else if(state.vertexEmissiveMaterialSourceActive == MATERIAL_COLOR1)
{
Vector4f materialSpecular = r.v[Color0];
Vector4f materialSpecular = v[Color0];
r.o[D0].x = r.o[D0].x + materialSpecular.x;
r.o[D0].y = r.o[D0].y + materialSpecular.y;
r.o[D0].z = r.o[D0].z + materialSpecular.z;
o[D0].x = o[D0].x + materialSpecular.x;
o[D0].y = o[D0].y + materialSpecular.y;
o[D0].z = o[D0].z + materialSpecular.z;
}
else if(state.vertexEmissiveMaterialSourceActive == MATERIAL_COLOR2)
{
Vector4f materialSpecular = r.v[Color1];
Vector4f materialSpecular = v[Color1];
r.o[D0].x = r.o[D0].x + materialSpecular.x;
r.o[D0].y = r.o[D0].y + materialSpecular.y;
r.o[D0].z = r.o[D0].z + materialSpecular.z;
o[D0].x = o[D0].x + materialSpecular.x;
o[D0].y = o[D0].y + materialSpecular.y;
o[D0].z = o[D0].z + materialSpecular.z;
}
else ASSERT(false);
// Diffuse alpha component
if(state.vertexDiffuseMaterialSourceActive == MATERIAL_MATERIAL)
{
r.o[D0].w = Float4(*Pointer<Float4>(r.data + OFFSET(DrawData,ff.materialDiffuse[0]))).wwww; // FIXME: Unpack
o[D0].w = Float4(*Pointer<Float4>(data + OFFSET(DrawData,ff.materialDiffuse[0]))).wwww; // FIXME: Unpack
}
else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR1)
{
Vector4f alpha = r.v[Color0];
r.o[D0].w = alpha.w;
Vector4f alpha = v[Color0];
o[D0].w = alpha.w;
}
else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR2)
{
Vector4f alpha = r.v[Color1];
r.o[D0].w = alpha.w;
Vector4f alpha = v[Color1];
o[D0].w = alpha.w;
}
else ASSERT(false);
......@@ -475,17 +475,17 @@ namespace sw
// Specular alpha component
if(state.vertexSpecularMaterialSourceActive == MATERIAL_MATERIAL)
{
r.o[D1].w = Float4(*Pointer<Float4>(r.data + OFFSET(DrawData,ff.materialSpecular[3]))).wwww; // FIXME: Unpack
o[D1].w = Float4(*Pointer<Float4>(data + OFFSET(DrawData,ff.materialSpecular[3]))).wwww; // FIXME: Unpack
}
else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR1)
{
Vector4f alpha = r.v[Color0];
r.o[D1].w = alpha.w;
Vector4f alpha = v[Color0];
o[D1].w = alpha.w;
}
else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR2)
{
Vector4f alpha = r.v[Color1];
r.o[D1].w = alpha.w;
Vector4f alpha = v[Color1];
o[D1].w = alpha.w;
}
else ASSERT(false);
}
......@@ -509,21 +509,21 @@ namespace sw
case FOG_NONE:
if(state.specularActive)
{
r.o[Fog].x = r.o[D1].w;
o[Fog].x = o[D1].w;
}
else
{
r.o[Fog].x = Float4(0.0f);
o[Fog].x = Float4(0.0f);
}
break;
case FOG_LINEAR:
r.o[Fog].x = f * *Pointer<Float4>(r.data + OFFSET(DrawData,fog.scale)) + *Pointer<Float4>(r.data + OFFSET(DrawData,fog.offset));
o[Fog].x = f * *Pointer<Float4>(data + OFFSET(DrawData,fog.scale)) + *Pointer<Float4>(data + OFFSET(DrawData,fog.offset));
break;
case FOG_EXP:
r.o[Fog].x = exponential2(f * *Pointer<Float4>(r.data + OFFSET(DrawData,fog.densityE)), true);
o[Fog].x = exponential2(f * *Pointer<Float4>(data + OFFSET(DrawData,fog.densityE)), true);
break;
case FOG_EXP2:
r.o[Fog].x = exponential2((f * f) * *Pointer<Float4>(r.data + OFFSET(DrawData,fog.density2E)), true);
o[Fog].x = exponential2((f * f) * *Pointer<Float4>(data + OFFSET(DrawData,fog.density2E)), true);
break;
default:
ASSERT(false);
......@@ -548,38 +548,38 @@ namespace sw
{
case TEXGEN_NONE:
{
Vector4f v = r.v[TexCoord0 + i];
Vector4f &&varying = v[TexCoord0 + i];
r.o[T0 + stage].x = v.x;
r.o[T0 + stage].y = v.y;
r.o[T0 + stage].z = v.z;
r.o[T0 + stage].w = v.w;
o[T0 + stage].x = varying.x;
o[T0 + stage].y = varying.y;
o[T0 + stage].z = varying.z;
o[T0 + stage].w = varying.w;
}
break;
case TEXGEN_PASSTHRU:
{
Vector4f v = r.v[TexCoord0 + i];
Vector4f &&varying = v[TexCoord0 + i];
r.o[T0 + stage].x = v.x;
r.o[T0 + stage].y = v.y;
r.o[T0 + stage].z = v.z;
r.o[T0 + stage].w = v.w;
o[T0 + stage].x = varying.x;
o[T0 + stage].y = varying.y;
o[T0 + stage].z = varying.z;
o[T0 + stage].w = varying.w;
if(state.input[TexCoord0 + i])
{
switch(state.input[TexCoord0 + i].count)
{
case 1:
r.o[T0 + stage].y = Float4(1.0f);
r.o[T0 + stage].z = Float4(0.0f);
r.o[T0 + stage].w = Float4(0.0f);
o[T0 + stage].y = Float4(1.0f);
o[T0 + stage].z = Float4(0.0f);
o[T0 + stage].w = Float4(0.0f);
break;
case 2:
r.o[T0 + stage].z = Float4(1.0f);
r.o[T0 + stage].w = Float4(0.0f);
o[T0 + stage].z = Float4(1.0f);
o[T0 + stage].w = Float4(0.0f);
break;
case 3:
r.o[T0 + stage].w = Float4(1.0f);
o[T0 + stage].w = Float4(1.0f);
break;
case 4:
break;
......@@ -606,22 +606,22 @@ namespace sw
Nc.w = Float4(1.0f);
r.o[T0 + stage].x = Nc.x;
r.o[T0 + stage].y = Nc.y;
r.o[T0 + stage].z = Nc.z;
r.o[T0 + stage].w = Nc.w;
o[T0 + stage].x = Nc.x;
o[T0 + stage].y = Nc.y;
o[T0 + stage].z = Nc.z;
o[T0 + stage].w = Nc.w;
}
break;
case TEXGEN_POSITION:
{
Vector4f Pn = transformBlend(r.v[Position], Pointer<Byte>(r.data + OFFSET(DrawData,ff.cameraTransformT)), true); // Position in camera space
Vector4f Pn = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true); // Position in camera space
Pn.w = Float4(1.0f);
r.o[T0 + stage].x = Pn.x;
r.o[T0 + stage].y = Pn.y;
r.o[T0 + stage].z = Pn.z;
r.o[T0 + stage].w = Pn.w;
o[T0 + stage].x = Pn.x;
o[T0 + stage].y = Pn.y;
o[T0 + stage].z = Pn.z;
o[T0 + stage].w = Pn.w;
}
break;
case TEXGEN_REFLECTION:
......@@ -639,7 +639,7 @@ namespace sw
Vector4f Ec; // Eye vector in camera space
Vector4f N2;
Ec = transformBlend(r.v[Position], Pointer<Byte>(r.data + OFFSET(DrawData,ff.cameraTransformT)), true);
Ec = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true);
Ec = normalize(Ec);
// R = E - 2 * N * (E . N)
......@@ -669,10 +669,10 @@ namespace sw
R.w = Float4(1.0f);
r.o[T0 + stage].x = R.x;
r.o[T0 + stage].y = R.y;
r.o[T0 + stage].z = R.z;
r.o[T0 + stage].w = R.w;
o[T0 + stage].x = R.x;
o[T0 + stage].y = R.y;
o[T0 + stage].z = R.z;
o[T0 + stage].w = R.w;
}
break;
case TEXGEN_SPHEREMAP:
......@@ -690,7 +690,7 @@ namespace sw
Vector4f Ec; // Eye vector in camera space
Vector4f N2;
Ec = transformBlend(r.v[Position], Pointer<Byte>(r.data + OFFSET(DrawData,ff.cameraTransformT)), true);
Ec = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true);
Ec = normalize(Ec);
// R = E - 2 * N * (E . N)
......@@ -726,10 +726,10 @@ namespace sw
R.z = Float4(1.0f);
R.w = Float4(0.0f);
r.o[T0 + stage].x = R.x;
r.o[T0 + stage].y = R.y;
r.o[T0 + stage].z = R.z;
r.o[T0 + stage].w = R.w;
o[T0 + stage].x = R.x;
o[T0 + stage].y = R.y;
o[T0 + stage].z = R.z;
o[T0 + stage].w = R.w;
}
break;
default:
......@@ -744,46 +744,46 @@ namespace sw
Vector4f T;
Vector4f t;
T.x = r.o[T0 + stage].x;
T.y = r.o[T0 + stage].y;
T.z = r.o[T0 + stage].z;
T.w = r.o[T0 + stage].w;
T.x = o[T0 + stage].x;
T.y = o[T0 + stage].y;
T.z = o[T0 + stage].z;
T.w = o[T0 + stage].w;
switch(state.textureState[stage].textureTransformCountActive)
{
case 4:
texTrans3.x = texTrans3.y = texTrans3.z = texTrans3.w = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.textureTransform[stage][3])); // FIXME: Unpack
texTrans3.x = texTrans3.y = texTrans3.z = texTrans3.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][3])); // FIXME: Unpack
texTrans3.x = texTrans3.x.xxxx;
texTrans3.y = texTrans3.y.yyyy;
texTrans3.z = texTrans3.z.zzzz;
texTrans3.w = texTrans3.w.wwww;
t.w = dot4(T, texTrans3);
case 3:
texTrans2.x = texTrans2.y = texTrans2.z = texTrans2.w = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.textureTransform[stage][2])); // FIXME: Unpack
texTrans2.x = texTrans2.y = texTrans2.z = texTrans2.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][2])); // FIXME: Unpack
texTrans2.x = texTrans2.x.xxxx;
texTrans2.y = texTrans2.y.yyyy;
texTrans2.z = texTrans2.z.zzzz;
texTrans2.w = texTrans2.w.wwww;
t.z = dot4(T, texTrans2);
case 2:
texTrans1.x = texTrans1.y = texTrans1.z = texTrans1.w = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.textureTransform[stage][1])); // FIXME: Unpack
texTrans1.x = texTrans1.y = texTrans1.z = texTrans1.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][1])); // FIXME: Unpack
texTrans1.x = texTrans1.x.xxxx;
texTrans1.y = texTrans1.y.yyyy;
texTrans1.z = texTrans1.z.zzzz;
texTrans1.w = texTrans1.w.wwww;
t.y = dot4(T, texTrans1);
case 1:
texTrans0.x = texTrans0.y = texTrans0.z = texTrans0.w = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.textureTransform[stage][0])); // FIXME: Unpack
texTrans0.x = texTrans0.y = texTrans0.z = texTrans0.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][0])); // FIXME: Unpack
texTrans0.x = texTrans0.x.xxxx;
texTrans0.y = texTrans0.y.yyyy;
texTrans0.z = texTrans0.z.zzzz;
texTrans0.w = texTrans0.w.wwww;
t.x = dot4(T, texTrans0);
r.o[T0 + stage].x = t.x;
r.o[T0 + stage].y = t.y;
r.o[T0 + stage].z = t.z;
r.o[T0 + stage].w = t.w;
o[T0 + stage].x = t.x;
o[T0 + stage].y = t.y;
o[T0 + stage].z = t.z;
o[T0 + stage].w = t.w;
case 0:
break;
default:
......@@ -801,26 +801,26 @@ namespace sw
if(state.input[PointSize])
{
r.o[Pts].y = r.v[PointSize].x;
o[Pts].y = v[PointSize].x;
}
else
{
r.o[Pts].y = *Pointer<Float4>(r.data + OFFSET(DrawData,point.pointSize));
o[Pts].y = *Pointer<Float4>(data + OFFSET(DrawData,point.pointSize));
}
if(state.pointScaleActive && !state.preTransformed)
{
Vector4f p = transformBlend(r.v[Position], Pointer<Byte>(r.data + OFFSET(DrawData,ff.cameraTransformT)), true);
Vector4f p = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true);
Float4 d = Sqrt(dot3(p, p)); // FIXME: length(p);
Float4 A = *Pointer<Float>(r.data + OFFSET(DrawData,point.pointScaleA)); // FIXME: Unpack
Float4 B = *Pointer<Float>(r.data + OFFSET(DrawData,point.pointScaleB)); // FIXME: Unpack
Float4 C = *Pointer<Float>(r.data + OFFSET(DrawData,point.pointScaleC)); // FIXME: Unpack
Float4 A = *Pointer<Float>(data + OFFSET(DrawData,point.pointScaleA)); // FIXME: Unpack
Float4 B = *Pointer<Float>(data + OFFSET(DrawData,point.pointScaleB)); // FIXME: Unpack
Float4 C = *Pointer<Float>(data + OFFSET(DrawData,point.pointScaleC)); // FIXME: Unpack
A = RcpSqrt_pp(A + d * (B + d * C));
r.o[Pts].y = r.o[Pts].y * Float4(*Pointer<Float>(r.data + OFFSET(DrawData,viewportHeight))) * A; // FIXME: Unpack
o[Pts].y = o[Pts].y * Float4(*Pointer<Float>(data + OFFSET(DrawData,viewportHeight))) * A; // FIXME: Unpack
}
}
......
......@@ -20,7 +20,8 @@
namespace sw
{
VertexProgram::VertexProgram(const VertexProcessor::State &state, const VertexShader *shader) : VertexRoutine(state, shader)
VertexProgram::VertexProgram(const VertexProcessor::State &state, const VertexShader *shader)
: VertexRoutine(state, shader), shader(shader), r(shader->dynamicallyIndexedTemporaries)
{
ifDepth = 0;
loopRepDepth = 0;
......@@ -32,6 +33,24 @@ namespace sw
{
labelBlock[i] = 0;
}
loopDepth = -1;
enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
if(shader && shader->containsBreakInstruction())
{
enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
}
if(shader && shader->containsContinueInstruction())
{
enableContinue = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
}
if(shader->instanceIdDeclared)
{
instanceID = *Pointer<Int>(data + OFFSET(DrawData,instanceID));
}
}
VertexProgram::~VertexProgram()
......@@ -46,7 +65,7 @@ namespace sw
{
for(int i = 0; i < VERTEX_TEXTURE_IMAGE_UNITS; i++)
{
sampler[i] = new SamplerCore(r.constants, state.samplerState[i]);
sampler[i] = new SamplerCore(constants, state.samplerState[i]);
}
if(!state.preTransformed)
......@@ -65,12 +84,12 @@ namespace sw
unsigned short version = shader->getVersion();
r.enableIndex = 0;
r.stackIndex = 0;
enableIndex = 0;
stackIndex = 0;
if(shader->containsLeaveInstruction())
{
r.enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
}
// Create all call site return blocks up front
......@@ -342,79 +361,79 @@ namespace sw
case Shader::PARAMETER_TEMP:
if(dst.rel.type == Shader::PARAMETER_VOID)
{
if(dst.x) pDst.x = r.r[dst.index].x;
if(dst.y) pDst.y = r.r[dst.index].y;
if(dst.z) pDst.z = r.r[dst.index].z;
if(dst.w) pDst.w = r.r[dst.index].w;
if(dst.x) pDst.x = r[dst.index].x;
if(dst.y) pDst.y = r[dst.index].y;
if(dst.z) pDst.z = r[dst.index].z;
if(dst.w) pDst.w = r[dst.index].w;
}
else
{
Int a = relativeAddress(dst);
if(dst.x) pDst.x = r.r[dst.index + a].x;
if(dst.y) pDst.y = r.r[dst.index + a].y;
if(dst.z) pDst.z = r.r[dst.index + a].z;
if(dst.w) pDst.w = r.r[dst.index + a].w;
if(dst.x) pDst.x = r[dst.index + a].x;
if(dst.y) pDst.y = r[dst.index + a].y;
if(dst.z) pDst.z = r[dst.index + a].z;
if(dst.w) pDst.w = r[dst.index + a].w;
}
break;
case Shader::PARAMETER_ADDR: pDst = r.a0; break;
case Shader::PARAMETER_ADDR: pDst = a0; break;
case Shader::PARAMETER_RASTOUT:
switch(dst.index)
{
case 0:
if(dst.x) pDst.x = r.o[Pos].x;
if(dst.y) pDst.y = r.o[Pos].y;
if(dst.z) pDst.z = r.o[Pos].z;
if(dst.w) pDst.w = r.o[Pos].w;
if(dst.x) pDst.x = o[Pos].x;
if(dst.y) pDst.y = o[Pos].y;
if(dst.z) pDst.z = o[Pos].z;
if(dst.w) pDst.w = o[Pos].w;
break;
case 1:
pDst.x = r.o[Fog].x;
pDst.x = o[Fog].x;
break;
case 2:
pDst.x = r.o[Pts].y;
pDst.x = o[Pts].y;
break;
default:
ASSERT(false);
}
break;
case Shader::PARAMETER_ATTROUT:
if(dst.x) pDst.x = r.o[D0 + dst.index].x;
if(dst.y) pDst.y = r.o[D0 + dst.index].y;
if(dst.z) pDst.z = r.o[D0 + dst.index].z;
if(dst.w) pDst.w = r.o[D0 + dst.index].w;
if(dst.x) pDst.x = o[D0 + dst.index].x;
if(dst.y) pDst.y = o[D0 + dst.index].y;
if(dst.z) pDst.z = o[D0 + dst.index].z;
if(dst.w) pDst.w = o[D0 + dst.index].w;
break;
case Shader::PARAMETER_TEXCRDOUT:
// case Shader::PARAMETER_OUTPUT:
if(version < 0x0300)
{
if(dst.x) pDst.x = r.o[T0 + dst.index].x;
if(dst.y) pDst.y = r.o[T0 + dst.index].y;
if(dst.z) pDst.z = r.o[T0 + dst.index].z;
if(dst.w) pDst.w = r.o[T0 + dst.index].w;
if(dst.x) pDst.x = o[T0 + dst.index].x;
if(dst.y) pDst.y = o[T0 + dst.index].y;
if(dst.z) pDst.z = o[T0 + dst.index].z;
if(dst.w) pDst.w = o[T0 + dst.index].w;
}
else
{
if(dst.rel.type == Shader::PARAMETER_VOID) // Not relative
{
if(dst.x) pDst.x = r.o[dst.index].x;
if(dst.y) pDst.y = r.o[dst.index].y;
if(dst.z) pDst.z = r.o[dst.index].z;
if(dst.w) pDst.w = r.o[dst.index].w;
if(dst.x) pDst.x = o[dst.index].x;
if(dst.y) pDst.y = o[dst.index].y;
if(dst.z) pDst.z = o[dst.index].z;
if(dst.w) pDst.w = o[dst.index].w;
}
else
{
Int a = relativeAddress(dst);
if(dst.x) pDst.x = r.o[dst.index + a].x;
if(dst.y) pDst.y = r.o[dst.index + a].y;
if(dst.z) pDst.z = r.o[dst.index + a].z;
if(dst.w) pDst.w = r.o[dst.index + a].w;
if(dst.x) pDst.x = o[dst.index + a].x;
if(dst.y) pDst.y = o[dst.index + a].y;
if(dst.z) pDst.z = o[dst.index + a].z;
if(dst.w) pDst.w = o[dst.index + a].w;
}
}
break;
case Shader::PARAMETER_LABEL: break;
case Shader::PARAMETER_PREDICATE: pDst = r.p0; break;
case Shader::PARAMETER_INPUT: break;
case Shader::PARAMETER_LABEL: break;
case Shader::PARAMETER_PREDICATE: pDst = p0; break;
case Shader::PARAMETER_INPUT: break;
default:
ASSERT(false);
}
......@@ -430,10 +449,10 @@ namespace sw
{
unsigned char pSwizzle = instruction->predicateSwizzle;
Float4 xPredicate = r.p0[(pSwizzle >> 0) & 0x03];
Float4 yPredicate = r.p0[(pSwizzle >> 2) & 0x03];
Float4 zPredicate = r.p0[(pSwizzle >> 4) & 0x03];
Float4 wPredicate = r.p0[(pSwizzle >> 6) & 0x03];
Float4 xPredicate = p0[(pSwizzle >> 0) & 0x03];
Float4 yPredicate = p0[(pSwizzle >> 2) & 0x03];
Float4 zPredicate = p0[(pSwizzle >> 4) & 0x03];
Float4 wPredicate = p0[(pSwizzle >> 6) & 0x03];
if(!instruction->predicateNot)
{
......@@ -469,83 +488,83 @@ namespace sw
case Shader::PARAMETER_TEMP:
if(dst.rel.type == Shader::PARAMETER_VOID)
{
if(dst.x) r.r[dst.index].x = d.x;
if(dst.y) r.r[dst.index].y = d.y;
if(dst.z) r.r[dst.index].z = d.z;
if(dst.w) r.r[dst.index].w = d.w;
if(dst.x) r[dst.index].x = d.x;
if(dst.y) r[dst.index].y = d.y;
if(dst.z) r[dst.index].z = d.z;
if(dst.w) r[dst.index].w = d.w;
}
else
{
Int a = relativeAddress(dst);
if(dst.x) r.r[dst.index + a].x = d.x;
if(dst.y) r.r[dst.index + a].y = d.y;
if(dst.z) r.r[dst.index + a].z = d.z;
if(dst.w) r.r[dst.index + a].w = d.w;
if(dst.x) r[dst.index + a].x = d.x;
if(dst.y) r[dst.index + a].y = d.y;
if(dst.z) r[dst.index + a].z = d.z;
if(dst.w) r[dst.index + a].w = d.w;
}
break;
case Shader::PARAMETER_ADDR:
if(dst.x) r.a0.x = d.x;
if(dst.y) r.a0.y = d.y;
if(dst.z) r.a0.z = d.z;
if(dst.w) r.a0.w = d.w;
if(dst.x) a0.x = d.x;
if(dst.y) a0.y = d.y;
if(dst.z) a0.z = d.z;
if(dst.w) a0.w = d.w;
break;
case Shader::PARAMETER_RASTOUT:
switch(dst.index)
{
case 0:
if(dst.x) r.o[Pos].x = d.x;
if(dst.y) r.o[Pos].y = d.y;
if(dst.z) r.o[Pos].z = d.z;
if(dst.w) r.o[Pos].w = d.w;
if(dst.x) o[Pos].x = d.x;
if(dst.y) o[Pos].y = d.y;
if(dst.z) o[Pos].z = d.z;
if(dst.w) o[Pos].w = d.w;
break;
case 1:
r.o[Fog].x = d.x;
o[Fog].x = d.x;
break;
case 2:
r.o[Pts].y = d.x;
o[Pts].y = d.x;
break;
default: ASSERT(false);
}
break;
case Shader::PARAMETER_ATTROUT:
if(dst.x) r.o[D0 + dst.index].x = d.x;
if(dst.y) r.o[D0 + dst.index].y = d.y;
if(dst.z) r.o[D0 + dst.index].z = d.z;
if(dst.w) r.o[D0 + dst.index].w = d.w;
if(dst.x) o[D0 + dst.index].x = d.x;
if(dst.y) o[D0 + dst.index].y = d.y;
if(dst.z) o[D0 + dst.index].z = d.z;
if(dst.w) o[D0 + dst.index].w = d.w;
break;
case Shader::PARAMETER_TEXCRDOUT:
// case Shader::PARAMETER_OUTPUT:
if(version < 0x0300)
{
if(dst.x) r.o[T0 + dst.index].x = d.x;
if(dst.y) r.o[T0 + dst.index].y = d.y;
if(dst.z) r.o[T0 + dst.index].z = d.z;
if(dst.w) r.o[T0 + dst.index].w = d.w;
if(dst.x) o[T0 + dst.index].x = d.x;
if(dst.y) o[T0 + dst.index].y = d.y;
if(dst.z) o[T0 + dst.index].z = d.z;
if(dst.w) o[T0 + dst.index].w = d.w;
}
else
{
if(dst.rel.type == Shader::PARAMETER_VOID) // Not relative
{
if(dst.x) r.o[dst.index].x = d.x;
if(dst.y) r.o[dst.index].y = d.y;
if(dst.z) r.o[dst.index].z = d.z;
if(dst.w) r.o[dst.index].w = d.w;
if(dst.x) o[dst.index].x = d.x;
if(dst.y) o[dst.index].y = d.y;
if(dst.z) o[dst.index].z = d.z;
if(dst.w) o[dst.index].w = d.w;
}
else
{
Int a = relativeAddress(dst);
if(dst.x) r.o[dst.index + a].x = d.x;
if(dst.y) r.o[dst.index + a].y = d.y;
if(dst.z) r.o[dst.index + a].z = d.z;
if(dst.w) r.o[dst.index + a].w = d.w;
if(dst.x) o[dst.index + a].x = d.x;
if(dst.y) o[dst.index + a].y = d.y;
if(dst.z) o[dst.index + a].z = d.z;
if(dst.w) o[dst.index + a].w = d.w;
}
}
break;
case Shader::PARAMETER_LABEL: break;
case Shader::PARAMETER_PREDICATE: r.p0 = d; break;
case Shader::PARAMETER_INPUT: break;
case Shader::PARAMETER_LABEL: break;
case Shader::PARAMETER_PREDICATE: p0 = d; break;
case Shader::PARAMETER_INPUT: break;
default:
ASSERT(false);
}
......@@ -571,28 +590,28 @@ namespace sw
case 0xFF:
continue;
case Shader::USAGE_PSIZE:
r.o[i].y = r.v[i].x;
o[i].y = v[i].x;
break;
case Shader::USAGE_TEXCOORD:
r.o[i].x = r.v[i].x;
r.o[i].y = r.v[i].y;
r.o[i].z = r.v[i].z;
r.o[i].w = r.v[i].w;
o[i].x = v[i].x;
o[i].y = v[i].y;
o[i].z = v[i].z;
o[i].w = v[i].w;
break;
case Shader::USAGE_POSITION:
r.o[i].x = r.v[i].x;
r.o[i].y = r.v[i].y;
r.o[i].z = r.v[i].z;
r.o[i].w = r.v[i].w;
o[i].x = v[i].x;
o[i].y = v[i].y;
o[i].z = v[i].z;
o[i].w = v[i].w;
break;
case Shader::USAGE_COLOR:
r.o[i].x = r.v[i].x;
r.o[i].y = r.v[i].y;
r.o[i].z = r.v[i].z;
r.o[i].w = r.v[i].w;
o[i].x = v[i].x;
o[i].y = v[i].y;
o[i].z = v[i].z;
o[i].w = v[i].w;
break;
case Shader::USAGE_FOG:
r.o[i].x = r.v[i].x;
o[i].x = v[i].x;
break;
default:
ASSERT(false);
......@@ -601,28 +620,28 @@ namespace sw
}
else
{
r.o[Pos].x = r.v[PositionT].x;
r.o[Pos].y = r.v[PositionT].y;
r.o[Pos].z = r.v[PositionT].z;
r.o[Pos].w = r.v[PositionT].w;
o[Pos].x = v[PositionT].x;
o[Pos].y = v[PositionT].y;
o[Pos].z = v[PositionT].z;
o[Pos].w = v[PositionT].w;
for(int i = 0; i < 2; i++)
{
r.o[D0 + i].x = r.v[Color0 + i].x;
r.o[D0 + i].y = r.v[Color0 + i].y;
r.o[D0 + i].z = r.v[Color0 + i].z;
r.o[D0 + i].w = r.v[Color0 + i].w;
o[D0 + i].x = v[Color0 + i].x;
o[D0 + i].y = v[Color0 + i].y;
o[D0 + i].z = v[Color0 + i].z;
o[D0 + i].w = v[Color0 + i].w;
}
for(int i = 0; i < 8; i++)
{
r.o[T0 + i].x = r.v[TexCoord0 + i].x;
r.o[T0 + i].y = r.v[TexCoord0 + i].y;
r.o[T0 + i].z = r.v[TexCoord0 + i].z;
r.o[T0 + i].w = r.v[TexCoord0 + i].w;
o[T0 + i].x = v[TexCoord0 + i].x;
o[T0 + i].y = v[TexCoord0 + i].y;
o[T0 + i].z = v[TexCoord0 + i].z;
o[T0 + i].w = v[TexCoord0 + i].w;
}
r.o[Pts].y = r.v[PointSize].x;
o[Pts].y = v[PointSize].x;
}
}
......@@ -636,11 +655,11 @@ namespace sw
case Shader::PARAMETER_TEMP:
if(src.rel.type == Shader::PARAMETER_VOID)
{
reg = r.r[i];
reg = r[i];
}
else
{
reg = r.r[i + relativeAddress(src)];
reg = r[i + relativeAddress(src)];
}
break;
case Shader::PARAMETER_CONST:
......@@ -649,25 +668,25 @@ namespace sw
case Shader::PARAMETER_INPUT:
if(src.rel.type == Shader::PARAMETER_VOID)
{
reg = r.v[i];
reg = v[i];
}
else
{
reg = r.v[i + relativeAddress(src)];
reg = v[i + relativeAddress(src)];
}
break;
case Shader::PARAMETER_VOID: return r.r[0]; // Dummy
case Shader::PARAMETER_VOID: return r[0]; // Dummy
case Shader::PARAMETER_FLOAT4LITERAL:
reg.x = Float4(src.value[0]);
reg.y = Float4(src.value[1]);
reg.z = Float4(src.value[2]);
reg.w = Float4(src.value[3]);
break;
case Shader::PARAMETER_ADDR: reg = r.a0; break;
case Shader::PARAMETER_CONSTBOOL: return r.r[0]; // Dummy
case Shader::PARAMETER_CONSTINT: return r.r[0]; // Dummy
case Shader::PARAMETER_LOOP: return r.r[0]; // Dummy
case Shader::PARAMETER_PREDICATE: return r.r[0]; // Dummy
case Shader::PARAMETER_ADDR: reg = a0; break;
case Shader::PARAMETER_CONSTBOOL: return r[0]; // Dummy
case Shader::PARAMETER_CONSTINT: return r[0]; // Dummy
case Shader::PARAMETER_LOOP: return r[0]; // Dummy
case Shader::PARAMETER_PREDICATE: return r[0]; // Dummy
case Shader::PARAMETER_SAMPLER:
if(src.rel.type == Shader::PARAMETER_VOID)
{
......@@ -675,21 +694,21 @@ namespace sw
}
else if(src.rel.type == Shader::PARAMETER_TEMP)
{
reg.x = As<Float4>(Int4(i) + As<Int4>(r.r[src.rel.index].x));
reg.x = As<Float4>(Int4(i) + As<Int4>(r[src.rel.index].x));
}
return reg;
case Shader::PARAMETER_OUTPUT:
if(src.rel.type == Shader::PARAMETER_VOID)
{
reg = r.o[i];
reg = o[i];
}
else
{
reg = r.o[i + relativeAddress(src)];
reg = o[i + relativeAddress(src)];
}
break;
case Shader::PARAMETER_MISCTYPE:
reg.x = As<Float>(Int(r.instanceID));
reg.x = As<Float>(Int(instanceID));
return reg;
default:
ASSERT(false);
......@@ -748,7 +767,7 @@ namespace sw
if(src.rel.type == Shader::PARAMETER_VOID) // Not relative
{
c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[i]));
c.x = c.y = c.z = c.w = *Pointer<Float4>(data + OFFSET(DrawData,vs.c[i]));
c.x = c.x.xxxx;
c.y = c.y.yyyy;
......@@ -778,9 +797,9 @@ namespace sw
}
else if(src.rel.type == Shader::PARAMETER_LOOP)
{
Int loopCounter = r.aL[r.loopDepth];
Int loopCounter = aL[loopDepth];
c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[i]) + loopCounter * 16);
c.x = c.y = c.z = c.w = *Pointer<Float4>(data + OFFSET(DrawData,vs.c[i]) + loopCounter * 16);
c.x = c.x.xxxx;
c.y = c.y.yyyy;
......@@ -793,7 +812,7 @@ namespace sw
{
Int a = relativeAddress(src);
c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[i]) + a * 16);
c.x = c.y = c.z = c.w = *Pointer<Float4>(data + OFFSET(DrawData,vs.c[i]) + a * 16);
c.x = c.x.xxxx;
c.y = c.y.yyyy;
......@@ -807,11 +826,11 @@ namespace sw
switch(src.rel.type)
{
case Shader::PARAMETER_ADDR: a = r.a0[component]; break;
case Shader::PARAMETER_TEMP: a = r.r[src.rel.index][component]; break;
case Shader::PARAMETER_INPUT: a = r.v[src.rel.index][component]; break;
case Shader::PARAMETER_OUTPUT: a = r.o[src.rel.index][component]; break;
case Shader::PARAMETER_CONST: a = *Pointer<Float>(r.data + OFFSET(DrawData,vs.c[src.rel.index][component])); break;
case Shader::PARAMETER_ADDR: a = a0[component]; break;
case Shader::PARAMETER_TEMP: a = r[src.rel.index][component]; break;
case Shader::PARAMETER_INPUT: a = v[src.rel.index][component]; break;
case Shader::PARAMETER_OUTPUT: a = o[src.rel.index][component]; break;
case Shader::PARAMETER_CONST: a = *Pointer<Float>(data + OFFSET(DrawData,vs.c[src.rel.index][component])); break;
default: ASSERT(false);
}
......@@ -824,10 +843,10 @@ namespace sw
Int index2 = Extract(index, 2);
Int index3 = Extract(index, 3);
c.x = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index0 * 16, 16);
c.y = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index1 * 16, 16);
c.z = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index2 * 16, 16);
c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index3 * 16, 16);
c.x = *Pointer<Float4>(data + OFFSET(DrawData,vs.c) + index0 * 16, 16);
c.y = *Pointer<Float4>(data + OFFSET(DrawData,vs.c) + index1 * 16, 16);
c.z = *Pointer<Float4>(data + OFFSET(DrawData,vs.c) + index2 * 16, 16);
c.w = *Pointer<Float4>(data + OFFSET(DrawData,vs.c) + index3 * 16, 16);
transpose4x4(c.x, c.y, c.z, c.w);
}
......@@ -842,25 +861,25 @@ namespace sw
if(var.rel.type == Shader::PARAMETER_TEMP)
{
return As<Int>(Extract(r.r[var.rel.index].x, 0)) * var.rel.scale;
return As<Int>(Extract(r[var.rel.index].x, 0)) * var.rel.scale;
}
else if(var.rel.type == Shader::PARAMETER_INPUT)
{
return As<Int>(Extract(r.v[var.rel.index].x, 0)) * var.rel.scale;
return As<Int>(Extract(v[var.rel.index].x, 0)) * var.rel.scale;
}
else if(var.rel.type == Shader::PARAMETER_OUTPUT)
{
return As<Int>(Extract(r.o[var.rel.index].x, 0)) * var.rel.scale;
return As<Int>(Extract(o[var.rel.index].x, 0)) * var.rel.scale;
}
else if(var.rel.type == Shader::PARAMETER_CONST)
{
RValue<Int4> c = *Pointer<Int4>(r.data + OFFSET(DrawData, vs.c[var.rel.index]));
RValue<Int4> c = *Pointer<Int4>(data + OFFSET(DrawData, vs.c[var.rel.index]));
return Extract(c, 0) * var.rel.scale;
}
else if(var.rel.type == Shader::PARAMETER_LOOP)
{
return r.aL[r.loopDepth];
return aL[loopDepth];
}
else ASSERT(false);
......@@ -869,23 +888,23 @@ namespace sw
Int4 VertexProgram::enableMask(const Shader::Instruction *instruction)
{
Int4 enable = instruction->analysisBranch ? Int4(r.enableStack[r.enableIndex]) : Int4(0xFFFFFFFF);
Int4 enable = instruction->analysisBranch ? Int4(enableStack[enableIndex]) : Int4(0xFFFFFFFF);
if(!whileTest)
{
if(shader->containsBreakInstruction() && instruction->analysisBreak)
{
enable &= r.enableBreak;
enable &= enableBreak;
}
if(shader->containsContinueInstruction() && instruction->analysisContinue)
{
enable &= r.enableContinue;
enable &= enableContinue;
}
if(shader->containsLeaveInstruction() && instruction->analysisLeave)
{
enable &= r.enableLeave;
enable &= enableLeave;
}
}
......@@ -956,20 +975,20 @@ namespace sw
if(breakDepth == 0)
{
r.enableIndex = r.enableIndex - breakDepth;
enableIndex = enableIndex - breakDepth;
Nucleus::createBr(endBlock);
}
else
{
r.enableBreak = r.enableBreak & ~r.enableStack[r.enableIndex];
Bool allBreak = SignMask(r.enableBreak) == 0x0;
enableBreak = enableBreak & ~enableStack[enableIndex];
Bool allBreak = SignMask(enableBreak) == 0x0;
r.enableIndex = r.enableIndex - breakDepth;
enableIndex = enableIndex - breakDepth;
branch(allBreak, endBlock, deadBlock);
}
Nucleus::setInsertBlock(deadBlock);
r.enableIndex = r.enableIndex + breakDepth;
enableIndex = enableIndex + breakDepth;
}
void VertexProgram::BREAKC(Vector4f &src0, Vector4f &src1, Control control)
......@@ -993,7 +1012,7 @@ namespace sw
void VertexProgram::BREAKP(const Src &predicateRegister) // FIXME: Factor out parts common with BREAKC
{
Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
if(predicateRegister.modifier == Shader::MODIFIER_NOT)
{
......@@ -1005,24 +1024,24 @@ namespace sw
void VertexProgram::BREAK(Int4 &condition)
{
condition &= r.enableStack[r.enableIndex];
condition &= enableStack[enableIndex];
llvm::BasicBlock *continueBlock = Nucleus::createBasicBlock();
llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
r.enableBreak = r.enableBreak & ~condition;
Bool allBreak = SignMask(r.enableBreak) == 0x0;
enableBreak = enableBreak & ~condition;
Bool allBreak = SignMask(enableBreak) == 0x0;
r.enableIndex = r.enableIndex - breakDepth;
enableIndex = enableIndex - breakDepth;
branch(allBreak, endBlock, continueBlock);
Nucleus::setInsertBlock(continueBlock);
r.enableIndex = r.enableIndex + breakDepth;
enableIndex = enableIndex + breakDepth;
}
void VertexProgram::CONTINUE()
{
r.enableContinue = r.enableContinue & ~r.enableStack[r.enableIndex];
enableContinue = enableContinue & ~enableStack[enableIndex];
}
void VertexProgram::TEST()
......@@ -1039,15 +1058,15 @@ namespace sw
if(callRetBlock[labelIndex].size() > 1)
{
r.callStack[r.stackIndex++] = UInt(callSiteIndex);
callStack[stackIndex++] = UInt(callSiteIndex);
}
Int4 restoreLeave = r.enableLeave;
Int4 restoreLeave = enableLeave;
Nucleus::createBr(labelBlock[labelIndex]);
Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
r.enableLeave = restoreLeave;
enableLeave = restoreLeave;
}
void VertexProgram::CALLNZ(int labelIndex, int callSiteIndex, const Src &src)
......@@ -1065,7 +1084,7 @@ namespace sw
void VertexProgram::CALLNZb(int labelIndex, int callSiteIndex, const Src &boolRegister)
{
Bool condition = (*Pointer<Byte>(r.data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0)); // FIXME
Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0)); // FIXME
if(boolRegister.modifier == Shader::MODIFIER_NOT)
{
......@@ -1079,27 +1098,27 @@ namespace sw
if(callRetBlock[labelIndex].size() > 1)
{
r.callStack[r.stackIndex++] = UInt(callSiteIndex);
callStack[stackIndex++] = UInt(callSiteIndex);
}
Int4 restoreLeave = r.enableLeave;
Int4 restoreLeave = enableLeave;
branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
r.enableLeave = restoreLeave;
enableLeave = restoreLeave;
}
void VertexProgram::CALLNZp(int labelIndex, int callSiteIndex, const Src &predicateRegister)
{
Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
if(predicateRegister.modifier == Shader::MODIFIER_NOT)
{
condition = ~condition;
}
condition &= r.enableStack[r.enableIndex];
condition &= enableStack[enableIndex];
if(!labelBlock[labelIndex])
{
......@@ -1108,19 +1127,19 @@ namespace sw
if(callRetBlock[labelIndex].size() > 1)
{
r.callStack[r.stackIndex++] = UInt(callSiteIndex);
callStack[stackIndex++] = UInt(callSiteIndex);
}
r.enableIndex++;
r.enableStack[r.enableIndex] = condition;
Int4 restoreLeave = r.enableLeave;
enableIndex++;
enableStack[enableIndex] = condition;
Int4 restoreLeave = enableLeave;
Bool notAllFalse = SignMask(condition) != 0;
branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
r.enableIndex--;
r.enableLeave = restoreLeave;
enableIndex--;
enableLeave = restoreLeave;
}
void VertexProgram::ELSE()
......@@ -1132,12 +1151,12 @@ namespace sw
if(isConditionalIf[ifDepth])
{
Int4 condition = ~r.enableStack[r.enableIndex] & r.enableStack[r.enableIndex - 1];
Int4 condition = ~enableStack[enableIndex] & enableStack[enableIndex - 1];
Bool notAllFalse = SignMask(condition) != 0;
branch(notAllFalse, falseBlock, endBlock);
r.enableStack[r.enableIndex] = ~r.enableStack[r.enableIndex] & r.enableStack[r.enableIndex - 1];
enableStack[enableIndex] = ~enableStack[enableIndex] & enableStack[enableIndex - 1];
}
else
{
......@@ -1162,7 +1181,7 @@ namespace sw
if(isConditionalIf[ifDepth])
{
breakDepth--;
r.enableIndex--;
enableIndex--;
}
}
......@@ -1170,7 +1189,7 @@ namespace sw
{
loopRepDepth--;
r.aL[r.loopDepth] = r.aL[r.loopDepth] + r.increment[r.loopDepth]; // FIXME: +=
aL[loopDepth] = aL[loopDepth] + increment[loopDepth]; // FIXME: +=
llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
......@@ -1178,8 +1197,8 @@ namespace sw
Nucleus::createBr(testBlock);
Nucleus::setInsertBlock(endBlock);
r.loopDepth--;
r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
loopDepth--;
enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
}
void VertexProgram::ENDREP()
......@@ -1192,8 +1211,8 @@ namespace sw
Nucleus::createBr(testBlock);
Nucleus::setInsertBlock(endBlock);
r.loopDepth--;
r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
loopDepth--;
enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
}
void VertexProgram::ENDWHILE()
......@@ -1206,8 +1225,8 @@ namespace sw
Nucleus::createBr(testBlock);
Nucleus::setInsertBlock(endBlock);
r.enableIndex--;
r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
enableIndex--;
enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
whileTest = false;
}
......@@ -1232,7 +1251,7 @@ namespace sw
{
ASSERT(ifDepth < 24 + 4);
Bool condition = (*Pointer<Byte>(r.data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0)); // FIXME
Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0)); // FIXME
if(boolRegister.modifier == Shader::MODIFIER_NOT)
{
......@@ -1252,7 +1271,7 @@ namespace sw
void VertexProgram::IFp(const Src &predicateRegister)
{
Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
if(predicateRegister.modifier == Shader::MODIFIER_NOT)
{
......@@ -1283,10 +1302,10 @@ namespace sw
void VertexProgram::IF(Int4 &condition)
{
condition &= r.enableStack[r.enableIndex];
condition &= enableStack[enableIndex];
r.enableIndex++;
r.enableStack[r.enableIndex] = condition;
enableIndex++;
enableStack[enableIndex] = condition;
llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
......@@ -1315,16 +1334,16 @@ namespace sw
void VertexProgram::LOOP(const Src &integerRegister)
{
r.loopDepth++;
loopDepth++;
r.iteration[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
r.aL[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,vs.i[integerRegister.index][1]));
r.increment[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,vs.i[integerRegister.index][2]));
iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
aL[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][1]));
increment[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][2]));
// FIXME: Compiles to two instructions?
If(r.increment[r.loopDepth] == 0)
If(increment[loopDepth] == 0)
{
r.increment[r.loopDepth] = 1;
increment[loopDepth] = 1;
}
llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
......@@ -1338,10 +1357,10 @@ namespace sw
Nucleus::createBr(testBlock);
Nucleus::setInsertBlock(testBlock);
branch(r.iteration[r.loopDepth] > 0, loopBlock, endBlock);
branch(iteration[loopDepth] > 0, loopBlock, endBlock);
Nucleus::setInsertBlock(loopBlock);
r.iteration[r.loopDepth] = r.iteration[r.loopDepth] - 1; // FIXME: --
iteration[loopDepth] = iteration[loopDepth] - 1; // FIXME: --
loopRepDepth++;
breakDepth = 0;
......@@ -1349,10 +1368,10 @@ namespace sw
void VertexProgram::REP(const Src &integerRegister)
{
r.loopDepth++;
loopDepth++;
r.iteration[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
r.aL[r.loopDepth] = r.aL[r.loopDepth - 1];
iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
aL[loopDepth] = aL[loopDepth - 1];
llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
......@@ -1365,10 +1384,10 @@ namespace sw
Nucleus::createBr(testBlock);
Nucleus::setInsertBlock(testBlock);
branch(r.iteration[r.loopDepth] > 0, loopBlock, endBlock);
branch(iteration[loopDepth] > 0, loopBlock, endBlock);
Nucleus::setInsertBlock(loopBlock);
r.iteration[r.loopDepth] = r.iteration[r.loopDepth] - 1; // FIXME: --
iteration[loopDepth] = iteration[loopDepth] - 1; // FIXME: --
loopRepDepth++;
breakDepth = 0;
......@@ -1376,7 +1395,7 @@ namespace sw
void VertexProgram::WHILE(const Src &temporaryRegister)
{
r.enableIndex++;
enableIndex++;
llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
......@@ -1385,24 +1404,24 @@ namespace sw
loopRepTestBlock[loopRepDepth] = testBlock;
loopRepEndBlock[loopRepDepth] = endBlock;
Int4 restoreBreak = r.enableBreak;
Int4 restoreContinue = r.enableContinue;
Int4 restoreBreak = enableBreak;
Int4 restoreContinue = enableContinue;
// FIXME: jump(testBlock)
Nucleus::createBr(testBlock);
Nucleus::setInsertBlock(testBlock);
r.enableContinue = restoreContinue;
enableContinue = restoreContinue;
const Vector4f &src = fetchRegisterF(temporaryRegister);
Int4 condition = As<Int4>(src.x);
condition &= r.enableStack[r.enableIndex - 1];
r.enableStack[r.enableIndex] = condition;
condition &= enableStack[enableIndex - 1];
enableStack[enableIndex] = condition;
Bool notAllFalse = SignMask(condition) != 0;
branch(notAllFalse, loopBlock, endBlock);
Nucleus::setInsertBlock(endBlock);
r.enableBreak = restoreBreak;
enableBreak = restoreBreak;
Nucleus::setInsertBlock(loopBlock);
......@@ -1424,7 +1443,7 @@ namespace sw
if(callRetBlock[currentLabel].size() > 1) // Pop the return destination from the call stack
{
// FIXME: Encapsulate
UInt index = r.callStack[--r.stackIndex];
UInt index = callStack[--stackIndex];
llvm::Value *value = index.loadValue();
llvm::Value *switchInst = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size());
......@@ -1450,7 +1469,7 @@ namespace sw
void VertexProgram::LEAVE()
{
r.enableLeave = r.enableLeave & ~r.enableStack[r.enableIndex];
enableLeave = enableLeave & ~enableStack[enableIndex];
// FIXME: Return from function if all instances left
// FIXME: Use enableLeave in other control-flow constructs
......@@ -1511,7 +1530,7 @@ namespace sw
void VertexProgram::TEXSIZE(Vector4f &dst, Float4 &lod, const Src &src1)
{
Pointer<Byte> textureMipmap = r.data + OFFSET(DrawData, mipmap[16]) + src1.index * sizeof(Texture) + OFFSET(Texture, mipmap);
Pointer<Byte> textureMipmap = data + OFFSET(DrawData, mipmap[16]) + src1.index * sizeof(Texture) + OFFSET(Texture, mipmap);
for(int i = 0; i < 4; ++i)
{
Pointer<Byte> mipmap = textureMipmap + (As<Int>(Extract(lod, i)) + Int(1)) * sizeof(Mipmap);
......@@ -1525,8 +1544,8 @@ namespace sw
{
if(s.type == Shader::PARAMETER_SAMPLER && s.rel.type == Shader::PARAMETER_VOID)
{
Pointer<Byte> texture = r.data + OFFSET(DrawData,mipmap[16]) + s.index * sizeof(Texture);
sampler[s.index]->sampleTexture(texture, c, u, v, w, q, r.a0, r.a0, false, false, true);
Pointer<Byte> texture = data + OFFSET(DrawData,mipmap[16]) + s.index * sizeof(Texture);
sampler[s.index]->sampleTexture(texture, c, u, v, w, q, a0, a0, false, false, true);
}
else
{
......@@ -1538,8 +1557,8 @@ namespace sw
{
If(index == i)
{
Pointer<Byte> texture = r.data + OFFSET(DrawData,mipmap[16]) + i * sizeof(Texture);
sampler[i]->sampleTexture(texture, c, u, v, w, q, r.a0, r.a0, false, false, true);
Pointer<Byte> texture = data + OFFSET(DrawData,mipmap[16]) + i * sizeof(Texture);
sampler[i]->sampleTexture(texture, c, u, v, w, q, a0, a0, false, false, true);
// FIXME: When the sampler states are the same, we could use one sampler and just index the texture
}
}
......
......@@ -32,6 +32,28 @@ namespace sw
virtual ~VertexProgram();
private:
const VertexShader *const shader;
RegisterArray<4096> r; // Temporary registers
Vector4f a0;
Array<Int, 4> aL;
Vector4f p0;
Array<Int, 4> increment;
Array<Int, 4> iteration;
Int loopDepth;
Int stackIndex; // FIXME: Inc/decrement callStack
Array<UInt, 16> callStack;
Int enableIndex;
Array<Int4, 1 + 24> enableStack;
Int4 enableBreak;
Int4 enableContinue;
Int4 enableLeave;
Int instanceID;
typedef Shader::DestinationParameter Dst;
typedef Shader::SourceParameter Src;
typedef Shader::Control Control;
......
......@@ -23,7 +23,10 @@ namespace sw
extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates
extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1]
VertexRoutine::VertexRoutine(const VertexProcessor::State &state, const VertexShader *shader) : r(shader), state(state), shader(shader)
VertexRoutine::VertexRoutine(const VertexProcessor::State &state, const VertexShader *shader)
: v(shader && shader->dynamicallyIndexedInput),
o(shader && shader->dynamicallyIndexedOutput),
state(state)
{
}
......@@ -41,12 +44,7 @@ namespace sw
UInt vertexCount = *Pointer<UInt>(task + OFFSET(VertexTask,vertexCount));
r.data = data;
r.constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,constants));
if(shader && shader->instanceIdDeclared)
{
r.instanceID = *Pointer<Int>(data + OFFSET(DrawData, instanceID));
}
constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,constants));
Do
{
......@@ -84,10 +82,10 @@ namespace sw
{
for(int i = 0; i < VERTEX_ATTRIBUTES; i++)
{
Pointer<Byte> input = *Pointer<Pointer<Byte>>(r.data + OFFSET(DrawData,input) + sizeof(void*) * i);
UInt stride = *Pointer<UInt>(r.data + OFFSET(DrawData,stride) + sizeof(unsigned int) * i);
Pointer<Byte> input = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,input) + sizeof(void*) * i);
UInt stride = *Pointer<UInt>(data + OFFSET(DrawData,stride) + sizeof(unsigned int) * i);
r.v[i] = readStream(input, stride, state.input[i], index);
v[i] = readStream(input, stride, state.input[i], index);
}
}
......@@ -95,39 +93,39 @@ namespace sw
{
int pos = state.positionRegister;
Int4 maxX = CmpLT(r.o[pos].w, r.o[pos].x);
Int4 maxY = CmpLT(r.o[pos].w, r.o[pos].y);
Int4 maxZ = CmpLT(r.o[pos].w, r.o[pos].z);
Int4 maxX = CmpLT(o[pos].w, o[pos].x);
Int4 maxY = CmpLT(o[pos].w, o[pos].y);
Int4 maxZ = CmpLT(o[pos].w, o[pos].z);
Int4 minX = CmpNLE(-r.o[pos].w, r.o[pos].x);
Int4 minY = CmpNLE(-r.o[pos].w, r.o[pos].y);
Int4 minZ = CmpNLE(Float4(0.0f), r.o[pos].z);
Int4 minX = CmpNLE(-o[pos].w, o[pos].x);
Int4 minY = CmpNLE(-o[pos].w, o[pos].y);
Int4 minZ = CmpNLE(Float4(0.0f), o[pos].z);
Int flags;
flags = SignMask(maxX);
r.clipFlags = *Pointer<Int>(r.constants + OFFSET(Constants,maxX) + flags * 4); // FIXME: Array indexing
clipFlags = *Pointer<Int>(constants + OFFSET(Constants,maxX) + flags * 4); // FIXME: Array indexing
flags = SignMask(maxY);
r.clipFlags |= *Pointer<Int>(r.constants + OFFSET(Constants,maxY) + flags * 4);
clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,maxY) + flags * 4);
flags = SignMask(maxZ);
r.clipFlags |= *Pointer<Int>(r.constants + OFFSET(Constants,maxZ) + flags * 4);
clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,maxZ) + flags * 4);
flags = SignMask(minX);
r.clipFlags |= *Pointer<Int>(r.constants + OFFSET(Constants,minX) + flags * 4);
clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minX) + flags * 4);
flags = SignMask(minY);
r.clipFlags |= *Pointer<Int>(r.constants + OFFSET(Constants,minY) + flags * 4);
clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minY) + flags * 4);
flags = SignMask(minZ);
r.clipFlags |= *Pointer<Int>(r.constants + OFFSET(Constants,minZ) + flags * 4);
clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minZ) + flags * 4);
Int4 finiteX = CmpLE(Abs(r.o[pos].x), *Pointer<Float4>(r.constants + OFFSET(Constants,maxPos)));
Int4 finiteY = CmpLE(Abs(r.o[pos].y), *Pointer<Float4>(r.constants + OFFSET(Constants,maxPos)));
Int4 finiteZ = CmpLE(Abs(r.o[pos].z), *Pointer<Float4>(r.constants + OFFSET(Constants,maxPos)));
Int4 finiteX = CmpLE(Abs(o[pos].x), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
Int4 finiteY = CmpLE(Abs(o[pos].y), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
Int4 finiteZ = CmpLE(Abs(o[pos].z), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
flags = SignMask(finiteX & finiteY & finiteZ);
r.clipFlags |= *Pointer<Int>(r.constants + OFFSET(Constants,fini) + flags * 4);
clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,fini) + flags * 4);
if(state.preTransformed)
{
r.clipFlags &= 0xFBFBFBFB; // Don't clip against far clip plane
clipFlags &= 0xFBFBFBFB; // Don't clip against far clip plane
}
}
......@@ -179,10 +177,10 @@ namespace sw
if(stream.normalized)
{
if(stream.count >= 1) v.x *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte));
if(stream.count >= 2) v.y *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte));
if(stream.count >= 3) v.z *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte));
if(stream.count >= 4) v.w *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte));
if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
}
}
break;
......@@ -197,19 +195,19 @@ namespace sw
if(stream.normalized)
{
if(stream.count >= 1) v.x *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleSByte));
if(stream.count >= 2) v.y *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleSByte));
if(stream.count >= 3) v.z *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleSByte));
if(stream.count >= 4) v.w *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleSByte));
if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte));
if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte));
if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte));
if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte));
}
}
break;
case STREAMTYPE_COLOR:
{
v.x = Float4(*Pointer<Byte4>(source0)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte));
v.y = Float4(*Pointer<Byte4>(source1)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte));
v.z = Float4(*Pointer<Byte4>(source2)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte));
v.w = Float4(*Pointer<Byte4>(source3)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte));
v.x = Float4(*Pointer<Byte4>(source0)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
v.y = Float4(*Pointer<Byte4>(source1)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
v.z = Float4(*Pointer<Byte4>(source2)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
v.w = Float4(*Pointer<Byte4>(source3)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
transpose4x4(v.x, v.y, v.z, v.w);
......@@ -230,10 +228,10 @@ namespace sw
if(stream.normalized)
{
if(stream.count >= 1) v.x *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleShort));
if(stream.count >= 2) v.y *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleShort));
if(stream.count >= 3) v.z *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleShort));
if(stream.count >= 4) v.w *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleShort));
if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort));
if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort));
if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort));
if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort));
}
}
break;
......@@ -248,10 +246,10 @@ namespace sw
if(stream.normalized)
{
if(stream.count >= 1) v.x *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleUShort));
if(stream.count >= 2) v.y *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleUShort));
if(stream.count >= 3) v.z *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleUShort));
if(stream.count >= 4) v.w *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleUShort));
if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort));
if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort));
if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort));
if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort));
}
}
break;
......@@ -356,10 +354,10 @@ namespace sw
break;
case STREAMTYPE_FIXED:
{
v.x = Float4(*Pointer<Int4>(source0)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleFixed));
v.y = Float4(*Pointer<Int4>(source1)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleFixed));
v.z = Float4(*Pointer<Int4>(source2)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleFixed));
v.w = Float4(*Pointer<Int4>(source3)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleFixed));
v.x = Float4(*Pointer<Int4>(source0)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleFixed));
v.y = Float4(*Pointer<Int4>(source1)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleFixed));
v.z = Float4(*Pointer<Int4>(source2)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleFixed));
v.w = Float4(*Pointer<Int4>(source3)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleFixed));
transpose4xN(v.x, v.y, v.z, v.w, stream.count);
}
......@@ -373,10 +371,10 @@ namespace sw
UShort x2 = *Pointer<UShort>(source2 + 0);
UShort x3 = *Pointer<UShort>(source3 + 0);
v.x.x = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(x0) * 4);
v.x.y = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(x1) * 4);
v.x.z = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(x2) * 4);
v.x.w = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(x3) * 4);
v.x.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x0) * 4);
v.x.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x1) * 4);
v.x.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x2) * 4);
v.x.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x3) * 4);
}
if(stream.count >= 2)
......@@ -386,10 +384,10 @@ namespace sw
UShort y2 = *Pointer<UShort>(source2 + 2);
UShort y3 = *Pointer<UShort>(source3 + 2);
v.y.x = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(y0) * 4);
v.y.y = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(y1) * 4);
v.y.z = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(y2) * 4);
v.y.w = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(y3) * 4);
v.y.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y0) * 4);
v.y.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y1) * 4);
v.y.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y2) * 4);
v.y.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y3) * 4);
}
if(stream.count >= 3)
......@@ -399,10 +397,10 @@ namespace sw
UShort z2 = *Pointer<UShort>(source2 + 4);
UShort z3 = *Pointer<UShort>(source3 + 4);
v.z.x = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(z0) * 4);
v.z.y = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(z1) * 4);
v.z.z = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(z2) * 4);
v.z.w = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(z3) * 4);
v.z.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z0) * 4);
v.z.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z1) * 4);
v.z.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z2) * 4);
v.z.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z3) * 4);
}
if(stream.count >= 4)
......@@ -412,10 +410,10 @@ namespace sw
UShort w2 = *Pointer<UShort>(source2 + 6);
UShort w3 = *Pointer<UShort>(source3 + 6);
v.w.x = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(w0) * 4);
v.w.y = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(w1) * 4);
v.w.z = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(w2) * 4);
v.w.w = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(w3) * 4);
v.w.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w0) * 4);
v.w.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w1) * 4);
v.w.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w2) * 4);
v.w.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w3) * 4);
}
}
break;
......@@ -446,34 +444,34 @@ namespace sw
// Backtransform
if(state.preTransformed)
{
Float4 rhw = Float4(1.0f) / r.o[pos].w;
Float4 rhw = Float4(1.0f) / o[pos].w;
Float4 W = *Pointer<Float4>(r.data + OFFSET(DrawData,Wx16)) * Float4(1.0f / 16.0f);
Float4 H = *Pointer<Float4>(r.data + OFFSET(DrawData,Hx16)) * Float4(1.0f / 16.0f);
Float4 L = *Pointer<Float4>(r.data + OFFSET(DrawData,X0x16)) * Float4(1.0f / 16.0f);
Float4 T = *Pointer<Float4>(r.data + OFFSET(DrawData,Y0x16)) * Float4(1.0f / 16.0f);
Float4 W = *Pointer<Float4>(data + OFFSET(DrawData,Wx16)) * Float4(1.0f / 16.0f);
Float4 H = *Pointer<Float4>(data + OFFSET(DrawData,Hx16)) * Float4(1.0f / 16.0f);
Float4 L = *Pointer<Float4>(data + OFFSET(DrawData,X0x16)) * Float4(1.0f / 16.0f);
Float4 T = *Pointer<Float4>(data + OFFSET(DrawData,Y0x16)) * Float4(1.0f / 16.0f);
r.o[pos].x = (r.o[pos].x - L) / W * rhw;
r.o[pos].y = (r.o[pos].y - T) / H * rhw;
r.o[pos].z = r.o[pos].z * rhw;
r.o[pos].w = rhw;
o[pos].x = (o[pos].x - L) / W * rhw;
o[pos].y = (o[pos].y - T) / H * rhw;
o[pos].z = o[pos].z * rhw;
o[pos].w = rhw;
}
if(!halfIntegerCoordinates && !state.preTransformed)
{
r.o[pos].x = r.o[pos].x + *Pointer<Float4>(r.data + OFFSET(DrawData,halfPixelX)) * r.o[pos].w;
r.o[pos].y = r.o[pos].y + *Pointer<Float4>(r.data + OFFSET(DrawData,halfPixelY)) * r.o[pos].w;
o[pos].x = o[pos].x + *Pointer<Float4>(data + OFFSET(DrawData,halfPixelX)) * o[pos].w;
o[pos].y = o[pos].y + *Pointer<Float4>(data + OFFSET(DrawData,halfPixelY)) * o[pos].w;
}
if(state.superSampling)
{
r.o[pos].x = r.o[pos].x + *Pointer<Float4>(r.data + OFFSET(DrawData,XXXX)) * r.o[pos].w;
r.o[pos].y = r.o[pos].y + *Pointer<Float4>(r.data + OFFSET(DrawData,YYYY)) * r.o[pos].w;
o[pos].x = o[pos].x + *Pointer<Float4>(data + OFFSET(DrawData,XXXX)) * o[pos].w;
o[pos].y = o[pos].y + *Pointer<Float4>(data + OFFSET(DrawData,YYYY)) * o[pos].w;
}
if(symmetricNormalizedDepth && !state.fixedFunction)
{
r.o[pos].z = (r.o[pos].z + r.o[pos].w) * Float4(0.5f);
o[pos].z = (o[pos].z + o[pos].w) * Float4(0.5f);
}
}
......@@ -485,10 +483,10 @@ namespace sw
{
if(state.output[i].write)
{
v.x = r.o[i].x;
v.y = r.o[i].y;
v.z = r.o[i].z;
v.w = r.o[i].w;
v.x = o[i].x;
v.y = o[i].y;
v.z = o[i].z;
v.w = o[i].w;
if(state.output[i].xClamp)
{
......@@ -540,23 +538,23 @@ namespace sw
}
}
*Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 0) = (r.clipFlags >> 0) & 0x0000000FF; // FIXME: unsigned char Vertex::clipFlags
*Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 1) = (r.clipFlags >> 8) & 0x0000000FF;
*Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 2) = (r.clipFlags >> 16) & 0x0000000FF;
*Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 3) = (r.clipFlags >> 24) & 0x0000000FF;
*Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 0) = (clipFlags >> 0) & 0x0000000FF; // FIXME: unsigned char Vertex::clipFlags
*Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 1) = (clipFlags >> 8) & 0x0000000FF;
*Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 2) = (clipFlags >> 16) & 0x0000000FF;
*Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 3) = (clipFlags >> 24) & 0x0000000FF;
int pos = state.positionRegister;
v.x = r.o[pos].x;
v.y = r.o[pos].y;
v.z = r.o[pos].z;
v.w = r.o[pos].w;
v.x = o[pos].x;
v.y = o[pos].y;
v.z = o[pos].z;
v.w = o[pos].w;
Float4 w = As<Float4>(As<Int4>(v.w) | (As<Int4>(CmpEQ(v.w, Float4(0.0f))) & As<Int4>(Float4(1.0f))));
Float4 rhw = Float4(1.0f) / w;
v.x = As<Float4>(RoundInt(*Pointer<Float4>(r.data + OFFSET(DrawData,X0x16)) + v.x * rhw * *Pointer<Float4>(r.data + OFFSET(DrawData,Wx16))));
v.y = As<Float4>(RoundInt(*Pointer<Float4>(r.data + OFFSET(DrawData,Y0x16)) + v.y * rhw * *Pointer<Float4>(r.data + OFFSET(DrawData,Hx16))));
v.x = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,X0x16)) + v.x * rhw * *Pointer<Float4>(data + OFFSET(DrawData,Wx16))));
v.y = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,Y0x16)) + v.y * rhw * *Pointer<Float4>(data + OFFSET(DrawData,Hx16))));
v.z = v.z * rhw;
v.w = rhw;
......
......@@ -41,59 +41,14 @@ namespace sw
void generate();
protected:
struct Registers
{
Registers(const VertexShader *shader) :
v(shader && shader->dynamicallyIndexedInput),
r(shader && shader->dynamicallyIndexedTemporaries),
o(shader && shader->dynamicallyIndexedOutput)
{
loopDepth = -1;
enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
Pointer<Byte> constants;
if(shader && shader->containsBreakInstruction())
{
enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
}
Int clipFlags;
if(shader && shader->containsContinueInstruction())
{
enableContinue = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
}
}
Pointer<Byte> data;
Pointer<Byte> constants;
Int clipFlags;
RegisterArray<16> v;
RegisterArray<4096> r;
RegisterArray<12> o;
Vector4f a0;
Array<Int, 4> aL;
Vector4f p0;
Array<Int, 4> increment;
Array<Int, 4> iteration;
Int loopDepth;
Int stackIndex; // FIXME: Inc/decrement callStack
Array<UInt, 16> callStack;
Int enableIndex;
Array<Int4, 1 + 24> enableStack;
Int4 enableBreak;
Int4 enableContinue;
Int4 enableLeave;
Int instanceID;
};
Registers r;
RegisterArray<16> v; // Varying registers
RegisterArray<12> o; // Output registers
const VertexProcessor::State &state;
const VertexShader *const shader;
private:
virtual void pipeline() = 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment