Commit 17078c70 by Stephen White

Fix push & pop of XMM registers.

Microsoft's x86-64 calling convention ABI requires registers XMM6-15 to be preserved by the callee: https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention?view=vs-2017#calling-convention-defaults Implement a _pop_reg() analog to _push_reg(), so we can specialize the XMM treatment. Pass the RegNum all the way down to the specialized class, because we don't know which registers should be 128-bit at the call site. Bug chromium:931926 Bug swiftshader:22 Change-Id: I57637c852f0f3bb9a374d61a16a7aaa434ac908d Reviewed-on: https://swiftshader-review.googlesource.com/c/25468Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Tested-by: 's avatarStephen White <senorblanco@chromium.org>
parent 105fd0f6
......@@ -15,7 +15,7 @@
#define MAJOR_VERSION 4
#define MINOR_VERSION 1
#define BUILD_VERSION 0
#define BUILD_REVISION 6
#define BUILD_REVISION 7
#define STRINGIFY(x) #x
#define MACRO_STRINGIFY(x) STRINGIFY(x)
......
......@@ -992,6 +992,94 @@ TEST(ReactorUnitTests, MulAdd)
delete routine;
}
// Check that a complex generated function which utilizes all 8 or 16 XMM
// registers computes the correct result.
// (Note that due to MSC's lack of support for inline assembly in x64,
// this test does not actually check that the register contents are
// preserved, just that the generated function computes the correct value.
// It's necessary to inspect the registers in a debugger to actually verify.)
TEST(ReactorUnitTests, PreserveXMMRegisters)
{
Routine *routine = nullptr;
{
Function<Void(Pointer<Byte>, Pointer<Byte>)> function;
{
Pointer<Byte> in = function.Arg<0>();
Pointer<Byte> out = function.Arg<1>();
Float4 a = *Pointer<Float4>(in + 16 * 0);
Float4 b = *Pointer<Float4>(in + 16 * 1);
Float4 c = *Pointer<Float4>(in + 16 * 2);
Float4 d = *Pointer<Float4>(in + 16 * 3);
Float4 e = *Pointer<Float4>(in + 16 * 4);
Float4 f = *Pointer<Float4>(in + 16 * 5);
Float4 g = *Pointer<Float4>(in + 16 * 6);
Float4 h = *Pointer<Float4>(in + 16 * 7);
Float4 i = *Pointer<Float4>(in + 16 * 8);
Float4 j = *Pointer<Float4>(in + 16 * 9);
Float4 k = *Pointer<Float4>(in + 16 * 10);
Float4 l = *Pointer<Float4>(in + 16 * 11);
Float4 m = *Pointer<Float4>(in + 16 * 12);
Float4 n = *Pointer<Float4>(in + 16 * 13);
Float4 o = *Pointer<Float4>(in + 16 * 14);
Float4 p = *Pointer<Float4>(in + 16 * 15);
Float4 ab = a + b;
Float4 cd = c + d;
Float4 ef = e + f;
Float4 gh = g + h;
Float4 ij = i + j;
Float4 kl = k + l;
Float4 mn = m + n;
Float4 op = o + p;
Float4 abcd = ab + cd;
Float4 efgh = ef + gh;
Float4 ijkl = ij + kl;
Float4 mnop = mn + op;
Float4 abcdefgh = abcd + efgh;
Float4 ijklmnop = ijkl + mnop;
Float4 sum = abcdefgh + ijklmnop;
*Pointer<Float4>(out) = sum;
Return();
}
routine = function("one");
assert(routine);
float input[64] = { 1.0f, 0.0f, 0.0f, 0.0f,
-1.0f, 1.0f, -1.0f, 0.0f,
1.0f, 2.0f, -2.0f, 0.0f,
-1.0f, 3.0f, -3.0f, 0.0f,
1.0f, 4.0f, -4.0f, 0.0f,
-1.0f, 5.0f, -5.0f, 0.0f,
1.0f, 6.0f, -6.0f, 0.0f,
-1.0f, 7.0f, -7.0f, 0.0f,
1.0f, 8.0f, -8.0f, 0.0f,
-1.0f, 9.0f, -9.0f, 0.0f,
1.0f, 10.0f, -10.0f, 0.0f,
-1.0f, 11.0f, -11.0f, 0.0f,
1.0f, 12.0f, -12.0f, 0.0f,
-1.0f, 13.0f, -13.0f, 0.0f,
1.0f, 14.0f, -14.0f, 0.0f,
-1.0f, 15.0f, -15.0f, 0.0f };
float result[4];
void (*callable)(float*, float*) = (void(*)(float*,float*))routine->getEntry();
callable(input, result);
EXPECT_EQ(result[0], 0.0f);
EXPECT_EQ(result[1], 120.0f);
EXPECT_EQ(result[2], -120.0f);
EXPECT_EQ(result[3], 0.0f);
}
delete routine;
}
int main(int argc, char **argv)
{
::testing::InitGoogleTest(&argc, argv);
......
......@@ -222,7 +222,7 @@
// Note: It would be more appropriate to list the xmm register aliases as
// REGLIST0(), but the corresponding empty initializer gives a syntax error, so
// we use REGLIST1() to redundantly assign the register itself as an alias.
#define REGX8664_XMM_TABLE \
#define REGX8664_XMM_TABLE2(U, W) \
/* val, encode, name, base, scratch,preserved,stackptr,frameptr,sboxres, \
isGPR,is64,is32,is16,is8, isXmm, \
is64To8,is32To8,is16To8,isTrunc8Rcvr,isAhRcvr, aliases */ \
......@@ -239,31 +239,37 @@
NO_ALIASES()) \
X(Reg_xmm5, 5, "xmm5", Reg_xmm5, 1,0,0,0,0, 0,0,0,0,0, 1, 0,0,0,0,0, \
NO_ALIASES()) \
X(Reg_xmm6, 6, "xmm6", Reg_xmm6, 0,0,0,0,0, 0,0,0,0,0, 1, 0,0,0,0,0, \
X(Reg_xmm6, 6, "xmm6", Reg_xmm6, U,W,0,0,0, 0,0,0,0,0, 1, 0,0,0,0,0, \
NO_ALIASES()) \
X(Reg_xmm7, 7, "xmm7", Reg_xmm7, 0,0,0,0,0, 0,0,0,0,0, 1, 0,0,0,0,0, \
X(Reg_xmm7, 7, "xmm7", Reg_xmm7, U,W,0,0,0, 0,0,0,0,0, 1, 0,0,0,0,0, \
NO_ALIASES()) \
X(Reg_xmm8, 8, "xmm8", Reg_xmm8, 0,0,0,0,0, 0,0,0,0,0, 1, 0,0,0,0,0, \
X(Reg_xmm8, 8, "xmm8", Reg_xmm8, U,W,0,0,0, 0,0,0,0,0, 1, 0,0,0,0,0, \
NO_ALIASES()) \
X(Reg_xmm9, 9, "xmm9", Reg_xmm9, 0,0,0,0,0, 0,0,0,0,0, 1, 0,0,0,0,0, \
X(Reg_xmm9, 9, "xmm9", Reg_xmm9, U,W,0,0,0, 0,0,0,0,0, 1, 0,0,0,0,0, \
NO_ALIASES()) \
X(Reg_xmm10, 10, "xmm10", Reg_xmm10, 0,0,0,0,0, 0,0,0,0,0, 1, 0,0,0,0,0, \
X(Reg_xmm10, 10, "xmm10", Reg_xmm10, U,W,0,0,0, 0,0,0,0,0, 1, 0,0,0,0,0, \
NO_ALIASES()) \
X(Reg_xmm11, 11, "xmm11", Reg_xmm11, 0,0,0,0,0, 0,0,0,0,0, 1, 0,0,0,0,0, \
X(Reg_xmm11, 11, "xmm11", Reg_xmm11, U,W,0,0,0, 0,0,0,0,0, 1, 0,0,0,0,0, \
NO_ALIASES()) \
X(Reg_xmm12, 12, "xmm12", Reg_xmm12, 0,0,0,0,0, 0,0,0,0,0, 1, 0,0,0,0,0, \
X(Reg_xmm12, 12, "xmm12", Reg_xmm12, U,W,0,0,0, 0,0,0,0,0, 1, 0,0,0,0,0, \
NO_ALIASES()) \
X(Reg_xmm13, 13, "xmm13", Reg_xmm13, 0,0,0,0,0, 0,0,0,0,0, 1, 0,0,0,0,0, \
X(Reg_xmm13, 13, "xmm13", Reg_xmm13, U,W,0,0,0, 0,0,0,0,0, 1, 0,0,0,0,0, \
NO_ALIASES()) \
X(Reg_xmm14, 14, "xmm14", Reg_xmm14, 0,0,0,0,0, 0,0,0,0,0, 1, 0,0,0,0,0, \
X(Reg_xmm14, 14, "xmm14", Reg_xmm14, U,W,0,0,0, 0,0,0,0,0, 1, 0,0,0,0,0, \
NO_ALIASES()) \
X(Reg_xmm15, 15, "xmm15", Reg_xmm15, 0,0,0,0,0, 0,0,0,0,0, 1, 0,0,0,0,0, \
X(Reg_xmm15, 15, "xmm15", Reg_xmm15, U,W,0,0,0, 0,0,0,0,0, 1, 0,0,0,0,0, \
NO_ALIASES()) \
/* End of xmm register set */
//#define X(val, encode, name, base, scratch, preserved, stackptr, frameptr,
// sboxres, isGPR, is64, is32, is16, is8, isXmm, is64To8, is32To8,
// is16To8, isTrunc8Rcvr, isAhRcvr, aliases)
#if defined(SUBZERO_USE_MICROSOFT_ABI) // Microsoft x86-64 ABI
#define REGX8664_XMM_TABLE REGX8664_XMM_TABLE2(0, 1)
#else // System V AMD64 ABI
#define REGX8664_XMM_TABLE REGX8664_XMM_TABLE2(1, 0)
#endif
// We also provide a combined table, so that there is a namespace where
// all of the registers are considered and have distinct numberings.
// This is in contrast to the above, where the "encode" is based on how
......
......@@ -230,7 +230,13 @@ void TargetX8632::_unlink_bp() {
_pop(ebp);
}
void TargetX8632::_push_reg(Variable *Reg) { _push(Reg); }
void TargetX8632::_push_reg(RegNumT RegNum) {
_push(getPhysicalRegister(RegNum, Traits::WordType));
}
void TargetX8632::_pop_reg(RegNumT RegNum) {
_pop(getPhysicalRegister(RegNum, Traits::WordType));
}
void TargetX8632::emitGetIP(CfgNode *Node) {
// If there is a non-deleted InstX86GetIP instruction, we need to move it to
......
......@@ -52,7 +52,8 @@ protected:
void _sub_sp(Operand *Adjustment);
void _link_bp();
void _unlink_bp();
void _push_reg(Variable *Reg);
void _push_reg(RegNumT RegNum);
void _pop_reg(RegNumT RegNum);
void initRebasePtr();
void initSandbox();
......
......@@ -292,16 +292,36 @@ void TargetX8664::_unlink_bp() {
}
}
void TargetX8664::_push_reg(Variable *Reg) {
Variable *rbp =
getPhysicalRegister(Traits::RegisterSet::Reg_rbp, Traits::WordType);
if (Reg != rbp || !NeedSandboxing) {
_push(Reg);
void TargetX8664::_push_reg(RegNumT RegNum) {
if (Traits::isXmm(RegNum)) {
Variable *reg =
getPhysicalRegister(RegNum, IceType_v4f32);
Variable *rsp =
getPhysicalRegister(Traits::RegisterSet::Reg_rsp, Traits::WordType);
auto* address = Traits::X86OperandMem::create(Func, reg->getType(), rsp, nullptr);
_sub_sp(Ctx->getConstantInt32(16)); // TODO(capn): accumulate all the offsets and adjust the stack pointer once.
_storep(reg, address);
} else if (RegNum != Traits::RegisterSet::Reg_rbp || !NeedSandboxing) {
_push(getPhysicalRegister(RegNum, Traits::WordType));
} else {
_push_rbp();
}
}
void TargetX8664::_pop_reg(RegNumT RegNum) {
if (Traits::isXmm(RegNum)) {
Variable *reg =
getPhysicalRegister(RegNum, IceType_v4f32);
Variable *rsp =
getPhysicalRegister(Traits::RegisterSet::Reg_rsp, Traits::WordType);
auto* address = Traits::X86OperandMem::create(Func, reg->getType(), rsp, nullptr);
_movp(reg, address);
_add_sp(Ctx->getConstantInt32(16)); // TODO(capn): accumulate all the offsets and adjust the stack pointer once.
} else {
_pop(getPhysicalRegister(RegNum, Traits::WordType));
}
}
void TargetX8664::emitGetIP(CfgNode *Node) {
// No IP base register is needed on X86-64.
(void)Node;
......
......@@ -55,7 +55,8 @@ protected:
void _sub_sp(Operand *Adjustment);
void _link_bp();
void _unlink_bp();
void _push_reg(Variable *Reg);
void _push_reg(RegNumT RegNum);
void _pop_reg(RegNumT RegNum);
void initRebasePtr();
void initSandbox();
......
......@@ -342,6 +342,18 @@ struct TargetX8664Traits {
return ByteRegs[RegNum];
}
static bool isXmm(RegNumT RegNum) {
static const bool IsXmm [RegisterSet::Reg_NUM] = {
#define X(val, encode, name, base, scratch, preserved, stackptr, frameptr, \
sboxres, isGPR, is64, is32, is16, is8, isXmm, is64To8, is32To8, \
is16To8, isTrunc8Rcvr, isAhRcvr, aliases) \
isXmm,
REGX8664_TABLE
#undef X
};
return IsXmm[RegNum];
}
static XmmRegister getEncodedXmm(RegNumT RegNum) {
static const XmmRegister XmmRegs[RegisterSet::Reg_NUM] = {
#define X(val, encode, name, base, scratch, preserved, stackptr, frameptr, \
......
......@@ -693,8 +693,11 @@ protected:
Context.insert<typename Traits::Insts::Lea>(Dest, Src0);
}
void _link_bp() { dispatchToConcrete(&Traits::ConcreteTarget::_link_bp); }
void _push_reg(Variable *Reg) {
dispatchToConcrete(&Traits::ConcreteTarget::_push_reg, std::move(Reg));
void _push_reg(RegNumT RegNum) {
dispatchToConcrete(&Traits::ConcreteTarget::_push_reg, std::move(RegNum));
}
void _pop_reg(RegNumT RegNum) {
dispatchToConcrete(&Traits::ConcreteTarget::_pop_reg, std::move(RegNum));
}
void _mfence() { Context.insert<typename Traits::Insts::Mfence>(); }
/// Moves can be used to redefine registers, creating "partial kills" for
......
......@@ -1093,7 +1093,7 @@ void TargetX86Base<TraitsType>::addProlog(CfgNode *Node) {
assert(RegNum == Traits::getBaseReg(RegNum));
++NumCallee;
PreservedRegsSizeBytes += typeWidthInBytes(Traits::WordType);
_push_reg(getPhysicalRegister(RegNum, Traits::WordType));
_push_reg(RegNum);
}
Ctx->statsUpdateRegistersSaved(NumCallee);
......@@ -1359,7 +1359,7 @@ void TargetX86Base<TraitsType>::addEpilog(CfgNode *Node) {
continue;
const auto RegNum = RegNumT::fromInt(i);
assert(RegNum == Traits::getBaseReg(RegNum));
_pop(getPhysicalRegister(RegNum, Traits::WordType));
_pop_reg(RegNum);
}
if (!NeedSandboxing) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment