Commit 7e1e4857 by Jan Voung

First pass at emitIAS for branches and binding labels

Currently not testing fixups of forward branches and instead streaming a ".byte (foo - (. + 1))" or ".long (foo - (. + 4))". It should be supported once emitIAS() delays writing things out until after the function is fully emitted (and therefore forward labels have all been bound). BUG=none R=stichnot@chromium.org Review URL: https://codereview.chromium.org/673543002
parent bd4ea5b4
......@@ -94,7 +94,7 @@ public:
template <typename T> T *getAssembler() const {
return static_cast<T *>(TargetAssembler.get());
}
bool UseIntegratedAssembler() const {
bool useIntegratedAssembler() const {
return getContext()->getFlags().UseIntegratedAssembler;
}
bool hasComputedFrame() const;
......
......@@ -12,6 +12,7 @@
//
//===----------------------------------------------------------------------===//
#include "assembler.h"
#include "IceCfg.h"
#include "IceCfgNode.h"
#include "IceInst.h"
......@@ -491,6 +492,10 @@ void CfgNode::emit(Cfg *Func) const {
Str << Func->getContext()->mangleName(Func->getFunctionName()) << ":\n";
}
Str << getAsmName() << ":\n";
if (Func->useIntegratedAssembler()) {
Assembler *Asm = Func->getAssembler<Assembler>();
Asm->BindCfgNodeLabel(getIndex());
}
for (InstPhi *Phi : Phis) {
if (Phi->isDeleted())
continue;
......@@ -505,7 +510,7 @@ void CfgNode::emit(Cfg *Func) const {
// suppress them.
if (I->isRedundantAssign())
continue;
if (Func->UseIntegratedAssembler()) {
if (Func->useIntegratedAssembler()) {
I->emitIAS(Func);
} else {
I->emit(Func);
......
......@@ -373,6 +373,33 @@ void emitIASBytes(const Cfg *Func, const x86::AssemblerX86 *Asm,
}
}
void emitIASBytesBranch(const Cfg *Func, const x86::AssemblerX86 *Asm,
intptr_t StartPosition, const x86::Label *Label,
const IceString &LabelName, bool Near) {
// If this is a backward branch (label is bound), we're good and know
// the offset. If this is a forward branch, then we can't actually emit
// the thing as text in a streaming manner, because the fixup hasn't
// happened yet. Instead, emit .long ($BranchLabel) - (. + 4), in that
// case and let the external assembler take care of that fixup.
if (Label->IsBound()) {
emitIASBytes(Func, Asm, StartPosition);
return;
}
const intptr_t FwdBranchSize = Near ? 1 : 4;
const IceString FwdBranchDirective = Near ? ".byte" : ".long";
Ostream &Str = Func->getContext()->getStrEmit();
intptr_t EndPosition = Asm->GetPosition();
assert(EndPosition - StartPosition > FwdBranchSize);
for (intptr_t i = StartPosition; i < EndPosition - FwdBranchSize; ++i) {
Str << "\t.byte 0x";
Str.write_hex(Asm->LoadBuffer<uint8_t>(i));
Str << "\n";
}
Str << "\t" << FwdBranchDirective << " " << LabelName << " - (. + "
<< FwdBranchSize << ")\n";
return;
}
} // end of anonymous namespace
void InstX8632::dump(const Cfg *Func) const {
......@@ -386,6 +413,15 @@ void InstX8632Label::emit(const Cfg *Func) const {
Str << getName(Func) << ":\n";
}
void InstX8632Label::emitIAS(const Cfg *Func) const {
x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
Asm->BindLocalLabel(Number);
// TODO(jvoung): remove the the textual label once forward branch
// fixups are used (and text assembler is not used).
Ostream &Str = Func->getContext()->getStrEmit();
Str << getName(Func) << ":\n";
}
void InstX8632Label::dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump();
Str << getName(Func) << ":";
......@@ -415,6 +451,47 @@ void InstX8632Br::emit(const Cfg *Func) const {
}
}
void InstX8632Br::emitIAS(const Cfg *Func) const {
x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
intptr_t StartPosition = Asm->GetPosition();
if (Label) {
x86::Label *L = Asm->GetOrCreateLocalLabel(Label->getNumber());
// In all these cases, local Labels should only be used for Near.
const bool Near = true;
if (Condition == CondX86::Br_None) {
Asm->jmp(L, Near);
} else {
Asm->j(Condition, L, Near);
}
emitIASBytesBranch(Func, Asm, StartPosition, L, Label->getName(Func), Near);
} else {
// Pessimistically assume it's far. This only affects Labels that
// are not Bound.
const bool Near = false;
if (Condition == CondX86::Br_None) {
x86::Label *L =
Asm->GetOrCreateCfgNodeLabel(getTargetFalse()->getIndex());
assert(!getTargetTrue());
Asm->jmp(L, Near);
emitIASBytesBranch(Func, Asm, StartPosition, L,
getTargetFalse()->getAsmName(), Near);
} else {
x86::Label *L = Asm->GetOrCreateCfgNodeLabel(getTargetTrue()->getIndex());
Asm->j(Condition, L, Near);
emitIASBytesBranch(Func, Asm, StartPosition, L,
getTargetTrue()->getAsmName(), Near);
StartPosition = Asm->GetPosition();
if (getTargetFalse()) {
x86::Label *L2 =
Asm->GetOrCreateCfgNodeLabel(getTargetFalse()->getIndex());
Asm->jmp(L2, Near);
emitIASBytesBranch(Func, Asm, StartPosition, L2,
getTargetFalse()->getAsmName(), Near);
}
}
}
}
void InstX8632Br::dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump();
Str << "br ";
......
......@@ -274,13 +274,14 @@ protected:
}
};
// InstX8632Label represents an intra-block label that is the
// target of an intra-block branch. These are used for lowering i1
// calculations, Select instructions, and 64-bit compares on a 32-bit
// architecture, without basic block splitting. Basic block splitting
// is not so desirable for several reasons, one of which is the impact
// on decisions based on whether a variable's live range spans
// multiple basic blocks.
// InstX8632Label represents an intra-block label that is the target
// of an intra-block branch. The offset between the label and the
// branch must be fit into one byte (considered "near"). These are
// used for lowering i1 calculations, Select instructions, and 64-bit
// compares on a 32-bit architecture, without basic block splitting.
// Basic block splitting is not so desirable for several reasons, one
// of which is the impact on decisions based on whether a variable's
// live range spans multiple basic blocks.
//
// Intra-block control flow must be used with caution. Consider the
// sequence for "c = (a >= b ? x : y)".
......@@ -321,15 +322,15 @@ public:
}
uint32_t getEmitInstCount() const override { return 0; }
IceString getName(const Cfg *Func) const;
SizeT getNumber() const { return Number; }
void emit(const Cfg *Func) const override;
// TODO(jvoung): Filler in.
void emitIAS(const Cfg *Func) const override { emit(Func); }
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override;
private:
InstX8632Label(Cfg *Func, TargetX8632 *Target);
~InstX8632Label() override {}
SizeT Number; // used only for unique label string generation
SizeT Number; // used for unique label generation.
};
// Conditional and unconditional branch instruction.
......@@ -385,8 +386,7 @@ public:
return Sum;
}
void emit(const Cfg *Func) const override;
// TODO(jvoung): Filler in.
void emitIAS(const Cfg *Func) const override { emit(Func); }
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return isClassof(Inst, Br); }
......
......@@ -207,7 +207,7 @@ class Assembler {
public:
Assembler() {}
~Assembler() {}
virtual ~Assembler() {}
// Allocate a chunk of bytes using the per-Assembler allocator.
uintptr_t AllocateBytes(size_t bytes) {
......@@ -224,6 +224,8 @@ public:
// Allocate data of type T using the per-Assembler allocator.
template <typename T> T *Allocate() { return Allocator.Allocate<T>(); }
virtual void BindCfgNodeLabel(SizeT NodeNumber) = 0;
private:
llvm::BumpPtrAllocator Allocator;
};
......
......@@ -67,6 +67,53 @@ Address Address::ofConstPool(GlobalContext *Ctx, Assembler *Asm,
return x86::Address::Absolute(Fixup);
}
AssemblerX86::~AssemblerX86() {
#ifndef NDEBUG
for (const Label *Label : CfgNodeLabels) {
Label->FinalCheck();
}
for (const Label *Label : LocalLabels) {
Label->FinalCheck();
}
#endif
}
Label *AssemblerX86::GetOrCreateLabel(SizeT Number, LabelVector &Labels) {
Label *L = nullptr;
if (Number == Labels.size()) {
L = new (this->Allocate<Label>()) Label();
Labels.push_back(L);
return L;
}
if (Number > Labels.size()) {
Labels.resize(Number + 1);
}
L = Labels[Number];
if (!L) {
L = new (this->Allocate<Label>()) Label();
Labels[Number] = L;
}
return L;
}
Label *AssemblerX86::GetOrCreateCfgNodeLabel(SizeT NodeNumber) {
return GetOrCreateLabel(NodeNumber, CfgNodeLabels);
}
Label *AssemblerX86::GetOrCreateLocalLabel(SizeT Number) {
return GetOrCreateLabel(Number, LocalLabels);
}
void AssemblerX86::BindCfgNodeLabel(SizeT NodeNumber) {
Label *L = GetOrCreateCfgNodeLabel(NodeNumber);
this->Bind(L);
}
void AssemblerX86::BindLocalLabel(SizeT Number) {
Label *L = GetOrCreateLocalLabel(Number);
this->Bind(L);
}
void AssemblerX86::call(GPRRegister reg) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xFF);
......
......@@ -284,7 +284,9 @@ public:
#endif // !NDEBUG
}
~Label() {
~Label() {}
void FinalCheck() const {
// Assert if label is being destroyed with unresolved branches pending.
assert(!IsLinked());
assert(!HasNear());
......@@ -363,11 +365,16 @@ public:
assert(!use_far_branches);
(void)use_far_branches;
}
~AssemblerX86() {}
~AssemblerX86() override;
static const bool kNearJump = true;
static const bool kFarJump = false;
Label *GetOrCreateCfgNodeLabel(SizeT NodeNumber);
void BindCfgNodeLabel(SizeT NodeNumber) override;
Label *GetOrCreateLocalLabel(SizeT Number);
void BindLocalLabel(SizeT Number);
// Operations to emit GPR instructions (and dispatch on operand type).
typedef void (AssemblerX86::*TypedEmitGPR)(Type, GPRRegister);
typedef void (AssemblerX86::*TypedEmitAddr)(Type, const Address &);
......@@ -848,6 +855,14 @@ private:
void EmitGenericShift(int rm, Type Ty, const Operand &operand,
GPRRegister shifter);
typedef std::vector<Label *> LabelVector;
// A vector of pool-allocated x86 labels for CFG nodes.
LabelVector CfgNodeLabels;
// A vector of pool-allocated x86 labels for Local labels.
LabelVector LocalLabels;
Label *GetOrCreateLabel(SizeT Number, LabelVector &Labels);
AssemblerBuffer buffer_;
};
......
; Tests various aspects of x86 branch encodings (near vs far,
; forward vs backward, using CFG labels, or local labels).
; Use -ffunction-sections so that the offsets reset for each function.
; RUN: %p2i -i %s --args -O2 --verbose none -ffunction-sections \
; RUN: | llvm-mc -triple=i686-none-nacl -x86-asm-syntax=intel -filetype=obj \
; RUN: | llvm-objdump -d --symbolize -x86-asm-syntax=intel - | FileCheck %s
; RUN: %p2i -i %s --args --verbose none | FileCheck --check-prefix=ERRORS %s
; Use atomic ops as filler, which shouldn't get optimized out.
declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32)
declare i32 @llvm.nacl.atomic.load.i32(i32*, i32)
declare i32 @llvm.nacl.atomic.rmw.i32(i32, i32*, i32, i32)
define void @test_near_backward(i32 %iptr, i32 %val) {
entry:
br label %next
next:
%ptr = inttoptr i32 %iptr to i32*
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
br label %next2
next2:
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
%cmp = icmp ult i32 %val, 0
br i1 %cmp, label %next2, label %next
}
; CHECK-LABEL: test_near_backward
; CHECK: 8: {{.*}} mov dword ptr
; CHECK-NEXT: a: {{.*}} mfence
; CHECK-NEXT: d: {{.*}} mov dword ptr
; CHECK-NEXT: f: {{.*}} mfence
; CHECK-NEXT: 12: {{.*}} cmp
; (0x15 + 2) - 10 == 0xd
; CHECK-NEXT: 15: 72 f6 jb -10
; (0x17 + 2) - 17 == 0x8
; CHECK-NEXT: 17: eb ef jmp -17
; Test one of the backward branches being too large for 8 bits
; and one being just okay.
define void @test_far_backward1(i32 %iptr, i32 %val) {
entry:
br label %next
next:
%ptr = inttoptr i32 %iptr to i32*
%tmp = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr, i32 6)
br label %next2
next2:
call void @llvm.nacl.atomic.store.i32(i32 %tmp, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
%cmp = icmp ugt i32 %val, 0
br i1 %cmp, label %next2, label %next
}
; CHECK-LABEL: test_far_backward1
; CHECK: 8: {{.*}} mov {{.*}}, dword ptr [e{{[^s]}}
; CHECK-NEXT: a: {{.*}} mov dword ptr
; CHECK-NEXT: c: {{.*}} mfence
; (0x85 + 2) - 125 == 0xa
; CHECK: 85: 77 83 ja -125
; (0x87 + 5) - 132 == 0x8
; CHECK-NEXT: 87: e9 7c ff ff ff jmp -132
; Same as test_far_backward1, but with the conditional branch being
; the one that is too far.
define void @test_far_backward2(i32 %iptr, i32 %val) {
entry:
br label %next
next:
%ptr = inttoptr i32 %iptr to i32*
%tmp = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr, i32 6)
%tmp2 = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr, i32 6)
%tmp3 = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr, i32 6)
%tmp4 = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr, i32 6)
%tmp5 = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr, i32 6)
br label %next2
next2:
call void @llvm.nacl.atomic.store.i32(i32 %tmp, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %tmp2, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %tmp3, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %tmp4, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %tmp5, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
%cmp = icmp sle i32 %val, 0
br i1 %cmp, label %next, label %next2
}
; CHECK-LABEL: test_far_backward2
; CHECK: c: {{.*}} mov {{.*}}, dword ptr [e{{[^s]}}
; CHECK: 14: {{.*}} mov {{.*}}, dword ptr
; CHECK-NEXT: 16: {{.*}} mov dword ptr
; CHECK-NEXT: 18: {{.*}} mfence
; (0x8c + 6) - 134 == 0xc
; CHECK: 8c: 0f 8e 7a ff ff ff jle -134
; (0x92 + 2) - 126 == 0x16
; CHECK-NEXT: 92: eb 82 jmp -126
define void @test_near_forward(i32 %iptr, i32 %val) {
entry:
br label %next1
next1:
%ptr = inttoptr i32 %iptr to i32*
%cmp = icmp ult i32 %val, 0
br i1 %cmp, label %next3, label %next2
next2:
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
br label %next3
next3:
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
br label %next1
}
; Forward branches for non-local labels currently use the fully relaxed
; form to avoid needing a relaxation pass.
; CHECK-LABEL: test_near_forward
; CHECK: 8: {{.*}} cmp
; CHECK-NEXT: b: 0f 82 05 00 00 00 jb 5
; CHECK-NEXT: 11: {{.*}} mov dword ptr
; CHECK-NEXT: 13: {{.*}} mfence
; Forward branch is 5 bytes ahead to here.
; CHECK-NEXT: 16: {{.*}} mov dword ptr
; Jumps back to (0x1b + 2) - 21 == 0x8 (to before the forward branch,
; therefore knowing that the forward branch was indeed 6 bytes).
; CHECK: 1b: eb eb jmp -21
; Unlike forward branches to cfg nodes, "local" forward branches
; always use a 1 byte displacement.
; Check local forward branches, followed by a near backward branch
; to make sure that the instruction size accounting for the forward
; branches are correct, by the time the backward branch is hit.
; A 64-bit compare happens to use local forward branches.
define void @test_local_forward_then_back(i64 %val64, i32 %iptr, i32 %val) {
entry:
br label %next
next:
%ptr = inttoptr i32 %iptr to i32*
call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
br label %next2
next2:
%cmp = icmp ult i64 %val64, 0
br i1 %cmp, label %next, label %next2
}
; CHECK-LABEL: test_local_forward_then_back
; CHECK: 14: {{.*}} mov dword ptr
; CHECK-NEXT: 16: {{.*}} mfence
; CHECK-NEXT: 19: {{.*}} mov dword ptr {{.*}}, 1
; CHECK-NEXT: 20: {{.*}} cmp
; CHECK-NEXT: 23: {{.*}} jb 14
; (0x37 + 2) - 37 == 0x14
; CHECK: 37: {{.*}} jne -37
; (0x39 + 2) - 34 == 0x19
; CHECK: 39: {{.*}} jmp -34
; Test that backward local branches also work and are small.
; Some of the atomic instructions use a cmpxchg loop.
define void @test_local_backward(i64 %val64, i32 %iptr, i32 %val) {
entry:
br label %next
next:
%ptr = inttoptr i32 %iptr to i32*
%a = call i32 @llvm.nacl.atomic.rmw.i32(i32 5, i32* %ptr, i32 %val, i32 6)
br label %next2
next2:
%success = icmp eq i32 1, %a
br i1 %success, label %next, label %next2
}
; CHECK-LABEL: test_local_backward
; CHECK: 9: {{.*}} mov {{.*}}, dword
; CHECK: b: {{.*}} mov
; CHECK-NEXT: d: {{.*}} xor
; CHECK-NEXT: f: {{.*}} lock
; CHECK-NEXT: 10: {{.*}} cmpxchg
; (0x13 + 2) - 10 == 0xb
; CHECK-NEXT: 13: 75 f6 jne -10
; (0x1c + 2) - 21 == 0x9
; CHECK: 1c: 74 eb je -21
; ERRORS-NOT: ICE translation error
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment