Commit 2c862522 by Andrew Scull

Introduce the ability to insert IACA (Intel Architecture Code Analyzer) marks.

The IACI marks identify the code which should be analyzed with the IACA. The generated binaries are not executable due to the marks. This feature should only be used during develpoment when analyzing generated code so it is protected behind the --allow-iaca-marks flag. ScopedIacaMark is a helper class which opens mark and closes it at the end of the scope. This is useful when there are many returns as you don't have to write `_iaca_end()` before them all. BUG= R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1260093003.
parent 729b5f6c
...@@ -833,6 +833,12 @@ public: ...@@ -833,6 +833,12 @@ public:
void xchg(Type Ty, const typename Traits::Address &address, void xchg(Type Ty, const typename Traits::Address &address,
typename Traits::GPRRegister reg); typename Traits::GPRRegister reg);
/// \name Intel Architecture Code Analyzer markers.
/// @{
void iaca_start();
void iaca_end();
/// @}
void emitSegmentOverride(uint8_t prefix); void emitSegmentOverride(uint8_t prefix);
intptr_t preferredLoopAlignment() { return 16; } intptr_t preferredLoopAlignment() { return 16; }
......
...@@ -3112,6 +3112,43 @@ void AssemblerX86Base<Machine>::xchg(Type Ty, ...@@ -3112,6 +3112,43 @@ void AssemblerX86Base<Machine>::xchg(Type Ty,
emitOperand(gprEncoding(reg), addr); emitOperand(gprEncoding(reg), addr);
} }
template <class Machine> void AssemblerX86Base<Machine>::iaca_start() {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x0F);
emitUint8(0x0B);
// mov $111, ebx
constexpr typename Traits::GPRRegister dst =
Traits::GPRRegister::Encoded_Reg_ebx;
constexpr Type Ty = IceType_i32;
emitRexB(Ty, dst);
emitUint8(0xB8 + gprEncoding(dst));
emitImmediate(Ty, Immediate(111));
emitUint8(0x64);
emitUint8(0x67);
emitUint8(0x90);
}
template <class Machine> void AssemblerX86Base<Machine>::iaca_end() {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
// mov $222, ebx
constexpr typename Traits::GPRRegister dst =
Traits::GPRRegister::Encoded_Reg_ebx;
constexpr Type Ty = IceType_i32;
emitRexB(Ty, dst);
emitUint8(0xB8 + gprEncoding(dst));
emitImmediate(Ty, Immediate(222));
emitUint8(0x64);
emitUint8(0x67);
emitUint8(0x90);
emitUint8(0x0F);
emitUint8(0x0B);
}
template <class Machine> template <class Machine>
void AssemblerX86Base<Machine>::emitSegmentOverride(uint8_t prefix) { void AssemblerX86Base<Machine>::emitSegmentOverride(uint8_t prefix) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer); AssemblerBuffer::EnsureCapacity ensured(&Buffer);
......
...@@ -34,6 +34,12 @@ cl::opt<bool> AllowErrorRecovery( ...@@ -34,6 +34,12 @@ cl::opt<bool> AllowErrorRecovery(
cl::desc("Allow error recovery when reading PNaCl bitcode."), cl::desc("Allow error recovery when reading PNaCl bitcode."),
cl::init(false)); cl::init(false));
cl::opt<bool> AllowIacaMarks(
"allow-iaca-marks",
cl::desc("Allow IACA (Intel Architecture Code Analyzer) marks to be "
"inserted. These binaries are not executable."),
cl::init(false));
// This is currently needed by crosstest.py. // This is currently needed by crosstest.py.
cl::opt<bool> AllowUninitializedGlobals( cl::opt<bool> AllowUninitializedGlobals(
"allow-uninitialized-globals", "allow-uninitialized-globals",
...@@ -341,6 +347,7 @@ void ClFlags::parseFlags(int argc, char **argv) { ...@@ -341,6 +347,7 @@ void ClFlags::parseFlags(int argc, char **argv) {
void ClFlags::resetClFlags(ClFlags &OutFlags) { void ClFlags::resetClFlags(ClFlags &OutFlags) {
// bool fields // bool fields
OutFlags.AllowErrorRecovery = false; OutFlags.AllowErrorRecovery = false;
OutFlags.AllowIacaMarks = false;
OutFlags.AllowUninitializedGlobals = false; OutFlags.AllowUninitializedGlobals = false;
OutFlags.DataSections = false; OutFlags.DataSections = false;
OutFlags.DecorateAsm = false; OutFlags.DecorateAsm = false;
...@@ -398,6 +405,7 @@ void ClFlags::getParsedClFlags(ClFlags &OutFlags) { ...@@ -398,6 +405,7 @@ void ClFlags::getParsedClFlags(ClFlags &OutFlags) {
} }
OutFlags.setAllowErrorRecovery(::AllowErrorRecovery); OutFlags.setAllowErrorRecovery(::AllowErrorRecovery);
OutFlags.setAllowIacaMarks(::AllowIacaMarks);
OutFlags.setAllowUninitializedGlobals(::AllowUninitializedGlobals); OutFlags.setAllowUninitializedGlobals(::AllowUninitializedGlobals);
OutFlags.setDataSections(::DataSections); OutFlags.setDataSections(::DataSections);
OutFlags.setDecorateAsm(::DecorateAsm); OutFlags.setDecorateAsm(::DecorateAsm);
......
...@@ -39,6 +39,9 @@ public: ...@@ -39,6 +39,9 @@ public:
bool getAllowErrorRecovery() const { return AllowErrorRecovery; } bool getAllowErrorRecovery() const { return AllowErrorRecovery; }
void setAllowErrorRecovery(bool NewValue) { AllowErrorRecovery = NewValue; } void setAllowErrorRecovery(bool NewValue) { AllowErrorRecovery = NewValue; }
bool getAllowIacaMarks() const { return AllowIacaMarks; }
void setAllowIacaMarks(bool NewValue) { AllowIacaMarks = NewValue; }
bool getAllowUninitializedGlobals() const { bool getAllowUninitializedGlobals() const {
return AllowUninitializedGlobals; return AllowUninitializedGlobals;
} }
...@@ -229,6 +232,7 @@ public: ...@@ -229,6 +232,7 @@ public:
private: private:
bool AllowErrorRecovery; bool AllowErrorRecovery;
bool AllowIacaMarks;
bool AllowUninitializedGlobals; bool AllowUninitializedGlobals;
bool DataSections; bool DataSections;
bool DecorateAsm; bool DecorateAsm;
......
...@@ -132,7 +132,12 @@ public: ...@@ -132,7 +132,12 @@ public:
Xadd, Xadd,
Xchg, Xchg,
Xor, Xor,
XorRMW XorRMW,
/// Intel Architecture Code Analyzer markers. These are not executable so
/// must only be used for analysis.
IacaStart,
IacaEnd
}; };
static const char *getWidthString(Type Ty); static const char *getWidthString(Type Ty);
...@@ -2694,6 +2699,53 @@ private: ...@@ -2694,6 +2699,53 @@ private:
InstX86Xchg(Cfg *Func, Operand *Dest, Variable *Source); InstX86Xchg(Cfg *Func, Operand *Dest, Variable *Source);
}; };
/// Start marker for the Intel Architecture Code Analyzer. This is not an
/// executable instruction and must only be used for analysis.
template <class Machine>
class InstX86IacaStart final : public InstX86Base<Machine> {
InstX86IacaStart() = delete;
InstX86IacaStart(const InstX86IacaStart &) = delete;
InstX86IacaStart &operator=(const InstX86IacaStart &) = delete;
public:
static InstX86IacaStart *create(Cfg *Func) {
return new (Func->allocate<InstX86IacaStart>()) InstX86IacaStart(Func);
}
void emit(const Cfg *Func) const override;
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override;
static bool classof(const Inst *Inst) {
return InstX86Base<Machine>::isClassof(Inst,
InstX86Base<Machine>::IacaStart);
}
private:
InstX86IacaStart(Cfg *Func);
};
/// End marker for the Intel Architecture Code Analyzer. This is not an
/// executable instruction and must only be used for analysis.
template <class Machine>
class InstX86IacaEnd final : public InstX86Base<Machine> {
InstX86IacaEnd() = delete;
InstX86IacaEnd(const InstX86IacaEnd &) = delete;
InstX86IacaEnd &operator=(const InstX86IacaEnd &) = delete;
public:
static InstX86IacaEnd *create(Cfg *Func) {
return new (Func->allocate<InstX86IacaEnd>()) InstX86IacaEnd(Func);
}
void emit(const Cfg *Func) const override;
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override;
static bool classof(const Inst *Inst) {
return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::IacaEnd);
}
private:
InstX86IacaEnd(Cfg *Func);
};
/// struct Insts is a template that can be used to instantiate all the X86 /// struct Insts is a template that can be used to instantiate all the X86
/// instructions for a target with a simple /// instructions for a target with a simple
/// ///
...@@ -2798,6 +2850,9 @@ template <class Machine> struct Insts { ...@@ -2798,6 +2850,9 @@ template <class Machine> struct Insts {
using Setcc = InstX86Setcc<Machine>; using Setcc = InstX86Setcc<Machine>;
using Xadd = InstX86Xadd<Machine>; using Xadd = InstX86Xadd<Machine>;
using Xchg = InstX86Xchg<Machine>; using Xchg = InstX86Xchg<Machine>;
using IacaStart = InstX86IacaStart<Machine>;
using IacaEnd = InstX86IacaEnd<Machine>;
}; };
/// X86 Instructions have static data (particularly, opcodes and instruction /// X86 Instructions have static data (particularly, opcodes and instruction
......
...@@ -367,6 +367,18 @@ InstX86Xchg<Machine>::InstX86Xchg(Cfg *Func, Operand *Dest, Variable *Source) ...@@ -367,6 +367,18 @@ InstX86Xchg<Machine>::InstX86Xchg(Cfg *Func, Operand *Dest, Variable *Source)
this->addSource(Source); this->addSource(Source);
} }
template <class Machine>
InstX86IacaStart<Machine>::InstX86IacaStart(Cfg *Func)
: InstX86Base<Machine>(Func, InstX86Base<Machine>::IacaStart, 0, nullptr) {
assert(Func->getContext()->getFlags().getAllowIacaMarks());
}
template <class Machine>
InstX86IacaEnd<Machine>::InstX86IacaEnd(Cfg *Func)
: InstX86Base<Machine>(Func, InstX86Base<Machine>::IacaEnd, 0, nullptr) {
assert(Func->getContext()->getFlags().getAllowIacaMarks());
}
// ======================== Dump routines ======================== // // ======================== Dump routines ======================== //
template <class Machine> template <class Machine>
...@@ -3156,6 +3168,58 @@ void InstX86Xchg<Machine>::dump(const Cfg *Func) const { ...@@ -3156,6 +3168,58 @@ void InstX86Xchg<Machine>::dump(const Cfg *Func) const {
this->dumpSources(Func); this->dumpSources(Func);
} }
template <class Machine>
void InstX86IacaStart<Machine>::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
Str << "\t# IACA_START\n"
<< "\t.byte 0x0F, 0x0B\n"
<< "\tmovl\t$111, %ebx\n"
<< "\t.byte 0x64, 0x67, 0x90";
}
template <class Machine>
void InstX86IacaStart<Machine>::emitIAS(const Cfg *Func) const {
typename InstX86Base<Machine>::Traits::Assembler *Asm =
Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
Asm->iaca_start();
}
template <class Machine>
void InstX86IacaStart<Machine>::dump(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
Str << "IACA_START";
}
template <class Machine>
void InstX86IacaEnd<Machine>::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
Str << "\t# IACA_END\n"
<< "\tmovl\t$222, %ebx\n"
<< "\t.byte 0x64, 0x67, 0x90\n"
<< "\t.byte 0x0F, 0x0B";
}
template <class Machine>
void InstX86IacaEnd<Machine>::emitIAS(const Cfg *Func) const {
typename InstX86Base<Machine>::Traits::Assembler *Asm =
Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
Asm->iaca_end();
}
template <class Machine>
void InstX86IacaEnd<Machine>::dump(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
Str << "IACA_END";
}
} // end of namespace X86Internal } // end of namespace X86Internal
} // end of namespace Ice } // end of namespace Ice
......
...@@ -578,6 +578,37 @@ protected: ...@@ -578,6 +578,37 @@ protected:
Context.insert(Traits::Insts::XorRMW::create(Func, DestSrc0, Src1)); Context.insert(Traits::Insts::XorRMW::create(Func, DestSrc0, Src1));
} }
void _iaca_start() {
if (!BuildDefs::minimal())
Context.insert(Traits::Insts::IacaStart::create(Func));
}
void _iaca_end() {
if (!BuildDefs::minimal())
Context.insert(Traits::Insts::IacaEnd::create(Func));
}
/// This class helps wrap IACA markers around the code generated by the
/// current scope. It means you don't need to put an end before each return.
class ScopedIacaMark {
ScopedIacaMark(const ScopedIacaMark &) = delete;
ScopedIacaMark &operator=(const ScopedIacaMark &) = delete;
public:
ScopedIacaMark(TargetX86Base *Lowering) : Lowering(Lowering) {
Lowering->_iaca_start();
}
~ScopedIacaMark() { end(); }
void end() {
if (!Lowering)
return;
Lowering->_iaca_end();
Lowering = nullptr;
}
private:
TargetX86Base *Lowering;
};
bool optimizeScalarMul(Variable *Dest, Operand *Src0, int32_t Src1); bool optimizeScalarMul(Variable *Dest, Operand *Src0, int32_t Src1);
void findRMW(); void findRMW();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment