Commit 179a55d7 by Eric Holk

Subzero, WASM: stop writing uninitialized data to .o file. Add timers.

Previously we were writing large numbers of zeros to the output file. This change only writes out the initialized portion and allocates the full address space at runtime. This reduces compile time by around 50%. This change also adds a couple of WASM-specific timers. BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4369 R=jpp@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/1938643002 .
parent 87def2c8
...@@ -12,11 +12,14 @@ ...@@ -12,11 +12,14 @@
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include <algorithm>
#include <cassert> #include <cassert>
#include <cmath> #include <cmath>
#include <iostream>
#include <vector>
#include <errno.h> #include <errno.h>
#include <fcntl.h> #include <fcntl.h>
#include <iostream>
#include <math.h> #include <math.h>
#include <stdint.h> #include <stdint.h>
#include <stdio.h> #include <stdio.h>
...@@ -44,7 +47,7 @@ void trace() {} ...@@ -44,7 +47,7 @@ void trace() {}
#endif // WASM_TRACE_RUNTIME #endif // WASM_TRACE_RUNTIME
extern "C" { extern "C" {
extern char WASM_MEMORY[]; char *WASM_MEMORY;
extern uint32_t WASM_DATA_SIZE; extern uint32_t WASM_DATA_SIZE;
extern uint32_t WASM_NUM_PAGES; extern uint32_t WASM_NUM_PAGES;
} // end of extern "C" } // end of extern "C"
...@@ -137,6 +140,8 @@ void __Sz_indirect_fail() { ...@@ -137,6 +140,8 @@ void __Sz_indirect_fail() {
abort(); abort();
} }
extern char WASM_DATA_INIT[];
void env$$abort() { void env$$abort() {
fprintf(stderr, "Aborting...\n"); fprintf(stderr, "Aborting...\n");
abort(); abort();
...@@ -217,6 +222,11 @@ extern int __szwasm_main(int, WasmPtr<WasmCharPtr>); ...@@ -217,6 +222,11 @@ extern int __szwasm_main(int, WasmPtr<WasmCharPtr>);
#define WASM_DEREF(Type, Index) (*WASM_REF(Type, Index)) #define WASM_DEREF(Type, Index) (*WASM_REF(Type, Index))
int main(int argc, const char **argv) { int main(int argc, const char **argv) {
// Create the heap.
std::vector<char> WasmHeap(WASM_NUM_PAGES << PageSizeLog2);
WASM_MEMORY = WasmHeap.data();
std::copy(WASM_DATA_INIT, WASM_DATA_INIT + WASM_DATA_SIZE, WasmHeap.begin());
// TODO (eholk): align these allocations correctly. // TODO (eholk): align these allocations correctly.
// Allocate space for the global data. // Allocate space for the global data.
......
...@@ -65,6 +65,8 @@ ...@@ -65,6 +65,8 @@
X(translateFunctions) \ X(translateFunctions) \
X(validateLiveness) \ X(validateLiveness) \
X(vmetadata) \ X(vmetadata) \
X(wasm) \
X(wasmGenIce) \
X(writeELF) X(writeELF)
//#define X(tag) //#define X(tag)
......
...@@ -276,6 +276,16 @@ public: ...@@ -276,6 +276,16 @@ public:
auto *Entry = Func->getEntryNode(); auto *Entry = Func->getEntryNode();
assert(Entry); assert(Entry);
LOG(out << Node(Entry) << "\n"); LOG(out << Node(Entry) << "\n");
// Load the WasmMemory address to make it available everywhere else in the
// function.
auto *WasmMemoryPtr =
Ctx->getConstantExternSym(Ctx->getGlobalString("WASM_MEMORY"));
assert(WasmMemory == nullptr);
auto *WasmMemoryV = makeVariable(getPointerType());
Entry->appendInst(InstLoad::create(Func, WasmMemoryV, WasmMemoryPtr));
WasmMemory = WasmMemoryV;
return OperandNode(Entry); return OperandNode(Entry);
} }
Node Param(uint32_t Index, wasm::LocalType Type) { Node Param(uint32_t Index, wasm::LocalType Type) {
...@@ -1162,71 +1172,6 @@ public: ...@@ -1162,71 +1172,6 @@ public:
llvm::report_fatal_error("StoreGlobal"); llvm::report_fatal_error("StoreGlobal");
} }
Operand *sanitizeAddress(Operand *Base, uint32_t Offset) {
SizeT MemSize = Module->module->min_mem_pages * WASM_PAGE_SIZE;
bool ConstZeroBase = false;
// first, add the index and the offset together.
if (auto *ConstBase = llvm::dyn_cast<ConstantInteger32>(Base)) {
uint32_t RealOffset = Offset + ConstBase->getValue();
if (RealOffset >= MemSize) {
// We've proven this will always be an out of bounds access, so insert
// an unconditional trap.
Control()->appendInst(InstUnreachable::create(Func));
// It doesn't matter what we return here, so return something that will
// allow the rest of code generation to happen.
//
// We might be tempted to just abort translation here, but out of bounds
// memory access is a runtime trap, not a compile error.
return Ctx->getConstantZero(getPointerType());
}
Base = Ctx->getConstantInt32(RealOffset);
ConstZeroBase = (0 == RealOffset);
} else if (0 != Offset) {
auto *Addr = makeVariable(Ice::getPointerType());
auto *OffsetConstant = Ctx->getConstantInt32(Offset);
Control()->appendInst(InstArithmetic::create(Func, InstArithmetic::Add,
Addr, Base, OffsetConstant));
Base = Addr;
}
// Do the bounds check.
//
// TODO (eholk): Add a command line argument to control whether bounds
// checks are inserted, and maybe add a way to duplicate bounds checks to
// get a better sense of the overhead.
if (!llvm::dyn_cast<ConstantInteger32>(Base)) {
// TODO (eholk): creating a new basic block on every memory access is
// terrible (see https://goo.gl/Zj7DTr). Try adding a new instruction that
// encapsulates this "abort if false" pattern.
auto *CheckPassed = Func->makeNode();
auto *CheckFailed = getBoundsFailTarget();
auto *Check = makeVariable(IceType_i1);
Control()->appendInst(InstIcmp::create(Func, InstIcmp::Ult, Check, Base,
Ctx->getConstantInt32(MemSize)));
Control()->appendInst(
InstBr::create(Func, Check, CheckPassed, CheckFailed));
*ControlPtr = OperandNode(CheckPassed);
}
Ice::Operand *RealAddr = nullptr;
auto MemBase = Ctx->getConstantSym(0, Ctx->getGlobalString("WASM_MEMORY"));
if (!ConstZeroBase) {
auto RealAddrV = Func->makeVariable(Ice::getPointerType());
Control()->appendInst(InstArithmetic::create(Func, InstArithmetic::Add,
RealAddrV, Base, MemBase));
RealAddr = RealAddrV;
} else {
RealAddr = MemBase;
}
return RealAddr;
}
Node LoadMem(wasm::LocalType Type, MachineType MemType, Node Index, Node LoadMem(wasm::LocalType Type, MachineType MemType, Node Index,
uint32_t Offset) { uint32_t Offset) {
LOG(out << "LoadMem." << toIceType(MemType) << "(" << Index << "[" << Offset LOG(out << "LoadMem." << toIceType(MemType) << "(" << Index << "[" << Offset
...@@ -1318,6 +1263,8 @@ private: ...@@ -1318,6 +1263,8 @@ private:
CfgUnorderedMap<Operand *, InstPhi *> PhiMap; CfgUnorderedMap<Operand *, InstPhi *> PhiMap;
CfgUnorderedMap<Operand *, CfgNode *> DefNodeMap; CfgUnorderedMap<Operand *, CfgNode *> DefNodeMap;
Operand *WasmMemory = nullptr;
InstPhi *getDefiningInst(Operand *Op) const { InstPhi *getDefiningInst(Operand *Op) const {
const auto &Iter = PhiMap.find(Op); const auto &Iter = PhiMap.find(Op);
if (Iter == PhiMap.end()) { if (Iter == PhiMap.end()) {
...@@ -1378,6 +1325,76 @@ private: ...@@ -1378,6 +1325,76 @@ private:
return IndirectFailTarget; return IndirectFailTarget;
} }
Operand *getWasmMemory() {
assert(WasmMemory != nullptr);
return WasmMemory;
}
Operand *sanitizeAddress(Operand *Base, uint32_t Offset) {
SizeT MemSize = Module->module->min_mem_pages * WASM_PAGE_SIZE;
bool ConstZeroBase = false;
// first, add the index and the offset together.
if (auto *ConstBase = llvm::dyn_cast<ConstantInteger32>(Base)) {
uint32_t RealOffset = Offset + ConstBase->getValue();
if (RealOffset >= MemSize) {
// We've proven this will always be an out of bounds access, so insert
// an unconditional trap.
Control()->appendInst(InstUnreachable::create(Func));
// It doesn't matter what we return here, so return something that will
// allow the rest of code generation to happen.
//
// We might be tempted to just abort translation here, but out of bounds
// memory access is a runtime trap, not a compile error.
return Ctx->getConstantZero(getPointerType());
}
Base = Ctx->getConstantInt32(RealOffset);
ConstZeroBase = (0 == RealOffset);
} else if (0 != Offset) {
auto *Addr = makeVariable(Ice::getPointerType());
auto *OffsetConstant = Ctx->getConstantInt32(Offset);
Control()->appendInst(InstArithmetic::create(Func, InstArithmetic::Add,
Addr, Base, OffsetConstant));
Base = Addr;
}
// Do the bounds check.
//
// TODO (eholk): Add a command line argument to control whether bounds
// checks are inserted, and maybe add a way to duplicate bounds checks to
// get a better sense of the overhead.
if (!llvm::dyn_cast<ConstantInteger32>(Base)) {
// TODO (eholk): creating a new basic block on every memory access is
// terrible (see https://goo.gl/Zj7DTr). Try adding a new instruction that
// encapsulates this "abort if false" pattern.
auto *CheckPassed = Func->makeNode();
auto *CheckFailed = getBoundsFailTarget();
auto *Check = makeVariable(IceType_i1);
Control()->appendInst(InstIcmp::create(Func, InstIcmp::Ult, Check, Base,
Ctx->getConstantInt32(MemSize)));
Control()->appendInst(
InstBr::create(Func, Check, CheckPassed, CheckFailed));
*ControlPtr = OperandNode(CheckPassed);
}
Ice::Operand *RealAddr = nullptr;
auto MemBase = getWasmMemory();
if (!ConstZeroBase) {
auto RealAddrV = Func->makeVariable(Ice::getPointerType());
Control()->appendInst(InstArithmetic::create(Func, InstArithmetic::Add,
RealAddrV, Base, MemBase));
RealAddr = RealAddrV;
} else {
RealAddr = MemBase;
}
return RealAddr;
}
template <typename F = std::function<void(Ostream &)>> void log(F Fn) const { template <typename F = std::function<void(Ostream &)>> void log(F Fn) const {
if (BuildDefs::dump() && (getFlags().getVerbose() & IceV_Wasm)) { if (BuildDefs::dump() && (getFlags().getVerbose() & IceV_Wasm)) {
Fn(Ctx->getStrDump()); Fn(Ctx->getStrDump());
...@@ -1390,6 +1407,7 @@ std::unique_ptr<Cfg> WasmTranslator::translateFunction(Zone *Zone, ...@@ -1390,6 +1407,7 @@ std::unique_ptr<Cfg> WasmTranslator::translateFunction(Zone *Zone,
FunctionBody &Body) { FunctionBody &Body) {
OstreamLocker L1(Ctx); OstreamLocker L1(Ctx);
auto Func = Cfg::create(Ctx, getNextSequenceNumber()); auto Func = Cfg::create(Ctx, getNextSequenceNumber());
TimerMarker T(TimerStack::TT_wasmGenIce, Func.get());
Ice::CfgLocalAllocatorScope L2(Func.get()); Ice::CfgLocalAllocatorScope L2(Func.get());
// TODO(eholk): parse the function signature... // TODO(eholk): parse the function signature...
...@@ -1419,6 +1437,8 @@ WasmTranslator::WasmTranslator(GlobalContext *Ctx) ...@@ -1419,6 +1437,8 @@ WasmTranslator::WasmTranslator(GlobalContext *Ctx)
void WasmTranslator::translate( void WasmTranslator::translate(
const std::string &IRFilename, const std::string &IRFilename,
std::unique_ptr<llvm::DataStreamer> InputStream) { std::unique_ptr<llvm::DataStreamer> InputStream) {
TimerMarker T(TimerStack::TT_wasm, Ctx);
LOG(out << "Initializing v8/wasm stuff..." LOG(out << "Initializing v8/wasm stuff..."
<< "\n"); << "\n");
Zone Zone; Zone Zone;
...@@ -1533,7 +1553,7 @@ void WasmTranslator::translate( ...@@ -1533,7 +1553,7 @@ void WasmTranslator::translate(
// Global variables, etc go here. // Global variables, etc go here.
auto *WasmMemory = VariableDeclaration::createExternal(Globals.get()); auto *WasmMemory = VariableDeclaration::createExternal(Globals.get());
WasmMemory->setName(Ctx->getGlobalString("WASM_MEMORY")); WasmMemory->setName(Ctx->getGlobalString("WASM_DATA_INIT"));
// Fill in the segments // Fill in the segments
SizeT WritePtr = 0; SizeT WritePtr = 0;
...@@ -1562,13 +1582,6 @@ void WasmTranslator::translate( ...@@ -1562,13 +1582,6 @@ void WasmTranslator::translate(
Globals.get(), reinterpret_cast<const char *>(&WritePtr), Globals.get(), reinterpret_cast<const char *>(&WritePtr),
sizeof(WritePtr))); sizeof(WritePtr)));
// Pad the rest with zeros
SizeT DataSize = Module->min_mem_pages * WASM_PAGE_SIZE;
if (WritePtr < DataSize) {
WasmMemory->addInitializer(VariableDeclaration::ZeroInitializer::create(
Globals.get(), DataSize - WritePtr));
}
// Save the number of pages for the runtime // Save the number of pages for the runtime
auto *GlobalNumPages = VariableDeclaration::createExternal(Globals.get()); auto *GlobalNumPages = VariableDeclaration::createExternal(Globals.get());
GlobalNumPages->setName(Ctx->getGlobalString("WASM_NUM_PAGES")); GlobalNumPages->setName(Ctx->getGlobalString("WASM_NUM_PAGES"));
...@@ -1594,9 +1607,12 @@ void WasmTranslator::translate( ...@@ -1594,9 +1607,12 @@ void WasmTranslator::translate(
Body.start = Buffer.data() + Fn.code_start_offset; Body.start = Buffer.data() + Fn.code_start_offset;
Body.end = Buffer.data() + Fn.code_end_offset; Body.end = Buffer.data() + Fn.code_end_offset;
auto Func = translateFunction(&Zone, Body); std::unique_ptr<Cfg> Func = nullptr;
Func->setFunctionName(Ctx->getGlobalString(FnName)); {
TimerMarker T_func(getContext(), FnName);
Func = translateFunction(&Zone, Body);
Func->setFunctionName(Ctx->getGlobalString(FnName));
}
Ctx->optQueueBlockingPush(makeUnique<CfgOptWorkItem>(std::move(Func))); Ctx->optQueueBlockingPush(makeUnique<CfgOptWorkItem>(std::move(Func)));
LOG(out << "done.\n"); LOG(out << "done.\n");
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment