Commit 5ba2a5b9 by Antonio Maiorano

Subzero: implement coroutines for Win32

Coroutines are emulated by using fibers. Bug: b/145754674 Change-Id: I3f4bf29d26a75a2386ed812dd821d8a7a8276305 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/40548Tested-by: 's avatarAntonio Maiorano <amaiorano@google.com> Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com>
parent 7a53cb62
...@@ -136,6 +136,11 @@ public: ...@@ -136,6 +136,11 @@ public:
// called without building a new rr::Function or rr::Coroutine. // called without building a new rr::Function or rr::Coroutine.
// While automatically called by operator(), finalize() should be called // While automatically called by operator(), finalize() should be called
// as early as possible to release the global Reactor mutex lock. // as early as possible to release the global Reactor mutex lock.
// It must also be called explicitly on the same thread that instantiates
// the Coroutine instance if operator() is invoked on separate threads.
// This is because presently, Reactor backends use a global mutex scoped
// to the generation of routines, and these must be locked/unlocked on the
// same thread.
inline void finalize(const Config::Edit &cfg = Config::Edit::None); inline void finalize(const Config::Edit &cfg = Config::Edit::None);
// Starts execution of the coroutine and returns a unique_ptr to a // Starts execution of the coroutine and returns a unique_ptr to a
...@@ -182,9 +187,16 @@ Coroutine<Return(Arguments...)>::operator()(Arguments... args) ...@@ -182,9 +187,16 @@ Coroutine<Return(Arguments...)>::operator()(Arguments... args)
{ {
finalize(); finalize();
using Sig = Nucleus::CoroutineBegin<Arguments...>; // TODO(b/148400732): Go back to just calling the CoroutineEntryBegin function directly.
auto pfn = (Sig *)routine->getEntry(Nucleus::CoroutineEntryBegin); std::function<Nucleus::CoroutineHandle()> coroutineBegin = [=] {
auto handle = pfn(args...); using Sig = Nucleus::CoroutineBegin<Arguments...>;
auto pfn = (Sig *)routine->getEntry(Nucleus::CoroutineEntryBegin);
auto handle = pfn(args...);
return handle;
};
auto handle = Nucleus::invokeCoroutineBegin(*routine, coroutineBegin);
return std::make_unique<Stream<Return>>(routine, handle); return std::make_unique<Stream<Return>>(routine, handle);
} }
......
...@@ -1302,9 +1302,9 @@ Type *T(InternalType t) ...@@ -1302,9 +1302,9 @@ Type *T(InternalType t)
return reinterpret_cast<Type *>(t); return reinterpret_cast<Type *>(t);
} }
inline std::vector<llvm::Type *> &T(std::vector<Type *> &t) inline const std::vector<llvm::Type *> &T(const std::vector<Type *> &t)
{ {
return reinterpret_cast<std::vector<llvm::Type *> &>(t); return reinterpret_cast<const std::vector<llvm::Type *> &>(t);
} }
inline llvm::BasicBlock *B(BasicBlock *t) inline llvm::BasicBlock *B(BasicBlock *t)
...@@ -1506,7 +1506,7 @@ void Nucleus::setInsertBlock(BasicBlock *basicBlock) ...@@ -1506,7 +1506,7 @@ void Nucleus::setInsertBlock(BasicBlock *basicBlock)
jit->builder->SetInsertPoint(B(basicBlock)); jit->builder->SetInsertPoint(B(basicBlock));
} }
void Nucleus::createFunction(Type *ReturnType, std::vector<Type *> &Params) void Nucleus::createFunction(Type *ReturnType, const std::vector<Type *> &Params)
{ {
jit->function = rr::createFunction("", T(ReturnType), T(Params)); jit->function = rr::createFunction("", T(ReturnType), T(Params));
...@@ -4964,7 +4964,7 @@ void promoteFunctionToCoroutine() ...@@ -4964,7 +4964,7 @@ void promoteFunctionToCoroutine()
namespace rr { namespace rr {
void Nucleus::createCoroutine(Type *YieldType, std::vector<Type *> &Params) void Nucleus::createCoroutine(Type *YieldType, const std::vector<Type *> &Params)
{ {
// Coroutines are initially created as a regular function. // Coroutines are initially created as a regular function.
// Upon the first call to Yield(), the function is promoted to a true // Upon the first call to Yield(), the function is promoted to a true
...@@ -5123,4 +5123,9 @@ std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Confi ...@@ -5123,4 +5123,9 @@ std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Confi
return routine; return routine;
} }
Nucleus::CoroutineHandle Nucleus::invokeCoroutineBegin(Routine &routine, std::function<Nucleus::CoroutineHandle()> func)
{
return func();
}
} // namespace rr } // namespace rr
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <cassert> #include <cassert>
#include <cstdarg> #include <cstdarg>
#include <cstdint> #include <cstdint>
#include <functional>
#include <memory> #include <memory>
#include <vector> #include <vector>
...@@ -171,7 +172,7 @@ public: ...@@ -171,7 +172,7 @@ public:
static BasicBlock *getInsertBlock(); static BasicBlock *getInsertBlock();
static void setInsertBlock(BasicBlock *basicBlock); static void setInsertBlock(BasicBlock *basicBlock);
static void createFunction(Type *ReturnType, std::vector<Type *> &Params); static void createFunction(Type *returnType, const std::vector<Type *> &paramTypes);
static Value *getArgument(unsigned int index); static Value *getArgument(unsigned int index);
// Coroutines // Coroutines
...@@ -190,9 +191,21 @@ public: ...@@ -190,9 +191,21 @@ public:
CoroutineEntryCount CoroutineEntryCount
}; };
static void createCoroutine(Type *ReturnType, std::vector<Type *> &Params); // Begins the generation of the three coroutine functions: CoroutineBegin, CoroutineAwait, and CoroutineDestroy,
// which will be returned by Routine::getEntry() with arg CoroutineEntryBegin, CoroutineEntryAwait, and CoroutineEntryDestroy
// respectively. Called by Coroutine constructor.
// Params are used to generate the params to CoroutineBegin, while ReturnType is used as the YieldType for the coroutine,
// returned via CoroutineAwait..
static void createCoroutine(Type *returnType, const std::vector<Type *> &params);
// Generates code to store the passed in value, and to suspend execution of the coroutine, such that the next call to
// CoroutineAwait can set the output yieldValue and resume execution of the coroutine.
static void yield(Value *val);
// Called to finalize coroutine creation. After this call, Routine::getEntry can be called to retrieve the entry point to any
// of the three coroutine functions. Called by Coroutine::finalize.
std::shared_ptr<Routine> acquireCoroutine(const char *name, const Config::Edit &cfg = Config::Edit::None); std::shared_ptr<Routine> acquireCoroutine(const char *name, const Config::Edit &cfg = Config::Edit::None);
static void yield(Value *); // Called by Coroutine::operator() to execute CoroutineEntryBegin wrapped up in func. This is needed in case
// the call must be run on a separate thread of execution (e.g. on a fiber).
static CoroutineHandle invokeCoroutineBegin(Routine &routine, std::function<CoroutineHandle()> func);
// Terminators // Terminators
static void createRetVoid(); static void createRetVoid();
......
...@@ -88,7 +88,7 @@ private: ...@@ -88,7 +88,7 @@ private:
void setLoadStoreInsts(Ice::CfgNode *, std::vector<LoadStoreInst> *); void setLoadStoreInsts(Ice::CfgNode *, std::vector<LoadStoreInst> *);
bool hasLoadStoreInsts(Ice::CfgNode *node) const; bool hasLoadStoreInsts(Ice::CfgNode *node) const;
std::vector<Optimizer::Uses *> allocatedUses; std::vector<Ice::Operand *> operandsWithUses;
}; };
void Optimizer::run(Ice::Cfg *function) void Optimizer::run(Ice::Cfg *function)
...@@ -104,11 +104,13 @@ void Optimizer::run(Ice::Cfg *function) ...@@ -104,11 +104,13 @@ void Optimizer::run(Ice::Cfg *function)
optimizeStoresInSingleBasicBlock(); optimizeStoresInSingleBasicBlock();
eliminateDeadCode(); eliminateDeadCode();
for(auto uses : allocatedUses) for(auto operand : operandsWithUses)
{ {
auto uses = reinterpret_cast<Uses *>(operand->getExternalData());
delete uses; delete uses;
operand->setExternalData(nullptr);
} }
allocatedUses.clear(); operandsWithUses.clear();
} }
void Optimizer::eliminateDeadCode() void Optimizer::eliminateDeadCode()
...@@ -713,7 +715,7 @@ Optimizer::Uses *Optimizer::getUses(Ice::Operand *operand) ...@@ -713,7 +715,7 @@ Optimizer::Uses *Optimizer::getUses(Ice::Operand *operand)
{ {
uses = new Optimizer::Uses; uses = new Optimizer::Uses;
setUses(operand, uses); setUses(operand, uses);
allocatedUses.push_back(uses); operandsWithUses.push_back(operand);
} }
return uses; return uses;
} }
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include <cmath> #include <cmath>
#include <thread>
#include <tuple> #include <tuple>
using namespace rr; using namespace rr;
...@@ -1896,6 +1897,150 @@ TEST(ReactorUnitTests, Coroutines_Parameters) ...@@ -1896,6 +1897,150 @@ TEST(ReactorUnitTests, Coroutines_Parameters)
EXPECT_EQ(out, 99); EXPECT_EQ(out, 99);
} }
// This test was written because Subzero's handling of vector types
// failed when more than one function is generated, as is the case
// with coroutines.
TEST(ReactorUnitTests, Coroutines_Vectors)
{
if(!rr::Caps.CoroutinesSupported)
{
SUCCEED() << "Coroutines not supported";
return;
}
Coroutine<int()> function;
{
Int4 a{ 1, 2, 3, 4 };
Yield(rr::Extract(a, 2));
Int4 b{ 5, 6, 7, 8 };
Yield(rr::Extract(b, 1));
Int4 c{ 9, 10, 11, 12 };
Yield(rr::Extract(c, 1));
}
auto coroutine = function();
int out;
coroutine->await(out);
EXPECT_EQ(out, 3);
coroutine->await(out);
EXPECT_EQ(out, 6);
coroutine->await(out);
EXPECT_EQ(out, 10);
}
// This test was written to make sure a coroutine without a Yield()
// works correctly, by executing like a regular function with no
// return (the return type is ignored).
// We also run it twice to ensure per instance and/or global state
// is properly cleaned up in between.
TEST(ReactorUnitTests, Coroutines_NoYield)
{
if(!rr::Caps.CoroutinesSupported)
{
SUCCEED() << "Coroutines not supported";
return;
}
for(int i = 0; i < 2; ++i)
{
Coroutine<int()> function;
{
Int a;
a = 4;
}
auto coroutine = function();
int out;
EXPECT_EQ(coroutine->await(out), false);
}
}
// Test generating one coroutine, and executing it on multiple threads. This makes
// sure the implementation manages per-call instance data correctly.
TEST(ReactorUnitTests, Coroutines_Parallel)
{
if(!rr::Caps.CoroutinesSupported)
{
SUCCEED() << "Coroutines not supported";
return;
}
Coroutine<int()> function;
{
Yield(Int(0));
Yield(Int(1));
Int current = 1;
Int next = 1;
While(true)
{
Yield(next);
auto tmp = current + next;
current = next;
next = tmp;
}
}
// Must call on same thread that creates the coroutine
function.finalize();
constexpr int32_t expected[] = {
0,
1,
1,
2,
3,
5,
8,
13,
21,
34,
55,
89,
144,
233,
377,
610,
987,
1597,
2584,
4181,
6765,
10946,
17711,
28657,
46368,
75025,
121393,
196418,
317811,
};
constexpr auto count = sizeof(expected) / sizeof(expected[0]);
std::vector<std::thread> threads;
const size_t numThreads = 100;
for(size_t t = 0; t < numThreads; ++t)
{
threads.emplace_back([&] {
auto coroutine = function();
for(size_t i = 0; i < count; i++)
{
int out = 0;
EXPECT_EQ(coroutine->await(out), true);
EXPECT_EQ(out, expected[i]);
}
});
}
for(auto &t : threads)
{
t.join();
}
}
template<typename TestFuncType, typename RefFuncType, typename TestValueType> template<typename TestFuncType, typename RefFuncType, typename TestValueType>
struct IntrinsicTestParams struct IntrinsicTestParams
{ {
......
...@@ -51,8 +51,85 @@ ...@@ -51,8 +51,85 @@
// Subzero utility functions // Subzero utility functions
// These functions only accept and return Subzero (Ice) types, and do not access any globals. // These functions only accept and return Subzero (Ice) types, and do not access any globals.
namespace {
namespace sz { namespace sz {
static Ice::Constant *getConstantPointer(Ice::GlobalContext *context, void const *ptr) void replaceEntryNode(Ice::Cfg *function, Ice::CfgNode *newEntryNode)
{
ASSERT_MSG(function->getEntryNode() != nullptr, "Function should have an entry node");
if(function->getEntryNode() == newEntryNode)
{
return;
}
// Make this the new entry node
function->setEntryNode(newEntryNode);
// Reorder nodes so that new entry block comes first. This is required
// by Cfg::renumberInstructions, which expects the first node in the list
// to be the entry node.
{
auto nodes = function->getNodes();
// TODO(amaiorano): Fast path if newEntryNode is last? Can avoid linear search.
auto iter = std::find(nodes.begin(), nodes.end(), newEntryNode);
ASSERT_MSG(iter != nodes.end(), "New node should be in the function's node list");
nodes.erase(iter);
nodes.insert(nodes.begin(), newEntryNode);
// swapNodes replaces its nodes with the input one, and renumbers them,
// so our new entry node will be 0, and the previous will be 1.
function->swapNodes(nodes);
}
}
Ice::Cfg *createFunction(Ice::GlobalContext *context, Ice::Type returnType, const std::vector<Ice::Type> &paramTypes)
{
uint32_t sequenceNumber = 0;
auto function = Ice::Cfg::create(context, sequenceNumber).release();
Ice::CfgLocalAllocatorScope allocScope{ function };
for(auto type : paramTypes)
{
Ice::Variable *arg = function->makeVariable(type);
function->addArg(arg);
}
Ice::CfgNode *node = function->makeNode();
function->setEntryNode(node);
return function;
}
Ice::Type getPointerType(Ice::Type elementType)
{
if(sizeof(void *) == 8)
{
return Ice::IceType_i64;
}
else
{
return Ice::IceType_i32;
}
}
Ice::Variable *allocateStackVariable(Ice::Cfg *function, Ice::Type type, int arraySize = 0)
{
int typeSize = Ice::typeWidthInBytes(type);
int totalSize = typeSize * (arraySize ? arraySize : 1);
auto bytes = Ice::ConstantInteger32::create(function->getContext(), Ice::IceType_i32, totalSize);
auto address = function->makeVariable(getPointerType(type));
auto alloca = Ice::InstAlloca::create(function, address, bytes, typeSize);
function->getEntryNode()->getInsts().push_front(alloca);
return address;
}
Ice::Constant *getConstantPointer(Ice::GlobalContext *context, void const *ptr)
{ {
if(sizeof(void *) == 8) if(sizeof(void *) == 8)
{ {
...@@ -64,8 +141,38 @@ static Ice::Constant *getConstantPointer(Ice::GlobalContext *context, void const ...@@ -64,8 +141,38 @@ static Ice::Constant *getConstantPointer(Ice::GlobalContext *context, void const
} }
} }
// Wrapper for calls on C functions with Ice types
template<typename Return, typename... CArgs, typename... RArgs>
Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Return(fptr)(CArgs...), RArgs &&... args)
{
Ice::Type retTy = T(rr::CToReactorT<Return>::getType());
// Subzero doesn't support boolean return values. Replace with an i32.
if(retTy == Ice::IceType_i1)
{
retTy = Ice::IceType_i32;
}
Ice::Variable *ret = nullptr;
if(retTy != Ice::IceType_void)
{
ret = function->makeVariable(retTy);
}
std::initializer_list<Ice::Variable *> iceArgs = { std::forward<RArgs>(args)... };
auto call = Ice::InstCall::create(function, iceArgs.size(), ret, getConstantPointer(function->getContext(), reinterpret_cast<void const *>(fptr)), false);
for(auto arg : iceArgs)
{
call->addArg(arg);
}
basicBlock->appendInst(call);
return ret;
}
// Returns a non-const variable copy of const v // Returns a non-const variable copy of const v
static Ice::Variable *createUnconstCast(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Constant *v) Ice::Variable *createUnconstCast(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Constant *v)
{ {
Ice::Variable *result = function->makeVariable(v->getType()); Ice::Variable *result = function->makeVariable(v->getType());
Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Bitcast, result, v); Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Bitcast, result, v);
...@@ -73,7 +180,7 @@ static Ice::Variable *createUnconstCast(Ice::Cfg *function, Ice::CfgNode *basicB ...@@ -73,7 +180,7 @@ static Ice::Variable *createUnconstCast(Ice::Cfg *function, Ice::CfgNode *basicB
return result; return result;
} }
static Ice::Variable *createLoad(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *ptr, Ice::Type type, unsigned int align) Ice::Variable *createLoad(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *ptr, Ice::Type type, unsigned int align)
{ {
// TODO(b/148272103): InstLoad assumes that a constant ptr is an offset, rather than an // TODO(b/148272103): InstLoad assumes that a constant ptr is an offset, rather than an
// absolute address. We circumvent this by casting to a non-const variable, and loading // absolute address. We circumvent this by casting to a non-const variable, and loading
...@@ -91,9 +198,12 @@ static Ice::Variable *createLoad(Ice::Cfg *function, Ice::CfgNode *basicBlock, I ...@@ -91,9 +198,12 @@ static Ice::Variable *createLoad(Ice::Cfg *function, Ice::CfgNode *basicBlock, I
} }
} // namespace sz } // namespace sz
} // namespace
namespace rr { namespace rr {
class ELFMemoryStreamer; class ELFMemoryStreamer;
} class CoroutineGenerator;
} // namespace rr
namespace { namespace {
...@@ -119,6 +229,10 @@ std::mutex codegenMutex; ...@@ -119,6 +229,10 @@ std::mutex codegenMutex;
Ice::ELFFileStreamer *elfFile = nullptr; Ice::ELFFileStreamer *elfFile = nullptr;
Ice::Fdstream *out = nullptr; Ice::Fdstream *out = nullptr;
// Coroutine globals
rr::Type *coroYieldType = nullptr;
std::shared_ptr<rr::CoroutineGenerator> coroGen;
} // Anonymous namespace } // Anonymous namespace
namespace { namespace {
...@@ -232,7 +346,11 @@ std::string BackendName() ...@@ -232,7 +346,11 @@ std::string BackendName()
} }
const Capabilities Caps = { const Capabilities Caps = {
#if defined(_WIN32)
true, // CoroutinesSupported
#else
false, // CoroutinesSupported false, // CoroutinesSupported
#endif
}; };
enum EmulatedType enum EmulatedType
...@@ -274,11 +392,27 @@ Type *T(EmulatedType t) ...@@ -274,11 +392,27 @@ Type *T(EmulatedType t)
return reinterpret_cast<Type *>(t); return reinterpret_cast<Type *>(t);
} }
std::vector<Ice::Type> T(const std::vector<Type *> &types)
{
std::vector<Ice::Type> result;
result.reserve(types.size());
for(auto &t : types)
{
result.push_back(T(t));
}
return result;
}
Value *V(Ice::Operand *v) Value *V(Ice::Operand *v)
{ {
return reinterpret_cast<Value *>(v); return reinterpret_cast<Value *>(v);
} }
Ice::Operand *V(Value *v)
{
return reinterpret_cast<Ice::Variable *>(v);
}
BasicBlock *B(Ice::CfgNode *b) BasicBlock *B(Ice::CfgNode *b)
{ {
return reinterpret_cast<BasicBlock *>(b); return reinterpret_cast<BasicBlock *>(b);
...@@ -303,6 +437,14 @@ static size_t typeSize(Type *type) ...@@ -303,6 +437,14 @@ static size_t typeSize(Type *type)
return Ice::typeWidthInBytes(T(type)); return Ice::typeWidthInBytes(T(type));
} }
static void createRetVoidIfNoRet()
{
if(::basicBlock->getInsts().empty() || ::basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
{
Nucleus::createRetVoid();
}
}
using ElfHeader = std::conditional<sizeof(void *) == 8, Elf64_Ehdr, Elf32_Ehdr>::type; using ElfHeader = std::conditional<sizeof(void *) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
using SectionHeader = std::conditional<sizeof(void *) == 8, Elf64_Shdr, Elf32_Shdr>::type; using SectionHeader = std::conditional<sizeof(void *) == 8, Elf64_Shdr, Elf32_Shdr>::type;
...@@ -462,7 +604,7 @@ static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &reloca ...@@ -462,7 +604,7 @@ static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &reloca
return symbolValue; return symbolValue;
} }
void *loadImage(uint8_t *const elfImage, size_t &codeSize) void *loadImage(uint8_t *const elfImage, size_t &codeSize, const char *functionName = nullptr)
{ {
ElfHeader *elfHeader = (ElfHeader *)elfImage; ElfHeader *elfHeader = (ElfHeader *)elfImage;
...@@ -496,6 +638,15 @@ void *loadImage(uint8_t *const elfImage, size_t &codeSize) ...@@ -496,6 +638,15 @@ void *loadImage(uint8_t *const elfImage, size_t &codeSize)
{ {
if(sectionHeader[i].sh_flags & SHF_EXECINSTR) if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
{ {
auto getCurrSectionName = [&]() {
auto sectionNameOffset = sectionHeader[elfHeader->e_shstrndx].sh_offset + sectionHeader[i].sh_name;
return reinterpret_cast<const char *>(elfImage + sectionNameOffset);
};
if(functionName && strstr(getCurrSectionName(), functionName) == nullptr)
{
continue;
}
entry = elfImage + sectionHeader[i].sh_offset; entry = elfImage + sectionHeader[i].sh_offset;
codeSize = sectionHeader[i].sh_size; codeSize = sectionHeader[i].sh_size;
} }
...@@ -593,22 +744,27 @@ public: ...@@ -593,22 +744,27 @@ public:
void seek(uint64_t Off) override { position = Off; } void seek(uint64_t Off) override { position = Off; }
const void *finalizeEntryBegin() const void *getEntryByName(const char *name)
{ {
position = std::numeric_limits<std::size_t>::max(); // Can't stream more data after this
size_t codeSize = 0; size_t codeSize = 0;
const void *entry = loadImage(&buffer[0], codeSize); const void *entry = loadImage(&buffer[0], codeSize, name);
protectMemoryPages(&buffer[0], buffer.size(), PERMISSION_READ | PERMISSION_EXECUTE);
#if defined(_WIN32) #if defined(_WIN32)
FlushInstructionCache(GetCurrentProcess(), NULL, 0); FlushInstructionCache(GetCurrentProcess(), NULL, 0);
#else #else
__builtin___clear_cache((char *)entry, (char *)entry + codeSize); __builtin___clear_cache((char *)entry, (char *)entry + codeSize);
#endif #endif
return entry; return entry;
} }
void finalize()
{
position = std::numeric_limits<std::size_t>::max(); // Can't stream more data after this
protectMemoryPages(&buffer[0], buffer.size(), PERMISSION_READ | PERMISSION_EXECUTE);
}
void setEntry(int index, const void *func) void setEntry(int index, const void *func)
{ {
ASSERT(func); ASSERT(func);
...@@ -664,6 +820,9 @@ Nucleus::Nucleus() ...@@ -664,6 +820,9 @@ Nucleus::Nucleus()
Flags.setVerbose(subzeroDumpEnabled ? Ice::IceV_Most : Ice::IceV_None); Flags.setVerbose(subzeroDumpEnabled ? Ice::IceV_Most : Ice::IceV_None);
Flags.setDisableHybridAssembly(true); Flags.setDisableHybridAssembly(true);
// Emit functions into separate sections in the ELF so we can find them by name
Flags.setFunctionSections(true);
static llvm::raw_os_ostream cout(std::cout); static llvm::raw_os_ostream cout(std::cout);
static llvm::raw_os_ostream cerr(std::cerr); static llvm::raw_os_ostream cerr(std::cerr);
...@@ -691,13 +850,24 @@ Nucleus::Nucleus() ...@@ -691,13 +850,24 @@ Nucleus::Nucleus()
Nucleus::~Nucleus() Nucleus::~Nucleus()
{ {
delete ::routine; delete ::routine;
::routine = nullptr;
delete ::allocator; delete ::allocator;
::allocator = nullptr;
delete ::function; delete ::function;
::function = nullptr;
delete ::context; delete ::context;
::context = nullptr;
delete ::elfFile; delete ::elfFile;
::elfFile = nullptr;
delete ::out; delete ::out;
::out = nullptr;
::basicBlock = nullptr;
::codegenMutex.unlock(); ::codegenMutex.unlock();
} }
...@@ -721,56 +891,89 @@ Config Nucleus::getDefaultConfig() ...@@ -721,56 +891,89 @@ Config Nucleus::getDefaultConfig()
return ::defaultConfig(); return ::defaultConfig();
} }
std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */) // This function lowers and produces executable binary code in memory for the input functions,
// and returns a Routine with the entry points to these functions.
template<size_t Count>
static std::shared_ptr<Routine> acquireRoutine(Ice::Cfg *const (&functions)[Count], const char *const (&names)[Count], const Config::Edit &cfgEdit)
{ {
// This logic is modeled after the IceCompiler, as well as GlobalContext::translateFunctions
// and GlobalContext::emitItems.
if(subzeroDumpEnabled) if(subzeroDumpEnabled)
{ {
// Output dump strings immediately, rather than once buffer is full. Useful for debugging. // Output dump strings immediately, rather than once buffer is full. Useful for debugging.
context->getStrDump().SetUnbuffered(); ::context->getStrDump().SetUnbuffered();
} }
if(basicBlock->getInsts().empty() || basicBlock->getInsts().back().getKind() != Ice::Inst::Ret) ::context->emitFileHeader();
// Translate
for(size_t i = 0; i < Count; ++i)
{ {
createRetVoid(); Ice::Cfg *currFunc = functions[i];
}
::function->setFunctionName(Ice::GlobalString::createWithString(::context, name)); // Install function allocator in TLS for Cfg-specific container allocators
Ice::CfgLocalAllocatorScope allocScope(currFunc);
rr::optimize(::function); currFunc->setFunctionName(Ice::GlobalString::createWithString(::context, names[i]));
::function->computeInOutEdges(); rr::optimize(currFunc);
ASSERT(!::function->hasError());
::function->translate(); currFunc->computeInOutEdges();
ASSERT(!::function->hasError()); ASSERT(!currFunc->hasError());
auto globals = ::function->getGlobalInits(); currFunc->translate();
ASSERT(!currFunc->hasError());
if(globals && !globals->empty()) currFunc->getAssembler<>()->setInternal(currFunc->getInternal());
{
::context->getGlobals()->merge(globals.get()); if(subzeroEmitTextAsm)
{
currFunc->emit();
}
currFunc->emitIAS();
} }
::context->emitFileHeader(); // Emit items
if(subzeroEmitTextAsm) ::context->lowerGlobals("");
auto objectWriter = ::context->getObjectWriter();
for(size_t i = 0; i < Count; ++i)
{ {
::function->emit(); Ice::Cfg *currFunc = functions[i];
// Accumulate globals from functions to emit into the "last" section at the end
auto globals = currFunc->getGlobalInits();
if(globals && !globals->empty())
{
::context->getGlobals()->merge(globals.get());
}
auto assembler = currFunc->releaseAssembler();
assembler->alignFunction();
objectWriter->writeFunctionCode(currFunc->getFunctionName(), currFunc->getInternal(), assembler.get());
} }
::function->emitIAS();
auto assembler = ::function->releaseAssembler();
auto objectWriter = ::context->getObjectWriter();
assembler->alignFunction();
objectWriter->writeFunctionCode(::function->getFunctionName(), false, assembler.get());
::context->lowerGlobals("last"); ::context->lowerGlobals("last");
::context->lowerConstants(); ::context->lowerConstants();
::context->lowerJumpTables(); ::context->lowerJumpTables();
objectWriter->setUndefinedSyms(::context->getConstantExternSyms()); objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
::context->emitTargetRODataSections();
objectWriter->writeNonUserSections(); objectWriter->writeNonUserSections();
const void *entryBegin = ::routine->finalizeEntryBegin(); // Done compiling functions, get entry pointers to each of them
::routine->setEntry(Nucleus::CoroutineEntryBegin, entryBegin); for(size_t i = 0; i < Count; ++i)
{
const void *entry = ::routine->getEntryByName(names[i]);
::routine->setEntry(i, entry);
}
::routine->finalize();
Routine *handoffRoutine = ::routine; Routine *handoffRoutine = ::routine;
::routine = nullptr; ::routine = nullptr;
...@@ -778,6 +981,12 @@ std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config: ...@@ -778,6 +981,12 @@ std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config:
return std::shared_ptr<Routine>(handoffRoutine); return std::shared_ptr<Routine>(handoffRoutine);
} }
std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
{
createRetVoidIfNoRet();
return rr::acquireRoutine({ ::function }, { name }, cfgEdit);
}
Value *Nucleus::allocateStackVariable(Type *t, int arraySize) Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
{ {
Ice::Type type = T(t); Ice::Type type = T(t);
...@@ -811,21 +1020,21 @@ void Nucleus::setInsertBlock(BasicBlock *basicBlock) ...@@ -811,21 +1020,21 @@ void Nucleus::setInsertBlock(BasicBlock *basicBlock)
::basicBlock = basicBlock; ::basicBlock = basicBlock;
} }
void Nucleus::createFunction(Type *ReturnType, std::vector<Type *> &Params) void Nucleus::createFunction(Type *returnType, const std::vector<Type *> &paramTypes)
{ {
uint32_t sequenceNumber = 0; ASSERT(::function == nullptr);
::function = Ice::Cfg::create(::context, sequenceNumber).release(); ASSERT(::allocator == nullptr);
::allocator = new Ice::CfgLocalAllocatorScope(::function); ASSERT(::basicBlock == nullptr);
for(Type *type : Params) ::function = sz::createFunction(::context, T(returnType), T(paramTypes));
{
Ice::Variable *arg = ::function->makeVariable(T(type)); // NOTE: The scoped allocator sets the TLS allocator to the one in the function. This global one
::function->addArg(arg); // becomes invalid if another one is created; for example, when creating await and destroy functions
} // for coroutines, in which case, we must make sure to create a new scoped allocator for ::function again.
// TODO: Get rid of this as a global, and create scoped allocs in every Nucleus function instead.
::allocator = new Ice::CfgLocalAllocatorScope(::function);
Ice::CfgNode *node = ::function->makeNode(); ::basicBlock = ::function->getEntryNode();
::function->setEntryNode(node);
::basicBlock = node;
} }
Value *Nucleus::getArgument(unsigned int index) Value *Nucleus::getArgument(unsigned int index)
...@@ -1152,7 +1361,7 @@ Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatil ...@@ -1152,7 +1361,7 @@ Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatil
{ {
ASSERT(value->getType() == T(type)); ASSERT(value->getType() == T(type));
auto store = Ice::InstStore::create(::function, value, ptr, align); auto store = Ice::InstStore::create(::function, V(value), V(ptr), align);
::basicBlock->appendInst(store); ::basicBlock->appendInst(store);
} }
...@@ -1556,14 +1765,7 @@ void Nucleus::createUnreachable() ...@@ -1556,14 +1765,7 @@ void Nucleus::createUnreachable()
Type *Nucleus::getPointerType(Type *ElementType) Type *Nucleus::getPointerType(Type *ElementType)
{ {
if(sizeof(void *) == 8) return T(sz::getPointerType(T(ElementType)));
{
return T(Ice::IceType_i64);
}
else
{
return T(Ice::IceType_i32);
}
} }
Value *Nucleus::createNullValue(Type *Ty) Value *Nucleus::createNullValue(Type *Ty)
...@@ -2899,11 +3101,11 @@ Int4::Int4(RValue<Byte4> cast) ...@@ -2899,11 +3101,11 @@ Int4::Int4(RValue<Byte4> cast)
Value *e; Value *e;
int swizzle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }; int swizzle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 };
Value *b = Nucleus::createBitCast(a, Byte16::getType()); Value *b = Nucleus::createBitCast(a, Byte16::getType());
Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Byte16::getType())), swizzle); Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Byte16::getType()), swizzle);
int swizzle2[8] = { 0, 8, 1, 9, 2, 10, 3, 11 }; int swizzle2[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Value *d = Nucleus::createBitCast(c, Short8::getType()); Value *d = Nucleus::createBitCast(c, Short8::getType());
e = Nucleus::createShuffleVector(d, V(Nucleus::createNullValue(Short8::getType())), swizzle2); e = Nucleus::createShuffleVector(d, Nucleus::createNullValue(Short8::getType()), swizzle2);
Value *f = Nucleus::createBitCast(e, Int4::getType()); Value *f = Nucleus::createBitCast(e, Int4::getType());
storeValue(f); storeValue(f);
...@@ -3879,34 +4081,507 @@ void EmitDebugLocation() {} ...@@ -3879,34 +4081,507 @@ void EmitDebugLocation() {}
void EmitDebugVariable(Value *value) {} void EmitDebugVariable(Value *value) {}
void FlushDebug() {} void FlushDebug() {}
void Nucleus::createCoroutine(Type *YieldType, std::vector<Type *> &Params) namespace {
namespace coro {
using FiberHandle = void *;
// Instance data per generated coroutine
// This is the "handle" type used for Coroutine functions
// Lifetime: from yield to when CoroutineEntryDestroy generated function is called.
struct CoroutineData
{
FiberHandle mainFiber{};
FiberHandle routineFiber{};
bool convertedFiber = false;
// Variables used by coroutines
bool done = false;
void *promisePtr = nullptr;
};
CoroutineData *createCoroutineData()
{
return new CoroutineData{};
}
void destroyCoroutineData(CoroutineData *coroData)
{
delete coroData;
}
void convertThreadToMainFiber(Nucleus::CoroutineHandle handle)
{
#if defined(_WIN32)
auto *coroData = reinterpret_cast<CoroutineData *>(handle);
coroData->mainFiber = ::ConvertThreadToFiber(nullptr);
if(coroData->mainFiber)
{
coroData->convertedFiber = true;
}
else
{
// We're probably already on a fiber, so just grab it and remember that we didn't
// convert it, so not to convert back to thread.
coroData->mainFiber = GetCurrentFiber();
coroData->convertedFiber = false;
}
ASSERT(coroData->mainFiber);
#else
UNIMPLEMENTED("convertThreadToMainFiber not implemented for current platform");
#endif
}
void convertMainFiberToThread(Nucleus::CoroutineHandle handle)
{
#if defined(_WIN32)
auto *coroData = reinterpret_cast<CoroutineData *>(handle);
ASSERT(coroData->mainFiber);
if(coroData->convertedFiber)
{
::ConvertFiberToThread();
coroData->mainFiber = nullptr;
}
#else
UNIMPLEMENTED("convertMainFiberToThread not implemented for current platform");
#endif
}
using FiberFunc = std::function<void()>;
void createRoutineFiber(Nucleus::CoroutineHandle handle, FiberFunc *fiberFunc)
{
#if defined(_WIN32)
struct Invoker
{
FiberFunc func;
static VOID __stdcall fiberEntry(LPVOID lpParameter)
{
auto *func = reinterpret_cast<FiberFunc *>(lpParameter);
(*func)();
}
};
auto *coroData = reinterpret_cast<CoroutineData *>(handle);
constexpr SIZE_T StackSize = 2 * 1024 * 1024;
coroData->routineFiber = ::CreateFiber(StackSize, &Invoker::fiberEntry, fiberFunc);
ASSERT(coroData->routineFiber);
#else
UNIMPLEMENTED("createRoutineFiber not implemented for current platform");
#endif
}
void deleteRoutineFiber(Nucleus::CoroutineHandle handle)
{
#if defined(_WIN32)
auto *coroData = reinterpret_cast<CoroutineData *>(handle);
ASSERT(coroData->routineFiber);
::DeleteFiber(coroData->routineFiber);
coroData->routineFiber = nullptr;
#else
UNIMPLEMENTED("deleteRoutineFiber not implemented for current platform");
#endif
}
void switchToMainFiber(Nucleus::CoroutineHandle handle)
{
#if defined(_WIN32)
auto *coroData = reinterpret_cast<CoroutineData *>(handle);
// Win32
ASSERT(coroData->mainFiber);
::SwitchToFiber(coroData->mainFiber);
#else
UNIMPLEMENTED("switchToMainFiber not implemented for current platform");
#endif
}
void switchToRoutineFiber(Nucleus::CoroutineHandle handle)
{
#if defined(_WIN32)
auto *coroData = reinterpret_cast<CoroutineData *>(handle);
// Win32
ASSERT(coroData->routineFiber);
::SwitchToFiber(coroData->routineFiber);
#else
UNIMPLEMENTED("switchToRoutineFiber not implemented for current platform");
#endif
}
namespace detail {
thread_local rr::Nucleus::CoroutineHandle coroHandle{};
} // namespace detail
void setHandleParam(Nucleus::CoroutineHandle handle)
{
ASSERT(!detail::coroHandle);
detail::coroHandle = handle;
}
Nucleus::CoroutineHandle getHandleParam()
{
ASSERT(detail::coroHandle);
auto handle = detail::coroHandle;
detail::coroHandle = {};
return handle;
}
void setDone(Nucleus::CoroutineHandle handle)
{
auto *coroData = reinterpret_cast<CoroutineData *>(handle);
ASSERT(!coroData->done); // Should be called once
coroData->done = true;
}
bool isDone(Nucleus::CoroutineHandle handle)
{
auto *coroData = reinterpret_cast<CoroutineData *>(handle);
return coroData->done;
}
void setPromisePtr(Nucleus::CoroutineHandle handle, void *promisePtr)
{
auto *coroData = reinterpret_cast<CoroutineData *>(handle);
coroData->promisePtr = promisePtr;
}
void *getPromisePtr(Nucleus::CoroutineHandle handle)
{
auto *coroData = reinterpret_cast<CoroutineData *>(handle);
return coroData->promisePtr;
}
} // namespace coro
} // namespace
// Used to generate coroutines.
// Lifetime: from yield to acquireCoroutine
class CoroutineGenerator
{
public:
CoroutineGenerator()
{
}
// Inserts instructions at the top of the current function to make it a coroutine.
void generateCoroutineBegin()
{
// Begin building the main coroutine_begin() function.
// We insert these instructions at the top of the entry node,
// before existing reactor-generated instructions.
// CoroutineHandle coroutine_begin(<Arguments>)
// {
// this->handle = coro::getHandleParam();
//
// YieldType promise;
// coro::setPromisePtr(handle, &promise); // For await
//
// ... <REACTOR CODE> ...
//
// Save original entry block and current block, and create a new entry block and make it current.
// This new block will be used to inject code above the begin routine's existing code. We make
// this block branch to the original entry block as the last instruction.
auto origEntryBB = ::function->getEntryNode();
auto origCurrBB = ::basicBlock;
auto newBB = ::function->makeNode();
sz::replaceEntryNode(::function, newBB);
::basicBlock = newBB;
// this->handle = coro::getHandleParam();
this->handle = sz::Call(::function, ::basicBlock, coro::getHandleParam);
// YieldType promise;
// coro::setPromisePtr(handle, &promise); // For await
this->promise = sz::allocateStackVariable(::function, T(::coroYieldType));
sz::Call(::function, ::basicBlock, coro::setPromisePtr, this->handle, this->promise);
// Branch to original entry block
auto br = Ice::InstBr::create(::function, origEntryBB);
::basicBlock->appendInst(br);
// Restore current block for future instructions
::basicBlock = origCurrBB;
}
// Adds instructions for Yield() calls at the current location of the main coroutine function.
void generateYield(Value *val)
{
// ... <REACTOR CODE> ...
//
// promise = val;
// coro::switchToMainFiber(handle);
//
// ... <REACTOR CODE> ...
Nucleus::createStore(val, V(this->promise), ::coroYieldType);
sz::Call(::function, ::basicBlock, coro::switchToMainFiber, this->handle);
}
// Adds instructions at the end of the current main coroutine function to end the coroutine.
void generateCoroutineEnd()
{
// ... <REACTOR CODE> ...
//
// coro::setDone(handle);
// coro::switchToMainFiber();
// // Unreachable
// }
//
sz::Call(::function, ::basicBlock, coro::setDone, this->handle);
// A Win32 Fiber function must not end, otherwise it tears down the thread it's running on.
// So we add code to switch back to the main thread.
sz::Call(::function, ::basicBlock, coro::switchToMainFiber, this->handle);
}
using FunctionUniquePtr = std::unique_ptr<Ice::Cfg>;
// Generates the await function for the current coroutine.
// Cannot use Nucleus functions that modify ::function and ::basicBlock.
static FunctionUniquePtr generateAwaitFunction()
{
// bool coroutine_await(CoroutineHandle handle, YieldType* out)
// {
// if (coro::isDone())
// {
// return false;
// }
// else // resume
// {
// YieldType* promise = coro::getPromisePtr(handle);
// *out = *promise;
// coro::switchToRoutineFiber(handle);
// return true;
// }
// }
// Subzero doesn't support bool types (IceType_i1) as return type
const Ice::Type ReturnType = Ice::IceType_i32;
const Ice::Type YieldPtrType = sz::getPointerType(T(::coroYieldType));
const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
Ice::Cfg *awaitFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType, YieldPtrType });
Ice::CfgLocalAllocatorScope scopedAlloc{ awaitFunc };
Ice::Variable *handle = awaitFunc->getArgs()[0];
Ice::Variable *outPtr = awaitFunc->getArgs()[1];
auto doneBlock = awaitFunc->makeNode();
{
// return false;
Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(0));
doneBlock->appendInst(ret);
}
auto resumeBlock = awaitFunc->makeNode();
{
// YieldType* promise = coro::getPromisePtr(handle);
Ice::Variable *promise = sz::Call(awaitFunc, resumeBlock, coro::getPromisePtr, handle);
// *out = *promise;
// Load promise value
Ice::Variable *promiseVal = awaitFunc->makeVariable(T(::coroYieldType));
auto load = Ice::InstLoad::create(awaitFunc, promiseVal, promise);
resumeBlock->appendInst(load);
// Then store it in output param
auto store = Ice::InstStore::create(awaitFunc, promiseVal, outPtr);
resumeBlock->appendInst(store);
// coro::switchToRoutineFiber(handle);
sz::Call(awaitFunc, resumeBlock, coro::switchToRoutineFiber, handle);
// return true;
Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(1));
resumeBlock->appendInst(ret);
}
// if (coro::isDone())
// {
// <doneBlock>
// }
// else // resume
// {
// <resumeBlock>
// }
Ice::CfgNode *bb = awaitFunc->getEntryNode();
Ice::Variable *done = sz::Call(awaitFunc, bb, coro::isDone);
auto br = Ice::InstBr::create(awaitFunc, done, doneBlock, resumeBlock);
bb->appendInst(br);
return FunctionUniquePtr{ awaitFunc };
}
// Generates the destroy function for the current coroutine.
// Cannot use Nucleus functions that modify ::function and ::basicBlock.
static FunctionUniquePtr generateDestroyFunction()
{
// void coroutine_destroy(Nucleus::CoroutineHandle handle)
// {
// coro::convertMainFiberToThread(coroData);
// coro::deleteRoutineFiber(handle);
// coro::destroyCoroutineData(handle);
// return;
// }
const Ice::Type ReturnType = Ice::IceType_void;
const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
Ice::Cfg *destroyFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType });
Ice::CfgLocalAllocatorScope scopedAlloc{ destroyFunc };
Ice::Variable *handle = destroyFunc->getArgs()[0];
auto *bb = destroyFunc->getEntryNode();
// coro::convertMainFiberToThread(coroData);
sz::Call(destroyFunc, bb, coro::convertMainFiberToThread, handle);
// coro::deleteRoutineFiber(handle);
sz::Call(destroyFunc, bb, coro::deleteRoutineFiber, handle);
// coro::destroyCoroutineData(handle);
sz::Call(destroyFunc, bb, coro::destroyCoroutineData, handle);
// return;
Ice::InstRet *ret = Ice::InstRet::create(destroyFunc);
bb->appendInst(ret);
return FunctionUniquePtr{ destroyFunc };
}
private:
Ice::Variable *handle{};
Ice::Variable *promise{};
};
static Nucleus::CoroutineHandle invokeCoroutineBegin(std::function<Nucleus::CoroutineHandle()> beginFunc)
{ {
// Subzero currently only supports coroutines as functions (i.e. that do not yield) // This doubles up as our coroutine handle
createFunction(YieldType, Params); auto coroData = coro::createCoroutineData();
// Convert current thread to a fiber so we can create new fibers and switch to them
coro::convertThreadToMainFiber(coroData);
coro::FiberFunc fiberFunc = [&]() {
// Store handle in TLS so that the coroutine can grab it right away, before
// any fiber switch occurs.
coro::setHandleParam(coroData);
// Invoke the begin function in the context of the routine fiber
beginFunc();
// Either it yielded, or finished. In either case, we switch back to the main fiber.
// We don't ever return from this function, or the current thread will be destroyed.
coro::switchToMainFiber(coroData);
};
coro::createRoutineFiber(coroData, &fiberFunc);
// Fiber will now start running, executing the saved beginFunc
coro::switchToRoutineFiber(coroData);
return coroData;
}
void Nucleus::createCoroutine(Type *yieldType, const std::vector<Type *> &params)
{
// Start by creating a regular function
createFunction(yieldType, params);
// Save in case yield() is called
ASSERT(::coroYieldType == nullptr); // Only one coroutine can be generated at once
::coroYieldType = yieldType;
}
void Nucleus::yield(Value *val)
{
Variable::materializeAll();
// On first yield, we start generating coroutine functions
if(!::coroGen)
{
::coroGen = std::make_shared<CoroutineGenerator>();
::coroGen->generateCoroutineBegin();
}
ASSERT(::coroGen);
::coroGen->generateYield(val);
} }
static bool coroutineEntryAwaitStub(Nucleus::CoroutineHandle, void *yieldValue) static bool coroutineEntryAwaitStub(Nucleus::CoroutineHandle, void *yieldValue)
{ {
return false; return false;
} }
static void coroutineEntryDestroyStub(Nucleus::CoroutineHandle) {}
static void coroutineEntryDestroyStub(Nucleus::CoroutineHandle handle)
{
}
std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */) std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
{ {
// acquireRoutine sets the CoroutineEntryBegin entry if(::coroGen)
auto coroutineEntry = acquireRoutine(name, cfgEdit); {
// Finish generating coroutine functions
{
Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
::coroGen->generateCoroutineEnd();
createRetVoidIfNoRet();
}
auto awaitFunc = ::coroGen->generateAwaitFunction();
auto destroyFunc = ::coroGen->generateDestroyFunction();
// At this point, we no longer need the CoroutineGenerator.
::coroGen.reset();
::coroYieldType = nullptr;
auto routine = rr::acquireRoutine({ ::function, awaitFunc.get(), destroyFunc.get() },
{ name, "await", "destroy" },
cfgEdit);
return routine;
}
else
{
{
Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
createRetVoidIfNoRet();
}
// For now, set the await and destroy entries to stubs, until we add proper coroutine support to the Subzero backend ::coroYieldType = nullptr;
auto routine = std::static_pointer_cast<ELFMemoryStreamer>(coroutineEntry);
routine->setEntry(Nucleus::CoroutineEntryAwait, reinterpret_cast<const void *>(&coroutineEntryAwaitStub));
routine->setEntry(Nucleus::CoroutineEntryDestroy, reinterpret_cast<const void *>(&coroutineEntryDestroyStub));
return coroutineEntry; // Not an actual coroutine (no yields), so return stubs for await and destroy
auto routine = rr::acquireRoutine({ ::function }, { name }, cfgEdit);
auto routineImpl = std::static_pointer_cast<ELFMemoryStreamer>(routine);
routineImpl->setEntry(Nucleus::CoroutineEntryAwait, reinterpret_cast<const void *>(&coroutineEntryAwaitStub));
routineImpl->setEntry(Nucleus::CoroutineEntryDestroy, reinterpret_cast<const void *>(&coroutineEntryDestroyStub));
return routine;
}
} }
void Nucleus::yield(Value *val) Nucleus::CoroutineHandle Nucleus::invokeCoroutineBegin(Routine &routine, std::function<Nucleus::CoroutineHandle()> func)
{ {
UNIMPLEMENTED("Yield"); const bool isCoroutine = routine.getEntry(Nucleus::CoroutineEntryAwait) != reinterpret_cast<const void *>(&coroutineEntryAwaitStub);
if(isCoroutine)
{
return rr::invokeCoroutineBegin(func);
}
else
{
// For regular routines, just invoke the begin func directly
return func();
}
} }
} // namespace rr } // namespace rr
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment