Commit 5ba2a5b9 by Antonio Maiorano

Subzero: implement coroutines for Win32

Coroutines are emulated by using fibers. Bug: b/145754674 Change-Id: I3f4bf29d26a75a2386ed812dd821d8a7a8276305 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/40548Tested-by: 's avatarAntonio Maiorano <amaiorano@google.com> Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com>
parent 7a53cb62
...@@ -136,6 +136,11 @@ public: ...@@ -136,6 +136,11 @@ public:
// called without building a new rr::Function or rr::Coroutine. // called without building a new rr::Function or rr::Coroutine.
// While automatically called by operator(), finalize() should be called // While automatically called by operator(), finalize() should be called
// as early as possible to release the global Reactor mutex lock. // as early as possible to release the global Reactor mutex lock.
// It must also be called explicitly on the same thread that instantiates
// the Coroutine instance if operator() is invoked on separate threads.
// This is because presently, Reactor backends use a global mutex scoped
// to the generation of routines, and these must be locked/unlocked on the
// same thread.
inline void finalize(const Config::Edit &cfg = Config::Edit::None); inline void finalize(const Config::Edit &cfg = Config::Edit::None);
// Starts execution of the coroutine and returns a unique_ptr to a // Starts execution of the coroutine and returns a unique_ptr to a
...@@ -182,9 +187,16 @@ Coroutine<Return(Arguments...)>::operator()(Arguments... args) ...@@ -182,9 +187,16 @@ Coroutine<Return(Arguments...)>::operator()(Arguments... args)
{ {
finalize(); finalize();
using Sig = Nucleus::CoroutineBegin<Arguments...>; // TODO(b/148400732): Go back to just calling the CoroutineEntryBegin function directly.
auto pfn = (Sig *)routine->getEntry(Nucleus::CoroutineEntryBegin); std::function<Nucleus::CoroutineHandle()> coroutineBegin = [=] {
auto handle = pfn(args...); using Sig = Nucleus::CoroutineBegin<Arguments...>;
auto pfn = (Sig *)routine->getEntry(Nucleus::CoroutineEntryBegin);
auto handle = pfn(args...);
return handle;
};
auto handle = Nucleus::invokeCoroutineBegin(*routine, coroutineBegin);
return std::make_unique<Stream<Return>>(routine, handle); return std::make_unique<Stream<Return>>(routine, handle);
} }
......
...@@ -1302,9 +1302,9 @@ Type *T(InternalType t) ...@@ -1302,9 +1302,9 @@ Type *T(InternalType t)
return reinterpret_cast<Type *>(t); return reinterpret_cast<Type *>(t);
} }
inline std::vector<llvm::Type *> &T(std::vector<Type *> &t) inline const std::vector<llvm::Type *> &T(const std::vector<Type *> &t)
{ {
return reinterpret_cast<std::vector<llvm::Type *> &>(t); return reinterpret_cast<const std::vector<llvm::Type *> &>(t);
} }
inline llvm::BasicBlock *B(BasicBlock *t) inline llvm::BasicBlock *B(BasicBlock *t)
...@@ -1506,7 +1506,7 @@ void Nucleus::setInsertBlock(BasicBlock *basicBlock) ...@@ -1506,7 +1506,7 @@ void Nucleus::setInsertBlock(BasicBlock *basicBlock)
jit->builder->SetInsertPoint(B(basicBlock)); jit->builder->SetInsertPoint(B(basicBlock));
} }
void Nucleus::createFunction(Type *ReturnType, std::vector<Type *> &Params) void Nucleus::createFunction(Type *ReturnType, const std::vector<Type *> &Params)
{ {
jit->function = rr::createFunction("", T(ReturnType), T(Params)); jit->function = rr::createFunction("", T(ReturnType), T(Params));
...@@ -4964,7 +4964,7 @@ void promoteFunctionToCoroutine() ...@@ -4964,7 +4964,7 @@ void promoteFunctionToCoroutine()
namespace rr { namespace rr {
void Nucleus::createCoroutine(Type *YieldType, std::vector<Type *> &Params) void Nucleus::createCoroutine(Type *YieldType, const std::vector<Type *> &Params)
{ {
// Coroutines are initially created as a regular function. // Coroutines are initially created as a regular function.
// Upon the first call to Yield(), the function is promoted to a true // Upon the first call to Yield(), the function is promoted to a true
...@@ -5123,4 +5123,9 @@ std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Confi ...@@ -5123,4 +5123,9 @@ std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Confi
return routine; return routine;
} }
Nucleus::CoroutineHandle Nucleus::invokeCoroutineBegin(Routine &routine, std::function<Nucleus::CoroutineHandle()> func)
{
return func();
}
} // namespace rr } // namespace rr
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <cassert> #include <cassert>
#include <cstdarg> #include <cstdarg>
#include <cstdint> #include <cstdint>
#include <functional>
#include <memory> #include <memory>
#include <vector> #include <vector>
...@@ -171,7 +172,7 @@ public: ...@@ -171,7 +172,7 @@ public:
static BasicBlock *getInsertBlock(); static BasicBlock *getInsertBlock();
static void setInsertBlock(BasicBlock *basicBlock); static void setInsertBlock(BasicBlock *basicBlock);
static void createFunction(Type *ReturnType, std::vector<Type *> &Params); static void createFunction(Type *returnType, const std::vector<Type *> &paramTypes);
static Value *getArgument(unsigned int index); static Value *getArgument(unsigned int index);
// Coroutines // Coroutines
...@@ -190,9 +191,21 @@ public: ...@@ -190,9 +191,21 @@ public:
CoroutineEntryCount CoroutineEntryCount
}; };
static void createCoroutine(Type *ReturnType, std::vector<Type *> &Params); // Begins the generation of the three coroutine functions: CoroutineBegin, CoroutineAwait, and CoroutineDestroy,
// which will be returned by Routine::getEntry() with arg CoroutineEntryBegin, CoroutineEntryAwait, and CoroutineEntryDestroy
// respectively. Called by Coroutine constructor.
// Params are used to generate the params to CoroutineBegin, while ReturnType is used as the YieldType for the coroutine,
// returned via CoroutineAwait..
static void createCoroutine(Type *returnType, const std::vector<Type *> &params);
// Generates code to store the passed in value, and to suspend execution of the coroutine, such that the next call to
// CoroutineAwait can set the output yieldValue and resume execution of the coroutine.
static void yield(Value *val);
// Called to finalize coroutine creation. After this call, Routine::getEntry can be called to retrieve the entry point to any
// of the three coroutine functions. Called by Coroutine::finalize.
std::shared_ptr<Routine> acquireCoroutine(const char *name, const Config::Edit &cfg = Config::Edit::None); std::shared_ptr<Routine> acquireCoroutine(const char *name, const Config::Edit &cfg = Config::Edit::None);
static void yield(Value *); // Called by Coroutine::operator() to execute CoroutineEntryBegin wrapped up in func. This is needed in case
// the call must be run on a separate thread of execution (e.g. on a fiber).
static CoroutineHandle invokeCoroutineBegin(Routine &routine, std::function<CoroutineHandle()> func);
// Terminators // Terminators
static void createRetVoid(); static void createRetVoid();
......
...@@ -88,7 +88,7 @@ private: ...@@ -88,7 +88,7 @@ private:
void setLoadStoreInsts(Ice::CfgNode *, std::vector<LoadStoreInst> *); void setLoadStoreInsts(Ice::CfgNode *, std::vector<LoadStoreInst> *);
bool hasLoadStoreInsts(Ice::CfgNode *node) const; bool hasLoadStoreInsts(Ice::CfgNode *node) const;
std::vector<Optimizer::Uses *> allocatedUses; std::vector<Ice::Operand *> operandsWithUses;
}; };
void Optimizer::run(Ice::Cfg *function) void Optimizer::run(Ice::Cfg *function)
...@@ -104,11 +104,13 @@ void Optimizer::run(Ice::Cfg *function) ...@@ -104,11 +104,13 @@ void Optimizer::run(Ice::Cfg *function)
optimizeStoresInSingleBasicBlock(); optimizeStoresInSingleBasicBlock();
eliminateDeadCode(); eliminateDeadCode();
for(auto uses : allocatedUses) for(auto operand : operandsWithUses)
{ {
auto uses = reinterpret_cast<Uses *>(operand->getExternalData());
delete uses; delete uses;
operand->setExternalData(nullptr);
} }
allocatedUses.clear(); operandsWithUses.clear();
} }
void Optimizer::eliminateDeadCode() void Optimizer::eliminateDeadCode()
...@@ -713,7 +715,7 @@ Optimizer::Uses *Optimizer::getUses(Ice::Operand *operand) ...@@ -713,7 +715,7 @@ Optimizer::Uses *Optimizer::getUses(Ice::Operand *operand)
{ {
uses = new Optimizer::Uses; uses = new Optimizer::Uses;
setUses(operand, uses); setUses(operand, uses);
allocatedUses.push_back(uses); operandsWithUses.push_back(operand);
} }
return uses; return uses;
} }
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include <cmath> #include <cmath>
#include <thread>
#include <tuple> #include <tuple>
using namespace rr; using namespace rr;
...@@ -1896,6 +1897,150 @@ TEST(ReactorUnitTests, Coroutines_Parameters) ...@@ -1896,6 +1897,150 @@ TEST(ReactorUnitTests, Coroutines_Parameters)
EXPECT_EQ(out, 99); EXPECT_EQ(out, 99);
} }
// This test was written because Subzero's handling of vector types
// failed when more than one function is generated, as is the case
// with coroutines.
TEST(ReactorUnitTests, Coroutines_Vectors)
{
if(!rr::Caps.CoroutinesSupported)
{
SUCCEED() << "Coroutines not supported";
return;
}
Coroutine<int()> function;
{
Int4 a{ 1, 2, 3, 4 };
Yield(rr::Extract(a, 2));
Int4 b{ 5, 6, 7, 8 };
Yield(rr::Extract(b, 1));
Int4 c{ 9, 10, 11, 12 };
Yield(rr::Extract(c, 1));
}
auto coroutine = function();
int out;
coroutine->await(out);
EXPECT_EQ(out, 3);
coroutine->await(out);
EXPECT_EQ(out, 6);
coroutine->await(out);
EXPECT_EQ(out, 10);
}
// This test was written to make sure a coroutine without a Yield()
// works correctly, by executing like a regular function with no
// return (the return type is ignored).
// We also run it twice to ensure per instance and/or global state
// is properly cleaned up in between.
TEST(ReactorUnitTests, Coroutines_NoYield)
{
if(!rr::Caps.CoroutinesSupported)
{
SUCCEED() << "Coroutines not supported";
return;
}
for(int i = 0; i < 2; ++i)
{
Coroutine<int()> function;
{
Int a;
a = 4;
}
auto coroutine = function();
int out;
EXPECT_EQ(coroutine->await(out), false);
}
}
// Test generating one coroutine, and executing it on multiple threads. This makes
// sure the implementation manages per-call instance data correctly.
TEST(ReactorUnitTests, Coroutines_Parallel)
{
if(!rr::Caps.CoroutinesSupported)
{
SUCCEED() << "Coroutines not supported";
return;
}
Coroutine<int()> function;
{
Yield(Int(0));
Yield(Int(1));
Int current = 1;
Int next = 1;
While(true)
{
Yield(next);
auto tmp = current + next;
current = next;
next = tmp;
}
}
// Must call on same thread that creates the coroutine
function.finalize();
constexpr int32_t expected[] = {
0,
1,
1,
2,
3,
5,
8,
13,
21,
34,
55,
89,
144,
233,
377,
610,
987,
1597,
2584,
4181,
6765,
10946,
17711,
28657,
46368,
75025,
121393,
196418,
317811,
};
constexpr auto count = sizeof(expected) / sizeof(expected[0]);
std::vector<std::thread> threads;
const size_t numThreads = 100;
for(size_t t = 0; t < numThreads; ++t)
{
threads.emplace_back([&] {
auto coroutine = function();
for(size_t i = 0; i < count; i++)
{
int out = 0;
EXPECT_EQ(coroutine->await(out), true);
EXPECT_EQ(out, expected[i]);
}
});
}
for(auto &t : threads)
{
t.join();
}
}
template<typename TestFuncType, typename RefFuncType, typename TestValueType> template<typename TestFuncType, typename RefFuncType, typename TestValueType>
struct IntrinsicTestParams struct IntrinsicTestParams
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment