Commit 6b164c33 by Alexis Hetu Committed by Alexis Hétu

Use atomic operations to specify shared memory access order

TSAN detected many data race errors in the SwiftShader Renderer class. x86 has a strong memory ordering model which guarantees that changes are observed in the same order by other threads. However, C++ does not provide such guarantees unless specified using atomic operations. In order to fix these, a new AtomicInt class was added which is a basically a wrapper class for std::atomic<int> and which only exposes the portion of the API required by SwiftShader. Since std::atomic isn't available on older versions of Android, a fallback class was implemented without using std::atomic, which is closer to the previous implementation. Both classes appear to work properly after performing a few dEQP tests. Both also perform similarly. A few minor changes were made in order to attempt to reduce the use of atomic integer operations when possible. Change-Id: Ife6d3a2b6113346f8f8163b692e79c2a0e03b22f Reviewed-on: https://swiftshader-review.googlesource.com/12308Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com> Tested-by: 's avatarNicolas Capens <nicolascapens@google.com>
parent 0c5035be
......@@ -30,6 +30,17 @@
#include <stdlib.h>
#if defined(__clang__)
#define USE_STD_ATOMIC __has_include(<atomic>) // clang has an explicit check for the availability of atomic
// atomic is available in C++11 or newer, and in Visual Studio 2012 or newer
#elif (defined(_MSC_VER) && (_MSC_VER >= 1700)) || (__cplusplus >= 201103L)
#define USE_STD_ATOMIC 1
#endif
#if USE_STD_ATOMIC
#include <atomic>
#endif
namespace sw
{
class Event;
......@@ -265,7 +276,7 @@ namespace sw
inline int atomicAdd(volatile int* target, int value)
{
#if defined(_MSC_VER)
#if defined(_WIN32)
return InterlockedExchangeAdd((volatile long*)target, value) + value;
#else
return __sync_add_and_fetch(target, value);
......@@ -280,6 +291,46 @@ namespace sw
__asm__ __volatile__ ("nop");
#endif
}
#if USE_STD_ATOMIC
class AtomicInt
{
public:
AtomicInt() : ai() {}
AtomicInt(int i) : ai(i) {}
inline operator int() const { return ai.load(std::memory_order_acquire); }
inline void operator=(const AtomicInt& i) { ai.store(i.ai.load(std::memory_order_acquire), std::memory_order_release); }
inline void operator=(int i) { ai.store(i, std::memory_order_release); }
inline void operator--() { ai.fetch_sub(1, std::memory_order_acq_rel); }
inline void operator++() { ai.fetch_add(1, std::memory_order_acq_rel); }
inline int operator--(int) { return ai.fetch_sub(1, std::memory_order_acq_rel) - 1; }
inline int operator++(int) { return ai.fetch_add(1, std::memory_order_acq_rel) + 1; }
inline void operator-=(int i) { ai.fetch_sub(i, std::memory_order_acq_rel); }
inline void operator+=(int i) { ai.fetch_add(i, std::memory_order_acq_rel); }
private:
std::atomic<int> ai;
};
#else
class AtomicInt
{
public:
AtomicInt() {}
AtomicInt(int i) : vi(i) {}
inline operator int() const { return vi; } // Note: this isn't a guaranteed atomic operation
inline void operator=(const AtomicInt& i) { sw::atomicExchange(&vi, i.vi); }
inline void operator=(int i) { sw::atomicExchange(&vi, i); }
inline void operator--() { sw::atomicDecrement(&vi); }
inline void operator++() { sw::atomicIncrement(&vi); }
inline int operator--(int) { return sw::atomicDecrement(&vi); }
inline int operator++(int) { return sw::atomicIncrement(&vi); }
inline void operator-=(int i) { sw::atomicAdd(&vi, -i); }
inline void operator+=(int i) { sw::atomicAdd(&vi, i); }
private:
volatile int vi;
};
#endif
}
#endif // sw_Thread_hpp
......@@ -60,20 +60,21 @@ namespace sw
if(clipFlagsOr & CLIP_USER)
{
int clipFlags = draw.clipFlags;
DrawData &data = *draw.data;
if(polygon.n >= 3) {
if(draw.clipFlags & CLIP_PLANE0) clipPlane(polygon, data.clipPlane[0]);
if(clipFlags & CLIP_PLANE0) clipPlane(polygon, data.clipPlane[0]);
if(polygon.n >= 3) {
if(draw.clipFlags & CLIP_PLANE1) clipPlane(polygon, data.clipPlane[1]);
if(clipFlags & CLIP_PLANE1) clipPlane(polygon, data.clipPlane[1]);
if(polygon.n >= 3) {
if(draw.clipFlags & CLIP_PLANE2) clipPlane(polygon, data.clipPlane[2]);
if(clipFlags & CLIP_PLANE2) clipPlane(polygon, data.clipPlane[2]);
if(polygon.n >= 3) {
if(draw.clipFlags & CLIP_PLANE3) clipPlane(polygon, data.clipPlane[3]);
if(clipFlags & CLIP_PLANE3) clipPlane(polygon, data.clipPlane[3]);
if(polygon.n >= 3) {
if(draw.clipFlags & CLIP_PLANE4) clipPlane(polygon, data.clipPlane[4]);
if(clipFlags & CLIP_PLANE4) clipPlane(polygon, data.clipPlane[4]);
if(polygon.n >= 3) {
if(draw.clipFlags & CLIP_PLANE5) clipPlane(polygon, data.clipPlane[5]);
if(clipFlags & CLIP_PLANE5) clipPlane(polygon, data.clipPlane[5]);
}}}}}}
}
......
......@@ -314,7 +314,7 @@ namespace sw
Query* q = *query;
if(includePrimitivesWrittenQueries || (q->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN))
{
atomicIncrement(&(q->reference));
++q->reference; // Atomic
draw->queries->push_back(q);
}
}
......@@ -645,7 +645,7 @@ namespace sw
draw->references = (count + batch - 1) / batch;
schedulerMutex.lock();
nextDraw++;
++nextDraw; // Atomic
schedulerMutex.unlock();
#ifndef NDEBUG
......@@ -771,9 +771,12 @@ namespace sw
{
DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
if(draw->primitive >= draw->count)
int primitive = draw->primitive;
int count = draw->count;
if(primitive >= count)
{
currentDraw++;
++currentDraw; // Atomic
if(currentDraw == nextDraw)
{
......@@ -785,8 +788,8 @@ namespace sw
if(!primitiveProgress[unit].references) // Task not already being executed and not still in use by a pixel unit
{
int primitive = draw->primitive;
int count = draw->count;
primitive = draw->primitive;
count = draw->count;
int batch = draw->batchSize;
primitiveProgress[unit].drawCall = currentDraw;
......@@ -812,7 +815,9 @@ namespace sw
{
schedulerMutex.lock();
if((int)qSize < threadCount - threadsAwake + 1)
int curThreadsAwake = threadsAwake;
if((int)qSize < threadCount - curThreadsAwake + 1)
{
findAvailableTasks();
}
......@@ -822,9 +827,9 @@ namespace sw
task[threadIndex] = taskQueue[(qHead - qSize) % 32];
qSize--;
if(threadsAwake != threadCount)
if(curThreadsAwake != threadCount)
{
int wakeup = qSize - threadsAwake + 1;
int wakeup = qSize - curThreadsAwake + 1;
for(int i = 0; i < threadCount && wakeup > 0; i++)
{
......@@ -834,7 +839,7 @@ namespace sw
task[i].type = Task::RESUME;
resume[i]->signal();
threadsAwake++;
++threadsAwake; // Atomic
wakeup--;
}
}
......@@ -844,7 +849,7 @@ namespace sw
{
task[threadIndex].type = Task::SUSPEND;
threadsAwake--;
--threadsAwake; // Atomic
}
schedulerMutex.unlock();
......@@ -943,15 +948,15 @@ namespace sw
if(pixelProgress[cluster].processedPrimitives >= draw.count)
{
pixelProgress[cluster].drawCall++;
++pixelProgress[cluster].drawCall; // Atomic
pixelProgress[cluster].processedPrimitives = 0;
}
int ref = atomicDecrement(&primitiveProgress[unit].references);
int ref = primitiveProgress[unit].references--; // Atomic
if(ref == 0)
{
ref = atomicDecrement(&draw.references);
ref = draw.references--; // Atomic
if(ref == 0)
{
......@@ -976,17 +981,17 @@ namespace sw
case Query::FRAGMENTS_PASSED:
for(int cluster = 0; cluster < clusterCount; cluster++)
{
atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
query->data += data.occlusion[cluster];
}
break;
case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
atomicAdd((volatile int*)&query->data, processedPrimitives);
query->data += processedPrimitives;
break;
default:
break;
}
atomicDecrement(&query->reference);
--query->reference; // Atomic
}
delete draw.queries;
......@@ -1069,17 +1074,18 @@ namespace sw
void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
{
Triangle *triangle = triangleBatch[unit];
DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
int primitiveDrawCall = primitiveProgress[unit].drawCall;
DrawCall *draw = drawList[primitiveDrawCall % DRAW_COUNT];
DrawData *data = draw->data;
VertexTask *task = vertexTask[thread];
const void *indices = data->indices;
VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
if(task->vertexCache.drawCall != primitiveDrawCall)
{
task->vertexCache.clear();
task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
task->vertexCache.drawCall = primitiveDrawCall;
}
unsigned int batch[128][3]; // FIXME: Adjust to dynamic batch size
......
......@@ -110,8 +110,8 @@ namespace sw
}
bool building;
volatile int reference;
volatile unsigned int data;
AtomicInt reference;
AtomicInt data;
const Type type;
};
......@@ -213,8 +213,8 @@ namespace sw
~DrawCall();
DrawType drawType;
int batchSize;
AtomicInt drawType;
AtomicInt batchSize;
Routine *vertexRoutine;
Routine *setupRoutine;
......@@ -247,11 +247,11 @@ namespace sw
std::list<Query*> *queries;
int clipFlags;
AtomicInt clipFlags;
volatile int primitive; // Current primitive to enter pipeline
volatile int count; // Number of primitives to render
volatile int references; // Remaining references to this draw call, 0 when done drawing, -1 when resources unlocked and slot is free
AtomicInt primitive; // Current primitive to enter pipeline
AtomicInt count; // Number of primitives to render
AtomicInt references; // Remaining references to this draw call, 0 when done drawing, -1 when resources unlocked and slot is free
DrawData *data;
};
......@@ -279,9 +279,9 @@ namespace sw
SUSPEND
};
volatile Type type;
volatile int primitiveUnit;
volatile int pixelCluster;
AtomicInt type;
AtomicInt primitiveUnit;
AtomicInt pixelCluster;
};
struct PrimitiveProgress
......@@ -295,11 +295,11 @@ namespace sw
references = 0;
}
volatile int drawCall;
volatile int firstPrimitive;
volatile int primitiveCount;
volatile int visible;
volatile int references;
AtomicInt drawCall;
AtomicInt firstPrimitive;
AtomicInt primitiveCount;
AtomicInt visible;
AtomicInt references;
};
struct PixelProgress
......@@ -311,9 +311,9 @@ namespace sw
executing = false;
}
volatile int drawCall;
volatile int processedPrimitives;
volatile bool executing;
AtomicInt drawCall;
AtomicInt processedPrimitives;
AtomicInt executing;
};
public:
......@@ -449,8 +449,8 @@ namespace sw
Plane clipPlane[MAX_CLIP_PLANES]; // Tranformed to clip space
bool updateClipPlanes;
volatile bool exitThreads;
volatile int threadsAwake;
AtomicInt exitThreads;
AtomicInt threadsAwake;
Thread *worker[16];
Event *resume[16]; // Events for resuming threads
Event *suspend[16]; // Events for suspending threads
......@@ -464,8 +464,8 @@ namespace sw
DrawCall *drawCall[DRAW_COUNT];
DrawCall *drawList[DRAW_COUNT];
volatile int currentDraw;
volatile int nextDraw;
AtomicInt currentDraw;
AtomicInt nextDraw;
Task taskQueue[32];
unsigned int qHead;
......
......@@ -245,7 +245,7 @@ namespace sw
int sliceB;
int sliceP;
Format format;
Lock lock;
AtomicInt lock;
bool dirty;
};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment