Commit 4e87e965 by Maxime Grégoire

Replacement of the pthread call by SwiftShader Thread and Resource objects

Change-Id: I7f2568196bdfeaab7902c38db6b2fbda6d9a2501 Reviewed-on: https://swiftshader-review.googlesource.com/3707Reviewed-by: 's avatarMaxime Grégoire <mgregoire@google.com> Tested-by: 's avatarMaxime Grégoire <mgregoire@google.com>
parent 73f2bd88
...@@ -95,6 +95,7 @@ ...@@ -95,6 +95,7 @@
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary> <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<PrecompiledHeaderFile /> <PrecompiledHeaderFile />
<BrowseInformation>true</BrowseInformation> <BrowseInformation>true</BrowseInformation>
<AdditionalIncludeDirectories>../../Common</AdditionalIncludeDirectories>
</ClCompile> </ClCompile>
<Link> <Link>
<SubSystem>Windows</SubSystem> <SubSystem>Windows</SubSystem>
...@@ -118,7 +119,7 @@ ...@@ -118,7 +119,7 @@
<PrecompiledHeaderFile> <PrecompiledHeaderFile>
</PrecompiledHeaderFile> </PrecompiledHeaderFile>
<BrowseInformation>true</BrowseInformation> <BrowseInformation>true</BrowseInformation>
<AdditionalIncludeDirectories>C:\Users\mgregoire\Documents\Visual Studio 2013\Projects\SwiftCL\pthread\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>../../Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile> </ClCompile>
<Link> <Link>
<SubSystem>Windows</SubSystem> <SubSystem>Windows</SubSystem>
...@@ -137,6 +138,7 @@ ...@@ -137,6 +138,7 @@
<IntrinsicFunctions>true</IntrinsicFunctions> <IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;OPENCL1_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;OPENCL1_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck> <SDLCheck>true</SDLCheck>
<AdditionalIncludeDirectories>../../Common</AdditionalIncludeDirectories>
</ClCompile> </ClCompile>
<Link> <Link>
<SubSystem>Windows</SubSystem> <SubSystem>Windows</SubSystem>
...@@ -161,7 +163,7 @@ ...@@ -161,7 +163,7 @@
<FavorSizeOrSpeed>Size</FavorSizeOrSpeed> <FavorSizeOrSpeed>Size</FavorSizeOrSpeed>
<OmitFramePointers>true</OmitFramePointers> <OmitFramePointers>true</OmitFramePointers>
<EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations> <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
<AdditionalIncludeDirectories>C:\Users\mgregoire\Documents\Visual Studio 2013\Projects\SwiftCL\pthread\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>../../Common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile> </ClCompile>
<Link> <Link>
<SubSystem>Windows</SubSystem> <SubSystem>Windows</SubSystem>
...@@ -175,6 +177,10 @@ ...@@ -175,6 +177,10 @@
</Link> </Link>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="..\..\Common\CPUID.hpp" />
<ClInclude Include="..\..\Common\Memory.hpp" />
<ClInclude Include="..\..\Common\Resource.hpp" />
<ClInclude Include="..\..\Common\Thread.hpp" />
<ClInclude Include="buffer.h" /> <ClInclude Include="buffer.h" />
<ClInclude Include="builtins.h" /> <ClInclude Include="builtins.h" />
<ClInclude Include="cl.h" /> <ClInclude Include="cl.h" />
...@@ -199,6 +205,10 @@ ...@@ -199,6 +205,10 @@
<ClInclude Include="worker.h" /> <ClInclude Include="worker.h" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClCompile Include="..\..\Common\CPUID.cpp" />
<ClCompile Include="..\..\Common\Memory.cpp" />
<ClCompile Include="..\..\Common\Resource.cpp" />
<ClCompile Include="..\..\Common\Thread.cpp" />
<ClCompile Include="buffer.cpp" /> <ClCompile Include="buffer.cpp" />
<ClCompile Include="builtins.cpp" /> <ClCompile Include="builtins.cpp" />
<ClCompile Include="commandqueue.cpp" /> <ClCompile Include="commandqueue.cpp" />
......
...@@ -25,8 +25,8 @@ CommandQueue::CommandQueue(Context *ctx, ...@@ -25,8 +25,8 @@ CommandQueue::CommandQueue(Context *ctx,
p_properties(properties), p_flushed(true) p_properties(properties), p_flushed(true)
{ {
// Initialize the locking machinery // Initialize the locking machinery
pthread_mutex_init(&p_event_list_mutex, 0); p_event_list_mutex = new sw::Resource(0);
pthread_cond_init(&p_event_list_cond, 0); p_event_list_cond = new sw::Event();
// Check that the device belongs to the context // Check that the device belongs to the context
if(!ctx->hasDevice(device)) if(!ctx->hasDevice(device))
...@@ -41,8 +41,12 @@ CommandQueue::CommandQueue(Context *ctx, ...@@ -41,8 +41,12 @@ CommandQueue::CommandQueue(Context *ctx,
CommandQueue::~CommandQueue() CommandQueue::~CommandQueue()
{ {
// Free the mutex // Free the mutex
pthread_mutex_destroy(&p_event_list_mutex); p_event_list_mutex->lock(sw::DESTRUCT);
pthread_cond_destroy(&p_event_list_cond); p_event_list_mutex->unlock();
p_event_list_mutex->destruct();
p_event_list_cond->signal();
delete p_event_list_cond;
} }
cl_int CommandQueue::info(cl_command_queue_info param_name, cl_int CommandQueue::info(cl_command_queue_info param_name,
...@@ -139,12 +143,12 @@ cl_int CommandQueue::checkProperties() const ...@@ -139,12 +143,12 @@ cl_int CommandQueue::checkProperties() const
void CommandQueue::flush() void CommandQueue::flush()
{ {
// Wait for the command queue to be in state "flushed". // Wait for the command queue to be in state "flushed".
pthread_mutex_lock(&p_event_list_mutex); p_event_list_mutex->lock(sw::PRIVATE);
while(!p_flushed) while(!p_flushed)
pthread_cond_wait(&p_event_list_cond, &p_event_list_mutex); p_event_list_cond->wait();
pthread_mutex_unlock(&p_event_list_mutex); p_event_list_mutex->unlock();
} }
void CommandQueue::finish() void CommandQueue::finish()
...@@ -155,12 +159,12 @@ void CommandQueue::finish() ...@@ -155,12 +159,12 @@ void CommandQueue::finish()
// All the queued events must have completed. When they are, they get // All the queued events must have completed. When they are, they get
// deleted from the command queue, so simply wait for it to become empty. // deleted from the command queue, so simply wait for it to become empty.
pthread_mutex_lock(&p_event_list_mutex); p_event_list_mutex->lock(sw::PRIVATE);
while(p_events.size() != 0) while(p_events.size() != 0)
pthread_cond_wait(&p_event_list_cond, &p_event_list_mutex); p_event_list_cond->wait();
pthread_mutex_unlock(&p_event_list_mutex); p_event_list_mutex->unlock();
} }
cl_int CommandQueue::queueEvent(Event *event) cl_int CommandQueue::queueEvent(Event *event)
...@@ -173,12 +177,12 @@ cl_int CommandQueue::queueEvent(Event *event) ...@@ -173,12 +177,12 @@ cl_int CommandQueue::queueEvent(Event *event)
return rs; return rs;
// Append the event at the end of the list // Append the event at the end of the list
pthread_mutex_lock(&p_event_list_mutex); p_event_list_mutex->lock(sw::PRIVATE);
p_events.push_back(event); p_events.push_back(event);
p_flushed = false; p_flushed = false;
pthread_mutex_unlock(&p_event_list_mutex); p_event_list_mutex->unlock();
// Timing info if needed // Timing info if needed
if(p_properties & CL_QUEUE_PROFILING_ENABLE) if(p_properties & CL_QUEUE_PROFILING_ENABLE)
...@@ -192,7 +196,7 @@ cl_int CommandQueue::queueEvent(Event *event) ...@@ -192,7 +196,7 @@ cl_int CommandQueue::queueEvent(Event *event)
void CommandQueue::cleanEvents() void CommandQueue::cleanEvents()
{ {
pthread_mutex_lock(&p_event_list_mutex); p_event_list_mutex->lock(sw::PRIVATE);
std::list<Event *>::iterator it = p_events.begin(), oldit; std::list<Event *>::iterator it = p_events.begin(), oldit;
...@@ -218,9 +222,9 @@ void CommandQueue::cleanEvents() ...@@ -218,9 +222,9 @@ void CommandQueue::cleanEvents()
// We have cleared the list, so wake up the sleeping threads // We have cleared the list, so wake up the sleeping threads
if(p_events.size() == 0) if(p_events.size() == 0)
pthread_cond_broadcast(&p_event_list_cond); p_event_list_cond->signal();
pthread_mutex_unlock(&p_event_list_mutex); p_event_list_mutex->unlock();
// Check now if we have to be deleted // Check now if we have to be deleted
if(references() == 0) if(references() == 0)
...@@ -229,7 +233,7 @@ void CommandQueue::cleanEvents() ...@@ -229,7 +233,7 @@ void CommandQueue::cleanEvents()
void CommandQueue::pushEventsOnDevice() void CommandQueue::pushEventsOnDevice()
{ {
pthread_mutex_lock(&p_event_list_mutex); p_event_list_mutex->lock(sw::PRIVATE);
// Explore the events in p_events and push on the device all of them that // Explore the events in p_events and push on the device all of them that
// are : // are :
// //
...@@ -328,23 +332,23 @@ void CommandQueue::pushEventsOnDevice() ...@@ -328,23 +332,23 @@ void CommandQueue::pushEventsOnDevice()
// Set the event as completed. This will call pushEventsOnDevice, // Set the event as completed. This will call pushEventsOnDevice,
// again, so release the lock to avoid a deadlock. We also return // again, so release the lock to avoid a deadlock. We also return
// because the recursive call will continue our work. // because the recursive call will continue our work.
pthread_mutex_unlock(&p_event_list_mutex); p_event_list_mutex->unlock();
event->setStatus(Event::Complete); event->setStatus(Event::Complete);
return; return;
} }
} }
if(p_flushed) if(p_flushed)
pthread_cond_broadcast(&p_event_list_cond); p_event_list_cond->signal();
pthread_mutex_unlock(&p_event_list_mutex); p_event_list_mutex->unlock();
} }
Event **CommandQueue::events(unsigned int &count) Event **CommandQueue::events(unsigned int &count)
{ {
Event **result; Event **result;
pthread_mutex_lock(&p_event_list_mutex); p_event_list_mutex->lock(sw::PRIVATE);
count = p_events.size(); count = p_events.size();
result = (Event **)std::malloc(count * sizeof(Event *)); result = (Event **)std::malloc(count * sizeof(Event *));
...@@ -365,7 +369,7 @@ Event **CommandQueue::events(unsigned int &count) ...@@ -365,7 +369,7 @@ Event **CommandQueue::events(unsigned int &count)
// Now result contains an immutable list of events. Even if the events // Now result contains an immutable list of events. Even if the events
// become completed in another thread while result is used, the events // become completed in another thread while result is used, the events
// are retained and so guaranteed to remain valid. // are retained and so guaranteed to remain valid.
pthread_mutex_unlock(&p_event_list_mutex); p_event_list_mutex->unlock();
return result; return result;
} }
...@@ -384,8 +388,8 @@ Event::Event(CommandQueue *parent, ...@@ -384,8 +388,8 @@ Event::Event(CommandQueue *parent,
p_status(status), p_device_data(0) p_status(status), p_device_data(0)
{ {
// Initialize the locking machinery // Initialize the locking machinery
pthread_cond_init(&p_state_change_cond, 0); p_state_change_cond = new sw::Event();
pthread_mutex_init(&p_state_mutex, 0); p_state_mutex = new sw::Resource(0);
std::memset(&p_timing, 0, sizeof(p_timing)); std::memset(&p_timing, 0, sizeof(p_timing));
...@@ -461,8 +465,13 @@ Event::~Event() ...@@ -461,8 +465,13 @@ Event::~Event()
if(p_event_wait_list) if(p_event_wait_list)
std::free((void *)p_event_wait_list); std::free((void *)p_event_wait_list);
pthread_mutex_destroy(&p_state_mutex); // Free the mutex
pthread_cond_destroy(&p_state_change_cond); p_state_mutex->lock(sw::DESTRUCT);
p_state_mutex->unlock();
p_state_mutex->destruct();
p_state_change_cond->signal();
delete p_state_change_cond;
} }
bool Event::isDummy() const bool Event::isDummy() const
...@@ -486,10 +495,10 @@ bool Event::isDummy() const ...@@ -486,10 +495,10 @@ bool Event::isDummy() const
void Event::setStatus(Status status) void Event::setStatus(Status status)
{ {
// TODO: If status < 0, terminate all the events depending on us. // TODO: If status < 0, terminate all the events depending on us.
pthread_mutex_lock(&p_state_mutex); p_state_mutex->lock(sw::PRIVATE);
p_status = status; p_status = status;
pthread_cond_broadcast(&p_state_change_cond); p_state_change_cond->signal();
// Call the callbacks // Call the callbacks
std::multimap<Status, CallbackData>::const_iterator it; std::multimap<Status, CallbackData>::const_iterator it;
...@@ -504,7 +513,7 @@ void Event::setStatus(Status status) ...@@ -504,7 +513,7 @@ void Event::setStatus(Status status)
data.callback((cl_event)this, p_status, data.user_data); data.callback((cl_event)this, p_status, data.user_data);
} }
pthread_mutex_unlock(&p_state_mutex); p_state_mutex->unlock();
// If the event is completed, inform our parent so it can push other events // If the event is completed, inform our parent so it can push other events
// to the device. // to the device.
...@@ -586,12 +595,12 @@ void Event::updateTiming(Timing timing) ...@@ -586,12 +595,12 @@ void Event::updateTiming(Timing timing)
if(timing >= Max) if(timing >= Max)
return; return;
pthread_mutex_lock(&p_state_mutex); p_state_mutex->lock(sw::PRIVATE);
// Don't update more than one time (NDRangeKernel for example) // Don't update more than one time (NDRangeKernel for example)
if(p_timing[timing]) if(p_timing[timing])
{ {
pthread_mutex_unlock(&p_state_mutex); p_state_mutex->unlock();
return; return;
} }
...@@ -610,7 +619,7 @@ void Event::updateTiming(Timing timing) ...@@ -610,7 +619,7 @@ void Event::updateTiming(Timing timing)
p_timing[timing] = rs; p_timing[timing] = rs;
pthread_mutex_unlock(&p_state_mutex); p_state_mutex->unlock();
} }
Event::Status Event::status() const Event::Status Event::status() const
...@@ -618,25 +627,25 @@ Event::Status Event::status() const ...@@ -618,25 +627,25 @@ Event::Status Event::status() const
// HACK : We need const qualifier but we also need to lock a mutex // HACK : We need const qualifier but we also need to lock a mutex
Event *me = (Event *)(void *)this; Event *me = (Event *)(void *)this;
pthread_mutex_lock(&me->p_state_mutex); me->p_state_mutex->lock(sw::PRIVATE);
Status ret = p_status; Status ret = p_status;
pthread_mutex_unlock(&me->p_state_mutex); me->p_state_mutex->unlock();
return ret; return ret;
} }
void Event::waitForStatus(Status status) void Event::waitForStatus(Status status)
{ {
pthread_mutex_lock(&p_state_mutex); p_state_mutex->lock(sw::PRIVATE);
while(p_status != status && p_status > 0) while(p_status != status && p_status > 0)
{ {
pthread_cond_wait(&p_state_change_cond, &p_state_mutex); p_state_change_cond->signal();
} }
pthread_mutex_unlock(&p_state_mutex); p_state_mutex->unlock();
} }
void *Event::deviceData() void *Event::deviceData()
...@@ -659,13 +668,13 @@ void Event::setCallback(cl_int command_exec_callback_type, ...@@ -659,13 +668,13 @@ void Event::setCallback(cl_int command_exec_callback_type,
data.callback = callback; data.callback = callback;
data.user_data = user_data; data.user_data = user_data;
pthread_mutex_lock(&p_state_mutex); p_state_mutex->lock(sw::PRIVATE);
p_callbacks.insert(std::pair<Status, CallbackData>( p_callbacks.insert(std::pair<Status, CallbackData>(
(Status)command_exec_callback_type, (Status)command_exec_callback_type,
data)); data));
pthread_mutex_unlock(&p_state_mutex); p_state_mutex->unlock();
} }
cl_int Event::info(cl_event_info param_name, cl_int Event::info(cl_event_info param_name,
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include "object.h" #include "object.h"
#include "opencl.h" #include "opencl.h"
#include "pthread.h" #include "Resource.hpp"
#include <map> #include <map>
#include <list> #include <list>
...@@ -148,8 +148,8 @@ namespace Devices ...@@ -148,8 +148,8 @@ namespace Devices
cl_command_queue_properties p_properties; cl_command_queue_properties p_properties;
std::list<Event *> p_events; std::list<Event *> p_events;
pthread_mutex_t p_event_list_mutex; sw::Resource *p_event_list_mutex;
pthread_cond_t p_event_list_cond; sw::Event *p_event_list_cond;
bool p_flushed; bool p_flushed;
}; };
...@@ -359,8 +359,8 @@ namespace Devices ...@@ -359,8 +359,8 @@ namespace Devices
cl_uint p_num_events_in_wait_list; cl_uint p_num_events_in_wait_list;
const Event **p_event_wait_list; const Event **p_event_wait_list;
pthread_cond_t p_state_change_cond; sw::Event *p_state_change_cond;
pthread_mutex_t p_state_mutex; sw::Resource *p_state_mutex;
Status p_status; Status p_status;
void *p_device_data; void *p_device_data;
......
...@@ -6,13 +6,13 @@ ...@@ -6,13 +6,13 @@
#include "program.h" #include "program.h"
#include "worker.h" #include "worker.h"
#include "builtins.h" #include "builtins.h"
#include "propertylist.h" #include "propertylist.h"
#include "commandqueue.h" #include "commandqueue.h"
#include "events.h" #include "events.h"
#include "memobject.h" #include "memobject.h"
#include "kernel.h"
#include "program.h" #include "Thread.hpp"
#include "CPUID.hpp"
#include <cstring> #include <cstring>
#include <cstdlib> #include <cstdlib>
...@@ -24,84 +24,48 @@ ...@@ -24,84 +24,48 @@
using namespace Devices; using namespace Devices;
CPUDevice::CPUDevice() CPUDevice::CPUDevice()
: DeviceInterface(), p_cores(0), p_num_events(0), p_workers(0), p_stop(false), : DeviceInterface(), p_num_events(0), p_stop(false)
p_initialized(false)
{ {
} }
void CPUDevice::init() void CPUDevice::init()
{ {
if(p_initialized)
return;
// Initialize the locking machinery // Initialize the locking machinery
pthread_cond_init(&p_events_cond, 0); p_events_cond = new sw::Event();
pthread_mutex_init(&p_events_mutex, 0); eventListResource = new sw::Resource(0);
//TODO
// Get info about the system
/*p_cores = sysconf(_SC_NPROCESSORS_ONLN);
p_cpu_mhz = 0.0f;
std::filebuf fb;
fb.open("/proc/cpuinfo", std::ios::in);
std::istream is(&fb);
while(!is.eof())
{
std::string key, value;
std::getline(is, key, ':');
is.ignore(1);
std::getline(is, value);
if(key.compare(0, 7, "cpu MHz") == 0)
{
std::istringstream ss(value);
ss >> p_cpu_mhz;
break;
}
}*/
//TODO CHANGE pcore value to real //TODO CHANGE pcore value to real
p_cores = 1;
p_cpu_mhz = 3200; p_cpu_mhz = 3200;
// Create worker threads
p_workers = (pthread_t *)std::malloc(numCPUs() * sizeof(pthread_t));
for(unsigned int i = 0; i<numCPUs(); ++i) // Create worker threads
for(unsigned int i = 0; i < sw::CPUID::coreCount(); ++i)
{ {
pthread_create(&p_workers[i], 0, &worker, this); p_workers[i] = new sw::Thread(worker, this);
} }
p_initialized = true;
} }
CPUDevice::~CPUDevice() CPUDevice::~CPUDevice()
{ {
if(!p_initialized)
return;
// Terminate the workers and wait for them
pthread_mutex_lock(&p_events_mutex);
p_stop = true; p_stop = true;
pthread_cond_broadcast(&p_events_cond); for(int thread = 0; thread < sw::CPUID::coreCount(); thread++)
pthread_mutex_unlock(&p_events_mutex);
//TODO
for(unsigned int i = 0; i<numCPUs(); ++i)
{ {
pthread_join(p_workers[i], 0); if(p_workers[thread])
{
p_workers[thread]->join();
delete p_workers[thread];
p_workers[thread] = 0;
}
} }
// Free allocated memory p_events_cond->signal();
std::free((void *)p_workers); delete p_events_cond;
pthread_mutex_destroy(&p_events_mutex);
p_events_cond = NULL; eventListResource->lock(sw::DESTRUCT);
//pthread_cond_destroy(&p_events_cond); eventListResource->unlock();
eventListResource->destruct();
} }
DeviceBuffer *CPUDevice::createDeviceBuffer(MemObject *buffer, cl_int *rs) DeviceBuffer *CPUDevice::createDeviceBuffer(MemObject *buffer, cl_int *rs)
...@@ -203,28 +167,27 @@ void CPUDevice::freeEventDeviceData(Event *event) ...@@ -203,28 +167,27 @@ void CPUDevice::freeEventDeviceData(Event *event)
void CPUDevice::pushEvent(Event *event) void CPUDevice::pushEvent(Event *event)
{ {
// Add an event in the list eventListResource->lock(sw::PRIVATE);
pthread_mutex_lock(&p_events_mutex);
p_events.push_back(event); p_events.push_back(event);
p_num_events++; // Way faster than STL list::size() ! p_num_events++;
p_events_cond->signal();
pthread_cond_broadcast(&p_events_cond); eventListResource->unlock();
pthread_mutex_unlock(&p_events_mutex);
} }
Event *CPUDevice::getEvent(bool &stop) Event *CPUDevice::getEvent(bool &stop)
{ {
eventListResource->lock(sw::PRIVATE);
// Return the first event in the list, if any. Remove it if it is a // Return the first event in the list, if any. Remove it if it is a
// single-shot event. // single-shot event.
pthread_mutex_lock(&p_events_mutex);
while(p_num_events == 0 && !p_stop) while(p_num_events == 0 && !p_stop)
pthread_cond_wait(&p_events_cond, &p_events_mutex); p_events_cond->wait();
if(p_stop) if(p_stop)
{ {
pthread_mutex_unlock(&p_events_mutex); eventListResource->unlock();
stop = true; stop = true;
return 0; return 0;
} }
...@@ -247,16 +210,11 @@ Event *CPUDevice::getEvent(bool &stop) ...@@ -247,16 +210,11 @@ Event *CPUDevice::getEvent(bool &stop)
p_events.pop_front(); p_events.pop_front();
} }
pthread_mutex_unlock(&p_events_mutex); eventListResource->unlock();
return event; return event;
} }
unsigned int CPUDevice::numCPUs() const
{
return p_cores;
}
float CPUDevice::cpuMhz() const float CPUDevice::cpuMhz() const
{ {
return p_cpu_mhz; return p_cpu_mhz;
...@@ -309,7 +267,7 @@ cl_int CPUDevice::info(cl_device_info param_name, ...@@ -309,7 +267,7 @@ cl_int CPUDevice::info(cl_device_info param_name,
break; break;
case CL_DEVICE_MAX_COMPUTE_UNITS: case CL_DEVICE_MAX_COMPUTE_UNITS:
SIMPLE_ASSIGN(cl_uint, numCPUs()); SIMPLE_ASSIGN(cl_uint, sw::CPUID::coreCount());
break; break;
case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:
......
...@@ -4,11 +4,13 @@ ...@@ -4,11 +4,13 @@
#ifndef __CPU_DEVICE_H__ #ifndef __CPU_DEVICE_H__
#define __CPU_DEVICE_H__ #define __CPU_DEVICE_H__
#define MAX_THREAD_AMOUNT 16
#include <list> #include <list>
#include "opencl.h" #include "opencl.h"
#include "device_interface.h" #include "device_interface.h"
#include "pthread.h" #include "Resource.hpp"
namespace Devices namespace Devices
{ {
...@@ -43,18 +45,16 @@ public: ...@@ -43,18 +45,16 @@ public:
void pushEvent(Event *event); void pushEvent(Event *event);
Event *getEvent(bool &stop); Event *getEvent(bool &stop);
unsigned int numCPUs() const; /*!< \brief Number of logical CPU cores on the system */
float cpuMhz() const; /*!< \brief Speed of the CPU in Mhz */ float cpuMhz() const; /*!< \brief Speed of the CPU in Mhz */
private: private:
unsigned int p_cores, p_num_events; unsigned int p_num_events;
float p_cpu_mhz; float p_cpu_mhz;
pthread_t *p_workers; sw::Thread *p_workers[MAX_THREAD_AMOUNT];
sw::Resource *eventListResource;
sw::Event *p_events_cond;
std::list<Event *> p_events; std::list<Event *> p_events;
pthread_cond_t p_events_cond; bool p_stop;
pthread_mutex_t p_events_mutex;
bool p_stop, p_initialized;
}; };
//class GPUDevice : public DeviceInterface //class GPUDevice : public DeviceInterface
......
// dllmain.cpp : Defines the entry point for the DLL application. // dllmain.cpp : Defines the entry point for the DLL application.
#include "dllmain.h"
#include "windows.h" #include "windows.h"
#include "opencl.h" #include "opencl.h"
#include "Thread.hpp"
#include "debug.h" #include "debug.h"
#include <windows.h> #include <windows.h>
#include <intrin.h> #include <intrin.h>
#include <WinUser.h> #include <WinUser.h>
#include "dllmain.h"
#if defined(_WIN32)
typedef DWORD LocalStorageKey;
#else
typedef pthread_key_t LocalStorageKey;
#endif
static LocalStorageKey currentTLS = TLS_OUT_OF_INDEXES; static sw::Thread::LocalStorageKey currentTLS = TLS_OUT_OF_INDEXES;
#if defined(_WIN32) #if defined(_WIN32)
#define IDD_DIALOG1 101 #define IDD_DIALOG1 101
......
...@@ -28,7 +28,7 @@ CPUKernel::CPUKernel(CPUDevice *device, Kernel *kernel, llvm::Function *function ...@@ -28,7 +28,7 @@ CPUKernel::CPUKernel(CPUDevice *device, Kernel *kernel, llvm::Function *function
: DeviceKernel(), p_device(device), p_kernel(kernel), p_function(function), : DeviceKernel(), p_device(device), p_kernel(kernel), p_function(function),
p_call_function(0) p_call_function(0)
{ {
pthread_mutex_init(&p_call_function_mutex, 0); p_call_function_mutex = new sw::Resource(0);
} }
CPUKernel::~CPUKernel() CPUKernel::~CPUKernel()
...@@ -37,7 +37,9 @@ CPUKernel::~CPUKernel() ...@@ -37,7 +37,9 @@ CPUKernel::~CPUKernel()
//if(p_call_function) //if(p_call_function)
//p_call_function->eraseFromParent(); //p_call_function->eraseFromParent();
pthread_mutex_destroy(&p_call_function_mutex); p_call_function_mutex->lock(sw::DESTRUCT);
p_call_function_mutex->unlock();
p_call_function_mutex->destruct();
} }
size_t CPUKernel::workGroupSize() const size_t CPUKernel::workGroupSize() const
...@@ -76,7 +78,7 @@ T k_exp(T base, unsigned int e) ...@@ -76,7 +78,7 @@ T k_exp(T base, unsigned int e)
size_t CPUKernel::guessWorkGroupSize(cl_uint num_dims, cl_uint dim, size_t CPUKernel::guessWorkGroupSize(cl_uint num_dims, cl_uint dim,
size_t global_work_size) const size_t global_work_size) const
{ {
unsigned int cpus = p_device->numCPUs(); unsigned int cpus = sw::CPUID::coreCount();
// Don't break in too small parts // Don't break in too small parts
if(k_exp(global_work_size, num_dims) > 64) if(k_exp(global_work_size, num_dims) > 64)
...@@ -148,13 +150,13 @@ size_t CPUKernel::typeOffset(size_t &offset, size_t type_len) ...@@ -148,13 +150,13 @@ size_t CPUKernel::typeOffset(size_t &offset, size_t type_len)
llvm::Function *CPUKernel::callFunction() llvm::Function *CPUKernel::callFunction()
{ {
pthread_mutex_lock(&p_call_function_mutex); p_call_function_mutex->lock(sw::PRIVATE);
// If we can reuse the same function between work groups, do it // If we can reuse the same function between work groups, do it
if(p_call_function) if(p_call_function)
{ {
llvm::Function *rs = p_call_function; llvm::Function *rs = p_call_function;
pthread_mutex_unlock(&p_call_function_mutex); p_call_function_mutex->unlock();
return rs; return rs;
} }
...@@ -257,11 +259,11 @@ llvm::Function *CPUKernel::callFunction() ...@@ -257,11 +259,11 @@ llvm::Function *CPUKernel::callFunction()
//// Retain the function if it can be reused //// Retain the function if it can be reused
//p_call_function = stub_function; //p_call_function = stub_function;
pthread_mutex_unlock(&p_call_function_mutex); p_call_function_mutex->unlock();
llvm::Function *rs = p_call_function; llvm::Function *rs = p_call_function;
pthread_mutex_unlock(&p_call_function_mutex); p_call_function_mutex->unlock();
return rs; return rs;
//return stub_function; //return stub_function;
...@@ -275,7 +277,7 @@ CPUKernelEvent::CPUKernelEvent(CPUDevice *device, KernelEvent *event) ...@@ -275,7 +277,7 @@ CPUKernelEvent::CPUKernelEvent(CPUDevice *device, KernelEvent *event)
p_kernel_args(0) p_kernel_args(0)
{ {
// Mutex // Mutex
pthread_mutex_init(&p_mutex, 0); p_mutex = new sw::Resource(0);
// Set current work group to (0, 0, ..., 0) // Set current work group to (0, 0, ..., 0)
std::memset(p_current_work_group, 0, event->work_dim() * sizeof(size_t)); std::memset(p_current_work_group, 0, event->work_dim() * sizeof(size_t));
...@@ -294,7 +296,9 @@ CPUKernelEvent::CPUKernelEvent(CPUDevice *device, KernelEvent *event) ...@@ -294,7 +296,9 @@ CPUKernelEvent::CPUKernelEvent(CPUDevice *device, KernelEvent *event)
CPUKernelEvent::~CPUKernelEvent() CPUKernelEvent::~CPUKernelEvent()
{ {
pthread_mutex_destroy(&p_mutex); p_mutex->lock(sw::DESTRUCT);
p_mutex->unlock();
p_mutex->destruct();
if(p_kernel_args) if(p_kernel_args)
std::free(p_kernel_args); std::free(p_kernel_args);
...@@ -303,7 +307,7 @@ CPUKernelEvent::~CPUKernelEvent() ...@@ -303,7 +307,7 @@ CPUKernelEvent::~CPUKernelEvent()
bool CPUKernelEvent::reserve() bool CPUKernelEvent::reserve()
{ {
// Lock, this will be unlocked in takeInstance() // Lock, this will be unlocked in takeInstance()
pthread_mutex_lock(&p_mutex); p_mutex->lock(sw::PRIVATE);
// Last work group if current == max - 1 // Last work group if current == max - 1
return (p_current_wg == p_num_wg - 1); return (p_current_wg == p_num_wg - 1);
...@@ -312,23 +316,23 @@ bool CPUKernelEvent::reserve() ...@@ -312,23 +316,23 @@ bool CPUKernelEvent::reserve()
bool CPUKernelEvent::finished() bool CPUKernelEvent::finished()
{ {
bool rs; bool rs;
pthread_mutex_lock(&p_mutex); p_mutex->lock(sw::PRIVATE);
rs = (p_finished_wg == p_num_wg); rs = (p_finished_wg == p_num_wg);
pthread_mutex_unlock(&p_mutex); p_mutex->unlock();
return rs; return rs;
} }
void CPUKernelEvent::workGroupFinished() void CPUKernelEvent::workGroupFinished()
{ {
pthread_mutex_lock(&p_mutex); p_mutex->lock(sw::PRIVATE);
p_finished_wg++; p_finished_wg++;
pthread_mutex_unlock(&p_mutex); p_mutex->unlock();
} }
CPUKernelWorkGroup *CPUKernelEvent::takeInstance() CPUKernelWorkGroup *CPUKernelEvent::takeInstance()
...@@ -343,7 +347,7 @@ CPUKernelWorkGroup *CPUKernelEvent::takeInstance() ...@@ -343,7 +347,7 @@ CPUKernelWorkGroup *CPUKernelEvent::takeInstance()
p_current_wg += 1; p_current_wg += 1;
// Release event // Release event
pthread_mutex_unlock(&p_mutex); p_mutex->unlock();
return wg; return wg;
} }
......
...@@ -2,11 +2,12 @@ ...@@ -2,11 +2,12 @@
#define __CPU_KERNEL_H__ #define __CPU_KERNEL_H__
#include "device_interface.h" #include "device_interface.h"
#include "CPUID.hpp"
#include "Resource.hpp"
//#include <llvm/ExecutionEngine/GenericValue.h> //#include <llvm/ExecutionEngine/GenericValue.h>
#include <vector> #include <vector>
#include <string> #include <string>
#include "pthread.h"
#include <stdint.h> #include <stdint.h>
...@@ -103,7 +104,7 @@ namespace Devices ...@@ -103,7 +104,7 @@ namespace Devices
CPUDevice *p_device; CPUDevice *p_device;
Kernel *p_kernel; Kernel *p_kernel;
llvm::Function *p_function, *p_call_function; llvm::Function *p_function, *p_call_function;
pthread_mutex_t p_call_function_mutex; sw::Resource *p_call_function_mutex;
}; };
class CPUKernelEvent; class CPUKernelEvent;
...@@ -286,7 +287,7 @@ namespace Devices ...@@ -286,7 +287,7 @@ namespace Devices
size_t p_current_work_group[MAX_WORK_DIMS], size_t p_current_work_group[MAX_WORK_DIMS],
p_max_work_groups[MAX_WORK_DIMS]; p_max_work_groups[MAX_WORK_DIMS];
size_t p_current_wg, p_finished_wg, p_num_wg; size_t p_current_wg, p_finished_wg, p_num_wg;
pthread_mutex_t p_mutex; sw::Resource *p_mutex;
void *p_kernel_args; void *p_kernel_args;
}; };
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
using namespace Devices; using namespace Devices;
void *worker(void *data) void worker(void *data)
{ {
CPUDevice *device = (CPUDevice *)data; CPUDevice *device = (CPUDevice *)data;
bool stop = false; bool stop = false;
...@@ -242,6 +242,4 @@ void *worker(void *data) ...@@ -242,6 +242,4 @@ void *worker(void *data)
/*if(mapped_data) /*if(mapped_data)
munmap(mapped_data, mapped_size);*/ munmap(mapped_data, mapped_size);*/
return 0;
} }
...@@ -10,6 +10,6 @@ ...@@ -10,6 +10,6 @@
* system. As explained by \ref events , this function waits until there * system. As explained by \ref events , this function waits until there
* are \c Coal::Event objects to process and handle them. * are \c Coal::Event objects to process and handle them.
*/ */
void *worker(void *data); void worker(void *data);
#endif #endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment