![]() |
Taskflow
3.2.0-Master-Branch
|
class for building a SYCL task dependency graph More...
#include <syclflow.hpp>
Public Member Functions | |
syclFlow (sycl::queue &queue) | |
constructs a standalone syclFlow from the given queue | |
~syclFlow ()=default | |
destroys the syclFlow | |
bool | empty () const |
queries the emptiness of the graph | |
size_t | num_tasks () const |
queries the number of tasks | |
void | dump (std::ostream &os) const |
dumps the syclFlow graph into a DOT format through an output stream | |
void | clear () |
clear the associated graph | |
template<typename F , std::enable_if_t< std::is_invocable_r_v< void, F, sycl::handler & >, void > * = nullptr> | |
syclTask | on (F &&func) |
creates a task that launches the given command group function object | |
template<typename F , std::enable_if_t< std::is_invocable_r_v< void, F, sycl::handler & >, void > * = nullptr> | |
void | on (syclTask task, F &&func) |
updates the task to the given command group function object | |
syclTask | memcpy (void *tgt, const void *src, size_t bytes) |
creates a memcpy task that copies untyped data in bytes | |
syclTask | memset (void *ptr, int value, size_t bytes) |
creates a memset task that fills untyped data with a byte value | |
template<typename T > | |
syclTask | fill (void *ptr, const T &pattern, size_t count) |
creates a fill task that fills typed data with the given value | |
template<typename T , std::enable_if_t<!std::is_same_v< T, void >, void > * = nullptr> | |
syclTask | copy (T *target, const T *source, size_t count) |
creates a copy task that copies typed data from a source to a target memory block | |
template<typename... ArgsT> | |
syclTask | parallel_for (ArgsT &&... args) |
creates a kernel task | |
template<typename F > | |
syclTask | single_task (F &&func) |
invokes a SYCL kernel function using only one thread | |
template<typename I , typename C > | |
syclTask | for_each (I first, I last, C &&callable) |
applies a callable to each dereferenced element of the data array | |
template<typename I , typename C > | |
syclTask | for_each_index (I first, I last, I step, C &&callable) |
applies a callable to each index in the range with the step size | |
template<typename I , typename C , typename... S> | |
syclTask | transform (I first, I last, C &&callable, S... srcs) |
applies a callable to a source range and stores the result in a target range | |
template<typename I , typename T , typename C > | |
syclTask | reduce (I first, I last, T *result, C &&op) |
performs parallel reduction over a range of items | |
template<typename I , typename T , typename C > | |
syclTask | uninitialized_reduce (I first, I last, T *result, C &&op) |
similar to tf::syclFlow::reduce but does not assume any initial value to reduce | |
template<typename P > | |
void | offload_until (P &&predicate) |
offloads the syclFlow onto a GPU and repeatedly runs it until the predicate becomes true | |
void | offload_n (size_t N) |
offloads the syclFlow and executes it by the given times | |
void | offload () |
offloads the syclFlow and executes it once | |
void | memcpy (syclTask task, void *tgt, const void *src, size_t bytes) |
rebinds the task to a memcpy task | |
void | memset (syclTask task, void *ptr, int value, size_t bytes) |
rebinds the task to a memset task | |
template<typename T > | |
void | fill (syclTask task, void *ptr, const T &pattern, size_t count) |
rebinds the task to a fill task | |
template<typename T , std::enable_if_t<!std::is_same_v< T, void >, void > * = nullptr> | |
void | copy (syclTask task, T *target, const T *source, size_t count) |
rebinds the task to a copy task | |
template<typename... ArgsT> | |
void | parallel_for (syclTask task, ArgsT &&... args) |
rebinds the task to a parallel-for kernel task | |
template<typename F > | |
void | single_task (syclTask task, F &&func) |
rebinds the task to a single-threaded kernel task | |
Friends | |
class | Executor |
class for building a SYCL task dependency graph
|
inline |
constructs a standalone syclFlow from the given queue
A standalone syclFlow does not go through any taskflow and can be run by the caller thread using explicit offload methods (e.g., tf::syclFlow::offload).
void tf::syclFlow::copy | ( | syclTask | task, |
T * | target, | ||
const T * | source, | ||
size_t | count | ||
) |
rebinds the task to a copy task
Similar to tf::syclFlow::copy but operates on an existing task.
syclTask tf::syclFlow::copy | ( | T * | target, |
const T * | source, | ||
size_t | count | ||
) |
creates a copy task that copies typed data from a source to a target memory block
T | trivially copyable value type |
target | pointer to the memory to fill |
source | pointer to the pattern value to fill into the memory |
count | number of items to fill the value |
Creates a task that copies count
items of type T
from a source memory location to a target memory location.
void tf::syclFlow::fill | ( | syclTask | task, |
void * | ptr, | ||
const T & | pattern, | ||
size_t | count | ||
) |
rebinds the task to a fill task
Similar to tf::syclFlow::fill but operates on an existing task.
syclTask tf::syclFlow::fill | ( | void * | ptr, |
const T & | pattern, | ||
size_t | count | ||
) |
creates a fill task that fills typed data with the given value
T | trivially copyable value type |
ptr | pointer to the memory to fill |
pattern | pattern value to fill into the memory |
count | number of items to fill the value |
Creates a task that fills the specified memory with the specified value.
syclTask tf::syclFlow::for_each | ( | I | first, |
I | last, | ||
C && | callable | ||
) |
applies a callable to each dereferenced element of the data array
I | iterator type |
C | callable type |
first | iterator to the beginning (inclusive) |
last | iterator to the end (exclusive) |
callable | a callable object to apply to the dereferenced iterator |
This method is equivalent to the parallel execution of the following loop on a GPU:
syclTask tf::syclFlow::for_each_index | ( | I | first, |
I | last, | ||
I | step, | ||
C && | callable | ||
) |
applies a callable to each index in the range with the step size
I | index type |
C | callable type |
first | beginning index |
last | last index |
step | step size |
callable | the callable to apply to each element in the data array |
This method is equivalent to the parallel execution of the following loop on a GPU:
|
inline |
rebinds the task to a memcpy task
Similar to tf::syclFlow::memcpy but operates on an existing task.
|
inline |
creates a memcpy task that copies untyped data in bytes
tgt | pointer to the target memory block |
src | pointer to the source memory block |
bytes | bytes to copy |
A memcpy task transfers bytes
of data from a source locationA src
to a target location tgt
. Both src
and tgt
may be either host or USM pointers.
|
inline |
rebinds the task to a memset task
Similar to tf::syclFlow::memset but operates on an existing task.
|
inline |
creates a memset task that fills untyped data with a byte value
ptr | pointer to the destination device memory area |
value | value to set for each byte of specified memory |
bytes | number of bytes to set |
Fills bytes
of memory beginning at address ptr
with value
. ptr
must be a USM allocation. value
is interpreted as an unsigned char.
|
inline |
offloads the syclFlow and executes it by the given times
N | number of executions |
void tf::syclFlow::offload_until | ( | P && | predicate | ) |
offloads the syclFlow onto a GPU and repeatedly runs it until the predicate becomes true
P | predicate type (a binary callable) |
predicate | a binary predicate (returns true for stop) |
Repetitively executes the present syclFlow through the given queue object until the predicate returns true
.
By default, if users do not offload the syclFlow, the executor will offload it once.
syclTask tf::syclFlow::on | ( | F && | func | ) |
creates a task that launches the given command group function object
F | type of command group function object |
func | function object that is constructible from std::function<void(sycl::handler&)> |
Creates a task that is associated from the given command group. In SYCL, each command group function object is given a unique command group handler object to perform all the necessary work required to correctly process data on a device using a kernel.
void tf::syclFlow::on | ( | syclTask | task, |
F && | func | ||
) |
updates the task to the given command group function object
Similar to tf::syclFlow::on but operates on an existing task.
syclTask tf::syclFlow::parallel_for | ( | ArgsT &&... | args | ) |
creates a kernel task
ArgsT | arguments types |
args | arguments to forward to the parallel_for methods defined in the handler object |
Creates a kernel task from a parallel_for method through the handler object associated with a command group.
void tf::syclFlow::parallel_for | ( | syclTask | task, |
ArgsT &&... | args | ||
) |
rebinds the task to a parallel-for kernel task
Similar to tf::syclFlow::parallel_for but operates on an existing task.
syclTask tf::syclFlow::reduce | ( | I | first, |
I | last, | ||
T * | result, | ||
C && | op | ||
) |
performs parallel reduction over a range of items
I | input iterator type |
T | value type |
C | callable type |
first | iterator to the beginning (inclusive) |
last | iterator to the end (exclusive) |
result | pointer to the result with an initialized value |
op | binary reduction operator |
This method is equivalent to the parallel execution of the following loop on a SYCL device:
syclTask tf::syclFlow::single_task | ( | F && | func | ) |
invokes a SYCL kernel function using only one thread
F | kernel function type |
func | kernel function |
Creates a task that launches the given function object using only one kernel thread.
void tf::syclFlow::single_task | ( | syclTask | task, |
F && | func | ||
) |
rebinds the task to a single-threaded kernel task
Similar to tf::syclFlow::single_task but operates on an existing task.
syclTask tf::syclFlow::transform | ( | I | first, |
I | last, | ||
C && | callable, | ||
S... | srcs | ||
) |
applies a callable to a source range and stores the result in a target range
I | iterator type |
C | callable type |
S | source types |
first | iterator to the beginning (inclusive) |
last | iterator to the end (exclusive) |
callable | the callable to apply to each element in the range |
srcs | iterators to the source ranges |
This method is equivalent to the parallel execution of the following loop on a SYCL device:
syclTask tf::syclFlow::uninitialized_reduce | ( | I | first, |
I | last, | ||
T * | result, | ||
C && | op | ||
) |
similar to tf::syclFlow::reduce but does not assume any initial value to reduce
This method is equivalent to the parallel execution of the following loop on a SYCL device: