![]() |
Taskflow
3.2.0-Master-Branch
|
class for building a SYCL task dependency graph More...
#include <syclflow.hpp>
Public Member Functions | |
| syclFlow (sycl::queue &queue) | |
| constructs a standalone syclFlow from the given queue | |
| ~syclFlow ()=default | |
| destroys the syclFlow | |
| bool | empty () const |
| queries the emptiness of the graph | |
| size_t | num_tasks () const |
| queries the number of tasks | |
| void | dump (std::ostream &os) const |
| dumps the syclFlow graph into a DOT format through an output stream | |
| void | clear () |
| clear the associated graph | |
| template<typename F , std::enable_if_t< std::is_invocable_r_v< void, F, sycl::handler & >, void > * = nullptr> | |
| syclTask | on (F &&func) |
| creates a task that launches the given command group function object | |
| template<typename F , std::enable_if_t< std::is_invocable_r_v< void, F, sycl::handler & >, void > * = nullptr> | |
| void | on (syclTask task, F &&func) |
| updates the task to the given command group function object | |
| syclTask | memcpy (void *tgt, const void *src, size_t bytes) |
| creates a memcpy task that copies untyped data in bytes | |
| syclTask | memset (void *ptr, int value, size_t bytes) |
| creates a memset task that fills untyped data with a byte value | |
| template<typename T > | |
| syclTask | fill (void *ptr, const T &pattern, size_t count) |
| creates a fill task that fills typed data with the given value | |
| template<typename T , std::enable_if_t<!std::is_same_v< T, void >, void > * = nullptr> | |
| syclTask | copy (T *target, const T *source, size_t count) |
| creates a copy task that copies typed data from a source to a target memory block | |
| template<typename... ArgsT> | |
| syclTask | parallel_for (ArgsT &&... args) |
| creates a kernel task | |
| template<typename F > | |
| syclTask | single_task (F &&func) |
| invokes a SYCL kernel function using only one thread | |
| template<typename I , typename C > | |
| syclTask | for_each (I first, I last, C &&callable) |
| applies a callable to each dereferenced element of the data array | |
| template<typename I , typename C > | |
| syclTask | for_each_index (I first, I last, I step, C &&callable) |
| applies a callable to each index in the range with the step size | |
| template<typename I , typename C , typename... S> | |
| syclTask | transform (I first, I last, C &&callable, S... srcs) |
| applies a callable to a source range and stores the result in a target range | |
| template<typename I , typename T , typename C > | |
| syclTask | reduce (I first, I last, T *result, C &&op) |
| performs parallel reduction over a range of items | |
| template<typename I , typename T , typename C > | |
| syclTask | uninitialized_reduce (I first, I last, T *result, C &&op) |
| similar to tf::syclFlow::reduce but does not assume any initial value to reduce | |
| template<typename P > | |
| void | offload_until (P &&predicate) |
| offloads the syclFlow onto a GPU and repeatedly runs it until the predicate becomes true | |
| void | offload_n (size_t N) |
| offloads the syclFlow and executes it by the given times | |
| void | offload () |
| offloads the syclFlow and executes it once | |
| void | memcpy (syclTask task, void *tgt, const void *src, size_t bytes) |
| rebinds the task to a memcpy task | |
| void | memset (syclTask task, void *ptr, int value, size_t bytes) |
| rebinds the task to a memset task | |
| template<typename T > | |
| void | fill (syclTask task, void *ptr, const T &pattern, size_t count) |
| rebinds the task to a fill task | |
| template<typename T , std::enable_if_t<!std::is_same_v< T, void >, void > * = nullptr> | |
| void | copy (syclTask task, T *target, const T *source, size_t count) |
| rebinds the task to a copy task | |
| template<typename... ArgsT> | |
| void | parallel_for (syclTask task, ArgsT &&... args) |
| rebinds the task to a parallel-for kernel task | |
| template<typename F > | |
| void | single_task (syclTask task, F &&func) |
| rebinds the task to a single-threaded kernel task | |
Friends | |
| class | Executor |
class for building a SYCL task dependency graph
|
inline |
constructs a standalone syclFlow from the given queue
A standalone syclFlow does not go through any taskflow and can be run by the caller thread using explicit offload methods (e.g., tf::syclFlow::offload).
| void tf::syclFlow::copy | ( | syclTask | task, |
| T * | target, | ||
| const T * | source, | ||
| size_t | count | ||
| ) |
rebinds the task to a copy task
Similar to tf::syclFlow::copy but operates on an existing task.
| syclTask tf::syclFlow::copy | ( | T * | target, |
| const T * | source, | ||
| size_t | count | ||
| ) |
creates a copy task that copies typed data from a source to a target memory block
| T | trivially copyable value type |
| target | pointer to the memory to fill |
| source | pointer to the pattern value to fill into the memory |
| count | number of items to fill the value |
Creates a task that copies count items of type T from a source memory location to a target memory location.
| void tf::syclFlow::fill | ( | syclTask | task, |
| void * | ptr, | ||
| const T & | pattern, | ||
| size_t | count | ||
| ) |
rebinds the task to a fill task
Similar to tf::syclFlow::fill but operates on an existing task.
| syclTask tf::syclFlow::fill | ( | void * | ptr, |
| const T & | pattern, | ||
| size_t | count | ||
| ) |
creates a fill task that fills typed data with the given value
| T | trivially copyable value type |
| ptr | pointer to the memory to fill |
| pattern | pattern value to fill into the memory |
| count | number of items to fill the value |
Creates a task that fills the specified memory with the specified value.
| syclTask tf::syclFlow::for_each | ( | I | first, |
| I | last, | ||
| C && | callable | ||
| ) |
applies a callable to each dereferenced element of the data array
| I | iterator type |
| C | callable type |
| first | iterator to the beginning (inclusive) |
| last | iterator to the end (exclusive) |
| callable | a callable object to apply to the dereferenced iterator |
This method is equivalent to the parallel execution of the following loop on a GPU:
| syclTask tf::syclFlow::for_each_index | ( | I | first, |
| I | last, | ||
| I | step, | ||
| C && | callable | ||
| ) |
applies a callable to each index in the range with the step size
| I | index type |
| C | callable type |
| first | beginning index |
| last | last index |
| step | step size |
| callable | the callable to apply to each element in the data array |
This method is equivalent to the parallel execution of the following loop on a GPU:
|
inline |
rebinds the task to a memcpy task
Similar to tf::syclFlow::memcpy but operates on an existing task.
|
inline |
creates a memcpy task that copies untyped data in bytes
| tgt | pointer to the target memory block |
| src | pointer to the source memory block |
| bytes | bytes to copy |
A memcpy task transfers bytes of data from a source locationA src to a target location tgt. Both src and tgt may be either host or USM pointers.
|
inline |
rebinds the task to a memset task
Similar to tf::syclFlow::memset but operates on an existing task.
|
inline |
creates a memset task that fills untyped data with a byte value
| ptr | pointer to the destination device memory area |
| value | value to set for each byte of specified memory |
| bytes | number of bytes to set |
Fills bytes of memory beginning at address ptr with value. ptr must be a USM allocation. value is interpreted as an unsigned char.
|
inline |
offloads the syclFlow and executes it by the given times
| N | number of executions |
| void tf::syclFlow::offload_until | ( | P && | predicate | ) |
offloads the syclFlow onto a GPU and repeatedly runs it until the predicate becomes true
| P | predicate type (a binary callable) |
| predicate | a binary predicate (returns true for stop) |
Repetitively executes the present syclFlow through the given queue object until the predicate returns true.
By default, if users do not offload the syclFlow, the executor will offload it once.
| syclTask tf::syclFlow::on | ( | F && | func | ) |
creates a task that launches the given command group function object
| F | type of command group function object |
| func | function object that is constructible from std::function<void(sycl::handler&)> |
Creates a task that is associated from the given command group. In SYCL, each command group function object is given a unique command group handler object to perform all the necessary work required to correctly process data on a device using a kernel.
| void tf::syclFlow::on | ( | syclTask | task, |
| F && | func | ||
| ) |
updates the task to the given command group function object
Similar to tf::syclFlow::on but operates on an existing task.
| syclTask tf::syclFlow::parallel_for | ( | ArgsT &&... | args | ) |
creates a kernel task
| ArgsT | arguments types |
| args | arguments to forward to the parallel_for methods defined in the handler object |
Creates a kernel task from a parallel_for method through the handler object associated with a command group.
| void tf::syclFlow::parallel_for | ( | syclTask | task, |
| ArgsT &&... | args | ||
| ) |
rebinds the task to a parallel-for kernel task
Similar to tf::syclFlow::parallel_for but operates on an existing task.
| syclTask tf::syclFlow::reduce | ( | I | first, |
| I | last, | ||
| T * | result, | ||
| C && | op | ||
| ) |
performs parallel reduction over a range of items
| I | input iterator type |
| T | value type |
| C | callable type |
| first | iterator to the beginning (inclusive) |
| last | iterator to the end (exclusive) |
| result | pointer to the result with an initialized value |
| op | binary reduction operator |
This method is equivalent to the parallel execution of the following loop on a SYCL device:
| syclTask tf::syclFlow::single_task | ( | F && | func | ) |
invokes a SYCL kernel function using only one thread
| F | kernel function type |
| func | kernel function |
Creates a task that launches the given function object using only one kernel thread.
| void tf::syclFlow::single_task | ( | syclTask | task, |
| F && | func | ||
| ) |
rebinds the task to a single-threaded kernel task
Similar to tf::syclFlow::single_task but operates on an existing task.
| syclTask tf::syclFlow::transform | ( | I | first, |
| I | last, | ||
| C && | callable, | ||
| S... | srcs | ||
| ) |
applies a callable to a source range and stores the result in a target range
| I | iterator type |
| C | callable type |
| S | source types |
| first | iterator to the beginning (inclusive) |
| last | iterator to the end (exclusive) |
| callable | the callable to apply to each element in the range |
| srcs | iterators to the source ranges |
This method is equivalent to the parallel execution of the following loop on a SYCL device:
| syclTask tf::syclFlow::uninitialized_reduce | ( | I | first, |
| I | last, | ||
| T * | result, | ||
| C && | op | ||
| ) |
similar to tf::syclFlow::reduce but does not assume any initial value to reduce
This method is equivalent to the parallel execution of the following loop on a SYCL device: