3#include "../taskflow.hpp"
33 Internal(
Executor& e) : executor {e} {}
148 template <
typename T>
149 syclTask fill(
void* ptr,
const T& pattern,
size_t count);
164 template <
typename T,
167 syclTask copy(T* target,
const T* source,
size_t count);
180 template <
typename...ArgsT>
196 template <
typename F>
219 template <
typename I,
typename C>
249 template <
typename I,
typename C>
275 template <
typename I,
typename C,
typename... S>
301 template <
typename I,
typename T,
typename C>
318 template <
typename I,
typename T,
typename C>
339 template <
typename P>
364 void memcpy(
syclTask task,
void* tgt,
const void* src,
size_t bytes);
378 template <
typename T>
379 void fill(
syclTask task,
void* ptr,
const T& pattern,
size_t count);
386 template <
typename T,
389 void copy(
syclTask task, T* target,
const T* source,
size_t count);
396 template <
typename...ArgsT>
404 template <
typename F>
424 _handle {
std::in_place_type_t<External>{}},
429inline syclFlow::syclFlow(Executor& e, syclGraph& g, sycl::queue& queue) :
431 _handle {
std::in_place_type_t<Internal>{}, e},
437 return _graph._nodes.empty();
442 return _graph._nodes.size();
447 _graph.dump(os,
nullptr,
"");
457 return on([=](sycl::handler& h){ h.memcpy(tgt, src, bytes); });
462 return on([=](sycl::handler& h){ h.memset(ptr, value, bytes); });
468 return on([=](sycl::handler& h){ h.fill(ptr, pattern, count); });
476 return on([=](sycl::handler& h){ h.memcpy(target, source, count*
sizeof(T)); });
484 auto node = _graph.emplace_back(_graph,
499template <
typename...ArgsT>
501 return on([args...] (sycl::handler& h) { h.parallel_for(args...); });
508 if(!(_graph._state & syclGraph::TOPOLOGY_CHANGED)) {
516 for(
auto& u : _graph._nodes) {
517 u->_level = u->_dependents.size();
523 while(!_bfs.empty()) {
524 auto u = _bfs.front();
527 for(
auto v : u->_successors) {
528 if(--(v->_level) == 0) {
529 v->_level = u->_level + 1;
538 bool in_order = _queue.is_in_order();
540 while(!predicate()) {
545 switch(u->_handle.index()) {
547 case syclNode::COMMAND_GROUP_HANDLER:
548 u->_event = _queue.submit([u, in_order](sycl::handler& h){
551 for(
auto p : u->_dependents) {
552 h.depends_on(p->_event);
565 _graph._state = syclGraph::OFFLOADED;
570 offload_until([repeat=n] ()
mutable {
return repeat-- == 0; });
575 offload_until([repeat=1] ()
mutable {
return repeat-- == 0; });
589 syclTask task,
void* tgt,
const void* src,
size_t bytes
591 on(task, [=](sycl::handler& h){ h.memcpy(tgt, src, bytes); });
596 syclTask task,
void* ptr,
int value,
size_t bytes
598 on(task, [=](sycl::handler& h){ h.memset(ptr, value, bytes); });
604 syclTask task,
void* ptr,
const T& pattern,
size_t count
606 on(task, [=](sycl::handler& h){ h.fill(ptr, pattern, count); });
614 syclTask task, T* target,
const T* source,
size_t count
616 on(task, [=](sycl::handler& h){
617 h.memcpy(target, source, count*
sizeof(T));}
622template <
typename...ArgsT>
624 on(task, [args...] (sycl::handler& h) { h.parallel_for(args...); });
638template <
typename C,
typename Q, std::enable_if_t<is_syclflow_task_v<C>,
void>*>
640 auto n =
_graph._emplace_back(
644 e._invoke_syclflow_task_entry(p, c, queue);
652template <
typename C, std::enable_if_t<is_syclflow_task_v<C>,
void>*>
662template <
typename C,
typename Q,
665void Executor::_invoke_syclflow_task_entry(Node* node, C&& c, Q& queue) {
669 syclGraph* g =
dynamic_cast<syclGraph*
>(h->graph.get());
673 syclFlow sf(*
this, *g, queue);
677 if(!(g->_state & syclGraph::OFFLOADED)) {
class to create an executor for running a taskflow graph
Definition executor.hpp:50
Task emplace(C &&callable)
creates a static task
Definition flow_builder.hpp:742
Graph & _graph
associated graph object
Definition flow_builder.hpp:727
Task emplace_on(C &&callable, D &&device)
creates a cudaFlow task on the given device
Definition cudaflow.hpp:1666
class to create a task handle over a node in a taskflow graph
Definition task.hpp:187
class for building a SYCL task dependency graph
Definition syclflow.hpp:23
syclTask single_task(F &&func)
invokes a SYCL kernel function using only one thread
Definition syclflow.hpp:492
bool empty() const
queries the emptiness of the graph
Definition syclflow.hpp:436
void offload_until(P &&predicate)
offloads the syclFlow onto a GPU and repeatedly runs it until the predicate becomes true
Definition syclflow.hpp:506
syclTask on(F &&func)
creates a task that launches the given command group function object
Definition syclflow.hpp:483
void offload_n(size_t N)
offloads the syclFlow and executes it by the given times
Definition syclflow.hpp:569
syclTask for_each_index(I first, I last, I step, C &&callable)
applies a callable to each index in the range with the step size
void offload()
offloads the syclFlow and executes it once
Definition syclflow.hpp:574
~syclFlow()=default
destroys the syclFlow
syclTask for_each(I first, I last, C &&callable)
applies a callable to each dereferenced element of the data array
void clear()
clear the associated graph
Definition syclflow.hpp:451
syclTask memset(void *ptr, int value, size_t bytes)
creates a memset task that fills untyped data with a byte value
Definition syclflow.hpp:461
void dump(std::ostream &os) const
dumps the syclFlow graph into a DOT format through an output stream
Definition syclflow.hpp:446
syclTask fill(void *ptr, const T &pattern, size_t count)
creates a fill task that fills typed data with the given value
Definition syclflow.hpp:467
syclTask uninitialized_reduce(I first, I last, T *result, C &&op)
similar to tf::syclFlow::reduce but does not assume any initial value to reduce
syclTask memcpy(void *tgt, const void *src, size_t bytes)
creates a memcpy task that copies untyped data in bytes
Definition syclflow.hpp:456
syclTask copy(T *target, const T *source, size_t count)
creates a copy task that copies typed data from a source to a target memory block
Definition syclflow.hpp:475
syclFlow(sycl::queue &queue)
constructs a standalone syclFlow from the given queue
Definition syclflow.hpp:422
syclTask reduce(I first, I last, T *result, C &&op)
performs parallel reduction over a range of items
syclTask transform(I first, I last, C &&callable, S... srcs)
applies a callable to a source range and stores the result in a target range
syclTask parallel_for(ArgsT &&... args)
creates a kernel task
Definition syclflow.hpp:500
size_t num_tasks() const
queries the number of tasks
Definition syclflow.hpp:441
handle to a node of the internal CUDA graph
Definition sycl_task.hpp:21
taskflow namespace
Definition small_vector.hpp:27