Taskflow  3.2.0-Master-Branch
Loading...
Searching...
No Matches
syclflow.hpp
Go to the documentation of this file.
1#pragma once
2
3#include "../taskflow.hpp"
4#include "sycl_task.hpp"
5
11namespace tf {
12
13// ----------------------------------------------------------------------------
14// class definition: syclFlow
15// ----------------------------------------------------------------------------
16
23class syclFlow {
24
25 friend class Executor;
26
27 struct External {
28 syclGraph graph;
29 };
30
31 struct Internal {
32 Executor& executor;
33 Internal(Executor& e) : executor {e} {}
34 };
35
36 using handle_t = std::variant<External, Internal>;
37
38 public:
39
47 syclFlow(sycl::queue& queue);
48
52 ~syclFlow() = default;
53
57 bool empty() const;
58
62 size_t num_tasks() const;
63
68 void dump(std::ostream& os) const;
69
73 void clear();
74
75 // ------------------------------------------------------------------------
76 // Generic device operations
77 // ------------------------------------------------------------------------
78
91 template <typename F, std::enable_if_t<
93 >
94 syclTask on(F&& func);
95
101 template <typename F, std::enable_if_t<
103 >
104 void on(syclTask task, F&& func);
105
119 syclTask memcpy(void* tgt, const void* src, size_t bytes);
120
134 syclTask memset(void* ptr, int value, size_t bytes);
135
148 template <typename T>
149 syclTask fill(void* ptr, const T& pattern, size_t count);
150
164 template <typename T,
166 >
167 syclTask copy(T* target, const T* source, size_t count);
168
180 template <typename...ArgsT>
181 syclTask parallel_for(ArgsT&&... args);
182
183 // ------------------------------------------------------------------------
184 // algorithms
185 // ------------------------------------------------------------------------
186
196 template <typename F>
197 syclTask single_task(F&& func);
198
219 template <typename I, typename C>
220 syclTask for_each(I first, I last, C&& callable);
221
249 template <typename I, typename C>
250 syclTask for_each_index(I first, I last, I step, C&& callable);
251
275 template <typename I, typename C, typename... S>
276 syclTask transform(I first, I last, C&& callable, S... srcs);
277
301 template <typename I, typename T, typename C>
302 syclTask reduce(I first, I last, T* result, C&& op);
303
318 template <typename I, typename T, typename C>
319 syclTask uninitialized_reduce(I first, I last, T* result, C&& op);
320
321 // ------------------------------------------------------------------------
322 // offload methods
323 // ------------------------------------------------------------------------
324
339 template <typename P>
340 void offload_until(P&& predicate);
341
347 void offload_n(size_t N);
348
352 void offload();
353
354 // ------------------------------------------------------------------------
355 // update methods
356 // ------------------------------------------------------------------------
357
358
364 void memcpy(syclTask task, void* tgt, const void* src, size_t bytes);
365
371 void memset(syclTask task, void* ptr, int value, size_t bytes);
372
378 template <typename T>
379 void fill(syclTask task, void* ptr, const T& pattern, size_t count);
380
386 template <typename T,
388 >
389 void copy(syclTask task, T* target, const T* source, size_t count);
390
396 template <typename...ArgsT>
397 void parallel_for(syclTask task, ArgsT&&... args);
398
404 template <typename F>
405 void single_task(syclTask task, F&& func);
406
407 private:
408
409 syclFlow(Executor&, syclGraph&, sycl::queue&);
410
411 sycl::queue& _queue;
412
413 handle_t _handle;
414
415 syclGraph& _graph;
416
419};
420
421// constructor
422inline syclFlow::syclFlow(sycl::queue& queue) :
423 _queue {queue},
424 _handle {std::in_place_type_t<External>{}},
425 _graph {std::get_if<External>(&_handle)->graph} {
426}
427
428// Construct the syclFlow from executor (internal graph)
429inline syclFlow::syclFlow(Executor& e, syclGraph& g, sycl::queue& queue) :
430 _queue {queue},
431 _handle {std::in_place_type_t<Internal>{}, e},
432 _graph {g} {
433}
434
435// Function: empty
436inline bool syclFlow::empty() const {
437 return _graph._nodes.empty();
438}
439
440// Function: num_tasks
441inline size_t syclFlow::num_tasks() const {
442 return _graph._nodes.size();
443}
444
445// Procedure: dump
446inline void syclFlow::dump(std::ostream& os) const {
447 _graph.dump(os, nullptr, "");
448}
449
450// Procedure: clear
451inline void syclFlow::clear() {
452 _graph.clear();
453}
454
455// Function: memcpy
456inline syclTask syclFlow::memcpy(void* tgt, const void* src, size_t bytes) {
457 return on([=](sycl::handler& h){ h.memcpy(tgt, src, bytes); });
458}
459
460// Function: memset
461inline syclTask syclFlow::memset(void* ptr, int value, size_t bytes) {
462 return on([=](sycl::handler& h){ h.memset(ptr, value, bytes); });
463}
464
465// Function: fill
466template <typename T>
467syclTask syclFlow::fill(void* ptr, const T& pattern, size_t count) {
468 return on([=](sycl::handler& h){ h.fill(ptr, pattern, count); });
469}
470
471// Function: copy
472template <typename T,
474>
475syclTask syclFlow::copy(T* target, const T* source, size_t count) {
476 return on([=](sycl::handler& h){ h.memcpy(target, source, count*sizeof(T)); });
477}
478
479// Function: on
480template <typename F, std::enable_if_t<
482>
484 auto node = _graph.emplace_back(_graph,
486 );
487 return syclTask(node);
488}
489
490// Function: single_task
491template <typename F>
493 return on([f=std::forward<F>(func)] (sycl::handler& h) {
494 h.single_task(f);
495 });
496}
497
498// Function: parallel_for
499template <typename...ArgsT>
501 return on([args...] (sycl::handler& h) { h.parallel_for(args...); });
502}
503
504// Procedure: offload_until
505template <typename P>
506void syclFlow::offload_until(P&& predicate) {
507
508 if(!(_graph._state & syclGraph::TOPOLOGY_CHANGED)) {
509 goto offload;
510 }
511
512 // levelize the graph
513 _tpg.clear();
514
515 // insert the first level of nodes into the queue
516 for(auto& u : _graph._nodes) {
517 u->_level = u->_dependents.size();
518 if(u->_level == 0) {
519 _bfs.push(u.get());
520 }
521 }
522
523 while(!_bfs.empty()) {
524 auto u = _bfs.front();
525 _bfs.pop();
526 _tpg.push_back(u);
527 for(auto v : u->_successors) {
528 if(--(v->_level) == 0) {
529 v->_level = u->_level + 1;
530 _bfs.push(v);
531 }
532 }
533 }
534
535 offload:
536
537 // offload the syclFlow graph
538 bool in_order = _queue.is_in_order();
539
540 while(!predicate()) {
541
542 // traverse node in a topological order
543 for(auto u : _tpg) {
544
545 switch(u->_handle.index()) {
546 // task type 1: command group handler
547 case syclNode::COMMAND_GROUP_HANDLER:
548 u->_event = _queue.submit([u, in_order](sycl::handler& h){
549 // wait on all predecessors
550 if(!in_order) {
551 for(auto p : u->_dependents) {
552 h.depends_on(p->_event);
553 }
554 }
555 std::get_if<syclNode::CGH>(&u->_handle)->work(h);
556 });
557 break;
558 }
559 }
560
561 // synchronize the execution
562 _queue.wait();
563 }
564
565 _graph._state = syclGraph::OFFLOADED;
566}
567
568// Procedure: offload_n
569inline void syclFlow::offload_n(size_t n) {
570 offload_until([repeat=n] () mutable { return repeat-- == 0; });
571}
572
573// Procedure: offload
574inline void syclFlow::offload() {
575 offload_until([repeat=1] () mutable { return repeat-- == 0; });
576}
577
578// Function: on
579template <typename F, std::enable_if_t<
581>
582void syclFlow::on(syclTask task, F&& f) {
583 std::get_if<syclNode::CGH>(&task._node->_handle)->work =
585}
586
587// Function: memcpy
589 syclTask task, void* tgt, const void* src, size_t bytes
590) {
591 on(task, [=](sycl::handler& h){ h.memcpy(tgt, src, bytes); });
592}
593
594// Function: memset
596 syclTask task, void* ptr, int value, size_t bytes
597) {
598 on(task, [=](sycl::handler& h){ h.memset(ptr, value, bytes); });
599}
600
601// Function: fill
602template <typename T>
604 syclTask task, void* ptr, const T& pattern, size_t count
605) {
606 on(task, [=](sycl::handler& h){ h.fill(ptr, pattern, count); });
607}
608
609// Function: copy
610template <typename T,
612>
614 syclTask task, T* target, const T* source, size_t count
615) {
616 on(task, [=](sycl::handler& h){
617 h.memcpy(target, source, count*sizeof(T));}
618 );
619}
620
621// Function: parallel_for
622template <typename...ArgsT>
623void syclFlow::parallel_for(syclTask task, ArgsT&&... args) {
624 on(task, [args...] (sycl::handler& h) { h.parallel_for(args...); });
625}
626
627// Function: single_task
628template <typename F>
629void syclFlow::single_task(syclTask task, F&& func) {
630 on(task, [f=std::forward<F>(func)] (sycl::handler& h) { h.single_task(f); });
631}
632
633// ############################################################################
634// Forward declaration: FlowBuilder
635// ############################################################################
636
637// FlowBuilder::emplace_on
638template <typename C, typename Q, std::enable_if_t<is_syclflow_task_v<C>, void>*>
639Task FlowBuilder::emplace_on(C&& callable, Q&& q) {
640 auto n = _graph._emplace_back(
642 [c=std::forward<C>(callable), queue=std::forward<Q>(q)]
643 (Executor& e, Node* p) mutable {
644 e._invoke_syclflow_task_entry(p, c, queue);
645 },
647 );
648 return Task(n);
649}
650
651// FlowBuilder::emplace
652template <typename C, std::enable_if_t<is_syclflow_task_v<C>, void>*>
653Task FlowBuilder::emplace(C&& callable) {
654 return emplace_on(std::forward<C>(callable), sycl::queue{});
655}
656
657// ############################################################################
658// Forward declaration: Executor
659// ############################################################################
660
661// Procedure: _invoke_syclflow_task_entry (syclFlow)
662template <typename C, typename Q,
664>
665void Executor::_invoke_syclflow_task_entry(Node* node, C&& c, Q& queue) {
666
667 auto h = std::get_if<Node::syclFlow>(&node->_handle);
668
669 syclGraph* g = dynamic_cast<syclGraph*>(h->graph.get());
670
671 g->clear();
672
673 syclFlow sf(*this, *g, queue);
674
675 c(sf);
676
677 if(!(g->_state & syclGraph::OFFLOADED)) {
678 sf.offload();
679 }
680}
681
682} // end of namespace tf -----------------------------------------------------
683
684
class to create an executor for running a taskflow graph
Definition executor.hpp:50
Task emplace(C &&callable)
creates a static task
Definition flow_builder.hpp:742
Graph & _graph
associated graph object
Definition flow_builder.hpp:727
Task emplace_on(C &&callable, D &&device)
creates a cudaFlow task on the given device
Definition cudaflow.hpp:1666
class to create a task handle over a node in a taskflow graph
Definition task.hpp:187
class for building a SYCL task dependency graph
Definition syclflow.hpp:23
syclTask single_task(F &&func)
invokes a SYCL kernel function using only one thread
Definition syclflow.hpp:492
bool empty() const
queries the emptiness of the graph
Definition syclflow.hpp:436
void offload_until(P &&predicate)
offloads the syclFlow onto a GPU and repeatedly runs it until the predicate becomes true
Definition syclflow.hpp:506
syclTask on(F &&func)
creates a task that launches the given command group function object
Definition syclflow.hpp:483
void offload_n(size_t N)
offloads the syclFlow and executes it by the given times
Definition syclflow.hpp:569
syclTask for_each_index(I first, I last, I step, C &&callable)
applies a callable to each index in the range with the step size
void offload()
offloads the syclFlow and executes it once
Definition syclflow.hpp:574
~syclFlow()=default
destroys the syclFlow
syclTask for_each(I first, I last, C &&callable)
applies a callable to each dereferenced element of the data array
void clear()
clear the associated graph
Definition syclflow.hpp:451
syclTask memset(void *ptr, int value, size_t bytes)
creates a memset task that fills untyped data with a byte value
Definition syclflow.hpp:461
void dump(std::ostream &os) const
dumps the syclFlow graph into a DOT format through an output stream
Definition syclflow.hpp:446
syclTask fill(void *ptr, const T &pattern, size_t count)
creates a fill task that fills typed data with the given value
Definition syclflow.hpp:467
syclTask uninitialized_reduce(I first, I last, T *result, C &&op)
similar to tf::syclFlow::reduce but does not assume any initial value to reduce
syclTask memcpy(void *tgt, const void *src, size_t bytes)
creates a memcpy task that copies untyped data in bytes
Definition syclflow.hpp:456
syclTask copy(T *target, const T *source, size_t count)
creates a copy task that copies typed data from a source to a target memory block
Definition syclflow.hpp:475
syclFlow(sycl::queue &queue)
constructs a standalone syclFlow from the given queue
Definition syclflow.hpp:422
syclTask reduce(I first, I last, T *result, C &&op)
performs parallel reduction over a range of items
syclTask transform(I first, I last, C &&callable, S... srcs)
applies a callable to a source range and stores the result in a target range
syclTask parallel_for(ArgsT &&... args)
creates a kernel task
Definition syclflow.hpp:500
size_t num_tasks() const
queries the number of tasks
Definition syclflow.hpp:441
handle to a node of the internal CUDA graph
Definition sycl_task.hpp:21
T forward(T... args)
taskflow namespace
Definition small_vector.hpp:27
syclTask include file