3#include "cuda_error.hpp"
28template<
unsigned NT,
unsigned VT>
31 static_assert(is_pow2(NT),
"max # threads per block must be a power of two");
36 const static unsigned nt = NT;
39 const static unsigned vt = VT;
42 const static unsigned nv = NT*VT;
57 cudaStream_t
stream() noexcept {
return _stream; };
66 cudaStream_t _stream {0};
class to define execution policy for CUDA standard algorithms
Definition cuda_execution_policy.hpp:29
cudaStream_t stream() noexcept
queries the associated stream
Definition cuda_execution_policy.hpp:57
void stream(cudaStream_t stream) noexcept
assigns a stream
Definition cuda_execution_policy.hpp:62
static const unsigned nv
static constant for getting the number of elements to process per block
Definition cuda_execution_policy.hpp:42
static const unsigned vt
static constant for getting the number of work units per thread
Definition cuda_execution_policy.hpp:39
static const unsigned nt
static constant for getting the number of threads per block
Definition cuda_execution_policy.hpp:36
cudaExecutionPolicy(cudaStream_t s)
constructs an execution policy object with the given stream
Definition cuda_execution_policy.hpp:52
cudaExecutionPolicy()=default
constructs an execution policy object with default stream
taskflow namespace
Definition small_vector.hpp:27