3#include "cuda_error.hpp"
17 TF_CHECK_CUDA(cudaGetDeviceCount(&N),
"failed to get device count");
18 return static_cast<size_t>(N);
26 TF_CHECK_CUDA(cudaGetDevice(&
id),
"failed to get current device id");
34 TF_CHECK_CUDA(cudaSetDevice(
id),
"failed to switch to device ",
id);
42 cudaGetDeviceProperties(&p, i),
"failed to get property of device ", i
52 cudaGetDeviceProperties(&p, i),
"failed to get property of device ", i
62 os <<
"Major revision number: " << p.major <<
'\n'
63 <<
"Minor revision number: " << p.minor <<
'\n'
64 <<
"Name: " << p.name <<
'\n'
65 <<
"Total global memory: " << p.totalGlobalMem <<
'\n'
66 <<
"Total shared memory per block: " << p.sharedMemPerBlock <<
'\n'
67 <<
"Total registers per block: " << p.regsPerBlock <<
'\n'
68 <<
"Warp size: " << p.warpSize <<
'\n'
69 <<
"Maximum memory pitch: " << p.memPitch <<
'\n'
70 <<
"Maximum threads per block: " << p.maxThreadsPerBlock <<
'\n';
72 os <<
"Maximum dimension of block: ";
73 for (
int i = 0; i < 3; ++i) {
75 os << p.maxThreadsDim[i];
79 os <<
"Maximum dimenstion of grid: ";
80 for (
int i = 0; i < 3; ++i) {
82 os << p.maxGridSize[i];;
86 os <<
"Clock rate: " << p.clockRate <<
'\n'
87 <<
"Total constant memory: " << p.totalConstMem <<
'\n'
88 <<
"Texture alignment: " << p.textureAlignment <<
'\n'
89 <<
"Concurrent copy and execution: " << p.deviceOverlap <<
'\n'
90 <<
"Number of multiprocessors: " << p.multiProcessorCount <<
'\n'
91 <<
"Kernel execution timeout: " << p.kernelExecTimeoutEnabled <<
'\n'
92 <<
"GPU sharing Host Memory: " << p.integrated <<
'\n'
93 <<
"Host page-locked mem mapping: " << p.canMapHostMemory <<
'\n'
94 <<
"Alignment for Surfaces: " << p.surfaceAlignment <<
'\n'
95 <<
"Device has ECC support: " << p.ECCEnabled <<
'\n'
96 <<
"Unified Addressing (UVA): " << p.unifiedAddressing <<
'\n';
105 cudaDeviceGetAttribute(&threads, cudaDevAttrMaxThreadsPerBlock, d),
106 "failed to query the maximum threads per block on device ", d
117 cudaDeviceGetAttribute(&dim, cudaDevAttrMaxBlockDimX, d),
118 "failed to query the maximum x-dimension per block on device ", d
129 cudaDeviceGetAttribute(&dim, cudaDevAttrMaxBlockDimY, d),
130 "failed to query the maximum y-dimension per block on device ", d
141 cudaDeviceGetAttribute(&dim, cudaDevAttrMaxBlockDimZ, d),
142 "failed to query the maximum z-dimension per block on device ", d
153 cudaDeviceGetAttribute(&dim, cudaDevAttrMaxGridDimX, d),
154 "failed to query the maximum x-dimension per grid on device ", d
165 cudaDeviceGetAttribute(&dim, cudaDevAttrMaxGridDimY, d),
166 "failed to query the maximum y-dimension per grid on device ", d
177 cudaDeviceGetAttribute(&dim, cudaDevAttrMaxGridDimZ, d),
178 "failed to query the maximum z-dimension per grid on device ", d
189 cudaDeviceGetAttribute(&num, cudaDevAttrMaxSharedMemoryPerBlock, d),
190 "failed to query the maximum shared memory per block on device ", d
201 cudaDeviceGetAttribute(&num, cudaDevAttrWarpSize, d),
202 "failed to query the warp size per block on device ", d
213 cudaDeviceGetAttribute(&num, cudaDevAttrComputeCapabilityMajor, d),
214 "failed to query the major number of compute capability of device ", d
225 cudaDeviceGetAttribute(&num, cudaDevAttrComputeCapabilityMinor, d),
226 "failed to query the minor number of compute capability of device ", d
237 cudaDeviceGetAttribute(&num, cudaDevAttrUnifiedAddressing, d),
238 "failed to query unified addressing status on device ", d
253 cudaDriverGetVersion(&num),
254 "failed to query the latest cuda version supported by the driver"
265 cudaRuntimeGetVersion(&num),
"failed to query cuda runtime version"
319inline cudaScopedDevice::cudaScopedDevice(
int dev) {
320 TF_CHECK_CUDA(cudaGetDevice(&_p),
"failed to get current device scope");
325 TF_CHECK_CUDA(cudaSetDevice(dev),
"failed to scope on device ", dev);
class to create an RAII-styled context switch
Definition cuda_device.hpp:293
~cudaScopedDevice()
destructs the guard and switches back to the previous device context
Definition cuda_device.hpp:330
taskflow namespace
Definition small_vector.hpp:27
size_t cuda_get_device_max_z_dim_per_grid(int d)
queries the maximum z-dimension per grid on a device
Definition cuda_device.hpp:174
int cuda_get_device_compute_capability_major(int d)
queries the major number of compute capability of a device
Definition cuda_device.hpp:210
int cuda_get_device()
gets the current device associated with the caller thread
Definition cuda_device.hpp:24
int cuda_get_runtime_version()
queries the CUDA Runtime version (1000 * major + 10 * minor)
Definition cuda_device.hpp:262
void cuda_get_device_property(int i, cudaDeviceProp &p)
obtains the device property
Definition cuda_device.hpp:40
int cuda_get_driver_version()
queries the latest CUDA version (1000 * major + 10 * minor) supported by the driver
Definition cuda_device.hpp:250
size_t cuda_get_device_max_z_dim_per_block(int d)
queries the maximum z-dimension per block on a device
Definition cuda_device.hpp:138
size_t cuda_get_device_max_x_dim_per_grid(int d)
queries the maximum x-dimension per grid on a device
Definition cuda_device.hpp:150
int cuda_get_device_compute_capability_minor(int d)
queries the minor number of compute capability of a device
Definition cuda_device.hpp:222
size_t cuda_get_device_max_y_dim_per_grid(int d)
queries the maximum y-dimension per grid on a device
Definition cuda_device.hpp:162
size_t cuda_get_device_max_y_dim_per_block(int d)
queries the maximum y-dimension per block on a device
Definition cuda_device.hpp:126
size_t cuda_get_device_max_threads_per_block(int d)
queries the maximum threads per block on a device
Definition cuda_device.hpp:102
size_t cuda_get_num_devices()
queries the number of available devices
Definition cuda_device.hpp:15
bool cuda_get_device_unified_addressing(int d)
queries if the device supports unified addressing
Definition cuda_device.hpp:234
void cuda_set_device(int id)
switches to a given device context
Definition cuda_device.hpp:33
size_t cuda_get_device_warp_size(int d)
queries the warp size on a device
Definition cuda_device.hpp:198
size_t cuda_get_device_max_shm_per_block(int d)
queries the maximum shared memory size in bytes per block on a device
Definition cuda_device.hpp:186
size_t cuda_get_device_max_x_dim_per_block(int d)
queries the maximum x-dimension per block on a device
Definition cuda_device.hpp:114
void cuda_dump_device_property(std::ostream &os, const cudaDeviceProp &p)
dumps the device property
Definition cuda_device.hpp:60