* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This software is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PSL v2 for more details.
*/
use xgpu_macros::api_hook;
#[api_hook(RuntimeApi, backend = crate::hook_impl::ipc::runtime::RuntimeApiImpl)]
mod api {
use std::ffi::{c_char, c_int, c_uint, c_ulonglong, c_void};
use cudax::runtime::*;
unsafe extern "C" {
pub fn cudaDeviceReset() -> cudaError_t;
pub fn cudaDeviceSynchronize() -> cudaError_t;
pub fn cudaDeviceSetLimit(limit: cudaLimit, value: usize) -> cudaError_t;
pub fn cudaDeviceGetLimit(p_value: *mut usize, limit: cudaLimit) -> cudaError_t;
pub fn cudaDeviceGetTexture1DLinearMaxWidth(
max_width_in_elements: *mut usize,
fmt_desc: *const cudaChannelFormatDesc,
device: c_int,
) -> cudaError_t;
pub fn cudaDeviceGetCacheConfig(p_cache_config: *mut cudaFuncCache) -> cudaError_t;
pub fn cudaDeviceGetStreamPriorityRange(
least_priority: *mut c_int,
greatest_priority: *mut c_int,
) -> cudaError_t;
pub fn cudaDeviceSetCacheConfig(cache_config: cudaFuncCache) -> cudaError_t;
pub fn cudaDeviceGetByPCIBusId(
device: *mut c_int,
pci_bus_id: *const c_char,
) -> cudaError_t;
pub fn cudaDeviceGetPCIBusId(
pci_bus_id: *mut c_char,
len: c_int,
device: c_int,
) -> cudaError_t;
pub fn cudaIpcGetEventHandle(
handle: *mut cudaIpcEventHandle_t,
event: cudaEvent_t,
) -> cudaError_t;
pub fn cudaIpcOpenEventHandle(
event: *mut cudaEvent_t,
handle: cudaIpcEventHandle_t,
) -> cudaError_t;
pub fn cudaIpcGetMemHandle(
handle: *mut cudaIpcMemHandle_t,
dev_ptr: *mut c_void,
) -> cudaError_t;
pub fn cudaIpcOpenMemHandle(
dev_ptr: *mut *mut c_void,
handle: cudaIpcMemHandle_t,
flags: c_uint,
) -> cudaError_t;
pub fn cudaIpcCloseMemHandle(dev_ptr: *mut c_void) -> cudaError_t;
pub fn cudaDeviceFlushGPUDirectRDMAWrites(
target: cudaFlushGPUDirectRDMAWritesTarget,
scope: cudaFlushGPUDirectRDMAWritesScope,
) -> cudaError_t;
pub fn cudaDeviceRegisterAsyncNotification(
device: c_int,
callback_func: cudaAsyncCallback,
user_data: *mut c_void,
callback: *mut cudaAsyncCallbackHandle_t,
) -> cudaError_t;
pub fn cudaDeviceUnregisterAsyncNotification(
device: c_int,
callback: cudaAsyncCallbackHandle_t,
) -> cudaError_t;
pub fn cudaDeviceGetSharedMemConfig(p_config: *mut cudaSharedMemConfig) -> cudaError_t;
pub fn cudaDeviceSetSharedMemConfig(config: cudaSharedMemConfig) -> cudaError_t;
pub fn cudaThreadExit() -> cudaError_t;
pub fn cudaThreadSynchronize() -> cudaError_t;
pub fn cudaThreadSetLimit(limit: cudaLimit, value: usize) -> cudaError_t;
pub fn cudaThreadGetLimit(p_value: *mut usize, limit: cudaLimit) -> cudaError_t;
pub fn cudaThreadGetCacheConfig(p_cache_config: *mut cudaFuncCache) -> cudaError_t;
pub fn cudaThreadSetCacheConfig(cache_config: cudaFuncCache) -> cudaError_t;
pub fn cudaGetLastError() -> cudaError_t;
pub fn cudaPeekAtLastError() -> cudaError_t;
pub fn cudaGetErrorName(error: cudaError_t) -> *const c_char;
pub fn cudaGetErrorString(error: cudaError_t) -> *const c_char;
pub fn cudaGetDeviceCount(count: *mut c_int) -> cudaError_t;
pub fn cudaGetDeviceProperties_v2(prop: *mut cudaDeviceProp, device: c_int) -> cudaError_t;
pub fn cudaDeviceGetAttribute(
value: *mut c_int,
attr: cudaDeviceAttr,
device: c_int,
) -> cudaError_t;
pub fn cudaDeviceGetDefaultMemPool(
mem_pool: *mut cudaMemPool_t,
device: c_int,
) -> cudaError_t;
pub fn cudaDeviceSetMemPool(device: c_int, mem_pool: cudaMemPool_t) -> cudaError_t;
pub fn cudaDeviceGetMemPool(mem_pool: *mut cudaMemPool_t, device: c_int) -> cudaError_t;
pub fn cudaDeviceGetNvSciSyncAttributes(
nv_sci_sync_attr_list: *mut c_void,
device: c_int,
flags: c_int,
) -> cudaError_t;
pub fn cudaDeviceGetP2PAttribute(
value: *mut c_int,
attr: cudaDeviceP2PAttr,
src_device: c_int,
dst_device: c_int,
) -> cudaError_t;
pub fn cudaChooseDevice(device: *mut c_int, prop: *const cudaDeviceProp) -> cudaError_t;
pub fn cudaInitDevice(device: c_int, device_flags: c_uint, flags: c_uint) -> cudaError_t;
pub fn cudaSetDevice(device: c_int) -> cudaError_t;
pub fn cudaGetDevice(device: *mut c_int) -> cudaError_t;
pub fn cudaSetValidDevices(device_arr: *mut c_int, len: c_int) -> cudaError_t;
pub fn cudaSetDeviceFlags(flags: c_uint) -> cudaError_t;
pub fn cudaGetDeviceFlags(flags: *mut c_uint) -> cudaError_t;
pub fn cudaStreamCreate(p_stream: *mut cudaStream_t) -> cudaError_t;
pub fn cudaStreamCreateWithFlags(p_stream: *mut cudaStream_t, flags: c_uint)
-> cudaError_t;
pub fn cudaStreamCreateWithPriority(
p_stream: *mut cudaStream_t,
flags: c_uint,
priority: c_int,
) -> cudaError_t;
pub fn cudaStreamGetPriority(h_stream: cudaStream_t, priority: *mut c_int) -> cudaError_t;
pub fn cudaStreamGetFlags(h_stream: cudaStream_t, flags: *mut c_uint) -> cudaError_t;
pub fn cudaStreamGetId(h_stream: cudaStream_t, stream_id: *mut c_ulonglong) -> cudaError_t;
pub fn cudaCtxResetPersistingL2Cache() -> cudaError_t;
pub fn cudaStreamCopyAttributes(dst: cudaStream_t, src: cudaStream_t) -> cudaError_t;
pub fn cudaStreamGetAttribute(
h_stream: cudaStream_t,
attr: cudaLaunchAttributeID,
value_out: *mut cudaLaunchAttributeValue,
) -> cudaError_t;
pub fn cudaStreamSetAttribute(
h_stream: cudaStream_t,
attr: cudaLaunchAttributeID,
value: *const cudaLaunchAttributeValue,
) -> cudaError_t;
pub fn cudaStreamDestroy(stream: cudaStream_t) -> cudaError_t;
pub fn cudaStreamWaitEvent(
stream: cudaStream_t,
event: cudaEvent_t,
flags: c_uint,
) -> cudaError_t;
pub fn cudaStreamAddCallback(
stream: cudaStream_t,
callback: cudaStreamCallback_t,
user_data: *mut c_void,
flags: c_uint,
) -> cudaError_t;
pub fn cudaStreamSynchronize(stream: cudaStream_t) -> cudaError_t;
pub fn cudaStreamQuery(stream: cudaStream_t) -> cudaError_t;
pub fn cudaStreamAttachMemAsync(
stream: cudaStream_t,
dev_ptr: *mut c_void,
length: usize,
flags: c_uint,
) -> cudaError_t;
pub fn cudaStreamBeginCapture(
stream: cudaStream_t,
mode: cudaStreamCaptureMode,
) -> cudaError_t;
pub fn cudaStreamBeginCaptureToGraph(
stream: cudaStream_t,
graph: cudaGraph_t,
dependencies: *const cudaGraphNode_t,
dependency_data: *const cudaGraphEdgeData,
num_dependencies: usize,
mode: cudaStreamCaptureMode,
) -> cudaError_t;
pub fn cudaThreadExchangeStreamCaptureMode(mode: *mut cudaStreamCaptureMode)
-> cudaError_t;
pub fn cudaStreamEndCapture(stream: cudaStream_t, p_graph: *mut cudaGraph_t)
-> cudaError_t;
pub fn cudaStreamIsCapturing(
stream: cudaStream_t,
p_capture_status: *mut cudaStreamCaptureStatus,
) -> cudaError_t;
pub fn cudaStreamGetCaptureInfo_v2(
stream: cudaStream_t,
capture_status_out: *mut cudaStreamCaptureStatus,
id_out: *mut c_ulonglong,
graph_out: *mut cudaGraph_t,
dependencies_out: *mut *const cudaGraphNode_t,
num_dependencies_out: *mut usize,
) -> cudaError_t;
pub fn cudaStreamGetCaptureInfo_v3(
stream: cudaStream_t,
capture_status_out: *mut cudaStreamCaptureStatus,
id_out: *mut c_ulonglong,
graph_out: *mut cudaGraph_t,
dependencies_out: *mut *const cudaGraphNode_t,
edge_data_out: *mut *const cudaGraphEdgeData,
num_dependencies_out: *mut usize,
) -> cudaError_t;
pub fn cudaStreamUpdateCaptureDependencies(
stream: cudaStream_t,
dependencies: *mut cudaGraphNode_t,
num_dependencies: usize,
flags: c_uint,
) -> cudaError_t;
pub fn cudaStreamUpdateCaptureDependencies_v2(
stream: cudaStream_t,
dependencies: *mut cudaGraphNode_t,
dependency_data: *const cudaGraphEdgeData,
num_dependencies: usize,
flags: c_uint,
) -> cudaError_t;
pub fn cudaEventCreate(event: *mut cudaEvent_t) -> cudaError_t;
pub fn cudaEventCreateWithFlags(event: *mut cudaEvent_t, flags: c_uint) -> cudaError_t;
pub fn cudaEventRecord(event: cudaEvent_t, stream: cudaStream_t) -> cudaError_t;
pub fn cudaEventRecordWithFlags(
event: cudaEvent_t,
stream: cudaStream_t,
flags: c_uint,
) -> cudaError_t;
pub fn cudaEventQuery(event: cudaEvent_t) -> cudaError_t;
pub fn cudaEventSynchronize(event: cudaEvent_t) -> cudaError_t;
pub fn cudaEventDestroy(event: cudaEvent_t) -> cudaError_t;
pub fn cudaEventElapsedTime(
ms: *mut f32,
start: cudaEvent_t,
end: cudaEvent_t,
) -> cudaError_t;
pub fn cudaImportExternalMemory(
ext_mem_out: *mut cudaExternalMemory_t,
mem_handle_desc: *const cudaExternalMemoryHandleDesc,
) -> cudaError_t;
pub fn cudaExternalMemoryGetMappedBuffer(
dev_ptr: *mut *mut c_void,
ext_mem: cudaExternalMemory_t,
buffer_desc: *const cudaExternalMemoryBufferDesc,
) -> cudaError_t;
pub fn cudaExternalMemoryGetMappedMipmappedArray(
mipmap: *mut cudaMipmappedArray_t,
ext_mem: cudaExternalMemory_t,
mipmap_desc: *const cudaExternalMemoryMipmappedArrayDesc,
) -> cudaError_t;
pub fn cudaDestroyExternalMemory(ext_mem: cudaExternalMemory_t) -> cudaError_t;
pub fn cudaImportExternalSemaphore(
ext_sem_out: *mut cudaExternalSemaphore_t,
sem_handle_desc: *const cudaExternalSemaphoreHandleDesc,
) -> cudaError_t;
pub fn cudaSignalExternalSemaphoresAsync_v2(
ext_sem_array: *const cudaExternalSemaphore_t,
params_array: *const cudaExternalSemaphoreSignalParams,
num_ext_sems: c_uint,
stream: cudaStream_t,
) -> cudaError_t;
pub fn cudaWaitExternalSemaphoresAsync_v2(
ext_sem_array: *const cudaExternalSemaphore_t,
params_array: *const cudaExternalSemaphoreWaitParams,
num_ext_sems: c_uint,
stream: cudaStream_t,
) -> cudaError_t;
pub fn cudaDestroyExternalSemaphore(ext_sem: cudaExternalSemaphore_t) -> cudaError_t;
pub fn cudaLaunchKernel(
func: *const c_void,
grid_dim: dim3,
block_dim: dim3,
args: *mut *mut c_void,
shared_mem: usize,
stream: cudaStream_t,
) -> cudaError_t;
pub fn cudaLaunchKernelExC(
config: *const cudaLaunchConfig_t,
func: *const c_void,
args: *mut *mut c_void,
) -> cudaError_t;
pub fn cudaLaunchCooperativeKernel(
func: *const c_void,
grid_dim: dim3,
block_dim: dim3,
args: *mut *mut c_void,
shared_mem: usize,
stream: cudaStream_t,
) -> cudaError_t;
pub fn cudaLaunchCooperativeKernelMultiDevice(
launch_params_list: *mut cudaLaunchParams,
num_devices: c_uint,
flags: c_uint,
) -> cudaError_t;
pub fn cudaFuncSetCacheConfig(
func: *const c_void,
cache_config: cudaFuncCache,
) -> cudaError_t;
pub fn cudaFuncGetAttributes(
attr: *mut cudaFuncAttributes,
func: *const c_void,
) -> cudaError_t;
pub fn cudaFuncSetAttribute(
func: *const c_void,
attr: cudaFuncAttribute,
value: c_int,
) -> cudaError_t;
pub fn cudaFuncGetParamInfo(
func: *const c_void,
param_index: usize,
param_offset: *mut usize,
param_size: *mut usize,
) -> cudaError_t;
pub fn cudaSetDoubleForDevice(d: *mut f64) -> cudaError_t;
pub fn cudaSetDoubleForHost(d: *mut f64) -> cudaError_t;
pub fn cudaLaunchHostFunc(
stream: cudaStream_t,
fn_: cudaHostFn_t,
user_data: *mut c_void,
) -> cudaError_t;
pub fn cudaFuncSetSharedMemConfig(
func: *const c_void,
config: cudaSharedMemConfig,
) -> cudaError_t;
pub fn cudaOccupancyMaxActiveBlocksPerMultiprocessor(
num_blocks: *mut c_int,
func: *const c_void,
block_size: c_int,
dynamic_smem_size: usize,
) -> cudaError_t;
pub fn cudaOccupancyAvailableDynamicSMemPerBlock(
dynamic_smem_size: *mut usize,
func: *const c_void,
num_blocks: c_int,
block_size: c_int,
) -> cudaError_t;
pub fn cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
num_blocks: *mut c_int,
func: *const c_void,
block_size: c_int,
dynamic_smem_size: usize,
flags: c_uint,
) -> cudaError_t;
pub fn cudaOccupancyMaxPotentialClusterSize(
cluster_size: *mut c_int,
func: *const c_void,
launch_config: *const cudaLaunchConfig_t,
) -> cudaError_t;
pub fn cudaOccupancyMaxActiveClusters(
num_clusters: *mut c_int,
func: *const c_void,
launch_config: *const cudaLaunchConfig_t,
) -> cudaError_t;
pub fn cudaMallocManaged(
dev_ptr: *mut *mut c_void,
size: usize,
flags: c_uint,
) -> cudaError_t;
pub fn cudaMalloc(dev_ptr: *mut *mut c_void, size: usize) -> cudaError_t;
pub fn cudaMallocHost(ptr: *mut *mut c_void, size: usize) -> cudaError_t;
pub fn cudaMallocPitch(
dev_ptr: *mut *mut c_void,
pitch: *mut usize,
width: usize,
height: usize,
) -> cudaError_t;
pub fn cudaMallocArray(
array: *mut cudaArray_t,
desc: *const cudaChannelFormatDesc,
width: usize,
height: usize,
flags: c_uint,
) -> cudaError_t;
pub fn cudaFree(dev_ptr: *mut c_void) -> cudaError_t;
pub fn cudaFreeHost(ptr: *mut c_void) -> cudaError_t;
pub fn cudaFreeArray(array: cudaArray_t) -> cudaError_t;
pub fn cudaFreeMipmappedArray(mipmapped_array: cudaMipmappedArray_t) -> cudaError_t;
pub fn cudaHostAlloc(p_host: *mut *mut c_void, size: usize, flags: c_uint) -> cudaError_t;
pub fn cudaHostRegister(ptr: *mut c_void, size: usize, flags: c_uint) -> cudaError_t;
pub fn cudaHostUnregister(ptr: *mut c_void) -> cudaError_t;
pub fn cudaHostGetDevicePointer(
p_device: *mut *mut c_void,
p_host: *mut c_void,
flags: c_uint,
) -> cudaError_t;
pub fn cudaHostGetFlags(p_flags: *mut c_uint, p_host: *mut c_void) -> cudaError_t;
pub fn cudaMalloc3D(pitcheddev_ptr: *mut cudaPitchedPtr, extent: cudaExtent)
-> cudaError_t;
pub fn cudaMalloc3DArray(
array: *mut cudaArray_t,
desc: *const cudaChannelFormatDesc,
extent: cudaExtent,
flags: c_uint,
) -> cudaError_t;
pub fn cudaMallocMipmappedArray(
mipmapped_array: *mut cudaMipmappedArray_t,
desc: *const cudaChannelFormatDesc,
extent: cudaExtent,
num_levels: c_uint,
flags: c_uint,
) -> cudaError_t;
pub fn cudaGetMipmappedArrayLevel(
level_array: *mut cudaArray_t,
mipmapped_array: cudaMipmappedArray_const_t,
level: c_uint,
) -> cudaError_t;
pub fn cudaMemcpy3D(p: *const cudaMemcpy3DParms) -> cudaError_t;
pub fn cudaMemcpy3DPeer(p: *const cudaMemcpy3DPeerParms) -> cudaError_t;
pub fn cudaMemcpy3DAsync(p: *const cudaMemcpy3DParms, stream: cudaStream_t) -> cudaError_t;
pub fn cudaMemcpy3DPeerAsync(
p: *const cudaMemcpy3DPeerParms,
stream: cudaStream_t,
) -> cudaError_t;
pub fn cudaMemGetInfo(free: *mut usize, total: *mut usize) -> cudaError_t;
pub fn cudaArrayGetInfo(
desc: *mut cudaChannelFormatDesc,
extent: *mut cudaExtent,
flags: *mut c_uint,
array: cudaArray_t,
) -> cudaError_t;
pub fn cudaArrayGetPlane(
p_plane_array: *mut cudaArray_t,
h_array: cudaArray_t,
plane_idx: c_uint,
) -> cudaError_t;
pub fn cudaArrayGetMemoryRequirements(
memory_requirements: *mut cudaArrayMemoryRequirements,
array: cudaArray_t,
device: c_int,
) -> cudaError_t;
pub fn cudaMipmappedArrayGetMemoryRequirements(
memory_requirements: *mut cudaArrayMemoryRequirements,
mipmap: cudaMipmappedArray_t,
device: c_int,
) -> cudaError_t;
pub fn cudaArrayGetSparseProperties(
sparse_properties: *mut cudaArraySparseProperties,
array: cudaArray_t,
) -> cudaError_t;
pub fn cudaMipmappedArrayGetSparseProperties(
sparse_properties: *mut cudaArraySparseProperties,
mipmap: cudaMipmappedArray_t,
) -> cudaError_t;
pub fn cudaMemcpy(
dst: *mut c_void,
src: *const c_void,
count: usize,
kind: cudaMemcpyKind,
) -> cudaError_t;
pub fn cudaMemcpyPeer(
dst: *mut c_void,
dst_device: c_int,
src: *const c_void,
src_device: c_int,
count: usize,
) -> cudaError_t;
pub fn cudaMemcpy2D(
dst: *mut c_void,
dpitch: usize,
src: *const c_void,
spitch: usize,
width: usize,
height: usize,
kind: cudaMemcpyKind,
) -> cudaError_t;
pub fn cudaMemcpy2DToArray(
dst: cudaArray_t,
w_offset: usize,
h_offset: usize,
src: *const c_void,
spitch: usize,
width: usize,
height: usize,
kind: cudaMemcpyKind,
) -> cudaError_t;
pub fn cudaMemcpy2DFromArray(
dst: *mut c_void,
dpitch: usize,
src: cudaArray_const_t,
w_offset: usize,
h_offset: usize,
width: usize,
height: usize,
kind: cudaMemcpyKind,
) -> cudaError_t;
pub fn cudaMemcpy2DArrayToArray(
dst: cudaArray_t,
w_offset_dst: usize,
h_offset_dst: usize,
src: cudaArray_const_t,
w_offset_src: usize,
h_offset_src: usize,
width: usize,
height: usize,
kind: cudaMemcpyKind,
) -> cudaError_t;
pub fn cudaMemcpyToSymbol(
symbol: *const c_void,
src: *const c_void,
count: usize,
offset: usize,
kind: cudaMemcpyKind,
) -> cudaError_t;
pub fn cudaMemcpyFromSymbol(
dst: *mut c_void,
symbol: *const c_void,
count: usize,
offset: usize,
kind: cudaMemcpyKind,
) -> cudaError_t;
pub fn cudaMemcpyAsync(
dst: *mut c_void,
src: *const c_void,
count: usize,
kind: cudaMemcpyKind,
stream: cudaStream_t,
) -> cudaError_t;
pub fn cudaMemcpyPeerAsync(
dst: *mut c_void,
dst_device: c_int,
src: *const c_void,
src_device: c_int,
count: usize,
stream: cudaStream_t,
) -> cudaError_t;
pub fn cudaMemcpy2DAsync(
dst: *mut c_void,
dpitch: usize,
src: *const c_void,
spitch: usize,
width: usize,
height: usize,
kind: cudaMemcpyKind,
stream: cudaStream_t,
) -> cudaError_t;
pub fn cudaMemcpy2DToArrayAsync(
dst: cudaArray_t,
w_offset: usize,
h_offset: usize,
src: *const c_void,
spitch: usize,
width: usize,
height: usize,
kind: cudaMemcpyKind,
stream: cudaStream_t,
) -> cudaError_t;
pub fn cudaMemcpy2DFromArrayAsync(
dst: *mut c_void,
dpitch: usize,
src: cudaArray_const_t,
w_offset: usize,
h_offset: usize,
width: usize,
height: usize,
kind: cudaMemcpyKind,
stream: cudaStream_t,
) -> cudaError_t;
pub fn cudaMemcpyToSymbolAsync(
symbol: *const c_void,
src: *const c_void,
count: usize,
offset: usize,
kind: cudaMemcpyKind,
stream: cudaStream_t,
) -> cudaError_t;
pub fn cudaMemcpyFromSymbolAsync(
dst: *mut c_void,
symbol: *const c_void,
count: usize,
offset: usize,
kind: cudaMemcpyKind,
stream: cudaStream_t,
) -> cudaError_t;
pub fn cudaMemset(dev_ptr: *mut c_void, value: c_int, count: usize) -> cudaError_t;
pub fn cudaMemset2D(
dev_ptr: *mut c_void,
pitch: usize,
value: c_int,
width: usize,
height: usize,
) -> cudaError_t;
pub fn cudaMemset3D(
pitcheddev_ptr: cudaPitchedPtr,
value: c_int,
extent: cudaExtent,
) -> cudaError_t;
pub fn cudaMemsetAsync(
dev_ptr: *mut c_void,
value: c_int,
count: usize,
stream: cudaStream_t,
) -> cudaError_t;
pub fn cudaMemset2DAsync(
dev_ptr: *mut c_void,
pitch: usize,
value: c_int,
width: usize,
height: usize,
stream: cudaStream_t,
) -> cudaError_t;
pub fn cudaMemset3DAsync(
pitcheddev_ptr: cudaPitchedPtr,
value: c_int,
extent: cudaExtent,
stream: cudaStream_t,
) -> cudaError_t;
pub fn cudaGetSymbolAddress(
dev_ptr: *mut *mut c_void,
symbol: *const c_void,
) -> cudaError_t;
pub fn cudaGetSymbolSize(size: *mut usize, symbol: *const c_void) -> cudaError_t;
pub fn cudaMemPrefetchAsync(
dev_ptr: *const c_void,
count: usize,
dst_device: c_int,
stream: cudaStream_t,
) -> cudaError_t;
pub fn cudaMemPrefetchAsync_v2(
dev_ptr: *const c_void,
count: usize,
location: cudaMemLocation,
flags: c_uint,
stream: cudaStream_t,
) -> cudaError_t;
pub fn cudaMemAdvise(
dev_ptr: *const c_void,
count: usize,
advice: cudaMemoryAdvise,
device: c_int,
) -> cudaError_t;
pub fn cudaMemAdvise_v2(
dev_ptr: *const c_void,
count: usize,
advice: cudaMemoryAdvise,
location: cudaMemLocation,
) -> cudaError_t;
pub fn cudaMemRangeGetAttribute(
data: *mut c_void,
data_size: usize,
attribute: cudaMemRangeAttribute,
dev_ptr: *const c_void,
count: usize,
) -> cudaError_t;
pub fn cudaMemRangeGetAttributes(
data: *mut *mut c_void,
data_sizes: *mut usize,
attributes: *mut cudaMemRangeAttribute,
num_attributes: usize,
dev_ptr: *const c_void,
count: usize,
) -> cudaError_t;
pub fn cudaMemcpyToArray(
dst: cudaArray_t,
w_offset: usize,
h_offset: usize,
src: *const c_void,
count: usize,
kind: cudaMemcpyKind,
) -> cudaError_t;
pub fn cudaMemcpyFromArray(
dst: *mut c_void,
src: cudaArray_const_t,
w_offset: usize,
h_offset: usize,
count: usize,
kind: cudaMemcpyKind,
) -> cudaError_t;
pub fn cudaMemcpyArrayToArray(
dst: cudaArray_t,
w_offset_dst: usize,
h_offset_dst: usize,
src: cudaArray_const_t,
w_offset_src: usize,
h_offset_src: usize,
count: usize,
kind: cudaMemcpyKind,
) -> cudaError_t;
pub fn cudaMemcpyToArrayAsync(
dst: cudaArray_t,
w_offset: usize,
h_offset: usize,
src: *const c_void,
count: usize,
kind: cudaMemcpyKind,
stream: cudaStream_t,
) -> cudaError_t;
pub fn cudaMemcpyFromArrayAsync(
dst: *mut c_void,
src: cudaArray_const_t,
w_offset: usize,
h_offset: usize,
count: usize,
kind: cudaMemcpyKind,
stream: cudaStream_t,
) -> cudaError_t;
pub fn cudaMallocAsync(
dev_ptr: *mut *mut c_void,
size: usize,
h_stream: cudaStream_t,
) -> cudaError_t;
pub fn cudaFreeAsync(dev_ptr: *mut c_void, h_stream: cudaStream_t) -> cudaError_t;
pub fn cudaMemPoolTrimTo(mem_pool: cudaMemPool_t, min_bytes_to_keep: usize) -> cudaError_t;
pub fn cudaMemPoolSetAttribute(
mem_pool: cudaMemPool_t,
attr: cudaMemPoolAttr,
value: *mut c_void,
) -> cudaError_t;
pub fn cudaMemPoolGetAttribute(
mem_pool: cudaMemPool_t,
attr: cudaMemPoolAttr,
value: *mut c_void,
) -> cudaError_t;
pub fn cudaMemPoolSetAccess(
mem_pool: cudaMemPool_t,
desc_list: *const cudaMemAccessDesc,
count: usize,
) -> cudaError_t;
pub fn cudaMemPoolGetAccess(
flags: *mut cudaMemAccessFlags,
mem_pool: cudaMemPool_t,
location: *mut cudaMemLocation,
) -> cudaError_t;
pub fn cudaMemPoolCreate(
mem_pool: *mut cudaMemPool_t,
pool_props: *const cudaMemPoolProps,
) -> cudaError_t;
pub fn cudaMemPoolDestroy(mem_pool: cudaMemPool_t) -> cudaError_t;
pub fn cudaMallocFromPoolAsync(
ptr: *mut *mut c_void,
size: usize,
mem_pool: cudaMemPool_t,
stream: cudaStream_t,
) -> cudaError_t;
pub fn cudaMemPoolExportToShareableHandle(
shareable_handle: *mut c_void,
mem_pool: cudaMemPool_t,
handle_type: cudaMemAllocationHandleType,
flags: c_uint,
) -> cudaError_t;
pub fn cudaMemPoolImportFromShareableHandle(
mem_pool: *mut cudaMemPool_t,
shareable_handle: *mut c_void,
handle_type: cudaMemAllocationHandleType,
flags: c_uint,
) -> cudaError_t;
pub fn cudaMemPoolExportPointer(
export_data: *mut cudaMemPoolPtrExportData,
ptr: *mut c_void,
) -> cudaError_t;
pub fn cudaMemPoolImportPointer(
ptr: *mut *mut c_void,
mem_pool: cudaMemPool_t,
export_data: *mut cudaMemPoolPtrExportData,
) -> cudaError_t;
pub fn cudaPointerGetAttributes(
attributes: *mut cudaPointerAttributes,
ptr: *const c_void,
) -> cudaError_t;
pub fn cudaDeviceCanAccessPeer(
can_access_peer: *mut c_int,
device: c_int,
peer_device: c_int,
) -> cudaError_t;
pub fn cudaDeviceEnablePeerAccess(peer_device: c_int, flags: c_uint) -> cudaError_t;
pub fn cudaDeviceDisablePeerAccess(peer_device: c_int) -> cudaError_t;
pub fn cudaGraphicsUnregisterResource(resource: cudaGraphicsResource_t) -> cudaError_t;
pub fn cudaGraphicsResourceSetMapFlags(
resource: cudaGraphicsResource_t,
flags: c_uint,
) -> cudaError_t;
pub fn cudaGraphicsMapResources(
count: c_int,
resources: *mut cudaGraphicsResource_t,
stream: cudaStream_t,
) -> cudaError_t;
pub fn cudaGraphicsUnmapResources(
count: c_int,
resources: *mut cudaGraphicsResource_t,
stream: cudaStream_t,
) -> cudaError_t;
pub fn cudaGraphicsResourceGetMappedPointer(
dev_ptr: *mut *mut c_void,
size: *mut usize,
resource: cudaGraphicsResource_t,
) -> cudaError_t;
pub fn cudaGraphicsSubResourceGetMappedArray(
array: *mut cudaArray_t,
resource: cudaGraphicsResource_t,
array_index: c_uint,
mip_level: c_uint,
) -> cudaError_t;
pub fn cudaGraphicsResourceGetMappedMipmappedArray(
mipmapped_array: *mut cudaMipmappedArray_t,
resource: cudaGraphicsResource_t,
) -> cudaError_t;
pub fn cudaGetChannelDesc(
desc: *mut cudaChannelFormatDesc,
array: cudaArray_const_t,
) -> cudaError_t;
pub fn cudaCreateChannelDesc(
x: c_int,
y: c_int,
z: c_int,
w: c_int,
f: cudaChannelFormatKind,
) -> cudaChannelFormatDesc;
pub fn cudaCreateTextureObject(
p_tex_object: *mut cudaTextureObject_t,
p_res_desc: *const cudaResourceDesc,
p_tex_desc: *const cudaTextureDesc,
p_res_view_desc: *const cudaResourceViewDesc,
) -> cudaError_t;
pub fn cudaDestroyTextureObject(tex_object: cudaTextureObject_t) -> cudaError_t;
pub fn cudaGetTextureObjectResourceDesc(
p_res_desc: *mut cudaResourceDesc,
tex_object: cudaTextureObject_t,
) -> cudaError_t;
pub fn cudaGetTextureObjectTextureDesc(
p_tex_desc: *mut cudaTextureDesc,
tex_object: cudaTextureObject_t,
) -> cudaError_t;
pub fn cudaGetTextureObjectResourceViewDesc(
p_res_view_desc: *mut cudaResourceViewDesc,
tex_object: cudaTextureObject_t,
) -> cudaError_t;
pub fn cudaCreateSurfaceObject(
p_surf_object: *mut cudaSurfaceObject_t,
p_res_desc: *const cudaResourceDesc,
) -> cudaError_t;
pub fn cudaDestroySurfaceObject(surf_object: cudaSurfaceObject_t) -> cudaError_t;
pub fn cudaGetSurfaceObjectResourceDesc(
p_res_desc: *mut cudaResourceDesc,
surf_object: cudaSurfaceObject_t,
) -> cudaError_t;
pub fn cudaDriverGetVersion(driver_version: *mut c_int) -> cudaError_t;
pub fn cudaRuntimeGetVersion(runtime_version: *mut c_int) -> cudaError_t;
pub fn cudaGraphCreate(p_graph: *mut cudaGraph_t, flags: c_uint) -> cudaError_t;
pub fn cudaGraphAddKernelNode(
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
p_node_params: *const cudaKernelNodeParams,
) -> cudaError_t;
pub fn cudaGraphKernelNodeGetParams(
node: cudaGraphNode_t,
p_node_params: *mut cudaKernelNodeParams,
) -> cudaError_t;
pub fn cudaGraphKernelNodeSetParams(
node: cudaGraphNode_t,
p_node_params: *const cudaKernelNodeParams,
) -> cudaError_t;
pub fn cudaGraphKernelNodeCopyAttributes(
h_src: cudaGraphNode_t,
h_dst: cudaGraphNode_t,
) -> cudaError_t;
pub fn cudaGraphKernelNodeGetAttribute(
h_node: cudaGraphNode_t,
attr: cudaLaunchAttributeID,
value_out: *mut cudaLaunchAttributeValue,
) -> cudaError_t;
pub fn cudaGraphKernelNodeSetAttribute(
h_node: cudaGraphNode_t,
attr: cudaLaunchAttributeID,
value: *const cudaLaunchAttributeValue,
) -> cudaError_t;
pub fn cudaGraphAddMemcpyNode(
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
p_copy_params: *const cudaMemcpy3DParms,
) -> cudaError_t;
pub fn cudaGraphAddMemcpyNodeToSymbol(
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
symbol: *const c_void,
src: *const c_void,
count: usize,
offset: usize,
kind: cudaMemcpyKind,
) -> cudaError_t;
pub fn cudaGraphAddMemcpyNodeFromSymbol(
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
dst: *mut c_void,
symbol: *const c_void,
count: usize,
offset: usize,
kind: cudaMemcpyKind,
) -> cudaError_t;
pub fn cudaGraphAddMemcpyNode1D(
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
dst: *mut c_void,
src: *const c_void,
count: usize,
kind: cudaMemcpyKind,
) -> cudaError_t;
pub fn cudaGraphMemcpyNodeGetParams(
node: cudaGraphNode_t,
p_node_params: *mut cudaMemcpy3DParms,
) -> cudaError_t;
pub fn cudaGraphMemcpyNodeSetParams(
node: cudaGraphNode_t,
p_node_params: *const cudaMemcpy3DParms,
) -> cudaError_t;
pub fn cudaGraphMemcpyNodeSetParamsToSymbol(
node: cudaGraphNode_t,
symbol: *const c_void,
src: *const c_void,
count: usize,
offset: usize,
kind: cudaMemcpyKind,
) -> cudaError_t;
pub fn cudaGraphMemcpyNodeSetParamsFromSymbol(
node: cudaGraphNode_t,
dst: *mut c_void,
symbol: *const c_void,
count: usize,
offset: usize,
kind: cudaMemcpyKind,
) -> cudaError_t;
pub fn cudaGraphMemcpyNodeSetParams1D(
node: cudaGraphNode_t,
dst: *mut c_void,
src: *const c_void,
count: usize,
kind: cudaMemcpyKind,
) -> cudaError_t;
pub fn cudaGraphAddMemsetNode(
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
p_memset_params: *const cudaMemsetParams,
) -> cudaError_t;
pub fn cudaGraphMemsetNodeGetParams(
node: cudaGraphNode_t,
p_node_params: *mut cudaMemsetParams,
) -> cudaError_t;
pub fn cudaGraphMemsetNodeSetParams(
node: cudaGraphNode_t,
p_node_params: *const cudaMemsetParams,
) -> cudaError_t;
pub fn cudaGraphAddHostNode(
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
p_node_params: *const cudaHostNodeParams,
) -> cudaError_t;
pub fn cudaGraphHostNodeGetParams(
node: cudaGraphNode_t,
p_node_params: *mut cudaHostNodeParams,
) -> cudaError_t;
pub fn cudaGraphHostNodeSetParams(
node: cudaGraphNode_t,
p_node_params: *const cudaHostNodeParams,
) -> cudaError_t;
pub fn cudaGraphAddChildGraphNode(
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
child_graph: cudaGraph_t,
) -> cudaError_t;
pub fn cudaGraphChildGraphNodeGetGraph(
node: cudaGraphNode_t,
p_graph: *mut cudaGraph_t,
) -> cudaError_t;
pub fn cudaGraphAddEmptyNode(
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
) -> cudaError_t;
pub fn cudaGraphAddEventRecordNode(
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
event: cudaEvent_t,
) -> cudaError_t;
pub fn cudaGraphEventRecordNodeGetEvent(
node: cudaGraphNode_t,
event_out: *mut cudaEvent_t,
) -> cudaError_t;
pub fn cudaGraphEventRecordNodeSetEvent(
node: cudaGraphNode_t,
event: cudaEvent_t,
) -> cudaError_t;
pub fn cudaGraphAddEventWaitNode(
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
event: cudaEvent_t,
) -> cudaError_t;
pub fn cudaGraphEventWaitNodeGetEvent(
node: cudaGraphNode_t,
event_out: *mut cudaEvent_t,
) -> cudaError_t;
pub fn cudaGraphEventWaitNodeSetEvent(
node: cudaGraphNode_t,
event: cudaEvent_t,
) -> cudaError_t;
pub fn cudaGraphAddExternalSemaphoresSignalNode(
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
node_params: *const cudaExternalSemaphoreSignalNodeParams,
) -> cudaError_t;
pub fn cudaGraphExternalSemaphoresSignalNodeGetParams(
h_node: cudaGraphNode_t,
params_out: *mut cudaExternalSemaphoreSignalNodeParams,
) -> cudaError_t;
pub fn cudaGraphExternalSemaphoresSignalNodeSetParams(
h_node: cudaGraphNode_t,
node_params: *const cudaExternalSemaphoreSignalNodeParams,
) -> cudaError_t;
pub fn cudaGraphAddExternalSemaphoresWaitNode(
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
node_params: *const cudaExternalSemaphoreWaitNodeParams,
) -> cudaError_t;
pub fn cudaGraphExternalSemaphoresWaitNodeGetParams(
h_node: cudaGraphNode_t,
params_out: *mut cudaExternalSemaphoreWaitNodeParams,
) -> cudaError_t;
pub fn cudaGraphExternalSemaphoresWaitNodeSetParams(
h_node: cudaGraphNode_t,
node_params: *const cudaExternalSemaphoreWaitNodeParams,
) -> cudaError_t;
pub fn cudaGraphAddMemAllocNode(
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
node_params: *mut cudaMemAllocNodeParams,
) -> cudaError_t;
pub fn cudaGraphMemAllocNodeGetParams(
node: cudaGraphNode_t,
params_out: *mut cudaMemAllocNodeParams,
) -> cudaError_t;
pub fn cudaGraphAddMemFreeNode(
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
dptr: *mut c_void,
) -> cudaError_t;
pub fn cudaGraphMemFreeNodeGetParams(
node: cudaGraphNode_t,
dptr_out: *mut c_void,
) -> cudaError_t;
pub fn cudaDeviceGraphMemTrim(device: c_int) -> cudaError_t;
pub fn cudaDeviceGetGraphMemAttribute(
device: c_int,
attr: cudaGraphMemAttributeType,
value: *mut c_void,
) -> cudaError_t;
pub fn cudaDeviceSetGraphMemAttribute(
device: c_int,
attr: cudaGraphMemAttributeType,
value: *mut c_void,
) -> cudaError_t;
pub fn cudaGraphClone(
p_graph_clone: *mut cudaGraph_t,
original_graph: cudaGraph_t,
) -> cudaError_t;
pub fn cudaGraphNodeFindInClone(
p_node: *mut cudaGraphNode_t,
original_node: cudaGraphNode_t,
cloned_graph: cudaGraph_t,
) -> cudaError_t;
pub fn cudaGraphNodeGetType(
node: cudaGraphNode_t,
p_type: *mut cudaGraphNodeType,
) -> cudaError_t;
pub fn cudaGraphGetNodes(
graph: cudaGraph_t,
nodes: *mut cudaGraphNode_t,
num_nodes: *mut usize,
) -> cudaError_t;
pub fn cudaGraphGetRootNodes(
graph: cudaGraph_t,
p_root_nodes: *mut cudaGraphNode_t,
p_num_root_nodes: *mut usize,
) -> cudaError_t;
pub fn cudaGraphGetEdges(
graph: cudaGraph_t,
from: *mut cudaGraphNode_t,
to: *mut cudaGraphNode_t,
num_edges: *mut usize,
) -> cudaError_t;
pub fn cudaGraphGetEdges_v2(
graph: cudaGraph_t,
from: *mut cudaGraphNode_t,
to: *mut cudaGraphNode_t,
edge_data: *mut cudaGraphEdgeData,
num_edges: *mut usize,
) -> cudaError_t;
pub fn cudaGraphNodeGetDependencies(
node: cudaGraphNode_t,
p_dependencies: *mut cudaGraphNode_t,
p_num_dependencies: *mut usize,
) -> cudaError_t;
pub fn cudaGraphNodeGetDependencies_v2(
node: cudaGraphNode_t,
p_dependencies: *mut cudaGraphNode_t,
edge_data: *mut cudaGraphEdgeData,
p_num_dependencies: *mut usize,
) -> cudaError_t;
pub fn cudaGraphNodeGetDependentNodes(
node: cudaGraphNode_t,
p_dependent_nodes: *mut cudaGraphNode_t,
p_num_dependent_nodes: *mut usize,
) -> cudaError_t;
pub fn cudaGraphNodeGetDependentNodes_v2(
node: cudaGraphNode_t,
p_dependent_nodes: *mut cudaGraphNode_t,
edge_data: *mut cudaGraphEdgeData,
p_num_dependent_nodes: *mut usize,
) -> cudaError_t;
pub fn cudaGraphAddDependencies(
graph: cudaGraph_t,
from: *const cudaGraphNode_t,
to: *const cudaGraphNode_t,
num_dependencies: usize,
) -> cudaError_t;
pub fn cudaGraphAddDependencies_v2(
graph: cudaGraph_t,
from: *const cudaGraphNode_t,
to: *const cudaGraphNode_t,
edge_data: *const cudaGraphEdgeData,
num_dependencies: usize,
) -> cudaError_t;
pub fn cudaGraphRemoveDependencies(
graph: cudaGraph_t,
from: *const cudaGraphNode_t,
to: *const cudaGraphNode_t,
num_dependencies: usize,
) -> cudaError_t;
pub fn cudaGraphRemoveDependencies_v2(
graph: cudaGraph_t,
from: *const cudaGraphNode_t,
to: *const cudaGraphNode_t,
edge_data: *const cudaGraphEdgeData,
num_dependencies: usize,
) -> cudaError_t;
pub fn cudaGraphDestroyNode(node: cudaGraphNode_t) -> cudaError_t;
pub fn cudaGraphInstantiate(
p_graph_exec: *mut cudaGraphExec_t,
graph: cudaGraph_t,
flags: c_ulonglong,
) -> cudaError_t;
pub fn cudaGraphInstantiateWithFlags(
p_graph_exec: *mut cudaGraphExec_t,
graph: cudaGraph_t,
flags: c_ulonglong,
) -> cudaError_t;
pub fn cudaGraphInstantiateWithParams(
p_graph_exec: *mut cudaGraphExec_t,
graph: cudaGraph_t,
instantiate_params: *mut cudaGraphInstantiateParams,
) -> cudaError_t;
pub fn cudaGraphExecGetFlags(
graph_exec: cudaGraphExec_t,
flags: *mut c_ulonglong,
) -> cudaError_t;
pub fn cudaGraphExecKernelNodeSetParams(
h_graph_exec: cudaGraphExec_t,
node: cudaGraphNode_t,
p_node_params: *const cudaKernelNodeParams,
) -> cudaError_t;
pub fn cudaGraphExecMemcpyNodeSetParams(
h_graph_exec: cudaGraphExec_t,
node: cudaGraphNode_t,
p_node_params: *const cudaMemcpy3DParms,
) -> cudaError_t;
pub fn cudaGraphExecMemcpyNodeSetParamsToSymbol(
h_graph_exec: cudaGraphExec_t,
node: cudaGraphNode_t,
symbol: *const c_void,
src: *const c_void,
count: usize,
offset: usize,
kind: cudaMemcpyKind,
) -> cudaError_t;
pub fn cudaGraphExecMemcpyNodeSetParamsFromSymbol(
h_graph_exec: cudaGraphExec_t,
node: cudaGraphNode_t,
dst: *mut c_void,
symbol: *const c_void,
count: usize,
offset: usize,
kind: cudaMemcpyKind,
) -> cudaError_t;
pub fn cudaGraphExecMemcpyNodeSetParams1D(
h_graph_exec: cudaGraphExec_t,
node: cudaGraphNode_t,
dst: *mut c_void,
src: *const c_void,
count: usize,
kind: cudaMemcpyKind,
) -> cudaError_t;
pub fn cudaGraphExecMemsetNodeSetParams(
h_graph_exec: cudaGraphExec_t,
node: cudaGraphNode_t,
p_node_params: *const cudaMemsetParams,
) -> cudaError_t;
pub fn cudaGraphExecHostNodeSetParams(
h_graph_exec: cudaGraphExec_t,
node: cudaGraphNode_t,
p_node_params: *const cudaHostNodeParams,
) -> cudaError_t;
pub fn cudaGraphExecChildGraphNodeSetParams(
h_graph_exec: cudaGraphExec_t,
node: cudaGraphNode_t,
child_graph: cudaGraph_t,
) -> cudaError_t;
pub fn cudaGraphExecEventRecordNodeSetEvent(
h_graph_exec: cudaGraphExec_t,
h_node: cudaGraphNode_t,
event: cudaEvent_t,
) -> cudaError_t;
pub fn cudaGraphExecEventWaitNodeSetEvent(
h_graph_exec: cudaGraphExec_t,
h_node: cudaGraphNode_t,
event: cudaEvent_t,
) -> cudaError_t;
pub fn cudaGraphExecExternalSemaphoresSignalNodeSetParams(
h_graph_exec: cudaGraphExec_t,
h_node: cudaGraphNode_t,
node_params: *const cudaExternalSemaphoreSignalNodeParams,
) -> cudaError_t;
pub fn cudaGraphExecExternalSemaphoresWaitNodeSetParams(
h_graph_exec: cudaGraphExec_t,
h_node: cudaGraphNode_t,
node_params: *const cudaExternalSemaphoreWaitNodeParams,
) -> cudaError_t;
pub fn cudaGraphNodeSetEnabled(
h_graph_exec: cudaGraphExec_t,
h_node: cudaGraphNode_t,
is_enabled: c_uint,
) -> cudaError_t;
pub fn cudaGraphNodeGetEnabled(
h_graph_exec: cudaGraphExec_t,
h_node: cudaGraphNode_t,
is_enabled: *mut c_uint,
) -> cudaError_t;
pub fn cudaGraphExecUpdate(
h_graph_exec: cudaGraphExec_t,
h_graph: cudaGraph_t,
result_info: *mut cudaGraphExecUpdateResultInfo,
) -> cudaError_t;
pub fn cudaGraphUpload(graph_exec: cudaGraphExec_t, stream: cudaStream_t) -> cudaError_t;
pub fn cudaGraphLaunch(graph_exec: cudaGraphExec_t, stream: cudaStream_t) -> cudaError_t;
pub fn cudaGraphExecDestroy(graph_exec: cudaGraphExec_t) -> cudaError_t;
pub fn cudaGraphDestroy(graph: cudaGraph_t) -> cudaError_t;
pub fn cudaGraphDebugDotPrint(
graph: cudaGraph_t,
path: *const c_char,
flags: c_uint,
) -> cudaError_t;
pub fn cudaUserObjectCreate(
object_out: *mut cudaUserObject_t,
ptr: *mut c_void,
destroy: cudaHostFn_t,
initial_refcount: c_uint,
flags: c_uint,
) -> cudaError_t;
pub fn cudaUserObjectRetain(object: cudaUserObject_t, count: c_uint) -> cudaError_t;
pub fn cudaUserObjectRelease(object: cudaUserObject_t, count: c_uint) -> cudaError_t;
pub fn cudaGraphRetainUserObject(
graph: cudaGraph_t,
object: cudaUserObject_t,
count: c_uint,
flags: c_uint,
) -> cudaError_t;
pub fn cudaGraphReleaseUserObject(
graph: cudaGraph_t,
object: cudaUserObject_t,
count: c_uint,
) -> cudaError_t;
pub fn cudaGraphAddNode(
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
node_params: *mut cudaGraphNodeParams,
) -> cudaError_t;
pub fn cudaGraphAddNode_v2(
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
dependency_data: *const cudaGraphEdgeData,
num_dependencies: usize,
node_params: *mut cudaGraphNodeParams,
) -> cudaError_t;
pub fn cudaGraphNodeSetParams(
node: cudaGraphNode_t,
node_params: *mut cudaGraphNodeParams,
) -> cudaError_t;
pub fn cudaGraphExecNodeSetParams(
graph_exec: cudaGraphExec_t,
node: cudaGraphNode_t,
node_params: *mut cudaGraphNodeParams,
) -> cudaError_t;
pub fn cudaGraphConditionalHandleCreate(
p_handle_out: *mut cudaGraphConditionalHandle,
graph: cudaGraph_t,
default_launch_value: c_uint,
flags: c_uint,
) -> cudaError_t;
pub fn cudaGetDriverEntryPoint(
symbol: *const c_char,
func_ptr: *mut *mut c_void,
flags: c_ulonglong,
driver_status: *mut cudaDriverEntryPointQueryResult,
) -> cudaError_t;
pub fn cudaGetExportTable(
pp_export_table: *mut *const c_void,
p_export_table_id: *const cudaUUID_t,
) -> cudaError_t;
pub fn cudaGetFuncBySymbol(
function_ptr: *mut cudaFunction_t,
symbol_ptr: *const c_void,
) -> cudaError_t;
pub fn cudaGetKernel(
kernel_ptr: *mut cudaKernel_t,
entry_func_addr: *const c_void,
) -> cudaError_t;
}
}
pub use api::RuntimeApi;