* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This software is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PSL v2 for more details.
*/
use std::{
cell::UnsafeCell,
ffi::{c_char, c_int, c_uint, c_ulonglong, c_void, CStr},
sync::OnceLock,
};
use dashmap::DashMap;
use nohash_hasher::BuildNoHashHasher;
use once_cell::sync::Lazy;
use smallvec::{smallvec, SmallVec};
use tinyvec::ArrayVec;
use tracing::{debug, error, info, trace, warn};
use cudax::runtime::*;
use xgpu_common::{
api_name::ApiFuncName,
ipc::message::{Argument, ArgumentFlag, Request},
sys::dynlib,
};
use crate::{
agent::Agent, fault_guard::virt, hook::runtime::RuntimeApi, hook_impl::native::dl, thread,
};
const ADDR_SIZE: usize = std::mem::size_of::<usize>();
fn get_func_offset(func: *const c_void) -> isize {
static LOADED_LIBS: Lazy<DashMap<usize, (), BuildNoHashHasher<usize>>> =
Lazy::new(|| DashMap::with_hasher(BuildNoHashHasher::default()));
let dl_info = dynlib::dladdr(func).expect("dladdr failed");
let base_addr = dl_info.base_addr().expect("base_addr is null").as_ptr() as usize;
LOADED_LIBS.entry(base_addr).or_insert_with(|| {
let file_name = dl_info.file_name().expect("file_name failed");
debug!("Loading new library '{:?}'...", file_name);
let req = Request::with_args(
ApiFuncName::Loaddynlibrary as u64,
smallvec![Argument::from_slice(
file_name.to_bytes_with_nul(),
ArgumentFlag::ARG_IN,
)],
);
let ret = Agent::get_instance()
.invoke_api::<u32>(req)
.expect("call invoke_api failed");
if ret != 0 {
panic!(
"server load so failed, so_name:{:?}, res:{}",
file_name, ret
);
}
});
(func as usize).wrapping_sub(base_addr) as isize
}
#[derive(Default, Debug)]
struct KernelParamMetadata {
size: usize,
vhandles: OnceLock<SmallVec<[usize; 8]>>,
}
struct KernelMetadata {
offset: isize,
params: ArrayVec<[KernelParamMetadata; KernelMetadata::MAX_PARAM_NUM]>,
}
impl KernelMetadata {
const MAX_PARAM_NUM: usize = 32;
#[cold]
fn resolve(func: *const c_void) -> &'static Self {
let offset = get_func_offset(func);
let mut params = ArrayVec::new();
for index in 0..Self::MAX_PARAM_NUM {
let mut offset: usize = 0;
let mut size: usize = 0;
let ret = unsafe { cudaFuncGetParamInfo(func, index, &mut offset, &mut size) };
if ret != cudaSuccess {
break;
}
trace!("Kernel {:#x}: param[{}], size={}", offset, index, size);
params.push(KernelParamMetadata {
size,
vhandles: OnceLock::new(),
});
}
Box::leak(Box::new(Self { offset, params }))
}
}
fn get_kernel_metadata(func: *const c_void) -> &'static KernelMetadata {
const TLB_SIZE: usize = 8;
type TlbEntry = (usize, Option<&'static KernelMetadata>);
type TlbCache = ArrayVec<[TlbEntry; TLB_SIZE]>;
static KERNEL_CACHE: Lazy<DashMap<usize, &'static KernelMetadata, BuildNoHashHasher<usize>>> =
Lazy::new(|| DashMap::with_hasher(BuildNoHashHasher::default()));
thread_local! {
static KERNEL_TLB: UnsafeCell<TlbCache> = UnsafeCell::new(ArrayVec::new());
}
let key = func as usize;
let meta = KERNEL_TLB.with(|cell| {
let cache = unsafe { &mut *cell.get() };
for (idx, (func, kernel)) in cache.iter().enumerate() {
if *func != key {
continue;
}
let found = unsafe { kernel.unwrap_unchecked() };
if idx > 0 {
cache[0..=idx].rotate_right(1);
}
return Some(found);
}
None
});
if let Some(m) = meta {
return m;
}
let kernel = if let Some(entry) = KERNEL_CACHE.get(&key) {
*entry.value()
} else {
*KERNEL_CACHE
.entry(key)
.or_insert_with(|| KernelMetadata::resolve(func))
};
KERNEL_TLB.with(|cell| {
let cache = unsafe { &mut *cell.get() };
if cache.len() == cache.capacity() {
cache[TLB_SIZE - 1] = (key, Some(kernel));
} else {
cache.push((key, Some(kernel)));
}
cache.rotate_right(1);
});
kernel
}
pub struct RuntimeApiImpl;
#[allow(unused_variables)]
impl RuntimeApi for RuntimeApiImpl {
fn cudaDeviceReset(&self) -> cudaError_t {
info!("[Hooked] api_name: cudaDeviceReset");
let req = Request::empty(ApiFuncName::Cudadevicereset as u64);
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret != cudaSuccess {
thread::context().set_error(ret);
}
ret
}
fn cudaDeviceSynchronize(&self) -> cudaError_t {
info!("[Hooked] api_name: cudaDeviceSynchronize");
let req = Request::empty(ApiFuncName::Cudadevicesynchronize as u64);
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret != cudaSuccess {
thread::context().set_error(ret);
}
ret
}
fn cudaDeviceSetLimit(&self, limit: cudaLimit, value: usize) -> cudaError_t {
unreachable!()
}
fn cudaDeviceGetLimit(&self, p_value: *mut usize, limit: cudaLimit) -> cudaError_t {
unreachable!()
}
fn cudaDeviceGetTexture1DLinearMaxWidth(
&self,
max_width_in_elements: *mut usize,
fmt_desc: *const cudaChannelFormatDesc,
device: c_int,
) -> cudaError_t {
unreachable!()
}
fn cudaDeviceGetCacheConfig(&self, p_cache_config: *mut cudaFuncCache) -> cudaError_t {
unreachable!()
}
fn cudaDeviceGetStreamPriorityRange(
&self,
least_priority: *mut c_int,
greatest_priority: *mut c_int,
) -> cudaError_t {
info!("[Hooked] api_name: cudaDeviceGetStreamPriorityRange");
debug!(
"least:{}, greatest:{}",
unsafe { *least_priority },
unsafe { *greatest_priority }
);
let req = Request::with_args(
ApiFuncName::Cudadevicegetstreampriorityrange as u64,
smallvec![
unsafe { Argument::from_mut_ptr(least_priority, ArgumentFlag::ARG_OUT) },
unsafe { Argument::from_mut_ptr(greatest_priority, ArgumentFlag::ARG_OUT) },
],
);
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret != cudaSuccess {
thread::context().set_error(ret);
}
debug!(
"after least:{}, greatest:{}",
unsafe { *least_priority },
unsafe { *greatest_priority }
);
ret
}
fn cudaDeviceSetCacheConfig(&self, cache_config: cudaFuncCache) -> cudaError_t {
unreachable!()
}
fn cudaDeviceGetByPCIBusId(
&self,
device: *mut c_int,
pci_bus_id: *const c_char,
) -> cudaError_t {
unreachable!()
}
fn cudaDeviceGetPCIBusId(
&self,
pci_bus_id: *mut c_char,
len: c_int,
device: c_int,
) -> cudaError_t {
unreachable!()
}
fn cudaIpcGetEventHandle(
&self,
handle: *mut cudaIpcEventHandle_t,
event: cudaEvent_t,
) -> cudaError_t {
unreachable!()
}
fn cudaIpcOpenEventHandle(
&self,
event: *mut cudaEvent_t,
handle: cudaIpcEventHandle_t,
) -> cudaError_t {
unreachable!()
}
fn cudaIpcGetMemHandle(
&self,
handle: *mut cudaIpcMemHandle_t,
dev_ptr: *mut c_void,
) -> cudaError_t {
unreachable!()
}
fn cudaIpcOpenMemHandle(
&self,
dev_ptr: *mut *mut c_void,
handle: cudaIpcMemHandle_t,
flags: c_uint,
) -> cudaError_t {
unreachable!()
}
fn cudaIpcCloseMemHandle(&self, dev_ptr: *mut c_void) -> cudaError_t {
unreachable!()
}
fn cudaDeviceFlushGPUDirectRDMAWrites(
&self,
target: cudaFlushGPUDirectRDMAWritesTarget,
scope: cudaFlushGPUDirectRDMAWritesScope,
) -> cudaError_t {
unreachable!()
}
fn cudaDeviceRegisterAsyncNotification(
&self,
device: c_int,
callback_func: cudaAsyncCallback,
user_data: *mut c_void,
callback: *mut cudaAsyncCallbackHandle_t,
) -> cudaError_t {
unreachable!()
}
fn cudaDeviceUnregisterAsyncNotification(
&self,
device: c_int,
callback: cudaAsyncCallbackHandle_t,
) -> cudaError_t {
unreachable!()
}
fn cudaDeviceGetSharedMemConfig(&self, p_config: *mut cudaSharedMemConfig) -> cudaError_t {
unreachable!()
}
fn cudaDeviceSetSharedMemConfig(&self, config: cudaSharedMemConfig) -> cudaError_t {
unreachable!()
}
fn cudaThreadExit(&self) -> cudaError_t {
unreachable!()
}
fn cudaThreadSynchronize(&self) -> cudaError_t {
unreachable!()
}
fn cudaThreadSetLimit(&self, limit: cudaLimit, value: usize) -> cudaError_t {
unreachable!()
}
fn cudaThreadGetLimit(&self, p_value: *mut usize, limit: cudaLimit) -> cudaError_t {
unreachable!()
}
fn cudaThreadGetCacheConfig(&self, p_cache_config: *mut cudaFuncCache) -> cudaError_t {
unreachable!()
}
fn cudaThreadSetCacheConfig(&self, cache_config: cudaFuncCache) -> cudaError_t {
unreachable!()
}
fn cudaGetLastError(&self) -> cudaError_t {
thread::context().get_error()
}
fn cudaPeekAtLastError(&self) -> cudaError_t {
thread::context().peek_error()
}
fn cudaGetErrorName(&self, error: cudaError_t) -> *const c_char {
type CudaGetErrorNameFn = unsafe extern "C" fn(device: cudaError_t) -> *const c_char;
static GLOBAL_CUDA_GET_ERROR_NAME: Lazy<CudaGetErrorNameFn> = Lazy::new(|| {
let ptr = dl::find_original_symbol("cudaGetErrorName")
.expect("FATAL: Failed to get original function");
unsafe { ptr.cast() }
});
unsafe { GLOBAL_CUDA_GET_ERROR_NAME(error) }
}
fn cudaGetErrorString(&self, error: cudaError_t) -> *const c_char {
type CudaGetErrorStringFn = unsafe extern "C" fn(device: cudaError_t) -> *const c_char;
static GLOBAL_CUDA_GET_ERROR_STRING: Lazy<CudaGetErrorStringFn> = Lazy::new(|| {
let ptr = dl::find_original_symbol("cudaGetErrorString")
.expect("FATAL: Failed to get original function");
unsafe { ptr.cast() }
});
let ptr = unsafe { GLOBAL_CUDA_GET_ERROR_STRING(error) };
let cstr = unsafe { CStr::from_ptr(ptr) };
error!("CUDA ERROR: {cstr:?}, errno={error}");
ptr
}
fn cudaGetDeviceCount(&self, count: *mut c_int) -> cudaError_t {
info!("[Hooked] api_name: cudaGetDeviceCount");
debug!("before dev_count: {}", unsafe { *count });
let req = Request::with_args(
ApiFuncName::Cudagetdevicecount as u64,
smallvec![unsafe { Argument::from_mut_ptr(count, ArgumentFlag::ARG_OUT) }],
);
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret != cudaSuccess {
thread::context().set_error(ret);
}
debug!("after dev_count: {}, ret={}", unsafe { *count }, ret);
ret
}
fn cudaGetDeviceProperties_v2(
&self,
prop: *mut cudaDeviceProp,
mut device: c_int,
) -> cudaError_t {
info!("[Hooked] api_name: cudaGetDeviceProperties_v2");
let req = Request::with_args(
ApiFuncName::CudagetdevicepropertiesV2 as u64,
smallvec![
unsafe { Argument::from_mut_ptr(prop, ArgumentFlag::ARG_OUT) },
Argument::from_mut(&mut device, ArgumentFlag::ARG_IN),
],
);
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret != cudaSuccess {
thread::context().set_error(ret);
}
ret
}
fn cudaDeviceGetAttribute(
&self,
value: *mut c_int,
attr: cudaDeviceAttr,
device: c_int,
) -> cudaError_t {
info!("[Hooked] api_name: cudaDeviceGetAttribute");
let req = Request::with_args(
ApiFuncName::Cudadevicegetattribute as u64,
smallvec![
unsafe { Argument::from_mut_ptr(value, ArgumentFlag::ARG_OUT) },
Argument::from_ref(&attr, ArgumentFlag::ARG_IN),
Argument::from_ref(&device, ArgumentFlag::ARG_IN),
],
);
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret != cudaSuccess {
thread::context().set_error(ret);
}
ret
}
fn cudaDeviceGetDefaultMemPool(
&self,
mem_pool: *mut cudaMemPool_t,
device: c_int,
) -> cudaError_t {
unreachable!()
}
fn cudaDeviceSetMemPool(&self, device: c_int, mem_pool: cudaMemPool_t) -> cudaError_t {
unreachable!()
}
fn cudaDeviceGetMemPool(&self, mem_pool: *mut cudaMemPool_t, device: c_int) -> cudaError_t {
unreachable!()
}
fn cudaDeviceGetNvSciSyncAttributes(
&self,
nv_sci_sync_attr_list: *mut c_void,
device: c_int,
flags: c_int,
) -> cudaError_t {
unreachable!()
}
fn cudaDeviceGetP2PAttribute(
&self,
value: *mut c_int,
attr: cudaDeviceP2PAttr,
src_device: c_int,
dst_device: c_int,
) -> cudaError_t {
unreachable!()
}
fn cudaChooseDevice(&self, device: *mut c_int, prop: *const cudaDeviceProp) -> cudaError_t {
unreachable!()
}
fn cudaInitDevice(&self, device: c_int, device_flags: c_uint, flags: c_uint) -> cudaError_t {
unreachable!()
}
fn cudaSetDevice(&self, device: c_int) -> cudaError_t {
type CudaSetDeviceFn = unsafe extern "C" fn(device: c_int) -> cudaError_t;
static GLOBAL_CUDA_SET_DEVICE: Lazy<CudaSetDeviceFn> = Lazy::new(|| {
let ptr = dl::find_original_symbol("cudaSetDevice")
.expect("FATAL: Failed to get original function");
unsafe { ptr.cast() }
});
info!("[Hooked] api_name: cudaSetDevice");
let context = thread::context();
let device_id = context.get_device();
if device == device_id {
return cudaSuccess;
}
unsafe { GLOBAL_CUDA_SET_DEVICE(device) };
let req = Request::with_args(
ApiFuncName::Cudasetdevice as u64,
smallvec![Argument::from_ref(&device, ArgumentFlag::ARG_IN)],
);
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret != cudaSuccess {
context.set_error(ret);
} else {
context.set_device(device);
}
ret
}
fn cudaGetDevice(&self, device: *mut c_int) -> cudaError_t {
info!("[Hooked] api_name: cudaGetDevice");
let device_id = thread::context().get_device();
unsafe {
*device = device_id;
}
cudaSuccess
}
fn cudaSetValidDevices(&self, device_arr: *mut c_int, len: c_int) -> cudaError_t {
unreachable!()
}
fn cudaSetDeviceFlags(&self, flags: c_uint) -> cudaError_t {
unreachable!()
}
fn cudaGetDeviceFlags(&self, flags: *mut c_uint) -> cudaError_t {
unreachable!()
}
fn cudaStreamCreate(&self, p_stream: *mut cudaStream_t) -> cudaError_t {
unreachable!()
}
fn cudaStreamCreateWithFlags(&self, p_stream: *mut cudaStream_t, flags: c_uint) -> cudaError_t {
unreachable!()
}
fn cudaStreamCreateWithPriority(
&self,
p_stream: *mut cudaStream_t,
flags: c_uint,
priority: c_int,
) -> cudaError_t {
info!("[Hooked] api_name: cudaStreamCreateWithPriority");
debug!("before stream: {}", unsafe { *p_stream });
let mut p_stream_usize = unsafe { *p_stream };
let req = Request::with_args(
ApiFuncName::Cudastreamcreatewithpriority as u64,
smallvec![
Argument::from_mut(
&mut p_stream_usize,
ArgumentFlag::ARG_OUT | ArgumentFlag::ARG_VIRT,
),
Argument::from_ref(&flags, ArgumentFlag::ARG_IN),
Argument::from_ref(&priority, ArgumentFlag::ARG_IN),
],
);
let request_id = req.request_id();
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret == cudaSuccess {
thread::context().set_error(ret);
}
debug!("after stream: {:#x}", p_stream_usize);
unsafe {
*p_stream = p_stream_usize as cudaStream_t;
virt::handle_insert(*p_stream as *mut c_void, *p_stream as *mut c_void, 0)
.expect("handle_insert failed");
virt::req_id_vhandle_insert(request_id, *p_stream as *mut c_void)
.expect("req_id_vhandle_insert failed");
}
ret
}
fn cudaStreamGetPriority(&self, h_stream: cudaStream_t, priority: *mut c_int) -> cudaError_t {
unreachable!()
}
fn cudaStreamGetFlags(&self, h_stream: cudaStream_t, flags: *mut c_uint) -> cudaError_t {
unreachable!()
}
fn cudaStreamGetId(&self, h_stream: cudaStream_t, stream_id: *mut c_ulonglong) -> cudaError_t {
unreachable!()
}
fn cudaCtxResetPersistingL2Cache(&self) -> cudaError_t {
unreachable!()
}
fn cudaStreamCopyAttributes(&self, dst: cudaStream_t, src: cudaStream_t) -> cudaError_t {
unreachable!()
}
fn cudaStreamGetAttribute(
&self,
h_stream: cudaStream_t,
attr: cudaLaunchAttributeID,
value_out: *mut cudaLaunchAttributeValue,
) -> cudaError_t {
unreachable!()
}
fn cudaStreamSetAttribute(
&self,
h_stream: cudaStream_t,
attr: cudaLaunchAttributeID,
value: *const cudaLaunchAttributeValue,
) -> cudaError_t {
unreachable!()
}
fn cudaStreamDestroy(&self, stream: cudaStream_t) -> cudaError_t {
unreachable!()
}
fn cudaStreamWaitEvent(
&self,
stream: cudaStream_t,
event: cudaEvent_t,
flags: c_uint,
) -> cudaError_t {
info!("[Hooked] api_name: cudaStreamWaitEvent");
let stream_usize = virt::handle_map(stream as *mut c_void).expect("handle_map failed")
as cudaStream_t as usize;
let event_usize = virt::handle_map(event as *mut c_void).expect("handle_map failed")
as cudaEvent_t as usize;
let req = Request::with_args(
ApiFuncName::Cudastreamwaitevent as u64,
smallvec![
Argument::from_ref(&stream_usize, ArgumentFlag::ARG_IN | ArgumentFlag::ARG_VIRT),
Argument::from_ref(&event_usize, ArgumentFlag::ARG_IN | ArgumentFlag::ARG_VIRT),
Argument::from_ref(&flags, ArgumentFlag::ARG_IN),
],
);
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret == cudaSuccess {
thread::context().set_error(ret);
}
ret
}
fn cudaStreamAddCallback(
&self,
stream: cudaStream_t,
callback: cudaStreamCallback_t,
user_data: *mut c_void,
flags: c_uint,
) -> cudaError_t {
unreachable!()
}
fn cudaStreamSynchronize(&self, stream: cudaStream_t) -> cudaError_t {
info!("[Hooked] api_name: cudaStreamSynchronize");
let stream_usize = virt::handle_map(stream as *mut c_void).expect("handle_map failed")
as cudaStream_t as usize;
let req = Request::with_args(
ApiFuncName::Cudastreamsynchronize as u64,
smallvec![Argument::from_ref(
&stream_usize,
ArgumentFlag::ARG_IN | ArgumentFlag::ARG_VIRT,
)],
);
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret == cudaSuccess {
thread::context().set_error(ret);
}
ret
}
fn cudaStreamQuery(&self, stream: cudaStream_t) -> cudaError_t {
unreachable!()
}
fn cudaStreamAttachMemAsync(
&self,
stream: cudaStream_t,
dev_ptr: *mut c_void,
length: usize,
flags: c_uint,
) -> cudaError_t {
unreachable!()
}
fn cudaStreamBeginCapture(
&self,
stream: cudaStream_t,
mode: cudaStreamCaptureMode,
) -> cudaError_t {
unreachable!()
}
fn cudaStreamBeginCaptureToGraph(
&self,
stream: cudaStream_t,
graph: cudaGraph_t,
dependencies: *const cudaGraphNode_t,
dependency_data: *const cudaGraphEdgeData,
num_dependencies: usize,
mode: cudaStreamCaptureMode,
) -> cudaError_t {
unreachable!()
}
fn cudaThreadExchangeStreamCaptureMode(&self, mode: *mut cudaStreamCaptureMode) -> cudaError_t {
info!("[Hooked] api_name: cudaThreadExchangeStreamCaptureMode");
let req = Request::with_args(
ApiFuncName::Cudathreadexchangestreamcapturemode as u64,
smallvec![unsafe { Argument::from_mut_ptr(mode, ArgumentFlag::ARG_OUT) }],
);
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret == cudaSuccess {
thread::context().set_error(ret);
}
ret
}
fn cudaStreamEndCapture(&self, stream: cudaStream_t, p_graph: *mut cudaGraph_t) -> cudaError_t {
unreachable!()
}
fn cudaStreamIsCapturing(
&self,
stream: cudaStream_t,
p_capture_status: *mut cudaStreamCaptureStatus,
) -> cudaError_t {
info!("[Hooked] api_name: cudaStreamIsCapturing");
let stream_usize = virt::handle_map(stream as *mut c_void).expect("handle_map failed")
as cudaStream_t as usize;
let req = Request::with_args(
ApiFuncName::Cudastreamiscapturing as u64,
smallvec![
Argument::from_ref(&stream_usize, ArgumentFlag::ARG_IN | ArgumentFlag::ARG_VIRT),
unsafe { Argument::from_mut_ptr(p_capture_status, ArgumentFlag::ARG_OUT) },
],
);
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret == cudaSuccess {
thread::context().set_error(ret);
}
ret
}
fn cudaStreamGetCaptureInfo_v2(
&self,
stream: cudaStream_t,
capture_status_out: *mut cudaStreamCaptureStatus,
id_out: *mut c_ulonglong,
graph_out: *mut cudaGraph_t,
dependencies_out: *mut *const cudaGraphNode_t,
num_dependencies_out: *mut usize,
) -> cudaError_t {
unreachable!()
}
fn cudaStreamGetCaptureInfo_v3(
&self,
stream: cudaStream_t,
capture_status_out: *mut cudaStreamCaptureStatus,
id_out: *mut c_ulonglong,
graph_out: *mut cudaGraph_t,
dependencies_out: *mut *const cudaGraphNode_t,
edge_data_out: *mut *const cudaGraphEdgeData,
num_dependencies_out: *mut usize,
) -> cudaError_t {
unreachable!()
}
fn cudaStreamUpdateCaptureDependencies(
&self,
stream: cudaStream_t,
dependencies: *mut cudaGraphNode_t,
num_dependencies: usize,
flags: c_uint,
) -> cudaError_t {
unreachable!()
}
fn cudaStreamUpdateCaptureDependencies_v2(
&self,
stream: cudaStream_t,
dependencies: *mut cudaGraphNode_t,
dependency_data: *const cudaGraphEdgeData,
num_dependencies: usize,
flags: c_uint,
) -> cudaError_t {
unreachable!()
}
fn cudaEventCreate(&self, event: *mut cudaEvent_t) -> cudaError_t {
unreachable!()
}
fn cudaEventCreateWithFlags(&self, event: *mut cudaEvent_t, flags: c_uint) -> cudaError_t {
info!("[Hooked] api_name: cudaEventCreateWithFlags");
let mut event_usize = unsafe { *event };
let req = Request::with_args(
ApiFuncName::Cudaeventcreatewithflags as u64,
smallvec![
Argument::from_mut(
&mut event_usize,
ArgumentFlag::ARG_OUT | ArgumentFlag::ARG_VIRT,
),
Argument::from_ref(&flags, ArgumentFlag::ARG_IN),
],
);
let request_id = req.request_id();
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret == cudaSuccess {
thread::context().set_error(ret);
}
unsafe {
*event = event_usize as cudaEvent_t;
virt::handle_insert(*event as *mut c_void, *event as *mut c_void, 0)
.expect("handle_insert failed");
virt::req_id_vhandle_insert(request_id, *event as *mut c_void)
.expect("req_id_vhandle_insert failed");
}
ret
}
fn cudaEventRecord(&self, event: cudaEvent_t, stream: cudaStream_t) -> cudaError_t {
info!("[Hooked] api_name: cudaEventRecord");
let event_usize = virt::handle_map(event as *mut c_void).expect("handle_map failed")
as cudaEvent_t as usize;
let stream_usize = virt::handle_map(stream as *mut c_void).expect("handle_map failed")
as cudaStream_t as usize;
let req = Request::with_args(
ApiFuncName::Cudaeventrecord as u64,
smallvec![
Argument::from_ref(&event_usize, ArgumentFlag::ARG_IN | ArgumentFlag::ARG_VIRT),
Argument::from_ref(&stream_usize, ArgumentFlag::ARG_IN | ArgumentFlag::ARG_VIRT),
],
);
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret == cudaSuccess {
thread::context().set_error(ret);
}
ret
}
fn cudaEventRecordWithFlags(
&self,
event: cudaEvent_t,
stream: cudaStream_t,
flags: c_uint,
) -> cudaError_t {
info!("[Hooked] api_name: cudaEventRecordWithFlags");
let stream_usize = virt::handle_map(stream as *mut c_void).expect("handle_map failed")
as cudaStream_t as usize;
let event_usize = virt::handle_map(event as *mut c_void).expect("handle_map failed")
as cudaEvent_t as usize;
let req = Request::with_args(
ApiFuncName::Cudaeventrecordwithflags as u64,
smallvec![
Argument::from_ref(&event_usize, ArgumentFlag::ARG_IN | ArgumentFlag::ARG_VIRT),
Argument::from_ref(&stream_usize, ArgumentFlag::ARG_IN | ArgumentFlag::ARG_VIRT),
Argument::from_ref(&flags, ArgumentFlag::ARG_IN),
],
);
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret == cudaSuccess {
thread::context().set_error(ret);
}
ret
}
fn cudaEventQuery(&self, event: cudaEvent_t) -> cudaError_t {
info!("[Hooked] api_name: cudaEventQuery");
let event_usize = virt::handle_map(event as *mut c_void).expect("handle_map failed")
as cudaEvent_t as usize;
let req = Request::with_args(
ApiFuncName::Cudaeventquery as u64,
smallvec![Argument::from_ref(
&event_usize,
ArgumentFlag::ARG_IN | ArgumentFlag::ARG_VIRT,
)],
);
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret == cudaSuccess {
thread::context().set_error(ret);
}
ret
}
fn cudaEventSynchronize(&self, event: cudaEvent_t) -> cudaError_t {
info!("[Hooked] api_name: cudaEventSynchronize");
let event_usize = virt::handle_map(event as *mut c_void).expect("handle_map failed")
as cudaEvent_t as usize;
let req = Request::with_args(
ApiFuncName::Cudaeventsynchronize as u64,
smallvec![Argument::from_ref(
&event_usize,
ArgumentFlag::ARG_IN | ArgumentFlag::ARG_VIRT,
)],
);
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret == cudaSuccess {
thread::context().set_error(ret);
}
ret
}
fn cudaEventDestroy(&self, event: cudaEvent_t) -> cudaError_t {
info!("[Hooked] api_name: cudaEventDestroy");
let event_usize = virt::handle_map(event as *mut c_void).expect("handle_map failed")
as cudaEvent_t as usize;
let req = Request::with_args(
ApiFuncName::Cudaeventdestroy as u64,
smallvec![Argument::from_ref(
&event_usize,
ArgumentFlag::ARG_IN | ArgumentFlag::ARG_VIRT,
)],
);
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret == cudaSuccess {
thread::context().set_error(ret);
}
ret
}
fn cudaEventElapsedTime(
&self,
ms: *mut f32,
start: cudaEvent_t,
end: cudaEvent_t,
) -> cudaError_t {
info!("[Hooked] api_name: cudaEventElapsedTime");
let start_usize = virt::handle_map(start as *mut c_void).expect("handle_map failed")
as cudaEvent_t as usize;
let end_usize = virt::handle_map(end as *mut c_void).expect("handle_map failed")
as cudaEvent_t as usize;
let req = Request::with_args(
ApiFuncName::Cudaeventelapsedtime as u64,
smallvec![
unsafe { Argument::from_mut_ptr(ms, ArgumentFlag::ARG_OUT) },
Argument::from_ref(&start_usize, ArgumentFlag::ARG_IN | ArgumentFlag::ARG_VIRT),
Argument::from_ref(&end_usize, ArgumentFlag::ARG_IN | ArgumentFlag::ARG_VIRT),
],
);
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret == cudaSuccess {
thread::context().set_error(ret);
}
ret
}
fn cudaImportExternalMemory(
&self,
ext_mem_out: *mut cudaExternalMemory_t,
mem_handle_desc: *const cudaExternalMemoryHandleDesc,
) -> cudaError_t {
unreachable!()
}
fn cudaExternalMemoryGetMappedBuffer(
&self,
dev_ptr: *mut *mut c_void,
ext_mem: cudaExternalMemory_t,
buffer_desc: *const cudaExternalMemoryBufferDesc,
) -> cudaError_t {
unreachable!()
}
fn cudaExternalMemoryGetMappedMipmappedArray(
&self,
mipmap: *mut cudaMipmappedArray_t,
ext_mem: cudaExternalMemory_t,
mipmap_desc: *const cudaExternalMemoryMipmappedArrayDesc,
) -> cudaError_t {
unreachable!()
}
fn cudaDestroyExternalMemory(&self, ext_mem: cudaExternalMemory_t) -> cudaError_t {
unreachable!()
}
fn cudaImportExternalSemaphore(
&self,
ext_sem_out: *mut cudaExternalSemaphore_t,
sem_handle_desc: *const cudaExternalSemaphoreHandleDesc,
) -> cudaError_t {
unreachable!()
}
fn cudaSignalExternalSemaphoresAsync_v2(
&self,
ext_sem_array: *const cudaExternalSemaphore_t,
params_array: *const cudaExternalSemaphoreSignalParams,
num_ext_sems: c_uint,
stream: cudaStream_t,
) -> cudaError_t {
unreachable!()
}
fn cudaWaitExternalSemaphoresAsync_v2(
&self,
ext_sem_array: *const cudaExternalSemaphore_t,
params_array: *const cudaExternalSemaphoreWaitParams,
num_ext_sems: c_uint,
stream: cudaStream_t,
) -> cudaError_t {
unreachable!()
}
fn cudaDestroyExternalSemaphore(&self, ext_sem: cudaExternalSemaphore_t) -> cudaError_t {
unreachable!()
}
fn cudaLaunchKernel(
&self,
func: *const c_void,
grid_dim: dim3,
block_dim: dim3,
args: *mut *mut c_void,
shared_mem: usize,
stream: cudaStream_t,
) -> cudaError_t {
info!("[Hooked] api_name: cudaLaunchKernel");
let kernel = get_kernel_metadata(func);
debug!(
"--new--launchkernel-func: {:p}, offset:{:x}, param_sizes:{:?}",
func, kernel.offset, kernel.params
);
let stream_usize = virt::handle_map(stream as *mut c_void).expect("handle_map failed")
as cudaStream_t as usize;
debug!(
"cudaLaunchKernel: gridDim=({},{},{}), blockDim=({},{},{}), sharedMem={}, stream_usize: 0x{:x}",
grid_dim.x, grid_dim.y, grid_dim.z, block_dim.x, block_dim.y, block_dim.z, shared_mem, stream_usize
);
let mut req_args: SmallVec<[Argument; 32]> = SmallVec::new();
req_args.push(Argument::from_ref(&kernel.offset, ArgumentFlag::ARG_IN));
req_args.push(Argument::from_ref(&grid_dim, ArgumentFlag::ARG_IN));
req_args.push(Argument::from_ref(&block_dim, ArgumentFlag::ARG_IN));
req_args.push(Argument::from_ref(&shared_mem, ArgumentFlag::ARG_IN));
req_args.push(Argument::from_ref(
&stream_usize,
ArgumentFlag::ARG_IN | ArgumentFlag::ARG_VIRT,
));
let args = unsafe { std::slice::from_raw_parts(args, kernel.params.len()) };
for (idx, metadata) in kernel.params.iter().enumerate() {
let param_addr = args[idx];
let param_size = metadata.size;
debug!(
"[cudaLaunchKernel] args[{}]: param_addr={:p}, param_size={}",
idx, param_addr, param_size
);
let param_data =
unsafe { std::slice::from_raw_parts_mut(param_addr.cast::<u8>(), param_size) };
let vhandles = metadata.vhandles.get_or_init(|| {
let mut vhandles = SmallVec::new();
let slot_count = param_size / ADDR_SIZE;
for k in 0..slot_count {
let offset = k * ADDR_SIZE;
let chunk = ¶m_data[offset..offset + ADDR_SIZE];
let key = usize::from_ne_bytes(chunk.try_into().unwrap());
if virt::is_vhandle_valid(key as *mut c_void) {
vhandles.push(offset);
}
}
vhandles
});
let mut param_flag = ArgumentFlag::ARG_IN;
if !vhandles.is_empty() {
for &offset in vhandles {
let chunk = ¶m_data[offset..offset + ADDR_SIZE];
let key = usize::from_ne_bytes(chunk.try_into().unwrap());
let value = if virt::is_vhandle_valid(key as *mut c_void) {
virt::handle_map(key as *mut c_void).expect("handle_map failed") as usize
} else {
key
};
let value_bytes = value.to_ne_bytes();
param_data[offset..offset + ADDR_SIZE].copy_from_slice(&value_bytes);
}
param_flag |= ArgumentFlag::ARG_VIRT;
}
req_args.push(Argument::from_slice(param_data, param_flag));
}
let req = Request::with_args(ApiFuncName::Cudalaunchkernel as u64, req_args);
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret == cudaSuccess {
thread::context().set_error(ret);
}
ret
}
fn cudaLaunchKernelExC(
&self,
config: *const cudaLaunchConfig_t,
func: *const c_void,
args: *mut *mut c_void,
) -> cudaError_t {
unreachable!()
}
fn cudaLaunchCooperativeKernel(
&self,
func: *const c_void,
grid_dim: dim3,
block_dim: dim3,
args: *mut *mut c_void,
shared_mem: usize,
stream: cudaStream_t,
) -> cudaError_t {
unreachable!()
}
fn cudaLaunchCooperativeKernelMultiDevice(
&self,
launch_params_list: *mut cudaLaunchParams,
num_devices: c_uint,
flags: c_uint,
) -> cudaError_t {
unreachable!()
}
fn cudaFuncSetCacheConfig(
&self,
func: *const c_void,
cache_config: cudaFuncCache,
) -> cudaError_t {
unreachable!()
}
fn cudaFuncGetAttributes(
&self,
attr: *mut cudaFuncAttributes,
func: *const c_void,
) -> cudaError_t {
info!("[Hooked] api_name: cudaFuncGetAttributes-----");
let offset = get_func_offset(func);
debug!(
"cudaFuncGetAttributes-func: {:p}, offset:{:x}",
func, offset
);
let req = Request::with_args(
ApiFuncName::Cudafuncgetattributes as u64,
smallvec![
unsafe { Argument::from_mut_ptr(attr, ArgumentFlag::ARG_OUT) },
Argument::from_ref(&offset, ArgumentFlag::ARG_IN),
],
);
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret == cudaSuccess {
thread::context().set_error(ret);
}
debug!("after attr:{:?}", unsafe { *attr });
ret
}
fn cudaFuncSetAttribute(
&self,
func: *const c_void,
attr: cudaFuncAttribute,
value: c_int,
) -> cudaError_t {
info!("[Hooked] api_name: cudaFuncSetAttribute-----");
let offset = get_func_offset(func);
debug!("cudaFuncSetAttribute-func: {:p}, offset:{:x}", func, offset);
let req = Request::with_args(
ApiFuncName::Cudafuncsetattribute as u64,
smallvec![
Argument::from_ref(&offset, ArgumentFlag::ARG_IN),
Argument::from_ref(&attr, ArgumentFlag::ARG_IN),
Argument::from_ref(&value, ArgumentFlag::ARG_IN),
],
);
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret == cudaSuccess {
thread::context().set_error(ret);
}
ret
}
fn cudaFuncGetParamInfo(
&self,
func: *const c_void,
param_index: usize,
param_offset: *mut usize,
param_size: *mut usize,
) -> cudaError_t {
info!("[hooked] cudaFuncGetParamInfo");
type CudaFuncGetParamInfoFn = unsafe extern "C" fn(
func: *const c_void,
param_index: usize,
param_offset: *mut usize,
param_size: *mut usize,
) -> cudaError_t;
static GLOBAL_CUDA_FUNC_GET_PARAM_INFO: Lazy<CudaFuncGetParamInfoFn> = Lazy::new(|| {
let ptr = dl::find_original_symbol("cudaFuncGetParamInfo")
.expect("FATAL: Failed to get original function");
unsafe { ptr.cast() }
});
let res =
unsafe { GLOBAL_CUDA_FUNC_GET_PARAM_INFO(func, param_index, param_offset, param_size) };
debug!("real--cudaFuncGetParamInfo (return res: {})", res);
res
}
fn cudaSetDoubleForDevice(&self, d: *mut f64) -> cudaError_t {
unreachable!()
}
fn cudaSetDoubleForHost(&self, d: *mut f64) -> cudaError_t {
unreachable!()
}
fn cudaLaunchHostFunc(
&self,
stream: cudaStream_t,
fn_: cudaHostFn_t,
user_data: *mut c_void,
) -> cudaError_t {
unreachable!()
}
fn cudaFuncSetSharedMemConfig(
&self,
func: *const c_void,
config: cudaSharedMemConfig,
) -> cudaError_t {
unreachable!()
}
fn cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&self,
num_blocks: *mut c_int,
func: *const c_void,
block_size: c_int,
dynamic_smem_size: usize,
) -> cudaError_t {
unreachable!()
}
fn cudaOccupancyAvailableDynamicSMemPerBlock(
&self,
dynamic_smem_size: *mut usize,
func: *const c_void,
num_blocks: c_int,
block_size: c_int,
) -> cudaError_t {
unreachable!()
}
fn cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
&self,
num_blocks: *mut c_int,
func: *const c_void,
block_size: c_int,
dynamic_smem_size: usize,
flags: c_uint,
) -> cudaError_t {
info!("[Hooked] api_name: cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags");
let offset = get_func_offset(func);
debug!("cudaFuncSetAttribute-func: {:p}, offset:{:x}", func, offset);
let req = Request::with_args(
ApiFuncName::Cudaoccupancymaxactiveblockspermultiprocessorwithflags as u64,
smallvec![
unsafe { Argument::from_mut_ptr(num_blocks, ArgumentFlag::ARG_OUT) },
Argument::from_ref(&offset, ArgumentFlag::ARG_IN),
Argument::from_ref(&block_size, ArgumentFlag::ARG_IN),
Argument::from_ref(&dynamic_smem_size, ArgumentFlag::ARG_IN),
Argument::from_ref(&flags, ArgumentFlag::ARG_IN),
],
);
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret == cudaSuccess {
thread::context().set_error(ret);
}
ret
}
fn cudaOccupancyMaxPotentialClusterSize(
&self,
cluster_size: *mut c_int,
func: *const c_void,
launch_config: *const cudaLaunchConfig_t,
) -> cudaError_t {
unreachable!()
}
fn cudaOccupancyMaxActiveClusters(
&self,
num_clusters: *mut c_int,
func: *const c_void,
launch_config: *const cudaLaunchConfig_t,
) -> cudaError_t {
unreachable!()
}
fn cudaMallocManaged(
&self,
dev_ptr: *mut *mut c_void,
size: usize,
flags: c_uint,
) -> cudaError_t {
unreachable!()
}
fn cudaMalloc(&self, dev_ptr: *mut *mut c_void, size: usize) -> cudaError_t {
info!("[Hooked] api_name: cudaMalloc");
debug!("dev_ptr:{:p}, *ptr:{:p}", dev_ptr, unsafe { *dev_ptr });
let mut dev_ptr_usize = unsafe { *dev_ptr } as usize;
let req = Request::with_args(
ApiFuncName::Cudamalloc as u64,
smallvec![
Argument::from_mut(
&mut dev_ptr_usize,
ArgumentFlag::ARG_OUT | ArgumentFlag::ARG_VIRT,
),
Argument::from_ref(&size, ArgumentFlag::ARG_IN),
],
);
let request_id = req.request_id();
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret == cudaSuccess {
thread::context().set_error(ret);
}
debug!("--2-dev_ptr:{:p}, *ptr:{:x}", dev_ptr, dev_ptr_usize);
unsafe {
*dev_ptr = dev_ptr_usize as *mut c_void;
virt::handle_insert(*dev_ptr, *dev_ptr, size).expect("handle_insert failed");
virt::req_id_vhandle_insert(request_id, *dev_ptr)
.expect("req_id_vhandle_insert failed");
}
ret
}
fn cudaMallocHost(&self, ptr: *mut *mut c_void, size: usize) -> cudaError_t {
unreachable!()
}
fn cudaMallocPitch(
&self,
dev_ptr: *mut *mut c_void,
pitch: *mut usize,
width: usize,
height: usize,
) -> cudaError_t {
unreachable!()
}
fn cudaMallocArray(
&self,
array: *mut cudaArray_t,
desc: *const cudaChannelFormatDesc,
width: usize,
height: usize,
flags: c_uint,
) -> cudaError_t {
unreachable!()
}
fn cudaFree(&self, dev_ptr: *mut c_void) -> cudaError_t {
info!("[Hooked] api_name: cudaFree");
let dev_ptr_usize = virt::handle_map(dev_ptr).expect("handle_map failed") as usize;
let req = Request::with_args(
ApiFuncName::Cudafree as u64,
smallvec![Argument::from_ref(
&dev_ptr_usize,
ArgumentFlag::ARG_IN | ArgumentFlag::ARG_VIRT,
)],
);
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret == cudaSuccess {
thread::context().set_error(ret);
}
warn!("cudaFree is not checked!");
ret
}
fn cudaFreeHost(&self, ptr: *mut c_void) -> cudaError_t {
unreachable!()
}
fn cudaFreeArray(&self, array: cudaArray_t) -> cudaError_t {
unreachable!()
}
fn cudaFreeMipmappedArray(&self, mipmapped_array: cudaMipmappedArray_t) -> cudaError_t {
unreachable!()
}
fn cudaHostAlloc(&self, p_host: *mut *mut c_void, size: usize, flags: c_uint) -> cudaError_t {
info!("[Hooked] api_name: cudaHostAlloc");
let mut dev_ptr_usize = unsafe { *p_host } as usize;
let req = Request::with_args(
ApiFuncName::Cudahostalloc as u64,
smallvec![
Argument::from_mut(
&mut dev_ptr_usize,
ArgumentFlag::ARG_OUT | ArgumentFlag::ARG_VIRT
),
Argument::from_ref(&size, ArgumentFlag::ARG_IN),
Argument::from_ref(&flags, ArgumentFlag::ARG_IN),
],
);
let request_id = req.request_id();
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret == cudaSuccess {
thread::context().set_error(ret);
}
type CudaHostAllocFn = unsafe extern "C" fn(
p_host: *mut *mut c_void,
size: usize,
flags: c_uint,
) -> cudaError_t;
static GLOBAL_CUDAHOSTALLOC: Lazy<CudaHostAllocFn> = Lazy::new(|| {
let ptr = dl::find_original_symbol("cudaHostAlloc")
.expect("FATAL: Failed to get original function");
unsafe { ptr.cast() }
});
info!("[hooked] cudaHostAlloc (original)");
let res = unsafe { GLOBAL_CUDAHOSTALLOC(p_host, size, flags) };
debug!(
"real--cudaHostAlloc (return res: {}), p_host:{:p}, *p_host:{:p}",
res,
p_host,
unsafe { *p_host }
);
unsafe {
virt::handle_insert(*p_host, dev_ptr_usize as *mut c_void, size)
.expect("handle_insert failed");
virt::handle_insert(
dev_ptr_usize as *mut c_void,
dev_ptr_usize as *mut c_void,
size,
)
.expect("handle_insert failed");
virt::req_id_vhandle_insert(request_id, dev_ptr_usize as *mut c_void)
.expect("req_id_vhandle_insert failed");
}
ret
}
fn cudaHostRegister(&self, ptr: *mut c_void, size: usize, flags: c_uint) -> cudaError_t {
unreachable!()
}
fn cudaHostUnregister(&self, ptr: *mut c_void) -> cudaError_t {
unreachable!()
}
fn cudaHostGetDevicePointer(
&self,
p_device: *mut *mut c_void,
p_host: *mut c_void,
flags: c_uint,
) -> cudaError_t {
unreachable!()
}
fn cudaHostGetFlags(&self, p_flags: *mut c_uint, p_host: *mut c_void) -> cudaError_t {
unreachable!()
}
fn cudaMalloc3D(&self, pitcheddev_ptr: *mut cudaPitchedPtr, extent: cudaExtent) -> cudaError_t {
unreachable!()
}
fn cudaMalloc3DArray(
&self,
array: *mut cudaArray_t,
desc: *const cudaChannelFormatDesc,
extent: cudaExtent,
flags: c_uint,
) -> cudaError_t {
unreachable!()
}
fn cudaMallocMipmappedArray(
&self,
mipmapped_array: *mut cudaMipmappedArray_t,
desc: *const cudaChannelFormatDesc,
extent: cudaExtent,
num_levels: c_uint,
flags: c_uint,
) -> cudaError_t {
unreachable!()
}
fn cudaGetMipmappedArrayLevel(
&self,
level_array: *mut cudaArray_t,
mipmapped_array: cudaMipmappedArray_const_t,
level: c_uint,
) -> cudaError_t {
unreachable!()
}
fn cudaMemcpy3D(&self, p: *const cudaMemcpy3DParms) -> cudaError_t {
unreachable!()
}
fn cudaMemcpy3DPeer(&self, p: *const cudaMemcpy3DPeerParms) -> cudaError_t {
unreachable!()
}
fn cudaMemcpy3DAsync(&self, p: *const cudaMemcpy3DParms, stream: cudaStream_t) -> cudaError_t {
unreachable!()
}
fn cudaMemcpy3DPeerAsync(
&self,
p: *const cudaMemcpy3DPeerParms,
stream: cudaStream_t,
) -> cudaError_t {
unreachable!()
}
fn cudaMemGetInfo(&self, free: *mut usize, total: *mut usize) -> cudaError_t {
unreachable!()
}
fn cudaArrayGetInfo(
&self,
desc: *mut cudaChannelFormatDesc,
extent: *mut cudaExtent,
flags: *mut c_uint,
array: cudaArray_t,
) -> cudaError_t {
unreachable!()
}
fn cudaArrayGetPlane(
&self,
p_plane_array: *mut cudaArray_t,
h_array: cudaArray_t,
plane_idx: c_uint,
) -> cudaError_t {
unreachable!()
}
fn cudaArrayGetMemoryRequirements(
&self,
memory_requirements: *mut cudaArrayMemoryRequirements,
array: cudaArray_t,
device: c_int,
) -> cudaError_t {
unreachable!()
}
fn cudaMipmappedArrayGetMemoryRequirements(
&self,
memory_requirements: *mut cudaArrayMemoryRequirements,
mipmap: cudaMipmappedArray_t,
device: c_int,
) -> cudaError_t {
unreachable!()
}
fn cudaArrayGetSparseProperties(
&self,
sparse_properties: *mut cudaArraySparseProperties,
array: cudaArray_t,
) -> cudaError_t {
unreachable!()
}
fn cudaMipmappedArrayGetSparseProperties(
&self,
sparse_properties: *mut cudaArraySparseProperties,
mipmap: cudaMipmappedArray_t,
) -> cudaError_t {
unreachable!()
}
fn cudaMemcpy(
&self,
dst: *mut c_void,
src: *const c_void,
count: usize,
kind: cudaMemcpyKind,
) -> cudaError_t {
unreachable!()
}
fn cudaMemcpyPeer(
&self,
dst: *mut c_void,
dst_device: c_int,
src: *const c_void,
src_device: c_int,
count: usize,
) -> cudaError_t {
unreachable!()
}
fn cudaMemcpy2D(
&self,
dst: *mut c_void,
dpitch: usize,
src: *const c_void,
spitch: usize,
width: usize,
height: usize,
kind: cudaMemcpyKind,
) -> cudaError_t {
unreachable!()
}
fn cudaMemcpy2DToArray(
&self,
dst: cudaArray_t,
w_offset: usize,
h_offset: usize,
src: *const c_void,
spitch: usize,
width: usize,
height: usize,
kind: cudaMemcpyKind,
) -> cudaError_t {
unreachable!()
}
fn cudaMemcpy2DFromArray(
&self,
dst: *mut c_void,
dpitch: usize,
src: cudaArray_const_t,
w_offset: usize,
h_offset: usize,
width: usize,
height: usize,
kind: cudaMemcpyKind,
) -> cudaError_t {
unreachable!()
}
fn cudaMemcpy2DArrayToArray(
&self,
dst: cudaArray_t,
w_offset_dst: usize,
h_offset_dst: usize,
src: cudaArray_const_t,
w_offset_src: usize,
h_offset_src: usize,
width: usize,
height: usize,
kind: cudaMemcpyKind,
) -> cudaError_t {
unreachable!()
}
fn cudaMemcpyToSymbol(
&self,
symbol: *const c_void,
src: *const c_void,
count: usize,
offset: usize,
kind: cudaMemcpyKind,
) -> cudaError_t {
unreachable!()
}
fn cudaMemcpyFromSymbol(
&self,
dst: *mut c_void,
symbol: *const c_void,
count: usize,
offset: usize,
kind: cudaMemcpyKind,
) -> cudaError_t {
unreachable!()
}
fn cudaMemcpyAsync(
&self,
dst: *mut c_void,
src: *const c_void,
count: usize,
kind: cudaMemcpyKind,
stream: cudaStream_t,
) -> cudaError_t {
info!("[Hooked] api_name: cudaMemcpyAsync");
let dst_flag: ArgumentFlag;
let dst_arg: Argument;
let mut dst_usize: usize;
let src_flag: ArgumentFlag;
let src_arg: Argument;
let mut src_usize: usize;
let arg_x: Argument;
if kind == cudaMemcpyDeviceToHost {
src_usize = virt::handle_map(src as *mut c_void).expect("handle_map failed")
as *const c_void as usize;
dst_flag = ArgumentFlag::ARG_OUT;
src_flag = ArgumentFlag::ARG_IN | ArgumentFlag::ARG_VIRT;
let slice = unsafe { std::slice::from_raw_parts_mut(dst.cast::<u8>(), count) };
debug!("D2H: count:{}, dst:{:p}", count, dst);
debug!("D2H: src:{:p}", src);
dst_arg = Argument::from_mut_slice(slice, dst_flag);
src_arg = Argument::from_ref(&src_usize, src_flag);
arg_x = match virt::handle_map(dst) {
Ok(p) => {
dst_usize = p as usize;
debug!("D2H: mapped dst_usize:{:x}", dst_usize);
match virt::handle_map(dst_usize as *mut c_void) {
Ok(p2) => {
debug!("D2H: double mapped dst_usize:{:x}", p2 as usize);
dst_usize = p2 as usize;
Argument::from_ref(
&dst_usize,
ArgumentFlag::ARG_IN | ArgumentFlag::ARG_VIRT,
)
}
Err(_) => Argument::empty(),
}
}
Err(_) => Argument::empty(),
};
} else if kind == cudaMemcpyHostToDevice {
dst_usize = virt::handle_map(dst).expect("handle_map failed") as usize;
dst_flag = ArgumentFlag::ARG_IN | ArgumentFlag::ARG_VIRT;
src_flag = ArgumentFlag::ARG_IN;
dst_arg = Argument::from_ref(&dst_usize, dst_flag);
let slice = unsafe { std::slice::from_raw_parts(src.cast::<u8>(), count) };
debug!("H2D: count:{} src:{:p}", count, src);
debug!("H2D: dst:{:p}, dst-as-usize:{:x}", dst, dst as usize);
src_arg = Argument::from_slice(slice, src_flag);
arg_x = match virt::handle_map(src as *mut c_void) {
Ok(p) => {
src_usize = p as usize;
debug!("H2D: mapped src_usize:{:x}", src_usize);
match virt::handle_map(src_usize as *mut c_void) {
Ok(p2) => {
debug!("H2D: double mapped src_usize:{:x}", p2 as usize);
src_usize = p2 as usize;
Argument::from_ref(
&src_usize,
ArgumentFlag::ARG_IN | ArgumentFlag::ARG_VIRT,
)
}
Err(_) => Argument::empty(),
}
}
Err(_) => Argument::empty(),
};
} else if kind == cudaMemcpyDeviceToDevice {
dst_usize = virt::handle_map(dst).expect("handle_map failed") as usize;
src_usize = virt::handle_map(src as *mut c_void).expect("handle_map failed")
as *const c_void as usize;
dst_flag = ArgumentFlag::ARG_IN | ArgumentFlag::ARG_VIRT;
src_flag = ArgumentFlag::ARG_IN | ArgumentFlag::ARG_VIRT;
dst_arg = Argument::from_ref(&dst_usize, dst_flag);
src_arg = Argument::from_ref(&src_usize, src_flag);
debug!("D2D: src:{:p}->dst:{:p}, count:{}", src, dst, count);
arg_x = Argument::empty();
} else {
panic!("cudaMemcpyAsync::host_to_host copy is not implemented!");
}
let stream_usize = virt::handle_map(stream as *mut c_void).expect("handle_map failed")
as cudaStream_t as usize;
let req = Request::with_args(
ApiFuncName::Cudamemcpyasync as u64,
smallvec![
dst_arg,
src_arg,
Argument::from_ref(&count, ArgumentFlag::ARG_IN),
Argument::from_ref(&kind, ArgumentFlag::ARG_IN),
Argument::from_ref(&stream_usize, ArgumentFlag::ARG_IN | ArgumentFlag::ARG_VIRT),
arg_x,
],
);
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret == cudaSuccess {
thread::context().set_error(ret);
}
if kind == cudaMemcpyDeviceToHost {
let slice = unsafe { std::slice::from_raw_parts_mut(dst.cast::<u8>(), count) };
debug!(
"D2H: after count:{}, dst:{:p}, slice:{:?}",
count, dst, slice
);
debug!("D2H: after src:{:p}", src);
}
ret
}
fn cudaMemcpyPeerAsync(
&self,
dst: *mut c_void,
dst_device: c_int,
src: *const c_void,
src_device: c_int,
count: usize,
stream: cudaStream_t,
) -> cudaError_t {
unreachable!()
}
fn cudaMemcpy2DAsync(
&self,
dst: *mut c_void,
dpitch: usize,
src: *const c_void,
spitch: usize,
width: usize,
height: usize,
kind: cudaMemcpyKind,
stream: cudaStream_t,
) -> cudaError_t {
unreachable!()
}
fn cudaMemcpy2DToArrayAsync(
&self,
dst: cudaArray_t,
w_offset: usize,
h_offset: usize,
src: *const c_void,
spitch: usize,
width: usize,
height: usize,
kind: cudaMemcpyKind,
stream: cudaStream_t,
) -> cudaError_t {
unreachable!()
}
fn cudaMemcpy2DFromArrayAsync(
&self,
dst: *mut c_void,
dpitch: usize,
src: cudaArray_const_t,
w_offset: usize,
h_offset: usize,
width: usize,
height: usize,
kind: cudaMemcpyKind,
stream: cudaStream_t,
) -> cudaError_t {
unreachable!()
}
fn cudaMemcpyToSymbolAsync(
&self,
symbol: *const c_void,
src: *const c_void,
count: usize,
offset: usize,
kind: cudaMemcpyKind,
stream: cudaStream_t,
) -> cudaError_t {
unreachable!()
}
fn cudaMemcpyFromSymbolAsync(
&self,
dst: *mut c_void,
symbol: *const c_void,
count: usize,
offset: usize,
kind: cudaMemcpyKind,
stream: cudaStream_t,
) -> cudaError_t {
unreachable!()
}
fn cudaMemset(&self, dev_ptr: *mut c_void, value: c_int, count: usize) -> cudaError_t {
info!("[Hooked] api_name: cudaMemset");
let dev_ptr_usize = virt::handle_map(dev_ptr).expect("handle_map failed") as usize;
let req = Request::with_args(
ApiFuncName::Cudamemset as u64,
smallvec![
Argument::from_ref(
&dev_ptr_usize,
ArgumentFlag::ARG_IN | ArgumentFlag::ARG_VIRT
),
Argument::from_ref(&value, ArgumentFlag::ARG_IN),
Argument::from_ref(&count, ArgumentFlag::ARG_IN),
],
);
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret == cudaSuccess {
thread::context().set_error(ret);
}
ret
}
fn cudaMemset2D(
&self,
dev_ptr: *mut c_void,
pitch: usize,
value: c_int,
width: usize,
height: usize,
) -> cudaError_t {
unreachable!()
}
fn cudaMemset3D(
&self,
pitcheddev_ptr: cudaPitchedPtr,
value: c_int,
extent: cudaExtent,
) -> cudaError_t {
unreachable!()
}
fn cudaMemsetAsync(
&self,
dev_ptr: *mut c_void,
value: c_int,
count: usize,
stream: cudaStream_t,
) -> cudaError_t {
info!("[Hooked] api_name: cudaMemsetAsync");
let dev_ptr_usize = virt::handle_map(dev_ptr).expect("handle_map failed") as usize;
let stream_usize = virt::handle_map(stream as *mut c_void).expect("handle_map failed")
as cudaStream_t as usize;
let req = Request::with_args(
ApiFuncName::Cudamemsetasync as u64,
smallvec![
Argument::from_ref(
&dev_ptr_usize,
ArgumentFlag::ARG_IN | ArgumentFlag::ARG_VIRT
),
Argument::from_ref(&value, ArgumentFlag::ARG_IN),
Argument::from_ref(&count, ArgumentFlag::ARG_IN),
Argument::from_ref(&stream_usize, ArgumentFlag::ARG_IN | ArgumentFlag::ARG_VIRT),
],
);
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret == cudaSuccess {
thread::context().set_error(ret);
}
ret
}
fn cudaMemset2DAsync(
&self,
dev_ptr: *mut c_void,
pitch: usize,
value: c_int,
width: usize,
height: usize,
stream: cudaStream_t,
) -> cudaError_t {
unreachable!()
}
fn cudaMemset3DAsync(
&self,
pitcheddev_ptr: cudaPitchedPtr,
value: c_int,
extent: cudaExtent,
stream: cudaStream_t,
) -> cudaError_t {
unreachable!()
}
fn cudaGetSymbolAddress(
&self,
dev_ptr: *mut *mut c_void,
symbol: *const c_void,
) -> cudaError_t {
unreachable!()
}
fn cudaGetSymbolSize(&self, size: *mut usize, symbol: *const c_void) -> cudaError_t {
unreachable!()
}
fn cudaMemPrefetchAsync(
&self,
dev_ptr: *const c_void,
count: usize,
dst_device: c_int,
stream: cudaStream_t,
) -> cudaError_t {
unreachable!()
}
fn cudaMemPrefetchAsync_v2(
&self,
dev_ptr: *const c_void,
count: usize,
location: cudaMemLocation,
flags: c_uint,
stream: cudaStream_t,
) -> cudaError_t {
unreachable!()
}
fn cudaMemAdvise(
&self,
dev_ptr: *const c_void,
count: usize,
advice: cudaMemoryAdvise,
device: c_int,
) -> cudaError_t {
unreachable!()
}
fn cudaMemAdvise_v2(
&self,
dev_ptr: *const c_void,
count: usize,
advice: cudaMemoryAdvise,
location: cudaMemLocation,
) -> cudaError_t {
unreachable!()
}
fn cudaMemRangeGetAttribute(
&self,
data: *mut c_void,
data_size: usize,
attribute: cudaMemRangeAttribute,
dev_ptr: *const c_void,
count: usize,
) -> cudaError_t {
unreachable!()
}
fn cudaMemRangeGetAttributes(
&self,
data: *mut *mut c_void,
data_sizes: *mut usize,
attributes: *mut cudaMemRangeAttribute,
num_attributes: usize,
dev_ptr: *const c_void,
count: usize,
) -> cudaError_t {
unreachable!()
}
fn cudaMemcpyToArray(
&self,
dst: cudaArray_t,
w_offset: usize,
h_offset: usize,
src: *const c_void,
count: usize,
kind: cudaMemcpyKind,
) -> cudaError_t {
unreachable!()
}
fn cudaMemcpyFromArray(
&self,
dst: *mut c_void,
src: cudaArray_const_t,
w_offset: usize,
h_offset: usize,
count: usize,
kind: cudaMemcpyKind,
) -> cudaError_t {
unreachable!()
}
fn cudaMemcpyArrayToArray(
&self,
dst: cudaArray_t,
w_offset_dst: usize,
h_offset_dst: usize,
src: cudaArray_const_t,
w_offset_src: usize,
h_offset_src: usize,
count: usize,
kind: cudaMemcpyKind,
) -> cudaError_t {
unreachable!()
}
fn cudaMemcpyToArrayAsync(
&self,
dst: cudaArray_t,
w_offset: usize,
h_offset: usize,
src: *const c_void,
count: usize,
kind: cudaMemcpyKind,
stream: cudaStream_t,
) -> cudaError_t {
unreachable!()
}
fn cudaMemcpyFromArrayAsync(
&self,
dst: *mut c_void,
src: cudaArray_const_t,
w_offset: usize,
h_offset: usize,
count: usize,
kind: cudaMemcpyKind,
stream: cudaStream_t,
) -> cudaError_t {
unreachable!()
}
fn cudaMallocAsync(
&self,
dev_ptr: *mut *mut c_void,
size: usize,
h_stream: cudaStream_t,
) -> cudaError_t {
unreachable!()
}
fn cudaFreeAsync(&self, dev_ptr: *mut c_void, h_stream: cudaStream_t) -> cudaError_t {
unreachable!()
}
fn cudaMemPoolTrimTo(&self, mem_pool: cudaMemPool_t, min_bytes_to_keep: usize) -> cudaError_t {
unreachable!()
}
fn cudaMemPoolSetAttribute(
&self,
mem_pool: cudaMemPool_t,
attr: cudaMemPoolAttr,
value: *mut c_void,
) -> cudaError_t {
unreachable!()
}
fn cudaMemPoolGetAttribute(
&self,
mem_pool: cudaMemPool_t,
attr: cudaMemPoolAttr,
value: *mut c_void,
) -> cudaError_t {
unreachable!()
}
fn cudaMemPoolSetAccess(
&self,
mem_pool: cudaMemPool_t,
desc_list: *const cudaMemAccessDesc,
count: usize,
) -> cudaError_t {
unreachable!()
}
fn cudaMemPoolGetAccess(
&self,
flags: *mut cudaMemAccessFlags,
mem_pool: cudaMemPool_t,
location: *mut cudaMemLocation,
) -> cudaError_t {
unreachable!()
}
fn cudaMemPoolCreate(
&self,
mem_pool: *mut cudaMemPool_t,
pool_props: *const cudaMemPoolProps,
) -> cudaError_t {
unreachable!()
}
fn cudaMemPoolDestroy(&self, mem_pool: cudaMemPool_t) -> cudaError_t {
unreachable!()
}
fn cudaMallocFromPoolAsync(
&self,
ptr: *mut *mut c_void,
size: usize,
mem_pool: cudaMemPool_t,
stream: cudaStream_t,
) -> cudaError_t {
unreachable!()
}
fn cudaMemPoolExportToShareableHandle(
&self,
shareable_handle: *mut c_void,
mem_pool: cudaMemPool_t,
handle_type: cudaMemAllocationHandleType,
flags: c_uint,
) -> cudaError_t {
unreachable!()
}
fn cudaMemPoolImportFromShareableHandle(
&self,
mem_pool: *mut cudaMemPool_t,
shareable_handle: *mut c_void,
handle_type: cudaMemAllocationHandleType,
flags: c_uint,
) -> cudaError_t {
unreachable!()
}
fn cudaMemPoolExportPointer(
&self,
export_data: *mut cudaMemPoolPtrExportData,
ptr: *mut c_void,
) -> cudaError_t {
unreachable!()
}
fn cudaMemPoolImportPointer(
&self,
ptr: *mut *mut c_void,
mem_pool: cudaMemPool_t,
export_data: *mut cudaMemPoolPtrExportData,
) -> cudaError_t {
unreachable!()
}
fn cudaPointerGetAttributes(
&self,
attributes: *mut cudaPointerAttributes,
ptr: *const c_void,
) -> cudaError_t {
info!("[Hooked] api_name: cudaPointerGetAttributes");
let ptr_usize = match virt::handle_map(ptr as *mut c_void) {
Ok(p) => p as usize,
Err(_) => ptr as usize,
};
let req = Request::with_args(
ApiFuncName::Cudapointergetattributes as u64,
smallvec![
unsafe { Argument::from_mut_ptr(attributes, ArgumentFlag::ARG_OUT) },
Argument::from_ref(&ptr_usize, ArgumentFlag::ARG_IN | ArgumentFlag::ARG_VIRT),
],
);
let ret = Agent::get_instance()
.invoke_api::<cudaError_t>(req)
.expect("call invoke_api failed");
if ret == cudaSuccess {
thread::context().set_error(ret);
}
ret
}
fn cudaDeviceCanAccessPeer(
&self,
can_access_peer: *mut c_int,
device: c_int,
peer_device: c_int,
) -> cudaError_t {
unreachable!()
}
fn cudaDeviceEnablePeerAccess(&self, peer_device: c_int, flags: c_uint) -> cudaError_t {
unreachable!()
}
fn cudaDeviceDisablePeerAccess(&self, peer_device: c_int) -> cudaError_t {
unreachable!()
}
fn cudaGraphicsUnregisterResource(&self, resource: cudaGraphicsResource_t) -> cudaError_t {
unreachable!()
}
fn cudaGraphicsResourceSetMapFlags(
&self,
resource: cudaGraphicsResource_t,
flags: c_uint,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphicsMapResources(
&self,
count: c_int,
resources: *mut cudaGraphicsResource_t,
stream: cudaStream_t,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphicsUnmapResources(
&self,
count: c_int,
resources: *mut cudaGraphicsResource_t,
stream: cudaStream_t,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphicsResourceGetMappedPointer(
&self,
dev_ptr: *mut *mut c_void,
size: *mut usize,
resource: cudaGraphicsResource_t,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphicsSubResourceGetMappedArray(
&self,
array: *mut cudaArray_t,
resource: cudaGraphicsResource_t,
array_index: c_uint,
mip_level: c_uint,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphicsResourceGetMappedMipmappedArray(
&self,
mipmapped_array: *mut cudaMipmappedArray_t,
resource: cudaGraphicsResource_t,
) -> cudaError_t {
unreachable!()
}
fn cudaGetChannelDesc(
&self,
desc: *mut cudaChannelFormatDesc,
array: cudaArray_const_t,
) -> cudaError_t {
unreachable!()
}
fn cudaCreateChannelDesc(
&self,
x: c_int,
y: c_int,
z: c_int,
w: c_int,
f: cudaChannelFormatKind,
) -> cudaChannelFormatDesc {
unreachable!()
}
fn cudaCreateTextureObject(
&self,
p_tex_object: *mut cudaTextureObject_t,
p_res_desc: *const cudaResourceDesc,
p_tex_desc: *const cudaTextureDesc,
p_res_view_desc: *const cudaResourceViewDesc,
) -> cudaError_t {
unreachable!()
}
fn cudaDestroyTextureObject(&self, tex_object: cudaTextureObject_t) -> cudaError_t {
unreachable!()
}
fn cudaGetTextureObjectResourceDesc(
&self,
p_res_desc: *mut cudaResourceDesc,
tex_object: cudaTextureObject_t,
) -> cudaError_t {
unreachable!()
}
fn cudaGetTextureObjectTextureDesc(
&self,
p_tex_desc: *mut cudaTextureDesc,
tex_object: cudaTextureObject_t,
) -> cudaError_t {
unreachable!()
}
fn cudaGetTextureObjectResourceViewDesc(
&self,
p_res_view_desc: *mut cudaResourceViewDesc,
tex_object: cudaTextureObject_t,
) -> cudaError_t {
unreachable!()
}
fn cudaCreateSurfaceObject(
&self,
p_surf_object: *mut cudaSurfaceObject_t,
p_res_desc: *const cudaResourceDesc,
) -> cudaError_t {
unreachable!()
}
fn cudaDestroySurfaceObject(&self, surf_object: cudaSurfaceObject_t) -> cudaError_t {
unreachable!()
}
fn cudaGetSurfaceObjectResourceDesc(
&self,
p_res_desc: *mut cudaResourceDesc,
surf_object: cudaSurfaceObject_t,
) -> cudaError_t {
unreachable!()
}
fn cudaDriverGetVersion(&self, driver_version: *mut c_int) -> cudaError_t {
unreachable!()
}
fn cudaRuntimeGetVersion(&self, runtime_version: *mut c_int) -> cudaError_t {
unreachable!()
}
fn cudaGraphCreate(&self, p_graph: *mut cudaGraph_t, flags: c_uint) -> cudaError_t {
unreachable!()
}
fn cudaGraphAddKernelNode(
&self,
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
p_node_params: *const cudaKernelNodeParams,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphKernelNodeGetParams(
&self,
node: cudaGraphNode_t,
p_node_params: *mut cudaKernelNodeParams,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphKernelNodeSetParams(
&self,
node: cudaGraphNode_t,
p_node_params: *const cudaKernelNodeParams,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphKernelNodeCopyAttributes(
&self,
h_src: cudaGraphNode_t,
h_dst: cudaGraphNode_t,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphKernelNodeGetAttribute(
&self,
h_node: cudaGraphNode_t,
attr: cudaLaunchAttributeID,
value_out: *mut cudaLaunchAttributeValue,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphKernelNodeSetAttribute(
&self,
h_node: cudaGraphNode_t,
attr: cudaLaunchAttributeID,
value: *const cudaLaunchAttributeValue,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphAddMemcpyNode(
&self,
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
p_copy_params: *const cudaMemcpy3DParms,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphAddMemcpyNodeToSymbol(
&self,
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
symbol: *const c_void,
src: *const c_void,
count: usize,
offset: usize,
kind: cudaMemcpyKind,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphAddMemcpyNodeFromSymbol(
&self,
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
dst: *mut c_void,
symbol: *const c_void,
count: usize,
offset: usize,
kind: cudaMemcpyKind,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphAddMemcpyNode1D(
&self,
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
dst: *mut c_void,
src: *const c_void,
count: usize,
kind: cudaMemcpyKind,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphMemcpyNodeGetParams(
&self,
node: cudaGraphNode_t,
p_node_params: *mut cudaMemcpy3DParms,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphMemcpyNodeSetParams(
&self,
node: cudaGraphNode_t,
p_node_params: *const cudaMemcpy3DParms,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphMemcpyNodeSetParamsToSymbol(
&self,
node: cudaGraphNode_t,
symbol: *const c_void,
src: *const c_void,
count: usize,
offset: usize,
kind: cudaMemcpyKind,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphMemcpyNodeSetParamsFromSymbol(
&self,
node: cudaGraphNode_t,
dst: *mut c_void,
symbol: *const c_void,
count: usize,
offset: usize,
kind: cudaMemcpyKind,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphMemcpyNodeSetParams1D(
&self,
node: cudaGraphNode_t,
dst: *mut c_void,
src: *const c_void,
count: usize,
kind: cudaMemcpyKind,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphAddMemsetNode(
&self,
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
p_memset_params: *const cudaMemsetParams,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphMemsetNodeGetParams(
&self,
node: cudaGraphNode_t,
p_node_params: *mut cudaMemsetParams,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphMemsetNodeSetParams(
&self,
node: cudaGraphNode_t,
p_node_params: *const cudaMemsetParams,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphAddHostNode(
&self,
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
p_node_params: *const cudaHostNodeParams,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphHostNodeGetParams(
&self,
node: cudaGraphNode_t,
p_node_params: *mut cudaHostNodeParams,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphHostNodeSetParams(
&self,
node: cudaGraphNode_t,
p_node_params: *const cudaHostNodeParams,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphAddChildGraphNode(
&self,
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
child_graph: cudaGraph_t,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphChildGraphNodeGetGraph(
&self,
node: cudaGraphNode_t,
p_graph: *mut cudaGraph_t,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphAddEmptyNode(
&self,
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphAddEventRecordNode(
&self,
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
event: cudaEvent_t,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphEventRecordNodeGetEvent(
&self,
node: cudaGraphNode_t,
event_out: *mut cudaEvent_t,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphEventRecordNodeSetEvent(
&self,
node: cudaGraphNode_t,
event: cudaEvent_t,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphAddEventWaitNode(
&self,
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
event: cudaEvent_t,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphEventWaitNodeGetEvent(
&self,
node: cudaGraphNode_t,
event_out: *mut cudaEvent_t,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphEventWaitNodeSetEvent(
&self,
node: cudaGraphNode_t,
event: cudaEvent_t,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphAddExternalSemaphoresSignalNode(
&self,
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
node_params: *const cudaExternalSemaphoreSignalNodeParams,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphExternalSemaphoresSignalNodeGetParams(
&self,
h_node: cudaGraphNode_t,
params_out: *mut cudaExternalSemaphoreSignalNodeParams,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphExternalSemaphoresSignalNodeSetParams(
&self,
h_node: cudaGraphNode_t,
node_params: *const cudaExternalSemaphoreSignalNodeParams,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphAddExternalSemaphoresWaitNode(
&self,
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
node_params: *const cudaExternalSemaphoreWaitNodeParams,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphExternalSemaphoresWaitNodeGetParams(
&self,
h_node: cudaGraphNode_t,
params_out: *mut cudaExternalSemaphoreWaitNodeParams,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphExternalSemaphoresWaitNodeSetParams(
&self,
h_node: cudaGraphNode_t,
node_params: *const cudaExternalSemaphoreWaitNodeParams,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphAddMemAllocNode(
&self,
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
node_params: *mut cudaMemAllocNodeParams,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphMemAllocNodeGetParams(
&self,
node: cudaGraphNode_t,
params_out: *mut cudaMemAllocNodeParams,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphAddMemFreeNode(
&self,
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
dptr: *mut c_void,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphMemFreeNodeGetParams(
&self,
node: cudaGraphNode_t,
dptr_out: *mut c_void,
) -> cudaError_t {
unreachable!()
}
fn cudaDeviceGraphMemTrim(&self, device: c_int) -> cudaError_t {
unreachable!()
}
fn cudaDeviceGetGraphMemAttribute(
&self,
device: c_int,
attr: cudaGraphMemAttributeType,
value: *mut c_void,
) -> cudaError_t {
unreachable!()
}
fn cudaDeviceSetGraphMemAttribute(
&self,
device: c_int,
attr: cudaGraphMemAttributeType,
value: *mut c_void,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphClone(
&self,
p_graph_clone: *mut cudaGraph_t,
original_graph: cudaGraph_t,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphNodeFindInClone(
&self,
p_node: *mut cudaGraphNode_t,
original_node: cudaGraphNode_t,
cloned_graph: cudaGraph_t,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphNodeGetType(
&self,
node: cudaGraphNode_t,
p_type: *mut cudaGraphNodeType,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphGetNodes(
&self,
graph: cudaGraph_t,
nodes: *mut cudaGraphNode_t,
num_nodes: *mut usize,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphGetRootNodes(
&self,
graph: cudaGraph_t,
p_root_nodes: *mut cudaGraphNode_t,
p_num_root_nodes: *mut usize,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphGetEdges(
&self,
graph: cudaGraph_t,
from: *mut cudaGraphNode_t,
to: *mut cudaGraphNode_t,
num_edges: *mut usize,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphGetEdges_v2(
&self,
graph: cudaGraph_t,
from: *mut cudaGraphNode_t,
to: *mut cudaGraphNode_t,
edge_data: *mut cudaGraphEdgeData,
num_edges: *mut usize,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphNodeGetDependencies(
&self,
node: cudaGraphNode_t,
p_dependencies: *mut cudaGraphNode_t,
p_num_dependencies: *mut usize,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphNodeGetDependencies_v2(
&self,
node: cudaGraphNode_t,
p_dependencies: *mut cudaGraphNode_t,
edge_data: *mut cudaGraphEdgeData,
p_num_dependencies: *mut usize,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphNodeGetDependentNodes(
&self,
node: cudaGraphNode_t,
p_dependent_nodes: *mut cudaGraphNode_t,
p_num_dependent_nodes: *mut usize,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphNodeGetDependentNodes_v2(
&self,
node: cudaGraphNode_t,
p_dependent_nodes: *mut cudaGraphNode_t,
edge_data: *mut cudaGraphEdgeData,
p_num_dependent_nodes: *mut usize,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphAddDependencies(
&self,
graph: cudaGraph_t,
from: *const cudaGraphNode_t,
to: *const cudaGraphNode_t,
num_dependencies: usize,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphAddDependencies_v2(
&self,
graph: cudaGraph_t,
from: *const cudaGraphNode_t,
to: *const cudaGraphNode_t,
edge_data: *const cudaGraphEdgeData,
num_dependencies: usize,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphRemoveDependencies(
&self,
graph: cudaGraph_t,
from: *const cudaGraphNode_t,
to: *const cudaGraphNode_t,
num_dependencies: usize,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphRemoveDependencies_v2(
&self,
graph: cudaGraph_t,
from: *const cudaGraphNode_t,
to: *const cudaGraphNode_t,
edge_data: *const cudaGraphEdgeData,
num_dependencies: usize,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphDestroyNode(&self, node: cudaGraphNode_t) -> cudaError_t {
unreachable!()
}
fn cudaGraphInstantiate(
&self,
p_graph_exec: *mut cudaGraphExec_t,
graph: cudaGraph_t,
flags: c_ulonglong,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphInstantiateWithFlags(
&self,
p_graph_exec: *mut cudaGraphExec_t,
graph: cudaGraph_t,
flags: c_ulonglong,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphInstantiateWithParams(
&self,
p_graph_exec: *mut cudaGraphExec_t,
graph: cudaGraph_t,
instantiate_params: *mut cudaGraphInstantiateParams,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphExecGetFlags(
&self,
graph_exec: cudaGraphExec_t,
flags: *mut c_ulonglong,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphExecKernelNodeSetParams(
&self,
h_graph_exec: cudaGraphExec_t,
node: cudaGraphNode_t,
p_node_params: *const cudaKernelNodeParams,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphExecMemcpyNodeSetParams(
&self,
h_graph_exec: cudaGraphExec_t,
node: cudaGraphNode_t,
p_node_params: *const cudaMemcpy3DParms,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphExecMemcpyNodeSetParamsToSymbol(
&self,
h_graph_exec: cudaGraphExec_t,
node: cudaGraphNode_t,
symbol: *const c_void,
src: *const c_void,
count: usize,
offset: usize,
kind: cudaMemcpyKind,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphExecMemcpyNodeSetParamsFromSymbol(
&self,
h_graph_exec: cudaGraphExec_t,
node: cudaGraphNode_t,
dst: *mut c_void,
symbol: *const c_void,
count: usize,
offset: usize,
kind: cudaMemcpyKind,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphExecMemcpyNodeSetParams1D(
&self,
h_graph_exec: cudaGraphExec_t,
node: cudaGraphNode_t,
dst: *mut c_void,
src: *const c_void,
count: usize,
kind: cudaMemcpyKind,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphExecMemsetNodeSetParams(
&self,
h_graph_exec: cudaGraphExec_t,
node: cudaGraphNode_t,
p_node_params: *const cudaMemsetParams,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphExecHostNodeSetParams(
&self,
h_graph_exec: cudaGraphExec_t,
node: cudaGraphNode_t,
p_node_params: *const cudaHostNodeParams,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphExecChildGraphNodeSetParams(
&self,
h_graph_exec: cudaGraphExec_t,
node: cudaGraphNode_t,
child_graph: cudaGraph_t,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphExecEventRecordNodeSetEvent(
&self,
h_graph_exec: cudaGraphExec_t,
h_node: cudaGraphNode_t,
event: cudaEvent_t,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphExecEventWaitNodeSetEvent(
&self,
h_graph_exec: cudaGraphExec_t,
h_node: cudaGraphNode_t,
event: cudaEvent_t,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphExecExternalSemaphoresSignalNodeSetParams(
&self,
h_graph_exec: cudaGraphExec_t,
h_node: cudaGraphNode_t,
node_params: *const cudaExternalSemaphoreSignalNodeParams,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphExecExternalSemaphoresWaitNodeSetParams(
&self,
h_graph_exec: cudaGraphExec_t,
h_node: cudaGraphNode_t,
node_params: *const cudaExternalSemaphoreWaitNodeParams,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphNodeSetEnabled(
&self,
h_graph_exec: cudaGraphExec_t,
h_node: cudaGraphNode_t,
is_enabled: c_uint,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphNodeGetEnabled(
&self,
h_graph_exec: cudaGraphExec_t,
h_node: cudaGraphNode_t,
is_enabled: *mut c_uint,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphExecUpdate(
&self,
h_graph_exec: cudaGraphExec_t,
h_graph: cudaGraph_t,
result_info: *mut cudaGraphExecUpdateResultInfo,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphUpload(&self, graph_exec: cudaGraphExec_t, stream: cudaStream_t) -> cudaError_t {
unreachable!()
}
fn cudaGraphLaunch(&self, graph_exec: cudaGraphExec_t, stream: cudaStream_t) -> cudaError_t {
unreachable!()
}
fn cudaGraphExecDestroy(&self, graph_exec: cudaGraphExec_t) -> cudaError_t {
unreachable!()
}
fn cudaGraphDestroy(&self, graph: cudaGraph_t) -> cudaError_t {
unreachable!()
}
fn cudaGraphDebugDotPrint(
&self,
graph: cudaGraph_t,
path: *const c_char,
flags: c_uint,
) -> cudaError_t {
unreachable!()
}
fn cudaUserObjectCreate(
&self,
object_out: *mut cudaUserObject_t,
ptr: *mut c_void,
destroy: cudaHostFn_t,
initial_refcount: c_uint,
flags: c_uint,
) -> cudaError_t {
unreachable!()
}
fn cudaUserObjectRetain(&self, object: cudaUserObject_t, count: c_uint) -> cudaError_t {
unreachable!()
}
fn cudaUserObjectRelease(&self, object: cudaUserObject_t, count: c_uint) -> cudaError_t {
unreachable!()
}
fn cudaGraphRetainUserObject(
&self,
graph: cudaGraph_t,
object: cudaUserObject_t,
count: c_uint,
flags: c_uint,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphReleaseUserObject(
&self,
graph: cudaGraph_t,
object: cudaUserObject_t,
count: c_uint,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphAddNode(
&self,
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
num_dependencies: usize,
node_params: *mut cudaGraphNodeParams,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphAddNode_v2(
&self,
p_graph_node: *mut cudaGraphNode_t,
graph: cudaGraph_t,
p_dependencies: *const cudaGraphNode_t,
dependency_data: *const cudaGraphEdgeData,
num_dependencies: usize,
node_params: *mut cudaGraphNodeParams,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphNodeSetParams(
&self,
node: cudaGraphNode_t,
node_params: *mut cudaGraphNodeParams,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphExecNodeSetParams(
&self,
graph_exec: cudaGraphExec_t,
node: cudaGraphNode_t,
node_params: *mut cudaGraphNodeParams,
) -> cudaError_t {
unreachable!()
}
fn cudaGraphConditionalHandleCreate(
&self,
p_handle_out: *mut cudaGraphConditionalHandle,
graph: cudaGraph_t,
default_launch_value: c_uint,
flags: c_uint,
) -> cudaError_t {
unreachable!()
}
fn cudaGetDriverEntryPoint(
&self,
symbol: *const c_char,
func_ptr: *mut *mut c_void,
flags: c_ulonglong,
driver_status: *mut cudaDriverEntryPointQueryResult,
) -> cudaError_t {
unreachable!()
}
fn cudaGetExportTable(
&self,
pp_export_table: *mut *const c_void,
p_export_table_id: *const cudaUUID_t,
) -> cudaError_t {
unreachable!()
}
fn cudaGetFuncBySymbol(
&self,
function_ptr: *mut cudaFunction_t,
symbol_ptr: *const c_void,
) -> cudaError_t {
unreachable!()
}
fn cudaGetKernel(
&self,
kernel_ptr: *mut cudaKernel_t,
entry_func_addr: *const c_void,
) -> cudaError_t {
unreachable!()
}
}