asc_stwt
产品支持情况
功能说明
将指定数据存储到Global Memory的地址address中,并缓存至Data Cache和L2 Cache。
函数原型
__simt_callee__ inline void asc_stwt(__gm__ long int* address, long int val)
__simt_callee__ inline void asc_stwt(__gm__ unsigned long int* address, unsigned long int val)
__simt_callee__ inline void asc_stwt(__gm__ long long int* address, long long int val)
__simt_callee__ inline void asc_stwt(__gm__ unsigned long long int* address, unsigned long long int val)
__simt_callee__ inline void asc_stwt(__gm__ long2* address, long2 val)
__simt_callee__ inline void asc_stwt(__gm__ ulong2* address, ulong2 val)
__simt_callee__ inline void asc_stwt(__gm__ long4* address, long4 val)
__simt_callee__ inline void asc_stwt(__gm__ ulong4* address, ulong4 val)
__simt_callee__ inline void asc_stwt(__gm__ longlong2* address, longlong2 val)
__simt_callee__ inline void asc_stwt(__gm__ ulonglong2* address, ulonglong2 val)
__simt_callee__ inline void asc_stwt(__gm__ longlong4* address, longlong4 val)
__simt_callee__ inline void asc_stwt(__gm__ ulonglong4* address, ulonglong4 val)
__simt_callee__ inline void asc_stwt(__gm__ signed char* address, signed char val)
__simt_callee__ inline void asc_stwt(__gm__ unsigned char* address, unsigned char val)
__simt_callee__ inline void asc_stwt(__gm__ char2* address, char2 val)
__simt_callee__ inline void asc_stwt(__gm__ uchar2* address, uchar2 val)
__simt_callee__ inline void asc_stwt(__gm__ char4* address, char4 val)
__simt_callee__ inline void asc_stwt(__gm__ uchar4* address, uchar4 val)
__simt_callee__ inline void asc_stwt(__gm__ short* address, short val)
__simt_callee__ inline void asc_stwt(__gm__ unsigned short* address, unsigned short val)
__simt_callee__ inline void asc_stwt(__gm__ short2* address, short2 val)
__simt_callee__ inline void asc_stwt(__gm__ ushort2* address, ushort2 val)
__simt_callee__ inline void asc_stwt(__gm__ short4* address, short4 val)
__simt_callee__ inline void asc_stwt(__gm__ ushort4* address, ushort4 val)
__simt_callee__ inline void asc_stwt(__gm__ int* address, int val)
__simt_callee__ inline void asc_stwt(__gm__ unsigned int* address, unsigned int val)
__simt_callee__ inline void asc_stwt(__gm__ int2* address, int2 val)
__simt_callee__ inline void asc_stwt(__gm__ uint2* address, uint2 val)
__simt_callee__ inline void asc_stwt(__gm__ int4* address, int4 val)
__simt_callee__ inline void asc_stwt(__gm__ uint4* address, uint4 val)
__simt_callee__ inline void asc_stwt(__gm__ float* address, float val)
__simt_callee__ inline void asc_stwt(__gm__ float2* address, float2 val)
__simt_callee__ inline void asc_stwt(__gm__ float4* address, float4 val)
__simt_callee__ inline void asc_stwt(__gm__ bfloat16_t* address, bfloat16_t val)
__simt_callee__ inline void asc_stwt(__gm__ bfloat16x2_t* address, bfloat16x2_t val)
__simt_callee__ inline void asc_stwt(__gm__ half* address, half2 val)
__simt_callee__ inline void asc_stwt(__gm__ half2* address, half2 val)
参数说明
表 1 参数说明
返回值说明
无
约束说明
无
需要包含的头文件
使用除half、half2、bfloat16_t、bfloat16x2_t类型之外的接口需要包含"simt_api/device_functions.h"头文件,使用half和half2类型接口需要包含"simt_api/asc_fp16.h"头文件,使用bfloat16_t和bfloat16x2_t类型接口需要包含"simt_api/asc_bf16.h"头文件。
#include "simt_api/device_functions.h"
#include "simt_api/asc_fp16.h"
#include "simt_api/asc_bf16.h"
调用示例
__simt_vf__ __launch_bounds__(1024) inline void kernel_asc_stwt(__gm__ float* src, __gm__ float* val)
{
int idx = threadIdx.x + blockIdx.x * blockDim.x;
asc_stwt(src + idx, val[idx]);
}