asc_stwt

产品支持情况

产品

是否支持

Ascend 950PR/Ascend 950DT

Atlas A3 训练系列产品/Atlas A3 推理系列产品

x

Atlas A2 训练系列产品/Atlas A2 推理系列产品

x

功能说明

将指定数据存储到Global Memory的地址address中,并缓存至Data Cache和L2 Cache。

函数原型

__simt_callee__ inline void asc_stwt(__gm__ long int* address, long int val)
__simt_callee__ inline void asc_stwt(__gm__ unsigned long int* address, unsigned long int val)
__simt_callee__ inline void asc_stwt(__gm__ long long int* address, long long int val)
__simt_callee__ inline void asc_stwt(__gm__ unsigned long long int* address, unsigned long long int val)
__simt_callee__ inline void asc_stwt(__gm__ long2* address, long2 val)
__simt_callee__ inline void asc_stwt(__gm__ ulong2* address, ulong2 val)
__simt_callee__ inline void asc_stwt(__gm__ long4* address, long4 val)
__simt_callee__ inline void asc_stwt(__gm__ ulong4* address, ulong4 val)
__simt_callee__ inline void asc_stwt(__gm__ longlong2* address, longlong2 val)
__simt_callee__ inline void asc_stwt(__gm__ ulonglong2* address, ulonglong2 val)
__simt_callee__ inline void asc_stwt(__gm__ longlong4* address, longlong4 val)
__simt_callee__ inline void asc_stwt(__gm__ ulonglong4* address, ulonglong4 val)
__simt_callee__ inline void asc_stwt(__gm__ signed char* address, signed char val)
__simt_callee__ inline void asc_stwt(__gm__ unsigned char* address, unsigned char val)
__simt_callee__ inline void asc_stwt(__gm__ char2* address, char2 val)
__simt_callee__ inline void asc_stwt(__gm__ uchar2* address, uchar2 val)
__simt_callee__ inline void asc_stwt(__gm__ char4* address, char4 val)
__simt_callee__ inline void asc_stwt(__gm__ uchar4* address, uchar4 val)
__simt_callee__ inline void asc_stwt(__gm__ short* address, short val)
__simt_callee__ inline void asc_stwt(__gm__ unsigned short* address, unsigned short val)
__simt_callee__ inline void asc_stwt(__gm__ short2* address, short2 val)
__simt_callee__ inline void asc_stwt(__gm__ ushort2* address, ushort2 val)
__simt_callee__ inline void asc_stwt(__gm__ short4* address, short4 val)
__simt_callee__ inline void asc_stwt(__gm__ ushort4* address, ushort4 val)
__simt_callee__ inline void asc_stwt(__gm__ int* address, int val)
__simt_callee__ inline void asc_stwt(__gm__ unsigned int* address, unsigned int val)
__simt_callee__ inline void asc_stwt(__gm__ int2* address, int2 val)
__simt_callee__ inline void asc_stwt(__gm__ uint2* address, uint2 val)
__simt_callee__ inline void asc_stwt(__gm__ int4* address, int4 val)
__simt_callee__ inline void asc_stwt(__gm__ uint4* address, uint4 val)
__simt_callee__ inline void asc_stwt(__gm__ float* address, float val)
__simt_callee__ inline void asc_stwt(__gm__ float2* address, float2 val)
__simt_callee__ inline void asc_stwt(__gm__ float4* address, float4 val)
__simt_callee__ inline void asc_stwt(__gm__ bfloat16_t* address, bfloat16_t val)
__simt_callee__ inline void asc_stwt(__gm__ bfloat16x2_t* address, bfloat16x2_t val)
__simt_callee__ inline void asc_stwt(__gm__ half* address, half2 val)
__simt_callee__ inline void asc_stwt(__gm__ half2* address, half2 val)

参数说明

表 1 参数说明

参数名

输入/输出

描述

address

输入

Global Memory的地址。

val

输入

源操作数。

返回值说明

约束说明

需要包含的头文件

使用除half、half2、bfloat16_t、bfloat16x2_t类型之外的接口需要包含"simt_api/device_functions.h"头文件,使用half和half2类型接口需要包含"simt_api/asc_fp16.h"头文件,使用bfloat16_t和bfloat16x2_t类型接口需要包含"simt_api/asc_bf16.h"头文件。

#include "simt_api/device_functions.h"
#include "simt_api/asc_fp16.h"
#include "simt_api/asc_bf16.h"

调用示例

__simt_vf__ __launch_bounds__(1024) inline void kernel_asc_stwt(__gm__ float* src, __gm__ float* val)
{
    int idx = threadIdx.x + blockIdx.x * blockDim.x;
    asc_stwt(src + idx, val[idx]);
}