/**
* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
/* !
* \file main.asc
* \brief
*/
#include <cstdio>
#include "kernel_args.h"
#include "securec.h"
#include "acl/acl.h"
extern __global__ __aicpu__ uint32_t MyAicpuKernel1(void *arg);
extern __global__ __aicpu__ uint32_t MyAicpuKernel2(void *arg);
extern "C" void hello_world_do(uint32_t numBlocks, void *stream, uint8_t*m, uint8_t *ti);
int32_t main(int argc, char const *argv[])
{
aclInit(nullptr);
int32_t deviceId = 0;
aclrtSetDevice(deviceId);
aclrtStream stream = nullptr; // aicpu stream
aclrtStream stream2 = nullptr; // aicore stream
aclrtCreateStream(&stream);
aclrtCreateStream(&stream2);
aclrtEvent event1;
aclrtEvent event2;
aclrtCreateEventExWithFlag(&event1, ACL_EVENT_SYNC);
aclrtCreateEventExWithFlag(&event2, ACL_EVENT_SYNC);
void* zDevice;
void* ti;
aclrtMalloc((void**)&zDevice, 4096, ACL_MEM_MALLOC_HUGE_FIRST);
aclrtMalloc((void**)&ti, 4096, ACL_MEM_MALLOC_HUGE_FIRST);
aclrtMemset((void*)ti, 4096, 0, 4096);
void *zHost = malloc(4096);
memset(zHost, 0, 4096);
aclrtMemcpy(zDevice, 4096, zHost, 4096, ACL_MEMCPY_HOST_TO_DEVICE);
struct KernelInfo::KernelArgs args = {0};
args.xDevice = (uint32_t *)zDevice;
args.yDevice = args.xDevice + 1;
args.zDevice = args.yDevice + 1;
args.ti = (KernelInfo::TilingInfo *)ti;
aclrtRecordEvent(event1, stream2);
aclrtStreamWaitEvent(stream, event1);
MyAicpuKernel1<<<1, nullptr, stream>>>(&args, sizeof(KernelInfo::KernelArgs));
hello_world_do(1, stream2, (uint8_t *)zDevice, (uint8_t *)ti);
aclrtRecordEvent(event2, stream);
aclrtStreamWaitEvent(stream2, event2);
args.ti = (KernelInfo::TilingInfo *)ti;
aclrtRecordEvent(event1, stream2);
aclrtStreamWaitEvent(stream, event1);
MyAicpuKernel2<<<1, nullptr, stream>>>(&args, sizeof(KernelInfo::KernelArgs));
hello_world_do(1, stream2, (uint8_t *)zDevice, (uint8_t *)ti);
aclrtRecordEvent(event2, stream);
aclrtStreamWaitEvent(stream2, event2);
aclrtSynchronizeStreamWithTimeout(stream, 10000);
aclrtFree(zDevice);
free(zHost);
aclrtDestroyStream(stream);
aclrtResetDevice(deviceId);
aclFinalize();
return 0;
}