/**
* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/


/* !
 * \file main.asc
 * \brief
 */

#include <cstdio>
#include "kernel_args.h"
#include "securec.h"
#include "acl/acl.h"

extern __global__ __aicpu__ uint32_t MyAicpuKernel1(void *arg);
extern __global__ __aicpu__ uint32_t MyAicpuKernel2(void *arg);

extern "C" void hello_world_do(uint32_t numBlocks, void *stream, uint8_t*m, uint8_t *ti);

int32_t main(int argc, char const *argv[])
{
    aclInit(nullptr);
    int32_t deviceId = 0;
    aclrtSetDevice(deviceId);

    aclrtStream stream = nullptr;  // aicpu stream
    aclrtStream stream2 = nullptr;  // aicore stream
    aclrtCreateStream(&stream);
    aclrtCreateStream(&stream2);

    aclrtEvent event1;
    aclrtEvent event2;
    aclrtCreateEventExWithFlag(&event1, ACL_EVENT_SYNC);
    aclrtCreateEventExWithFlag(&event2, ACL_EVENT_SYNC);

    void* zDevice;
    void* ti;
    aclrtMalloc((void**)&zDevice, 4096, ACL_MEM_MALLOC_HUGE_FIRST);
    aclrtMalloc((void**)&ti, 4096, ACL_MEM_MALLOC_HUGE_FIRST);
    aclrtMemset((void*)ti, 4096, 0, 4096);
    void *zHost = malloc(4096);
    memset(zHost, 0, 4096);
    aclrtMemcpy(zDevice, 4096, zHost, 4096, ACL_MEMCPY_HOST_TO_DEVICE);

    struct KernelInfo::KernelArgs args = {0};
    args.xDevice = (uint32_t *)zDevice;
    args.yDevice = args.xDevice + 1;
    args.zDevice = args.yDevice + 1;
    args.ti = (KernelInfo::TilingInfo *)ti;

    aclrtRecordEvent(event1, stream2);
    aclrtStreamWaitEvent(stream, event1);
    MyAicpuKernel1<<<1, nullptr, stream>>>(&args, sizeof(KernelInfo::KernelArgs));
    hello_world_do(1, stream2, (uint8_t *)zDevice, (uint8_t *)ti);
    aclrtRecordEvent(event2, stream);
    aclrtStreamWaitEvent(stream2, event2);

    args.ti = (KernelInfo::TilingInfo *)ti;
    aclrtRecordEvent(event1, stream2);
    aclrtStreamWaitEvent(stream, event1);
    MyAicpuKernel2<<<1, nullptr, stream>>>(&args, sizeof(KernelInfo::KernelArgs));
    hello_world_do(1, stream2, (uint8_t *)zDevice, (uint8_t *)ti);
    aclrtRecordEvent(event2, stream);
    aclrtStreamWaitEvent(stream2, event2);

    aclrtSynchronizeStreamWithTimeout(stream, 10000);
    aclrtFree(zDevice);
    free(zHost);
    aclrtDestroyStream(stream);
    aclrtResetDevice(deviceId);
    aclFinalize();
    return 0;
}