/**
 * Copyright (c) 2025 Huawei Technologies Co., Ltd.
 * This program is free software, you can redistribute it and/or modify it under the terms and conditions of 
 * CANN Open Software License Agreement Version 2.0 (the "License").
 * Please refer to the License for details. You may not use this file except in compliance with the License.
 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, 
 * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
 * See LICENSE in the root of the software repository for the full text of the License.
 */

#include <benchmark/benchmark.h>
#include <memory>
#include <string>
#include <vector>
#include <graph_utils.h>
#include "graph/node.h"
#include "graph/compute_graph.h"
#include "graph/fast_graph/execute_graph.h"
#include "graph/normal_graph/compute_graph_impl.h"
#include "fast_graph/fast_graph_impl.h"
#include "fast_node_utils.h"
#include "node_utils.h"
#include "graph/op_desc.h"

using namespace ge;

#define GRAPH_CHECK_RET true

class NodeBuilder {
 public:
  NodeBuilder(string name, string type, const std::shared_ptr<ExecuteGraph> &owner_graph)
      : name_(std::move(name)), type_(std::move(type)), owner_graph_(owner_graph) {}
  NodeBuilder &InputNum(int64_t num) {
    input_num_ = num;
    return *this;
  }
  NodeBuilder &OutputNum(int64_t num) {
    output_num_ = num;
    return *this;
  }

  NodeBuilder &IoNum(int64_t input_num, int64_t output_num) {
    return InputNum(input_num).OutputNum(output_num);
  }

  FastNode *Build() const {
    auto op_desc = std::make_shared<OpDesc>(name_, type_);
    auto td = GeTensorDesc();
    for (int64_t i = 0; i < input_num_; ++i) {
      op_desc->AddInputDesc(td);
    }
    for (int64_t i = 0; i < output_num_; ++i) {
      op_desc->AddOutputDesc(td);
    }
    return owner_graph_->AddNode(op_desc);
  }

 private:
  std::string name_;
  std::string type_;
  std::shared_ptr<ExecuteGraph> owner_graph_;
  int64_t input_num_ = 0;
  int64_t output_num_ = 0;
};

void OpDescCreate(int64_t node_num, std::shared_ptr<OpDesc> *op_desc, int64_t io_num) {
  for (int64_t j = 0; j < node_num; j++) {
    op_desc[j] = std::make_shared<OpDesc>("op", "op");
    auto td = GeTensorDesc();

    for (int64_t i = 0; i < io_num; ++i) {
      op_desc[j]->AddInputDesc(td);
    }
    for (int64_t i = 0; i < io_num; ++i) {
      op_desc[j]->AddOutputDesc(td);
    }
  }
}

static void OLD_Graph_Creation(benchmark::State &state) {
  for (auto _ : state) {
    auto compute_graph = std::make_shared<ge::ComputeGraph>("graph");
    benchmark::DoNotOptimize(compute_graph);
    benchmark::ClobberMemory();
  }
}
BENCHMARK(OLD_Graph_Creation);

static void NEW_Graph_Creation(benchmark::State &state) {
  for (auto _ : state) {
    auto compute_graph = std::make_shared<ge::ExecuteGraph>("graph");
    benchmark::DoNotOptimize(compute_graph);
    benchmark::ClobberMemory();
  }
}
BENCHMARK(NEW_Graph_Creation);

static void TEST_HASH_TIME(benchmark::State &state) {
  std::string test = "hello, world.";
  int loop_num = 1000;
  for (int i = 0; i < loop_num; ++i) {
    test += "a";
  }
  for (auto _ : state) {
    auto size = std::hash<std::string>{}(test);
    benchmark::DoNotOptimize(size);
    benchmark::ClobberMemory();
  }
}
BENCHMARK(TEST_HASH_TIME);

static void OLD_Graph_AddAndRemoveSingleNode(benchmark::State &state) {
  auto compute_graph = std::make_shared<ComputeGraph>("graph0");
  int64_t io_num = state.range(0);
  int64_t node_num = state.range(1);
  std::shared_ptr<OpDesc> op_desc[node_num] = {nullptr};
  OpDescCreate(node_num, op_desc, io_num);
  for (int j = 0; j < node_num; j++) {
    if (j != 0) compute_graph->AddNode(op_desc[j]);
  }

  ge::NodePtr node[node_num];
  for (auto _ : state) {
    node[0] = compute_graph->AddNode(op_desc[0]);
#if GRAPH_CHECK_RET
    if (node[0] == nullptr) {
      std::cout << "Graph_AddNode Error" << std::endl;
      return;
    }
#endif
    GraphUtils::RemoveJustNode(compute_graph, node[0]);
    benchmark::DoNotOptimize(node[0]);
    benchmark::ClobberMemory();
  }
}
BENCHMARK(OLD_Graph_AddAndRemoveSingleNode)->Args({20, 10})->Args({20, 100})->Args({20, 1000})->Args({20, 10000});

static void NEW_Graph_AddAndRemoveSingleNode(benchmark::State &state) {
  auto compute_graph = std::make_shared<ge::ExecuteGraph>("graph1");
  int64_t node_num = state.range(1);
  std::shared_ptr<OpDesc> op_desc[node_num] = {nullptr};

  int64_t io_num = state.range(0);
  OpDescCreate(node_num, op_desc, io_num);
  for (int j = 0; j < node_num; j++) {
    if (j != 0) compute_graph->AddNode(op_desc[j]);
  }

  FastNode *node[node_num] = {};
  for (auto _ : state) {
    node[0] = compute_graph->AddNode(op_desc[0]);
#if GRAPH_CHECK_RET
    if (node[0] == nullptr) {
      std::cout << "Graph_AddNode Error" << std::endl;
      return;
    }
#endif
    compute_graph->RemoveJustNode(node[0]);

    benchmark::DoNotOptimize(node[0]);
    benchmark::ClobberMemory();
  }
}
BENCHMARK(NEW_Graph_AddAndRemoveSingleNode)->Args({20, 10})->Args({20, 100})->Args({20, 1000})->Args({20, 10000});

static void NEW_Graph_AddAndRemoveMultiNode(benchmark::State &state) {
  auto compute_graph = std::make_shared<ge::ExecuteGraph>("graph2");
  int64_t io_num = state.range(0);
  int64_t node_num = state.range(1);
  std::shared_ptr<OpDesc> op_desc[node_num] = {nullptr};
  OpDescCreate(node_num, op_desc, io_num);

  FastNode *node[node_num] = {};
  for (auto _ : state) {
    for (int64_t j = 0; j < node_num; j++) {
      node[j] = compute_graph->AddNode(op_desc[j]);
    }

    for (int64_t j = 0; j < node_num; j++) {
      compute_graph->RemoveJustNode(node[j]);
    }
    benchmark::DoNotOptimize(node[0]);
    benchmark::ClobberMemory();
  }
}
BENCHMARK(NEW_Graph_AddAndRemoveMultiNode)->Args({20, 10000})->Args({20, 100000});

static void OLD_Graph_AddAndRemoveMultiNode(benchmark::State &state) {
  auto compute_graph = std::make_shared<ComputeGraph>("graph01");
  int64_t io_num = state.range(0);
  int64_t node_num = state.range(1);
  std::shared_ptr<OpDesc> op_desc[node_num] = {nullptr};
  OpDescCreate(node_num, op_desc, io_num);

  ge::NodePtr node[node_num];
  for (auto _ : state) {
    for (int64_t j = 0; j < node_num; j++) {
      node[j] = compute_graph->AddNode(op_desc[j]);
    }

    for (int64_t j = 0; j < node_num; j++) {
      GraphUtils::RemoveJustNode(compute_graph, node[j]);
    }
    benchmark::ClobberMemory();
  }
}
BENCHMARK(OLD_Graph_AddAndRemoveMultiNode)->Args({20, 10000})->Args({20, 100000});

static void OLD_Graph_ADD_NODE_WITH_NODE(benchmark::State &state) {
  int64_t io_num = state.range(0);
  int64_t node_num = state.range(1);
  auto root_graph2 = std::make_shared<ComputeGraph>("root_graph2");
  auto root_graph = std::make_shared<ComputeGraph>("root_graph");
  std::shared_ptr<OpDesc> op_desc[node_num] = {nullptr};
  OpDescCreate(node_num, op_desc, io_num);

  NodePtr node[node_num] = {};
  for (auto _ : state) {
    for (int i = 0; i < node_num; i++) {
      node[i] = root_graph2->AddNode(op_desc[i]);
#if GRAPH_CHECK_RET
      if (node[i] == nullptr) {
        std::cout << "NEW_Graph_ADD_NODE_WITH_NODE AddNode Error" << std::endl;
        return;
      }
#endif
      auto ret = GraphUtils::RemoveJustNode(root_graph2, node[i]);
#if GRAPH_CHECK_RET
      if (ret != GRAPH_SUCCESS) {
        std::cout << "NEW_Graph_ADD_NODE_WITH_NODE RemoveJustNode Error" << std::endl;
        return;
      }
#endif

      node[i] = root_graph->AddNode(node[i]);
#if GRAPH_CHECK_RET
      if (node[i] == nullptr) {
        std::cout << "NEW_Graph_ADD_NODE_WITH_NODE AddNode Error" << std::endl;
        return;
      }
#endif

      ret = GraphUtils::RemoveJustNode(root_graph, node[i]);
#if GRAPH_CHECK_RET
      if (ret != GRAPH_SUCCESS) {
        std::cout << "NEW_Graph_ADD_NODE_WITH_NODE RemoveJustNode Error" << std::endl;
        return;
      }
#endif
    }
  }
}
BENCHMARK(OLD_Graph_ADD_NODE_WITH_NODE)->Args({20, 100});
BENCHMARK(OLD_Graph_ADD_NODE_WITH_NODE)->Args({20, 1000});
BENCHMARK(OLD_Graph_ADD_NODE_WITH_NODE)->Args({20, 10000});
BENCHMARK(OLD_Graph_ADD_NODE_WITH_NODE)->Args({20, 50000});

static void NEW_Graph_ADD_NODE_WITH_NODE(benchmark::State &state) {
  auto new_root_graph2 = std::make_shared<ExecuteGraph>("new_graph2");
  auto new_root_graph = std::make_shared<ExecuteGraph>("new_graph");
  int64_t io_num = state.range(0);
  int64_t node_num = state.range(1);
  FastNode *node[node_num] = {};
  std::shared_ptr<OpDesc> op_desc[node_num] = {nullptr};
  OpDescCreate(node_num, op_desc, io_num);

  for (auto _ : state) {
    for (int i = 0; i < node_num; i++) {
      node[i] = new_root_graph2->AddNode(op_desc[i]);
#if GRAPH_CHECK_RET
      if (node[i] == nullptr) {
        std::cout << "NEW_Graph_ADD_NODE_WITH_NODE Add Node Error" << std::endl;
        return;
      }
#endif
      auto ret = new_root_graph2->RemoveJustNode(node[i]);
#if GRAPH_CHECK_RET
      if (ret != GRAPH_SUCCESS) {
        std::cout << "NEW_Graph_ADD_NODE_WITH_NODE Remove Node Error" << std::endl;
        return;
      }
#endif

      node[i] = new_root_graph->AddNode(node[i]);
#if GRAPH_CHECK_RET
      if (node[i] == nullptr) {
        std::cout << "NEW_Graph_ADD_NODE_WITH_NODE Add Node Error" << std::endl;
        return;
      }
#endif

      ret = new_root_graph->RemoveJustNode(node[i]);
#if GRAPH_CHECK_RET
      if (ret != GRAPH_SUCCESS) {
        std::cout << "NEW_Graph_ADD_NODE_WITH_NODE Remove Node Error" << std::endl;
        return;
      }
#endif
    }
  }
}
BENCHMARK(NEW_Graph_ADD_NODE_WITH_NODE)->Args({20, 100});
BENCHMARK(NEW_Graph_ADD_NODE_WITH_NODE)->Args({20, 1000});
BENCHMARK(NEW_Graph_ADD_NODE_WITH_NODE)->Args({20, 10000});
BENCHMARK(NEW_Graph_ADD_NODE_WITH_NODE)->Args({20, 50000});

static void OLD_Graph_GetDirectNode(benchmark::State &state) {
  auto compute_graph = std::make_shared<ComputeGraph>("graph");
  int64_t io_num = state.range(0);
  int64_t node_num = state.range(1);
  std::shared_ptr<OpDesc> op_desc[node_num] = {nullptr};
  OpDescCreate(node_num, op_desc, io_num);

  for (int j = 0; j < node_num; j++) {
    compute_graph->AddNode(op_desc[j]);
  }

  for (auto _ : state) {
    auto ret = compute_graph->GetDirectNode();
    if (ret.size() == 0) {
      std::cout << "OLD GetDirectNode Error " << std::endl;
      return;
    }

    benchmark::DoNotOptimize(ret);
    benchmark::ClobberMemory();
  }
}
BENCHMARK(OLD_Graph_GetDirectNode)->Args({20, 10})->Args({20, 100})->Args({20, 1000})->Args({20, 10000})->Iterations(1);

static void New_Graph_GetDirectNode(benchmark::State &state) {
  auto compute_graph = std::make_shared<ExecuteGraph>(std::string("graph"));
  int64_t io_num = state.range(0);
  int64_t node_num = state.range(1);
  std::shared_ptr<OpDesc> op_desc[node_num] = {nullptr};
  OpDescCreate(node_num, op_desc, io_num);
  for (int j = 0; j < node_num; j++) {
    (void)compute_graph->AddNode(op_desc[j]);
  }

  for (auto _ : state) {
    auto ret = compute_graph->GetDirectNode();
    if (ret.size() == 0) {
      std::cout << "OLD GetDirectNode Error " << std::endl;
      return;
    }

    benchmark::DoNotOptimize(ret);
    benchmark::ClobberMemory();
  }
}
BENCHMARK(New_Graph_GetDirectNode)->Args({20, 10})->Args({20, 100})->Args({20, 1000})->Args({20, 10000})->Iterations(1);

static void Graph_AddAndRemoveEdge(benchmark::State &state) {
  auto compute_graph = std::make_shared<ExecuteGraph>("graph");
  int num = state.range(1);
  int vec_size = state.range(0);
  std::vector<FastNode *> vec;
  vec.resize(vec_size);

  for (int i = 0; i < vec_size; i++) {
    vec[i] = NodeBuilder("Node" + std::to_string(i), "Node", compute_graph).IoNum(num, num).Build();
#if GRAPH_CHECK_RET
    if (vec[i] == nullptr) {
      std::cout << "Graph_AddEdge vec[i] Error " << i << std::endl;
      return;
    }
#endif
  }

  FastEdge *edge[num] = {};
  for (auto _ : state) {
    for (int i = 0; i < num; i++) {
      edge[i] = compute_graph->AddEdge(vec[1], i, vec[0], i);
#if GRAPH_CHECK_RET
      if (edge[i] == nullptr) {
        std::cout << "Graph_AddEdge Error " << std::endl;
        return;
      }
#endif
    }

    for (int i = 0; i < num; i++) {
      auto ret = compute_graph->RemoveEdge(edge[i]);
#if GRAPH_CHECK_RET
      if (ret != GRAPH_SUCCESS) {
        std::cout << "OLD_Graph_AddAndRemoveEdge RemoveEdge Error " << i << std::endl;
        return;
      }
#endif
    }
    benchmark::ClobberMemory();
  }
}
BENCHMARK(Graph_AddAndRemoveEdge)->Args({20, 20})->Args({20, 100})->Args({20, 1000})->Args({20, 10000})->Iterations(1);

static void OLD_Graph_AddAndRemoveEdge(benchmark::State &state) {
  auto compute_graph = std::make_shared<ComputeGraph>("graph");
  int64_t io_num = state.range(0);
  int64_t node_num = state.range(1);
  std::vector<Node *> nodes;
  nodes.resize(node_num);

  std::shared_ptr<OpDesc> op_desc[node_num] = {nullptr};
  OpDescCreate(node_num, op_desc, io_num);
  for (int j = 0; j < node_num; j++) {
    auto tmp = compute_graph->AddNode(op_desc[j]);
    nodes[j] = tmp.get();
  }

  for (auto _ : state) {
    for (int i = 1; i < node_num; i++) {
      auto ret = GraphUtils::AddEdge(nodes[i]->GetOutDataAnchor(0), nodes[i - 1]->GetInDataAnchor(0));
#if GRAPH_CHECK_RET
      if (ret != GRAPH_SUCCESS) {
        std::cout << "OLD_Graph_AddAndRemoveEdge AddEdge Error " << i << std::endl;
        return;
      }
#endif
    }

    for (int i = 1; i < node_num; i++) {
      auto ret = GraphUtils::RemoveEdge(nodes[i]->GetOutDataAnchor(0), nodes[i - 1]->GetInDataAnchor(0));
#if GRAPH_CHECK_RET
      if (ret != GRAPH_SUCCESS) {
        std::cout << "OLD_Graph_AddAndRemoveEdge RemoveEdge Error " << i << std::endl;
        return;
      }
#endif
    }
    benchmark::ClobberMemory();
  }
}
BENCHMARK(OLD_Graph_AddAndRemoveEdge)
    ->Args({20, 20})
    ->Args({20, 100})
    ->Args({20, 1000})
    ->Args({20, 10000})
    ->Iterations(1);

static void Graph_GetAllEdge(benchmark::State &state) {
  auto compute_graph = std::make_shared<ExecuteGraph>("graph");
  std::vector<FastNode *> vec;
  int node_num = 2;
  vec.resize(node_num);
  int edge_num = state.range(0);

  for (int i = 0; i < node_num; i++) {
    vec[i] = NodeBuilder("Node" + std::to_string(i), "Node", compute_graph).IoNum(edge_num, edge_num).Build();
  }

  FastEdge *edge[edge_num] = {};
  for (int i = 0; i < edge_num; i++) {
    edge[i] = compute_graph->AddEdge(vec[1], i, vec[0], i);
  }

  for (auto _ : state) {
    auto ret = compute_graph->GetAllEdges();
#if GRAPH_CHECK_RET
    if (ret.size() == 0) {
      std::cout << "Graph_GetAllOutEdge Error " << std::endl;
      return;
    }
#endif

    benchmark::DoNotOptimize(ret);
    benchmark::ClobberMemory();
  }
}
BENCHMARK(Graph_GetAllEdge)->Arg(20)->Arg(100)->Arg(1000)->Arg(10000);

static void Graph_AddAndRemoveSubgraph(benchmark::State &state) {
  auto root_graph = std::make_shared<ExecuteGraph>("root_graph");
  auto subgraph_num = state.range();
  int edge_num = 5;
  FastNode *node[subgraph_num] = {};
  std::shared_ptr<OpDesc> op_desc[subgraph_num] = {nullptr};
  OpDescCreate(subgraph_num, op_desc, edge_num);

  for (int i = 0; i < subgraph_num; ++i) {
    node[i] = root_graph->AddNode(op_desc[i]);
  }

  std::shared_ptr<ExecuteGraph> sub_graph[subgraph_num] = {nullptr};
  for (int i = 0; i < subgraph_num; i++) {
    sub_graph[i] = std::make_shared<ExecuteGraph>("subgraph_" + std::to_string(i));
    sub_graph[i]->SetParentGraph(root_graph.get());
    sub_graph[i]->SetParentNode(node[i]);
  }

  for (int i = 0; i < subgraph_num - 1; i++) {
    auto ret = root_graph->AddSubGraph(sub_graph[i]);
#if GRAPH_CHECK_RET
    if (ret == nullptr) {
      std::cout << "Graph_RemoveHeadEdge Error" << std::endl;
      return;
    }
#endif
  }

  for (auto _ : state) {
    auto ret = root_graph->AddSubGraph(sub_graph[subgraph_num - 1]);
#if GRAPH_CHECK_RET
    if (ret == nullptr) {
      std::cout << "Graph_RemoveHeadEdge Error" << std::endl;
      return;
    }
#endif
    root_graph->RemoveSubGraph(ret);

    benchmark::DoNotOptimize(ret);
    benchmark::ClobberMemory();
  }
}
BENCHMARK(Graph_AddAndRemoveSubgraph)->Arg(10)->Arg(100)->Arg(1000)->Arg(10000);

static void OLD_Graph_AddAndRemoveSubgraph(benchmark::State &state) {
  auto root_graph = std::make_shared<ComputeGraph>("root_graph");
  auto subgraph_num = state.range();
  int edge_num = 5;
  NodePtr node[subgraph_num] = {};
  std::shared_ptr<OpDesc> op_desc[subgraph_num] = {nullptr};
  ComputeGraphPtr sub_graph[subgraph_num] = {nullptr};

  OpDescCreate(subgraph_num, op_desc, edge_num);
  for (int i = 0; i < subgraph_num; ++i) {
    node[i] = root_graph->AddNode(op_desc[i]);
  }
  for (int i = 0; i < subgraph_num; i++) {
    sub_graph[i] = std::make_shared<ComputeGraph>("subgraph_" + std::to_string(i));
    sub_graph[i]->SetParentGraph(root_graph);
    sub_graph[i]->SetParentNode(node[i]);
  }
  for (int i = 0; i < subgraph_num - 1; i++) {
    auto ret = root_graph->AddSubgraph(sub_graph[i]);
#if GRAPH_CHECK_RET
    if (ret != GRAPH_SUCCESS) {
      std::cout << "OLD_Graph_AddAndRemoveSubgraph 0 Error" << std::endl;
      return;
    }
#endif
  }

  for (auto _ : state) {
    auto ret = root_graph->AddSubgraph(sub_graph[subgraph_num - 1]);
#if GRAPH_CHECK_RET
    if (ret != GRAPH_SUCCESS) {
      std::cout << "OLD_Graph_AddAndRemoveSubgraph 1 Error" << std::endl;
      return;
    }
#endif
    root_graph->RemoveSubGraph(sub_graph[subgraph_num - 1]);

    benchmark::DoNotOptimize(ret);
    benchmark::ClobberMemory();
  }
}
BENCHMARK(OLD_Graph_AddAndRemoveSubgraph)->Arg(10)->Arg(100)->Arg(1000)->Arg(10000);

static void Graph_Sort(benchmark::State &state) {
  auto compute_graph = std::make_shared<ExecuteGraph>("graph");
  std::vector<FastNode *> vec;
  int io_num = state.range(0);
  int node_num = state.range(1);
  vec.resize(node_num);

  for (int i = 0; i < node_num; i++) {
    vec[i] = NodeBuilder("Node" + std::to_string(i), "Node", compute_graph).IoNum(io_num, io_num).Build();
#if GRAPH_CHECK_RET
    if (vec[i] == nullptr) {
      std::cout << "Graph_Sort Error." << std::endl;
      return;
    }
#endif
  }

  FastEdge *edge[node_num] = {};
  for (int j = 1; j < node_num; j++) {
    edge[j] = compute_graph->AddEdge(vec[j - 1], 1, vec[j], 0);
#if GRAPH_CHECK_RET
    if (edge[j] == nullptr) {
      std::cout << "Graph_Sort Error." << std::endl;
      return;
    }
#endif
  }

  for (auto _ : state) {
    auto ret = compute_graph->TopologicalSortingGraph(compute_graph.get(), true);
#if GRAPH_CHECK_RET
    if (ret != GRAPH_SUCCESS) {
      std::cout << "Graph_Sort Error: " << ret << std::endl;
      return;
    }
#endif
    benchmark::DoNotOptimize(ret);
    benchmark::ClobberMemory();
  }
}
BENCHMARK(Graph_Sort)->Args({20, 10000})->Args({20, 50000})->Args({20, 100000})->Args({20, 200000})->Iterations(1);

static void OLD_Graph_Sort(benchmark::State &state) {
  auto compute_graph = std::make_shared<ComputeGraph>("graph");
  int io_num = state.range(0);
  int node_num = state.range(1);
  std::vector<NodePtr> vec;
  vec.resize(node_num);

  std::shared_ptr<OpDesc> op_desc[node_num] = {nullptr};
  OpDescCreate(node_num, op_desc, io_num);
  for (int i = 0; i < node_num; i++) {
    vec[i] = compute_graph->AddNode(op_desc[i]);
#if GRAPH_CHECK_RET
    if (vec[i] == nullptr) {
      std::cout << "OLD_Graph_Sort Error: 0" << std::endl;
      return;
    }
#endif
  }

  for (int j = 1; j < node_num; j++) {
    auto ret = GraphUtils::AddEdge(vec[j - 1]->GetOutDataAnchor(1), vec[j]->GetInDataAnchor(0));
#if GRAPH_CHECK_RET
    if (ret != GRAPH_SUCCESS) {
      std::cout << "OLD_Graph_Sort Error: 1" << std::endl;
      return;
    }
#endif
  }

  for (auto _ : state) {
    auto ret = compute_graph->TopologicalSortingGraph(true);
#if GRAPH_CHECK_RET
    if (ret != GRAPH_SUCCESS) {
      std::cout << "OLD_Graph_Sort Error: 2" << std::endl;
      return;
    }
#endif

    benchmark::DoNotOptimize(ret);
    benchmark::ClobberMemory();
  }
}
BENCHMARK(OLD_Graph_Sort)->Args({20, 10000})->Args({20, 50000})->Args({20, 100000})->Args({20, 200000})->Iterations(1);

static void Graph_ALL_RUN(benchmark::State &state) {
  int node_num = state.range(1);
  std::shared_ptr<OpDesc> op_desc[node_num] = {nullptr};
  int edge_num = state.range(0);
  OpDescCreate(node_num, op_desc, edge_num);

  auto subgraph_num = state.range(2);
  auto subgraph_node_num = state.range(3);
  std::shared_ptr<ExecuteGraph> sub_graph[subgraph_num] = {nullptr};

  FastNode *node[node_num] = {};
  FastEdge *edge[node_num] = {};
  ExecuteGraph *quick_graph[subgraph_num] = {nullptr};
  std::shared_ptr<OpDesc> sub_op_desc[subgraph_num][subgraph_node_num] = {};
  for (int i = 0; i < subgraph_num; i++) {
    OpDescCreate(subgraph_node_num, sub_op_desc[i], edge_num);
  }

  auto root_graph = std::make_shared<ExecuteGraph>("root_graph");
  for (auto _ : state) {
    for (int i = 0; i < node_num; i++) {
      node[i] = root_graph->AddNode(op_desc[i]);
#if GRAPH_CHECK_RET
      if (node[i] == nullptr) {
        std::cout << "0 Graph_ALL_RUN Error" << std::endl;
        return;
      }
#endif
    }

    for (int i = 1; i < node_num; i++) {
      edge[i] = root_graph->AddEdge(node[i], 1, node[i - 1], 0);
#if GRAPH_CHECK_RET
      if (edge[i] == nullptr) {
        std::cout << "1 Graph_ALL_RUN Add Edge Error " << i << std::endl;
        return;
      }
#endif
    }

    FastNode *sub_node[subgraph_num][subgraph_node_num] = {};
    for (int i = 0; i < subgraph_num; i++) {
      sub_graph[i] = std::make_shared<ExecuteGraph>("subgraph_" + std::to_string(i));
      for (int j = 0; j < subgraph_node_num; j++) {
        sub_node[i][j] = sub_graph[i]->AddNode(sub_op_desc[i][j]);
#if GRAPH_CHECK_RET
        if (sub_node[i][j] == nullptr) {
          std::cout << "Graph_ALL_RUN add subgraph node error." << std::endl;
          return;
        }
#endif
      }
    }

    for (int i = 0; i < subgraph_num; i++) {
      for (int j = 1; j < subgraph_node_num; j++) {
        auto ret = sub_graph[i]->AddEdge(sub_node[i][j], 1, sub_node[i][j - 1], 0);
#if GRAPH_CHECK_RET
        if (ret == nullptr) {
          std::cout << "1 Graph_ALL_RUN sub graph edge Error " << j << std::endl;
          return;
        }
#endif
      }
    }

    for (int i = 0; i < subgraph_num; ++i) {
      quick_graph[i] = root_graph->AddSubGraph(sub_graph[i]);
#if GRAPH_CHECK_RET
      if (quick_graph[i] == nullptr) {
        std::cout << "2 Graph_ALL_RUN add subgraph Error" << std::endl;
        return;
      }
#endif
    }

    root_graph->TopologicalSortingGraph(root_graph.get(), true);

    for (int i = 1; i < node_num; i++) {
      root_graph->RemoveEdge(edge[i]);
    }

    for (int i = 0; i < node_num; i++) {
      root_graph->RemoveJustNode(node[i]);
    }

    for (int i = 0; i < subgraph_num; ++i) {
      root_graph->RemoveSubGraph(quick_graph[i]);
    }

    benchmark::ClobberMemory();
  }
}
BENCHMARK(Graph_ALL_RUN)->Args({20, 2000, 1000, 10})->Iterations(1);
BENCHMARK(Graph_ALL_RUN)->Args({20, 4000, 1000, 10})->Iterations(1);
BENCHMARK(Graph_ALL_RUN)->Args({20, 6000, 1000, 10})->Iterations(1);
BENCHMARK(Graph_ALL_RUN)->Args({20, 8000, 1000, 10})->Iterations(1);

static void OLD_Graph_ALL_RUN(benchmark::State &state) {
  auto subgraph_num = state.range(2);
  int node_num = state.range(1);
  int edge_num = state.range(0);
  std::shared_ptr<OpDesc> op_desc[node_num] = {nullptr};
  OpDescCreate(node_num, op_desc, edge_num);

  auto subgraph_node_num = state.range(3);
  ComputeGraphPtr sub_graph[subgraph_num] = {nullptr};
  std::shared_ptr<OpDesc> sub_op_desc[subgraph_num][subgraph_node_num] = {};
  for (int i = 0; i < subgraph_num; i++) {
    OpDescCreate(subgraph_node_num, sub_op_desc[i], edge_num);
  }

  NodePtr node[node_num] = {};
  ComputeGraphPtr quick_graph[subgraph_num] = {nullptr};
  auto old_root_graph = std::make_shared<ComputeGraph>("root_graph");
  for (auto _ : state) {
    for (int i = 0; i < node_num; i++) {
      node[i] = old_root_graph->AddNode(op_desc[i]);
#if GRAPH_CHECK_RET
      if (node[i] == nullptr) {
        std::cout << "OLD_Graph_ALL_RUN add node error." << std::endl;
        return;
      }
#endif
    }

    for (int i = 1; i < node_num; i++) {
      auto ret = GraphUtils::AddEdge(node[i]->GetOutDataAnchor(1), node[i - 1]->GetInDataAnchor(0));
#if GRAPH_CHECK_RET
      if (ret != GRAPH_SUCCESS) {
        std::cout << "OLD_Graph_ALL_RUN add edge error" << std::endl;
        return;
      }
#endif
    }

    NodePtr sub_graph_node[subgraph_num][subgraph_node_num] = {};
    for (int i = 0; i < subgraph_num; i++) {
      sub_graph[i] = std::make_shared<ComputeGraph>("subgraph_" + std::to_string(i));
      for (int j = 0; j < subgraph_node_num; j++) {
        sub_graph_node[i][j] = sub_graph[i]->AddNode(sub_op_desc[i][j]);
#if GRAPH_CHECK_RET
        if (sub_graph_node[i][j] == nullptr) {
          std::cout << "OLD_Graph_ALL_RUN add node error." << std::endl;
          return;
        }
#endif
      }
    }

    for (int i = 0; i < subgraph_num; i++) {
      for (int j = 1; j < subgraph_node_num; j++) {
        GraphUtils::AddEdge(sub_graph_node[i][j]->GetOutDataAnchor(1), sub_graph_node[i][j - 1]->GetInDataAnchor(0));
      }
    }

    for (int64_t i = 0; i < subgraph_num; ++i) {
      quick_graph[i] = old_root_graph->AddSubGraph(sub_graph[i]);
#if GRAPH_CHECK_RET
      if (quick_graph[i] == nullptr) {
        std::cout << "2 OLD_Graph_ALL_RUN Error" << std::endl;
        return;
      }
#endif
    }

    old_root_graph->TopologicalSortingGraph(true);

    for (int i = 1; i < node_num; i++) {
      auto ret = GraphUtils::RemoveEdge(node[i]->GetOutDataAnchor(1), node[i - 1]->GetInDataAnchor(0));
#if GRAPH_CHECK_RET
      if (ret != GRAPH_SUCCESS) {
        std::cout << "0 OLD_Graph_ALL_RUN Error" << std::endl;
        return;
      }
#endif
    }

    for (int i = 0; i < node_num; i++) {
      auto ret = GraphUtils::RemoveJustNode(old_root_graph, node[i]);
#if GRAPH_CHECK_RET
      if (ret != GRAPH_SUCCESS) {
        std::cout << "OLD_Graph_ALL_RUN remove node error." << std::endl;
        return;
      }
#endif
    }

    for (int64_t i = 0; i < subgraph_num; ++i) {
      old_root_graph->RemoveSubGraph(quick_graph[i]);
    }

    benchmark::ClobberMemory();
  }
}
BENCHMARK(OLD_Graph_ALL_RUN)->Args({20, 2000, 1000, 10})->Iterations(1);
BENCHMARK(OLD_Graph_ALL_RUN)->Args({20, 4000, 1000, 10})->Iterations(1);
BENCHMARK(OLD_Graph_ALL_RUN)->Args({20, 6000, 1000, 10})->Iterations(1);
BENCHMARK(OLD_Graph_ALL_RUN)->Args({20, 8000, 1000, 10})->Iterations(1);

static void Graph_AddAndRemoveSubgraph_Multi(benchmark::State &state) {
  auto root_graph = std::make_shared<ExecuteGraph>("root_graph");
  auto subgraph_num = state.range();

  std::shared_ptr<ExecuteGraph> sub_graph[subgraph_num] = {nullptr};
  for (int i = 0; i < subgraph_num; i++) {
    sub_graph[i] = std::make_shared<ExecuteGraph>("subgraph_" + std::to_string(i));
  }

  int edge_num = 5;
  FastNode *node[subgraph_num] = {};
  std::shared_ptr<OpDesc> op_desc[subgraph_num] = {nullptr};
  OpDescCreate(subgraph_num, op_desc, edge_num);
  for (int i = 0; i < subgraph_num; ++i) {
    node[i] = root_graph->AddNode(op_desc[i]);
  }

  for (int i = 0; i < subgraph_num; i++) {
    sub_graph[i] = std::make_shared<ExecuteGraph>("subgraph_" + std::to_string(i));
    sub_graph[i]->SetParentGraph(root_graph.get());
    sub_graph[i]->SetParentNode(node[i]);
  }

  ExecuteGraph *new_sub_graph[subgraph_num] = {nullptr};
  for (auto _ : state) {
    for (int64_t i = 0; i < subgraph_num; ++i) {
      new_sub_graph[i] = root_graph->AddSubGraph(sub_graph[i]);
#if GRAPH_CHECK_RET
      if (new_sub_graph[i] == nullptr) {
        std::cout << "Graph_RemoveHeadEdge Error" << std::endl;
        return;
      }
#endif
    }
    for (int64_t i = 0; i < subgraph_num; ++i) {
      root_graph->RemoveSubGraph(new_sub_graph[i]);
    }

    benchmark::ClobberMemory();
  }
}
BENCHMARK(Graph_AddAndRemoveSubgraph_Multi)->Arg(10)->Arg(100)->Arg(1000)->Arg(10000);

static void Graph_AddAndRemoveSubgraph_Multi_OLD(benchmark::State &state) {
  auto root_graph = std::make_shared<ComputeGraph>("root_graph");
  auto subgraph_num = state.range();
  int edge_num = 5;
  NodePtr node[subgraph_num] = {};
  std::shared_ptr<OpDesc> op_desc[subgraph_num] = {nullptr};
  OpDescCreate(subgraph_num, op_desc, edge_num);
  for (int i = 0; i < subgraph_num; ++i) {
    node[i] = root_graph->AddNode(op_desc[i]);
  }

  ComputeGraphPtr sub_graph[subgraph_num] = {nullptr};
  for (int i = 0; i < subgraph_num; i++) {
    sub_graph[i] = std::make_shared<ComputeGraph>("subgraph_" + std::to_string(i));
    sub_graph[i]->SetParentGraph(root_graph);
    sub_graph[i]->SetParentNode(node[i]);
  }

  for (auto _ : state) {
    for (int64_t i = 0; i < subgraph_num; ++i) {
      auto ret = root_graph->AddSubGraph(sub_graph[i]);
#if GRAPH_CHECK_RET
      if (ret == nullptr) {
        std::cout << "Graph_RemoveHeadEdge Error" << std::endl;
        return;
      }
#endif
    }
    for (int64_t i = 0; i < subgraph_num; ++i) {
      root_graph->RemoveSubGraph(sub_graph[i]);
    }

    benchmark::ClobberMemory();
  }
}
BENCHMARK(Graph_AddAndRemoveSubgraph_Multi_OLD)->Arg(10)->Arg(100)->Arg(1000)->Arg(10000);

static void TEST_ANCHOR(benchmark::State &state) {
  auto root_graph = std::make_shared<ComputeGraph>("root_graph");
  auto edge_num = state.range();
  int node_num = 2;
  NodePtr node[node_num] = {};

  std::shared_ptr<OpDesc> op_desc[node_num] = {nullptr};
  OpDescCreate(node_num, op_desc, edge_num);
  for (int i = 0; i < node_num; ++i) {
    node[i] = root_graph->AddNode(op_desc[i]);
  }

  InDataAnchorPtr ptr[edge_num] = {};
  OutDataAnchorPtr out_ptr[edge_num] = {};

  for (auto _ : state) {
    node[0]->GetAllInAnchors();
    node[0]->GetAllOutAnchors();
    benchmark::ClobberMemory();
  }
}
BENCHMARK(TEST_ANCHOR)->Arg(10)->Arg(100)->Arg(1000)->Iterations(1);

static void TEST_ANCHOR_PEER_GET(benchmark::State &state) {
  auto root_graph = std::make_shared<ComputeGraph>("root_graph");
  auto anchor_num = state.range();
  int node_num = 2;
  NodePtr node[node_num] = {};
  InDataAnchorPtr ptr[anchor_num] = {};
  OutDataAnchorPtr out_ptr[anchor_num] = {};
  std::shared_ptr<OpDesc> op_desc[node_num] = {nullptr};

  OpDescCreate(node_num, op_desc, anchor_num);
  for (int i = 0; i < node_num; ++i) {
    node[i] = root_graph->AddNode(op_desc[i]);
  }

  for (int j = 0; j < anchor_num; j++) {
    GraphUtils::AddEdge(node[0]->GetOutDataAnchor(0), node[1]->GetInDataAnchor(j));
  }

  for (auto _ : state) {
    auto ret = node[0]->GetOutDataAnchor(0)->GetPeerAnchors();
    benchmark::DoNotOptimize(ret);
    benchmark::ClobberMemory();
  }
}
BENCHMARK(TEST_ANCHOR_PEER_GET)->Arg(10)->Arg(100)->Arg(1000);

static void TEST_GET_IN_NODES(benchmark::State &state) {
  auto root_graph = std::make_shared<ComputeGraph>("root_graph");
  auto anchor_num = state.range();
  int node_num = 1001;
  NodePtr node[node_num] = {};

  for (int j = 0; j < node_num; j++) {
    OpDescPtr op_desc = std::make_shared<OpDesc>("op", "op");
    auto td = GeTensorDesc();
    for (int64_t i = 0; i < anchor_num; ++i) {
      op_desc->AddInputDesc(td);
    }
    for (int64_t i = 0; i < anchor_num; ++i) {
      op_desc->AddOutputDesc(td);
    }
    node[j] = root_graph->AddNode(op_desc);
  }

  for (int j = 0; j < anchor_num; j++) {
    GraphUtils::AddEdge(node[0]->GetOutDataAnchor(j), node[j]->GetInDataAnchor(j));
  }

  InDataAnchorPtr ptr[anchor_num] = {};
  OutDataAnchorPtr out_ptr[anchor_num] = {};

  for (auto _ : state) {
    auto ret = node[0]->GetOutNodes();
    benchmark::DoNotOptimize(ret);
    benchmark::ClobberMemory();
  }
}
BENCHMARK(TEST_GET_IN_NODES)->Arg(10)->Arg(100)->Arg(1000);

static void TEST_OLD_Graph_ALL_RUN(benchmark::State &state) {
  int edge_num = state.range(0);
  int node_num = state.range(1);
  auto subgraph_num = state.range(2);
  auto subgraph_node_num = state.range(3);

  std::shared_ptr<OpDesc> op_desc[node_num] = {nullptr};
  ComputeGraphPtr sub_graph[subgraph_num] = {nullptr};
  std::shared_ptr<OpDesc> sub_op_desc[subgraph_num][subgraph_node_num] = {};
  NodePtr node[node_num] = {};
  ComputeGraphPtr quick_graph[subgraph_num] = {nullptr};

  OpDescCreate(node_num, op_desc, edge_num);
  for (int i = 0; i < subgraph_num; i++) {
    OpDescCreate(subgraph_num, sub_op_desc[i], edge_num);
  }

  auto root_graph = std::make_shared<ComputeGraph>("root_graph");
  for (auto _ : state) {
    for (int i = 0; i < node_num; i++) {
      node[i] = root_graph->AddNode(op_desc[i]);
#if GRAPH_CHECK_RET
      if (node[i] == nullptr) {
        std::cout << "0 OLD_Graph_ALL_RUN Error" << std::endl;
        return;
      }
#endif
    }

    for (int i = 1; i < node_num; i++) {
      auto ret = GraphUtils::AddEdge(node[i]->GetOutDataAnchor(0), node[i - 1]->GetInDataAnchor(0));
#if GRAPH_CHECK_RET
      if (ret != GRAPH_SUCCESS) {
        std::cout << "0 OLD_Graph_ALL_RUN Error" << std::endl;
        return;
      }
#endif
      ret = GraphUtils::AddEdge(node[i]->GetOutDataAnchor(1), node[i - 1]->GetInDataAnchor(1));
#if GRAPH_CHECK_RET
      if (ret != GRAPH_SUCCESS) {
        std::cout << "0 OLD_Graph_ALL_RUN Error" << std::endl;
        return;
      }
#endif
    }

    root_graph->TopologicalSortingGraph(true);

    for (int i = 1; i < node_num; i++) {
      auto ret = GraphUtils::RemoveEdge(node[i]->GetOutDataAnchor(0), node[i - 1]->GetInDataAnchor(0));
#if GRAPH_CHECK_RET
      if (ret != GRAPH_SUCCESS) {
        std::cout << "0 OLD_Graph_ALL_RUN Error" << std::endl;
        return;
      }
#endif
      ret = GraphUtils::RemoveEdge(node[i]->GetOutDataAnchor(1), node[i - 1]->GetInDataAnchor(1));
    }

    for (int i = 0; i < node_num; i++) {
      auto ret = GraphUtils::RemoveJustNode(root_graph, node[i]);
#if GRAPH_CHECK_RET
      if (ret != GRAPH_SUCCESS) {
        std::cout << "TEST_OLD_Graph_ALL_RUN remove node error." << std::endl;
        return;
      }
#endif
    }

    benchmark::ClobberMemory();
  }
}
BENCHMARK(TEST_OLD_Graph_ALL_RUN)->Args({20, 500, 1, 0})->Iterations(100);

static void OLD_GRAPH_DEEPCOPY(benchmark::State &state) {
  auto subgraph_num = state.range(2);
  int node_num = state.range(1);
  std::shared_ptr<OpDesc> op_desc[node_num] = {nullptr};
  for (int j = 0; j < node_num; j++) {
    op_desc[j] = std::make_shared<OpDesc>("op", "op");
    auto td = GeTensorDesc();
    for (int64_t i = 0; i < state.range(0); ++i) {
      op_desc[j]->AddInputDesc(td);
    }
    for (int64_t i = 0; i < state.range(0); ++i) {
      op_desc[j]->AddOutputDesc(td);
    }
  }

  auto subgraph_node_num = state.range(3);
  ComputeGraphPtr sub_graph[subgraph_num] = {nullptr};

  std::shared_ptr<OpDesc> sub_op_desc[subgraph_num][subgraph_node_num] = {};
  for (int i = 0; i < subgraph_num; i++) {
    for (int j = 0; j < subgraph_node_num; j++) {
      sub_op_desc[i][j] = std::make_shared<OpDesc>("op", "op");
      auto td = GeTensorDesc();

      for (int64_t x = 0; x < state.range(0); ++x) {
        sub_op_desc[i][j]->AddInputDesc(td);
      }
      for (int64_t x = 0; x < state.range(0); ++x) {
        sub_op_desc[i][j]->AddOutputDesc(td);
      }
    }
  }

  NodePtr node[node_num] = {};
  ComputeGraphPtr quick_graph[subgraph_num] = {nullptr};
  auto root_graph = std::make_shared<ComputeGraph>("root_graph");

  for (int i = 0; i < node_num; i++) {
    node[i] = root_graph->AddNode(op_desc[i]);
#if GRAPH_CHECK_RET
    if (node[i] == nullptr) {
      std::cout << "0 OLD_Graph_ALL_RUN Error" << std::endl;
      return;
    }
#endif
  }

  for (int i = 1; i < node_num; i++) {
    auto ret = GraphUtils::AddEdge(node[i]->GetOutDataAnchor(1), node[i - 1]->GetInDataAnchor(0));
#if GRAPH_CHECK_RET
    if (ret != GRAPH_SUCCESS) {
      std::cout << "0 OLD_Graph_ALL_RUN Error" << std::endl;
      return;
    }
#endif
  }

  for (int i = 0; i < subgraph_num; i++) {
    sub_graph[i] = std::make_shared<ComputeGraph>("subgraph_" + std::to_string(i));
    NodePtr sub_graph_node[subgraph_node_num] = {};
    for (int j = 0; j < subgraph_node_num; j++) {
      sub_graph_node[j] = sub_graph[i]->AddNode(sub_op_desc[i][j]);
#if GRAPH_CHECK_RET
      if (sub_graph_node[j] == nullptr) {
        std::cout << "1111 Graph_ALL_RUN Error" << std::endl;
        return;
      }
#endif
    }
    for (int j = 1; j < subgraph_node_num; j++) {
      GraphUtils::AddEdge(sub_graph_node[j]->GetOutDataAnchor(1), sub_graph_node[j - 1]->GetInDataAnchor(0));
    }
  }

  for (int64_t i = 0; i < subgraph_num; ++i) {
    quick_graph[i] = root_graph->AddSubGraph(sub_graph[i]);
#if GRAPH_CHECK_RET
    if (quick_graph[i] == nullptr) {
      std::cout << "2 OLD_Graph_ALL_RUN Error" << std::endl;
      return;
    }
#endif
  }

  auto test_graph = std::make_shared<ComputeGraph>("test");

  for (auto _ : state) {
    GraphUtils::CopyComputeGraph(root_graph, test_graph);
    benchmark::ClobberMemory();
  }
}
BENCHMARK(OLD_GRAPH_DEEPCOPY)->Args({20, 10000, 1, 10})->Iterations(1);
BENCHMARK(OLD_GRAPH_DEEPCOPY)->Args({20, 50000, 10, 10})->Iterations(1);
BENCHMARK(OLD_GRAPH_DEEPCOPY)->Args({20, 50000, 100, 10})->Iterations(1);
BENCHMARK(OLD_GRAPH_DEEPCOPY)->Args({20, 50000, 1000, 10})->Iterations(1);

static void NEW_GRAPH_DEEPCOPY(benchmark::State &state) {
  int node_num = state.range(1);
  std::shared_ptr<OpDesc> op_desc[node_num] = {nullptr};
  for (int j = 0; j < node_num; j++) {
    op_desc[j] = std::make_shared<OpDesc>("op", "op");
    auto td = GeTensorDesc();

    for (int64_t i = 0; i < state.range(0); ++i) {
      op_desc[j]->AddInputDesc(td);
    }
    for (int64_t i = 0; i < state.range(0); ++i) {
      op_desc[j]->AddOutputDesc(td);
    }
  }

  auto subgraph_num = state.range(2);
  auto subgraph_node_num = state.range(3);
  std::shared_ptr<ExecuteGraph> sub_graph[subgraph_num] = {nullptr};

  FastNode *node[node_num] = {};
  FastEdge *edge[node_num] = {};
  ExecuteGraph *quick_graph[subgraph_num] = {nullptr};
  std::shared_ptr<OpDesc> sub_op_desc[subgraph_num][subgraph_node_num] = {};
  for (int i = 0; i < subgraph_num; i++) {
    for (int j = 0; j < subgraph_node_num; j++) {
      sub_op_desc[i][j] = std::make_shared<OpDesc>("op", "op");
      auto td = GeTensorDesc();

      for (int64_t x = 0; x < state.range(0); ++x) {
        sub_op_desc[i][j]->AddInputDesc(td);
      }
      for (int64_t x = 0; x < state.range(0); ++x) {
        sub_op_desc[i][j]->AddOutputDesc(td);
      }
    }
  }

  auto root_graph = std::make_shared<ExecuteGraph>("root_graph");
  for (int i = 0; i < node_num; i++) {
    node[i] = root_graph->AddNode(op_desc[i]);
#if GRAPH_CHECK_RET
    if (node[i] == nullptr) {
      std::cout << "0 Graph_ALL_RUN Error" << std::endl;
      return;
    }
#endif
  }

  for (int i = 1; i < node_num; i++) {
    edge[i] = root_graph->AddEdge(node[i], 1, node[i - 1], 0);
#if GRAPH_CHECK_RET
    if (edge[i] == nullptr) {
      std::cout << "1 Graph_ALL_RUN Add Edge Error " << i << std::endl;
      return;
    }
#endif
  }

  for (int i = 0; i < subgraph_num; i++) {
    sub_graph[i] = std::make_shared<ExecuteGraph>("subgraph_" + std::to_string(i));
    FastNode *sub_graph_node[subgraph_node_num] = {};
    for (int j = 0; j < subgraph_node_num; j++) {
      sub_graph_node[j] = sub_graph[i]->AddNode(sub_op_desc[i][j]);
#if GRAPH_CHECK_RET
      if (sub_graph_node[j] == nullptr) {
        std::cout << "1111 Graph_ALL_RUN Error" << std::endl;
        return;
      }
#endif
    }
    for (int j = 1; j < subgraph_node_num; j++) {
      auto ret = sub_graph[i]->AddEdge(sub_graph_node[j], 1, sub_graph_node[j - 1], 0);
#if GRAPH_CHECK_RET
      if (ret == nullptr) {
        std::cout << "1 Graph_ALL_RUN sub graph Error" << j << std::endl;
        return;
      }
#endif
    }
  }

  for (int i = 0; i < subgraph_num; ++i) {
    quick_graph[i] = root_graph->AddSubGraph(sub_graph[i]);
#if GRAPH_CHECK_RET
    if (quick_graph[i] == nullptr) {
      std::cout << "2 Graph_ALL_RUN Error" << std::endl;
      return;
    }
#endif
  }

  auto test1_graph = std::make_shared<ExecuteGraph>("root_graph");
  for (auto _ : state) {
    test1_graph->CompleteCopy(*(root_graph.get()));
    benchmark::ClobberMemory();
  }
}
BENCHMARK(NEW_GRAPH_DEEPCOPY)->Args({20, 10000, 1, 10})->Iterations(1);
BENCHMARK(NEW_GRAPH_DEEPCOPY)->Args({20, 50000, 10, 10})->Iterations(1);
BENCHMARK(NEW_GRAPH_DEEPCOPY)->Args({20, 50000, 100, 10})->Iterations(1);
BENCHMARK(NEW_GRAPH_DEEPCOPY)->Args({20, 50000, 1000, 10})->Iterations(1);

static void TEST_REMOVE_NODE(benchmark::State &state) {
  int node_num = state.range(1);
  int edge_num = state.range(0);
  std::shared_ptr<OpDesc> op_desc[node_num] = {nullptr};
  OpDescCreate(node_num, op_desc, edge_num);

  NodePtr node[node_num] = {};
  auto old_root_graph = std::make_shared<ComputeGraph>("root_graph");

  for (int i = 0; i < node_num; i++) {
    node[i] = old_root_graph->AddNode(op_desc[i]);
#if GRAPH_CHECK_RET
    if (node[i] == nullptr) {
      std::cout << "OLD_Graph_ALL_RUN add node error." << std::endl;
      return;
    }
#endif
  }

  for (int i = 1; i < node_num; i++) {
    auto ret = GraphUtils::AddEdge(node[i]->GetOutDataAnchor(1), node[i - 1]->GetInDataAnchor(0));
#if GRAPH_CHECK_RET
    if (ret != GRAPH_SUCCESS) {
      std::cout << "OLD_Graph_ALL_RUN add edge error" << std::endl;
      return;
    }
#endif
  }

  for (auto _ : state) {
    for (int i = 0; i < node_num; i++) {
      auto ret = old_root_graph->RemoveNode(node[i]);
#if GRAPH_CHECK_RET
      if (ret != GRAPH_SUCCESS) {
        std::cout << "OLD_Graph_ALL_RUN remove node error." << std::endl;
        return;
      }
#endif
    }

    benchmark::ClobberMemory();
  }
}
BENCHMARK(TEST_REMOVE_NODE)->Args({20, 10000, 2000, 10})->Iterations(1);
BENCHMARK(TEST_REMOVE_NODE)->Args({20, 10000, 4000, 10})->Iterations(1);
BENCHMARK(TEST_REMOVE_NODE)->Args({20, 10000, 6000, 10})->Iterations(1);
BENCHMARK(TEST_REMOVE_NODE)->Args({20, 10000, 10000, 10})->Iterations(1);

static void TEST_GetSubGraph(benchmark::State &state) {
  auto root_graph = std::make_shared<ComputeGraph>("root_graph");
  int node_num = state.range(0);
  size_t subgraph_num = state.range(1);
  int edge_num = 5;
  NodePtr node[node_num] = {};
  for (int i = 0; i < node_num; ++i) {
    auto op_desc = std::make_shared<OpDesc>("op", "op");
    auto td = GeTensorDesc();
    for (int j = 0; j < edge_num; ++j) {
      op_desc->AddInputDesc(td);
      op_desc->AddOutputDesc(td);
    }
    node[i] = root_graph->AddNode(op_desc);
  }

  std::shared_ptr<ComputeGraph> sub_graph[subgraph_num] = {nullptr};
  for (size_t i = 0; i < subgraph_num; i++) {
    sub_graph[i] = std::make_shared<ComputeGraph>("subgraph_" + std::to_string(i));
    sub_graph[i]->SetParentGraph(root_graph);
    sub_graph[i]->SetParentNode(node[i]);
  }

  for (size_t i = 0; i < subgraph_num; i++) {
    std::string name = "subgraph_" + std::to_string(i);
    root_graph->AddSubgraph(name, sub_graph[i]);
  }

  for (auto _ : state) {
    auto subgraphs = root_graph->GetAllSubgraphs();
#if GRAPH_CHECK_RET
    if (subgraphs.size() != subgraph_num) {
      std::cout << "0 TEST_GetSubGraph Error" << std::endl;
      exit(1);
    }
#endif
    benchmark::DoNotOptimize(subgraphs);
    benchmark::ClobberMemory();
  }
}
BENCHMARK(TEST_GetSubGraph)->Args({10000, 2000});
BENCHMARK(TEST_GetSubGraph)->Args({10000, 4000});
BENCHMARK(TEST_GetSubGraph)->Args({10000, 6000});
BENCHMARK(TEST_GetSubGraph)->Args({10000, 8000});

static void New_Graph_AddNodeAndUpdateIo(benchmark::State &state) {
  auto compute_graph = std::make_shared<ExecuteGraph>("graph0");
  int node_num = 1;
  int io_num = 10;
  int new_num = state.range(0);
  std::shared_ptr<OpDesc> op_desc[node_num] = {nullptr};
  OpDescCreate(node_num, op_desc, io_num);
  FastNode *node  = compute_graph->AddNode(op_desc[0]);

  for (auto _ : state) {
    auto ret = FastNodeUtils::AppendInputEdgeInfo(node, new_num);
    benchmark::DoNotOptimize(ret);
    benchmark::ClobberMemory();
  }
}
BENCHMARK(New_Graph_AddNodeAndUpdateIo)->Arg(11)->Arg(21)->Arg(110)->Arg(1010)->Iterations(1);

static void New_Graph_AddNodeAndUpdateOutput_Step(benchmark::State &state) {
  auto compute_graph = std::make_shared<ExecuteGraph>("graph0");
  int node_num = 1;
  int io_num = 10;
  int new_num = state.range(0);
  std::shared_ptr<OpDesc> op_desc[node_num] = {nullptr};
  OpDescCreate(node_num, op_desc, io_num);
  FastNode *node  = compute_graph->AddNode(op_desc[0]);

  const GeTensorDesc data_desc(GeShape(), FORMAT_ND, DT_FLOAT);
  for (int i = op_desc[0]->GetOutputsSize(); i < new_num; ++i) {
    op_desc[0]->AddOutputDesc(data_desc);
  }

  for (auto _ : state) {
    for (int i = io_num; i < new_num; ++i) {
      node->UpdateDataOutNum(i);
    }
    benchmark::ClobberMemory();
  }
}
BENCHMARK(New_Graph_AddNodeAndUpdateOutput_Step)->Arg(20)->Arg(110)->Arg(1010)->Iterations(1);

static void New_Graph_AddNodeAndUpdateInput_Step(benchmark::State &state) {
  auto compute_graph = std::make_shared<ExecuteGraph>("graph0");
  int node_num = 1;
  int io_num = 10;
  int new_num = state.range(0);
  std::shared_ptr<OpDesc> op_desc[node_num] = {nullptr};
  OpDescCreate(node_num, op_desc, io_num);
  FastNode *node  = compute_graph->AddNode(op_desc[0]);

  const GeTensorDesc data_desc(GeShape(), FORMAT_ND, DT_FLOAT);
  for (int i = op_desc[0]->GetOutputsSize(); i < new_num; ++i) {
    op_desc[0]->AddOutputDesc(data_desc);
  }

  for (auto _ : state) {
    for (int i = io_num; i < new_num; ++i) {
      node->UpdateDataInNum(i);
    }
    benchmark::ClobberMemory();
  }
}
BENCHMARK(New_Graph_AddNodeAndUpdateInput_Step)->Arg(20)->Arg(110)->Arg(1010)->Iterations(1);

static void OLD_Graph_AddNodeAndUpdateIo(benchmark::State &state) {
  auto compute_graph = std::make_shared<ComputeGraph>("graph0");
  int node_num = 1;
  int io_num = 10;
  std::shared_ptr<OpDesc> op_desc[node_num] = {nullptr};
  OpDescCreate(node_num, op_desc, io_num);
  NodePtr node  = compute_graph->AddNode(op_desc[0]);
  int new_num = state.range(0);

  for (auto _ : state) {
    auto ret = NodeUtils::AppendInputAnchor(node, new_num);
    benchmark::DoNotOptimize(ret);
    benchmark::ClobberMemory();
  }
}
BENCHMARK(OLD_Graph_AddNodeAndUpdateIo)->Arg(11)->Arg(21)->Arg(110)->Arg(1010)->Iterations(1);

static void OLD_ChangeEdgeAndNodeOwner(benchmark::State &state) {
  auto compute_graph = std::make_shared<ge::ComputeGraph>("graph");
  int node_num = state.range(0);
  int edge_num = 5;
  NodePtr node[node_num] = {};
  std::shared_ptr<OpDesc> op_desc[node_num] = {nullptr};
  OpDescCreate(node_num, op_desc, edge_num);

  for (int i = 0; i < node_num; ++i) {
    node[i] = compute_graph->AddNode(op_desc[i]);
    if (node[i] == nullptr) {
      return;
    }
  }

  for (int i = 1; i < node_num; ++i) {
    GraphUtils::AddEdge(node[i - 1]->GetOutDataAnchor(0), node[i]->GetInDataAnchor(0));
    GraphUtils::AddEdge(node[i - 1]->GetOutControlAnchor(), node[i]->GetInControlAnchor());
  }

  auto graph1 = std::make_shared<ge::ComputeGraph>("graph1");
  for (auto _ : state) {
    for (int i = 0; i < node_num; ++i) {
      graph1->AddNode(node[i]);
      GraphUtils::RemoveJustNode(compute_graph, node[i]);
    }

    for (int i = 0; i < node_num; ++i) {
      compute_graph->AddNode(node[i]);
      GraphUtils::RemoveJustNode(graph1, node[i]);
    }

    benchmark::ClobberMemory();
  }
}
BENCHMARK(OLD_ChangeEdgeAndNodeOwner)->Arg(10)->Arg(100)->Arg(1000);

static void NEW_ChangeEdgeAndNodeOwner(benchmark::State &state) {
  auto compute_graph = std::make_shared<ge::ExecuteGraph>("graph");
  int node_num = state.range(0);
  int edge_num = 5;
  FastNode *node[node_num] = {};
  std::shared_ptr<OpDesc> op_desc[node_num] = {nullptr};
  OpDescCreate(node_num, op_desc, edge_num);

  for (int i = 0; i < node_num; ++i) {
    node[i] = compute_graph->AddNode(op_desc[i]);
    if (node[i] == nullptr) {
      return;
    }
  }

  FastEdge *edge[node_num] = {};
  FastEdge *ctrl_edge[node_num] = {};
  for (int i = 1; i < node_num; ++i) {
    edge[i] = compute_graph->AddEdge(node[i - 1], 0, node[i], 0);
    ctrl_edge[i] = compute_graph->AddEdge(node[i - 1], kControlEdgeIndex, node[i], kControlEdgeIndex);
  }

  auto graph1 = std::make_shared<ge::ExecuteGraph>("graph1");
  for (auto _ : state) {
    for (int i = 0; i < node_num; ++i) {
      graph1->AddNode(node[i]);
    }

    for (int i = 0; i < node_num; ++i) {
      auto &edges = node[i]->GetAllInDataEdgesRef();
      for (auto edge : edges) {
        if (edge != nullptr) {
          graph1->MoveEdgeToGraph(edge);
        }
      }
    }

    for (int i = 0; i < node_num; ++i) {
      compute_graph->AddNode(node[i]);
    }

    for (int i = 0; i < node_num; ++i) {
      auto &edges = node[i]->GetAllInDataEdgesRef();
      for (auto edge : edges) {
        if (edge != nullptr) {
          compute_graph->MoveEdgeToGraph(edge);
        }
      }
    }
    benchmark::ClobberMemory();
  }
}
BENCHMARK(NEW_ChangeEdgeAndNodeOwner)->Arg(10)->Arg(100)->Arg(1000);

BENCHMARK_MAIN();