* Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
* gazelle is licensed under the Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
* http://license.coscl.org.cn/MulanPSL2
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
* PURPOSE.
* See the Mulan PSL v2 for more details.
*/
#include <rte_eal.h>
#include <rte_version.h>
#if RTE_VERSION < RTE_VERSION_NUM(23, 11, 0, 0)
#include <rte_kni.h>
#endif
#include <rte_ethdev.h>
#include <lwip/etharp.h>
#include <lwip/ethip6.h>
#include <lwip/lwipgz_posix_api.h>
#include <netif/ethernet.h>
#include <arch/sys_arch.h>
#include <securec.h>
#include "common/dpdk_common.h"
#include "lstack_cfg.h"
#include "lstack_vdev.h"
#include "lstack_stack_stat.h"
#include "lstack_log.h"
#include "lstack_dpdk.h"
#include "lstack_protocol_stack.h"
#include "lstack_thread_rpc.h"
#include "lstack_flow.h"
#include "lstack_tx_cache.h"
#include "lstack_virtio.h"
#include "lstack_ethdev.h"
#include "lstack_mempool.h"
* so that it can have the arp table */
static void stack_broadcast_arp(struct rte_mbuf *mbuf, struct protocol_stack *cur_stack)
{
struct protocol_stack_group *stack_group = get_protocol_stack_group();
struct rte_mbuf *mbuf_copy = NULL;
struct protocol_stack *stack = NULL;
int32_t ret;
for (int32_t i = 0; i < stack_group->stack_num; i++) {
stack = stack_group->stacks[i];
if (cur_stack == stack) {
continue;
}
if (stack == NULL || !(netif_is_up(&stack->netif))) {
continue;
}
ret = mem_get_mbuf_bulk(stack->stack_idx, &mbuf_copy, 1, true);
if (ret == 0) {
stack->stats.rx_allocmbuf_fail++;
return;
}
copy_mbuf(mbuf_copy, mbuf);
ret = rpc_call_arp(stack->stack_idx, mbuf_copy);
if (ret != 0) {
mem_put_mbuf_bulk(&mbuf_copy, 1);
return;
}
}
#if RTE_VERSION < RTE_VERSION_NUM(23, 11, 0, 0)
if (get_global_cfg_params()->kni_switch) {
ret = mem_get_mbuf_bulk(cur_stack->stack_idx, &mbuf_copy, 1, true);
if (ret == 0) {
cur_stack->stats.rx_allocmbuf_fail++;
return;
}
copy_mbuf(mbuf_copy, mbuf);
kni_handle_tx(mbuf_copy);
}
#endif
if (get_global_cfg_params()->flow_bifurcation) {
ret = mem_get_mbuf_bulk(cur_stack->stack_idx, &mbuf_copy, 1, true);
if (ret == 0) {
cur_stack->stats.rx_allocmbuf_fail++;
return;
}
copy_mbuf(mbuf_copy, mbuf);
virtio_tap_process_tx(cur_stack->queue_id, mbuf_copy);
}
return;
}
void eth_dev_recv(struct rte_mbuf *mbuf, struct protocol_stack *stack)
{
int ret;
struct pbuf *next = NULL;
struct pbuf *prev = NULL;
struct pbuf *head = NULL;
struct rte_mbuf *m = mbuf;
uint16_t len, pkt_len;
struct rte_mbuf *next_m = NULL;
pkt_len = (uint16_t)rte_pktmbuf_pkt_len(m);
while (m != NULL) {
len = (uint16_t)rte_pktmbuf_data_len(m);
next = mbuf_to_pbuf(m);
mem_init_pbuf(next, PBUF_RAW, pkt_len, len, PBUF_POOL);
pkt_len -= len;
if (head == NULL) {
head = next;
}
if (prev != NULL) {
prev->next = next;
}
prev = next;
next_m = m->next;
m->next = NULL;
m = next_m;
mem_preput_pbuf(next);
}
if (head != NULL) {
ret = stack->netif.input(head, &stack->netif);
if (ret != ERR_OK) {
LSTACK_LOG(ERR, LSTACK, "eth_dev_recv: failed to handle rx pbuf ret=%d\n", ret);
stack->stats.rx_drop++;
}
}
}
#if RTE_VERSION < RTE_VERSION_NUM(23, 11, 0, 0)
void kni_handle_rx(uint16_t port_id)
{
struct rte_mbuf *pkts_burst[GAZELLE_PACKET_READ_SIZE];
struct rte_kni* kni = get_gazelle_kni();
uint32_t nb_kni_rx = 0;
if (kni) {
nb_kni_rx = rte_kni_rx_burst(kni, pkts_burst, GAZELLE_PACKET_READ_SIZE);
}
if (nb_kni_rx > 0) {
uint16_t nb_rx = rte_eth_tx_burst(port_id, 0, pkts_burst, nb_kni_rx);
for (uint16_t i = nb_rx; i < nb_kni_rx; ++i) {
rte_pktmbuf_free(pkts_burst[i]);
}
}
return;
}
void kni_handle_tx(struct rte_mbuf *mbuf)
{
if (!get_global_cfg_params()->kni_switch ||
!get_kni_started()) {
rte_pktmbuf_free(mbuf);
return;
}
struct rte_ipv4_hdr *ipv4_hdr;
uint16_t l3_offset = mbuf->l2_len;
ipv4_hdr = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(mbuf, char*) +
l3_offset);
if (mbuf->nb_segs > 1) {
ipv4_hdr->hdr_checksum = 0;
ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr);
}
if (!rte_kni_tx_burst(get_gazelle_kni(), &mbuf, 1)) {
rte_pktmbuf_free(mbuf);
}
}
#endif
#define IS_ARP_PKT(ptype) ((ptype & RTE_PTYPE_L2_ETHER_ARP) == RTE_PTYPE_L2_ETHER_ARP)
#define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
((ptype & RTE_PTYPE_L4_TCP) == RTE_PTYPE_L4_TCP) && \
((ptype & RTE_PTYPE_L4_FRAG) != RTE_PTYPE_L4_FRAG) && \
(RTE_ETH_IS_TUNNEL_PKT(ptype) == 0))
#define IS_IPV6_TCP_PKT(ptype) (RTE_ETH_IS_IPV6_HDR(ptype) && \
((ptype & RTE_PTYPE_L4_TCP) == RTE_PTYPE_L4_TCP) && \
((ptype & RTE_PTYPE_L4_FRAG) != RTE_PTYPE_L4_FRAG) && \
(RTE_ETH_IS_TUNNEL_PKT(ptype) == 0))
#define IS_IPV4_UDP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
((ptype & RTE_PTYPE_L4_UDP) == RTE_PTYPE_L4_UDP) && \
(RTE_ETH_IS_TUNNEL_PKT(ptype) == 0))
#define IS_IPV6_UDP_PKT(ptype) (RTE_ETH_IS_IPV6_HDR(ptype) && \
((ptype & RTE_PTYPE_L4_UDP) == RTE_PTYPE_L4_UDP) && \
(RTE_ETH_IS_TUNNEL_PKT(ptype) == 0))
#define IS_ICMPV6_PKT(ptype) (RTE_ETH_IS_IPV6_HDR(ptype) && \
((ptype & RTE_PTYPE_L4_ICMP) == RTE_PTYPE_L4_ICMP) && \
(RTE_ETH_IS_TUNNEL_PKT(ptype) == 0))
static uint16_t eth_dev_get_dst_port(struct rte_mbuf *pkt)
{
uint16_t dst_port = VIRTIO_PORT_INVALID;
uint32_t packet_type = pkt->packet_type;
void *l4_hdr = rte_pktmbuf_mtod_offset(pkt, void *, pkt->l2_len + pkt->l3_len);
if (IS_IPV4_TCP_PKT(packet_type) || IS_IPV6_TCP_PKT(packet_type)) {
dst_port = rte_be_to_cpu_16(((struct rte_tcp_hdr *)l4_hdr)->dst_port);
} else if (IS_IPV4_UDP_PKT(packet_type) || IS_IPV6_UDP_PKT(packet_type)) {
dst_port = rte_be_to_cpu_16(((struct rte_udp_hdr *)l4_hdr)->dst_port);
}
return dst_port;
}
int32_t eth_dev_poll(void)
{
uint32_t nr_pkts;
struct cfg_params *cfg = get_global_cfg_params();
struct protocol_stack *stack = get_protocol_stack();
nr_pkts = stack->dev_ops.rx_poll(stack, stack->pkts, cfg->nic_read_number);
if (nr_pkts == 0) {
return 0;
}
if (!use_ltran() && get_protocol_stack_group()->latency_start) {
uint64_t time_stamp = sys_now_us();
time_stamp_into_mbuf(nr_pkts, stack->pkts, time_stamp);
}
for (uint32_t i = 0; i < nr_pkts; i++) {
int transfer_type = TRANSFER_CURRENT_THREAD;
if (!use_ltran()) {
if (unlikely(IS_ARP_PKT(stack->pkts[i]->packet_type)) ||
unlikely(IS_ICMPV6_PKT(stack->pkts[i]->packet_type))) {
stack_broadcast_arp(stack->pkts[i], stack);
transfer_arp_to_other_process(stack->pkts[i]);
} else {
if (get_global_cfg_params()->tuple_filter && stack->queue_id == 0) {
transfer_type = distribute_pakages(stack->pkts[i]);
}
if (get_global_cfg_params()->flow_bifurcation) {
uint16_t dst_port = eth_dev_get_dst_port(stack->pkts[i]);
if (virtio_distribute_pkg_to_kernel(dst_port)) {
transfer_type = TRANSFER_KERNEL;
}
}
}
}
if (likely(transfer_type == TRANSFER_CURRENT_THREAD)) {
eth_dev_recv(stack->pkts[i], stack);
} else if (transfer_type == TRANSFER_KERNEL) {
if (get_global_cfg_params()->flow_bifurcation) {
virtio_tap_process_tx(stack->queue_id, stack->pkts[i]);
} else {
#if RTE_VERSION < RTE_VERSION_NUM(23, 11, 0, 0)
kni_handle_tx(stack->pkts[i]);
#else
rte_pktmbuf_free(stack->pkts[i]);
#endif
}
} else {
}
}
stack->stats.rx += nr_pkts;
return nr_pkts;
}
static err_t eth_dev_output(struct netif *netif, struct pbuf *pbuf)
{
struct protocol_stack *stack = get_protocol_stack();
struct rte_mbuf *pre_mbuf = NULL;
struct rte_mbuf *first_mbuf = NULL;
struct rte_mbuf *mbuf;
void *buf_addr;
while (likely(pbuf != NULL)) {
mbuf = pbuf_to_mbuf(pbuf);
rte_mbuf_refcnt_update(mbuf, 1);
mbuf->data_len = pbuf->len;
mbuf->pkt_len = pbuf->tot_len;
mbuf->next = NULL;
* |rte_mbuf | mbuf_private | data_off | data |
* ^ ^
* buf_addr payload
* m->buf_addr pointer pbuf->payload
*/
buf_addr = rte_pktmbuf_mtod(mbuf, void *);
mbuf->data_off += (uint8_t *)pbuf->payload - (uint8_t *)buf_addr;
if (first_mbuf == NULL) {
first_mbuf = mbuf;
first_mbuf->nb_segs = 1;
} else {
first_mbuf->nb_segs++;
pre_mbuf->next = mbuf;
}
pre_mbuf = mbuf;
if (get_protocol_stack_group()->latency_start)
calculate_lstack_latency(stack->stack_idx, &pbuf, 1, GAZELLE_LATENCY_WRITE_LSTACK, 0);
pbuf = pbuf->next;
}
uint32_t sent_pkts = stack->dev_ops.tx_xmit(stack, &first_mbuf, 1);
stack->stats.tx += sent_pkts;
if (sent_pkts < 1) {
mem_put_mbuf_bulk(&first_mbuf, 1);
stack->stats.tx_drop++;
return ERR_MEM;
}
return ERR_OK;
}
static err_t eth_dev_init(struct netif *netif)
{
struct cfg_params *cfg = get_global_cfg_params();
netif->name[0] = 'e';
netif->name[1] = 't';
netif->flags |= NETIF_FLAG_BROADCAST | NETIF_FLAG_ETHARP | NETIF_FLAG_IGMP | NETIF_FLAG_MLD6;
netif->mtu = GAZELLE_IP_MTU;
netif->output = etharp_output;
netif->linkoutput = eth_dev_output;
netif->output_ip6 = ethip6_output;
int32_t ret;
ret = memcpy_s(netif->hwaddr, sizeof(netif->hwaddr), cfg->mac_addr, ETHER_ADDR_LEN);
if (ret != EOK) {
LSTACK_LOG(ERR, LSTACK, "memcpy_s fail ret=%d\n", ret);
return ERR_MEM;
}
netif->hwaddr_len = ETHER_ADDR_LEN;
if (xdp_eth_enabled()) {
netif_set_rxol_flags(netif, RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
RTE_ETH_RX_OFFLOAD_IPV4_CKSUM);
netif_set_txol_flags(netif, RTE_ETH_TX_OFFLOAD_TCP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_TSO);
netif_set_max_pbuf_frags(netif, OFFLOAD_TX_TSO_4K_FRAGS);
} else {
netif_set_rxol_flags(netif, get_protocol_stack_group()->rx_offload);
netif_set_txol_flags(netif, get_protocol_stack_group()->tx_offload);
netif_set_max_pbuf_frags(netif, OFFLOAD_TX_TSO_MTU_FRAGS);
}
return ERR_OK;
}
int32_t ethdev_init(struct protocol_stack *stack)
{
struct cfg_params *cfg = get_global_cfg_params();
int ret = 0;
vdev_dev_ops_init(&stack->dev_ops);
if (cfg->send_cache_mode) {
ret = tx_cache_init(stack->queue_id, stack, &stack->dev_ops);
if (ret < 0) {
return ret;
}
}
if (use_ltran()) {
stack->rx_ring_used = 0;
int32_t ret = fill_mbuf_to_ring(stack->stack_idx, stack->rx_ring, RING_SIZE(VDEV_RX_QUEUE_SZ));
if (ret != 0) {
LSTACK_LOG(ERR, LSTACK, "fill mbuf to rx_ring failed ret=%d\n", ret);
return ret;
}
} else {
if (cfg->tuple_filter && stack->queue_id == 0) {
flow_init();
}
}
netif_set_default(&stack->netif);
struct netif *netif;
if (!ip4_addr_isany(&cfg->host_addr)) {
netif = netif_add(&stack->netif, &cfg->host_addr, &cfg->netmask,
&cfg->gateway_addr, NULL, eth_dev_init, ethernet_input);
} else {
netif = netif_add(&stack->netif, NULL, NULL, NULL, NULL, eth_dev_init, ethernet_input);
}
if (netif == NULL) {
LSTACK_LOG(ERR, LSTACK, "netif_add failed\n");
return ERR_IF;
}
if (!ip6_addr_isany(&cfg->host_addr6)) {
netif_ip6_addr_set(&stack->netif, 0, &cfg->host_addr6);
netif_ip6_addr_set_state(&stack->netif, 0, IP6_ADDR_VALID);
}
if (get_global_cfg_params()->vlan_mode >= 0 && get_global_cfg_params()->vlan_mode <= 4094) {
netif_set_vlan_tci(&stack->netif, (u16_t)get_global_cfg_params()->vlan_mode);
}
netif_set_link_up(&stack->netif);
netif_set_up(&stack->netif);
return 0;
}