From 04de7dda7394fa9c2b0fc9cec65661d9b4f0d04d Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 11 Nov 2018 22:24:03 +0200 Subject: [PATCH] net/mlx5e: Infrastructure for duplicated offloading of TC flows Under uplink LAG or multipath schemes, traffic that matches one flow might arrive on both uplink ports and transmitted through both as part of supporting aggregation and high-availability. To cope with the fact that the SW model might use logical SW port (e.g uplink team or bond) but we have two HW ports with e-switch on each, there are cases where in order to offload a SW TC rule we need to duplicate it to two HW flows. Since each HW rule has its own counter we also aggregate the counter of both rules when a flow stats query is executed from user-space. Introduce the changes for the different elements (add/delete/stats), currently nothing is duplicated. Signed-off-by: Roi Dayan Signed-off-by: Aviv Heller Signed-off-by: Shahar Klein Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 174 +++++++++++++++++- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 2 + .../mellanox/mlx5/core/eswitch_offloads.c | 8 +- 3 files changed, 176 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 0921213561cb98..eacccac05dda59 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -52,6 +52,7 @@ #include "fs_core.h" #include "en/port.h" #include "en/tc_tun.h" +#include "lib/devcom.h" struct mlx5_nic_flow_attr { u32 action; @@ -74,6 +75,7 @@ enum { MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 3), MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 4), MLX5E_TC_FLOW_SLOW = BIT(MLX5E_TC_FLOW_BASE + 5), + MLX5E_TC_FLOW_DUP = BIT(MLX5E_TC_FLOW_BASE + 6), }; #define MLX5E_TC_MAX_SPLITS 1 @@ -111,8 +113,10 @@ struct mlx5e_tc_flow { * destinations. */ struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS]; + struct mlx5e_tc_flow *peer_flow; struct list_head mod_hdr; /* flows sharing the same mod hdr ID */ struct list_head hairpin; /* flows sharing the same hairpin */ + struct list_head peer; /* flows with peer flow */ union { struct mlx5_esw_flow_attr esw_attr[0]; struct mlx5_nic_flow_attr nic_attr[0]; @@ -1249,13 +1253,48 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv, } } +static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) +{ + struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch; + + if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) || + !(flow->flags & MLX5E_TC_FLOW_DUP)) + return; + + mutex_lock(&esw->offloads.peer_mutex); + list_del(&flow->peer); + mutex_unlock(&esw->offloads.peer_mutex); + + flow->flags &= ~MLX5E_TC_FLOW_DUP; + + mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow); + kvfree(flow->peer_flow); + flow->peer_flow = NULL; +} + +static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) +{ + struct mlx5_core_dev *dev = flow->priv->mdev; + struct mlx5_devcom *devcom = dev->priv.devcom; + struct mlx5_eswitch *peer_esw; + + peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (!peer_esw) + return; + + __mlx5e_tc_del_fdb_peer_flow(flow); + mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); +} + static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { + mlx5e_tc_del_fdb_peer_flow(flow); mlx5e_tc_del_fdb_flow(priv, flow); - else + } else { mlx5e_tc_del_nic_flow(priv, flow); + } } @@ -2660,6 +2699,11 @@ static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv) return &priv->fs.tc.ht; } +static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) +{ + return false; +} + static int mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, struct tc_cls_flower_offload *f, u16 flow_flags, @@ -2693,11 +2737,13 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, } static int -mlx5e_add_fdb_flow(struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f, - u16 flow_flags, - struct net_device *filter_dev, - struct mlx5e_tc_flow **__flow) +__mlx5e_add_fdb_flow(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f, + u16 flow_flags, + struct net_device *filter_dev, + struct mlx5_eswitch_rep *in_rep, + struct mlx5_core_dev *in_mdev, + struct mlx5e_tc_flow **__flow) { struct netlink_ext_ack *extack = f->common.extack; struct mlx5e_tc_flow_parse_attr *parse_attr; @@ -2723,6 +2769,8 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv, if (err) goto err_free; + flow->esw_attr->in_rep = in_rep; + flow->esw_attr->in_mdev = in_mdev; err = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow, extack); if (err) goto err_free; @@ -2738,6 +2786,87 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv, return err; } +static int mlx5e_tc_add_fdb_peer_flow(struct tc_cls_flower_offload *f, + struct mlx5e_tc_flow *flow) +{ + struct mlx5e_priv *priv = flow->priv, *peer_priv; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw; + struct mlx5_devcom *devcom = priv->mdev->priv.devcom; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5e_rep_priv *peer_urpriv; + struct mlx5e_tc_flow *peer_flow; + struct mlx5_core_dev *in_mdev; + int err = 0; + + peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (!peer_esw) + return -ENODEV; + + peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH); + peer_priv = netdev_priv(peer_urpriv->netdev); + + /* in_mdev is assigned of which the packet originated from. + * So packets redirected to uplink use the same mdev of the + * original flow and packets redirected from uplink use the + * peer mdev. + */ + if (flow->esw_attr->in_rep->vport == FDB_UPLINK_VPORT) + in_mdev = peer_priv->mdev; + else + in_mdev = priv->mdev; + + parse_attr = flow->esw_attr->parse_attr; + err = __mlx5e_add_fdb_flow(peer_priv, f, flow->flags, + parse_attr->filter_dev, + flow->esw_attr->in_rep, in_mdev, &peer_flow); + if (err) + goto out; + + flow->peer_flow = peer_flow; + flow->flags |= MLX5E_TC_FLOW_DUP; + mutex_lock(&esw->offloads.peer_mutex); + list_add_tail(&flow->peer, &esw->offloads.peer_flows); + mutex_unlock(&esw->offloads.peer_mutex); + +out: + mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + return err; +} + +static int +mlx5e_add_fdb_flow(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f, + u16 flow_flags, + struct net_device *filter_dev, + struct mlx5e_tc_flow **__flow) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *in_rep = rpriv->rep; + struct mlx5_core_dev *in_mdev = priv->mdev; + struct mlx5e_tc_flow *flow; + int err; + + err = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep, + in_mdev, &flow); + if (err) + goto out; + + if (is_peer_flow_needed(flow)) { + err = mlx5e_tc_add_fdb_peer_flow(f, flow); + if (err) { + mlx5e_tc_del_fdb_flow(priv, flow); + goto out; + } + } + + *__flow = flow; + + return 0; + +out: + return err; +} + static int mlx5e_add_nic_flow(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, @@ -2882,7 +3011,9 @@ int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags) { + struct mlx5_devcom *devcom = priv->mdev->priv.devcom; struct rhashtable *tc_ht = get_tc_ht(priv); + struct mlx5_eswitch *peer_esw; struct mlx5e_tc_flow *flow; struct mlx5_fc *counter; u64 bytes; @@ -2902,6 +3033,27 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); + peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (!peer_esw) + goto out; + + if ((flow->flags & MLX5E_TC_FLOW_DUP) && + (flow->peer_flow->flags & MLX5E_TC_FLOW_OFFLOADED)) { + u64 bytes2; + u64 packets2; + u64 lastuse2; + + counter = mlx5e_tc_get_counter(flow->peer_flow); + mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2); + + bytes += bytes2; + packets += packets2; + lastuse = max_t(u64, lastuse, lastuse2); + } + + mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + +out: tcf_exts_stats_update(f->exts, bytes, packets, lastuse); return 0; @@ -3014,3 +3166,11 @@ int mlx5e_tc_num_filters(struct mlx5e_priv *priv) return atomic_read(&tc_ht->nelems); } + +void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw) +{ + struct mlx5e_tc_flow *flow, *tmp; + + list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer) + __mlx5e_tc_del_fdb_peer_flow(flow); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 9dba6ad5744d20..4d048f7e703b96 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -167,6 +167,8 @@ struct mlx5_esw_offload { struct mlx5_flow_table *ft_offloads; struct mlx5_flow_group *vport_rx_group; struct mlx5_eswitch_rep *vport_reps; + struct list_head peer_flows; + struct mutex peer_mutex; DECLARE_HASHTABLE(encap_tbl, 8); DECLARE_HASHTABLE(mod_hdr_tbl, 8); u8 inline_mode; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index a6927ca3d4cab9..76cb5720247433 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -563,7 +563,7 @@ static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev, dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest->vport.num = 0; dest->vport.vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id); - dest->vport.vhca_id_valid = 1; + dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; } static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, @@ -1313,8 +1313,11 @@ static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw, return 0; } +void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); + static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw) { + mlx5e_tc_clean_fdb_peer_flows(esw); esw_del_fdb_peer_miss_rules(esw); } @@ -1365,6 +1368,9 @@ static void esw_offloads_devcom_init(struct mlx5_eswitch *esw) { struct mlx5_devcom *devcom = esw->dev->priv.devcom; + INIT_LIST_HEAD(&esw->offloads.peer_flows); + mutex_init(&esw->offloads.peer_mutex); + if (!MLX5_CAP_ESW(esw->dev, merged_eswitch)) return;