Skip to content

Commit

Permalink
net/mlx5e: Infrastructure for duplicated offloading of TC flows
Browse files Browse the repository at this point in the history
Under uplink LAG or multipath schemes, traffic that matches one flow
might arrive on both uplink ports and transmitted through both
as part of supporting aggregation and high-availability.

To cope with the fact that the SW model might use logical SW port
(e.g uplink team or bond) but we have two HW ports with e-switch on
each, there are cases where in order to offload a SW TC rule we
need to duplicate it to two HW flows.

Since each HW rule has its own counter we also aggregate the counter
of both rules when a flow stats query is executed from user-space.

Introduce the changes for the different elements (add/delete/stats),
currently nothing is duplicated.

Signed-off-by: Roi Dayan <[email protected]>
Signed-off-by: Aviv Heller <[email protected]>
Signed-off-by: Shahar Klein <[email protected]>
Signed-off-by: Saeed Mahameed <[email protected]>
  • Loading branch information
roidayan authored and Saeed Mahameed committed Dec 14, 2018
1 parent ac004b8 commit 04de7dd
Show file tree
Hide file tree
Showing 3 changed files with 176 additions and 8 deletions.
174 changes: 167 additions & 7 deletions drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
#include "fs_core.h"
#include "en/port.h"
#include "en/tc_tun.h"
#include "lib/devcom.h"

struct mlx5_nic_flow_attr {
u32 action;
Expand All @@ -74,6 +75,7 @@ enum {
MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 3),
MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 4),
MLX5E_TC_FLOW_SLOW = BIT(MLX5E_TC_FLOW_BASE + 5),
MLX5E_TC_FLOW_DUP = BIT(MLX5E_TC_FLOW_BASE + 6),
};

#define MLX5E_TC_MAX_SPLITS 1
Expand Down Expand Up @@ -111,8 +113,10 @@ struct mlx5e_tc_flow {
* destinations.
*/
struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
struct mlx5e_tc_flow *peer_flow;
struct list_head mod_hdr; /* flows sharing the same mod hdr ID */
struct list_head hairpin; /* flows sharing the same hairpin */
struct list_head peer; /* flows with peer flow */
union {
struct mlx5_esw_flow_attr esw_attr[0];
struct mlx5_nic_flow_attr nic_attr[0];
Expand Down Expand Up @@ -1249,13 +1253,48 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
}
}

static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
{
struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;

if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) ||
!(flow->flags & MLX5E_TC_FLOW_DUP))
return;

mutex_lock(&esw->offloads.peer_mutex);
list_del(&flow->peer);
mutex_unlock(&esw->offloads.peer_mutex);

flow->flags &= ~MLX5E_TC_FLOW_DUP;

mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
kvfree(flow->peer_flow);
flow->peer_flow = NULL;
}

static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
{
struct mlx5_core_dev *dev = flow->priv->mdev;
struct mlx5_devcom *devcom = dev->priv.devcom;
struct mlx5_eswitch *peer_esw;

peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
if (!peer_esw)
return;

__mlx5e_tc_del_fdb_peer_flow(flow);
mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
}

static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow)
{
if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
mlx5e_tc_del_fdb_peer_flow(flow);
mlx5e_tc_del_fdb_flow(priv, flow);
else
} else {
mlx5e_tc_del_nic_flow(priv, flow);
}
}


Expand Down Expand Up @@ -2660,6 +2699,11 @@ static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv)
return &priv->fs.tc.ht;
}

static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
{
return false;
}

static int
mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
struct tc_cls_flower_offload *f, u16 flow_flags,
Expand Down Expand Up @@ -2693,11 +2737,13 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
}

static int
mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f,
u16 flow_flags,
struct net_device *filter_dev,
struct mlx5e_tc_flow **__flow)
__mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f,
u16 flow_flags,
struct net_device *filter_dev,
struct mlx5_eswitch_rep *in_rep,
struct mlx5_core_dev *in_mdev,
struct mlx5e_tc_flow **__flow)
{
struct netlink_ext_ack *extack = f->common.extack;
struct mlx5e_tc_flow_parse_attr *parse_attr;
Expand All @@ -2723,6 +2769,8 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
if (err)
goto err_free;

flow->esw_attr->in_rep = in_rep;
flow->esw_attr->in_mdev = in_mdev;
err = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow, extack);
if (err)
goto err_free;
Expand All @@ -2738,6 +2786,87 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
return err;
}

static int mlx5e_tc_add_fdb_peer_flow(struct tc_cls_flower_offload *f,
struct mlx5e_tc_flow *flow)
{
struct mlx5e_priv *priv = flow->priv, *peer_priv;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5e_rep_priv *peer_urpriv;
struct mlx5e_tc_flow *peer_flow;
struct mlx5_core_dev *in_mdev;
int err = 0;

peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
if (!peer_esw)
return -ENODEV;

peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
peer_priv = netdev_priv(peer_urpriv->netdev);

/* in_mdev is assigned of which the packet originated from.
* So packets redirected to uplink use the same mdev of the
* original flow and packets redirected from uplink use the
* peer mdev.
*/
if (flow->esw_attr->in_rep->vport == FDB_UPLINK_VPORT)
in_mdev = peer_priv->mdev;
else
in_mdev = priv->mdev;

parse_attr = flow->esw_attr->parse_attr;
err = __mlx5e_add_fdb_flow(peer_priv, f, flow->flags,
parse_attr->filter_dev,
flow->esw_attr->in_rep, in_mdev, &peer_flow);
if (err)
goto out;

flow->peer_flow = peer_flow;
flow->flags |= MLX5E_TC_FLOW_DUP;
mutex_lock(&esw->offloads.peer_mutex);
list_add_tail(&flow->peer, &esw->offloads.peer_flows);
mutex_unlock(&esw->offloads.peer_mutex);

out:
mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
return err;
}

static int
mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f,
u16 flow_flags,
struct net_device *filter_dev,
struct mlx5e_tc_flow **__flow)
{
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5_eswitch_rep *in_rep = rpriv->rep;
struct mlx5_core_dev *in_mdev = priv->mdev;
struct mlx5e_tc_flow *flow;
int err;

err = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
in_mdev, &flow);
if (err)
goto out;

if (is_peer_flow_needed(flow)) {
err = mlx5e_tc_add_fdb_peer_flow(f, flow);
if (err) {
mlx5e_tc_del_fdb_flow(priv, flow);
goto out;
}
}

*__flow = flow;

return 0;

out:
return err;
}

static int
mlx5e_add_nic_flow(struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f,
Expand Down Expand Up @@ -2882,7 +3011,9 @@ int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f, int flags)
{
struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
struct rhashtable *tc_ht = get_tc_ht(priv);
struct mlx5_eswitch *peer_esw;
struct mlx5e_tc_flow *flow;
struct mlx5_fc *counter;
u64 bytes;
Expand All @@ -2902,6 +3033,27 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,

mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);

peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
if (!peer_esw)
goto out;

if ((flow->flags & MLX5E_TC_FLOW_DUP) &&
(flow->peer_flow->flags & MLX5E_TC_FLOW_OFFLOADED)) {
u64 bytes2;
u64 packets2;
u64 lastuse2;

counter = mlx5e_tc_get_counter(flow->peer_flow);
mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);

bytes += bytes2;
packets += packets2;
lastuse = max_t(u64, lastuse, lastuse2);
}

mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);

out:
tcf_exts_stats_update(f->exts, bytes, packets, lastuse);

return 0;
Expand Down Expand Up @@ -3014,3 +3166,11 @@ int mlx5e_tc_num_filters(struct mlx5e_priv *priv)

return atomic_read(&tc_ht->nelems);
}

void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
{
struct mlx5e_tc_flow *flow, *tmp;

list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
__mlx5e_tc_del_fdb_peer_flow(flow);
}
2 changes: 2 additions & 0 deletions drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,8 @@ struct mlx5_esw_offload {
struct mlx5_flow_table *ft_offloads;
struct mlx5_flow_group *vport_rx_group;
struct mlx5_eswitch_rep *vport_reps;
struct list_head peer_flows;
struct mutex peer_mutex;
DECLARE_HASHTABLE(encap_tbl, 8);
DECLARE_HASHTABLE(mod_hdr_tbl, 8);
u8 inline_mode;
Expand Down
8 changes: 7 additions & 1 deletion drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,7 @@ static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev,
dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
dest->vport.num = 0;
dest->vport.vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id);
dest->vport.vhca_id_valid = 1;
dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
}

static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
Expand Down Expand Up @@ -1313,8 +1313,11 @@ static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
return 0;
}

void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw);

static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
{
mlx5e_tc_clean_fdb_peer_flows(esw);
esw_del_fdb_peer_miss_rules(esw);
}

Expand Down Expand Up @@ -1365,6 +1368,9 @@ static void esw_offloads_devcom_init(struct mlx5_eswitch *esw)
{
struct mlx5_devcom *devcom = esw->dev->priv.devcom;

INIT_LIST_HEAD(&esw->offloads.peer_flows);
mutex_init(&esw->offloads.peer_mutex);

if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
return;

Expand Down

0 comments on commit 04de7dd

Please sign in to comment.