#include "mlx5_ib.h"
#include "srq.h"
- static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq)
+ static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe)
{
struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;
case MLX5_CQE_SIG_ERR:
sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
- read_lock(&dev->mdev->priv.mkey_table.lock);
- mmkey = __mlx5_mr_lookup(dev->mdev,
- mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
+ xa_lock(&dev->mdev->priv.mkey_table);
+ mmkey = xa_load(&dev->mdev->priv.mkey_table,
+ mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
mr = to_mibmr(mmkey);
get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
mr->sig->sig_err_exists = true;
mr->sig->err_item.expected,
mr->sig->err_item.actual);
- read_unlock(&dev->mdev->priv.mkey_table.lock);
+ xa_unlock(&dev->mdev->priv.mkey_table);
goto repoll;
}
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
}
-struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
{
+ struct ib_device *ibdev = ibcq->device;
int entries = attr->cqe;
int vector = attr->comp_vector;
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_cq *cq;
+ struct mlx5_ib_cq *cq = to_mcq(ibcq);
+ u32 out[MLX5_ST_SZ_DW(create_cq_out)];
int uninitialized_var(index);
int uninitialized_var(inlen);
u32 *cqb = NULL;
if (entries < 0 ||
(entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))))
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (check_cq_create_flags(attr->flags))
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
entries = roundup_pow_of_two(entries + 1);
if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))
- return ERR_PTR(-EINVAL);
-
- cq = kzalloc(sizeof(*cq), GFP_KERNEL);
- if (!cq)
- return ERR_PTR(-ENOMEM);
+ return -EINVAL;
cq->ibcq.cqe = entries - 1;
mutex_init(&cq->resize_mutex);
err = create_cq_user(dev, udata, cq, entries, &cqb, &cqe_size,
&index, &inlen);
if (err)
- goto err_create;
+ return err;
} else {
cqe_size = cache_line_size() == 128 ? 128 : 64;
err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb,
&index, &inlen);
if (err)
- goto err_create;
+ return err;
INIT_WORK(&cq->notify_work, notify_soft_wc_handler);
}
if (cq->create_flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN)
MLX5_SET(cqc, cqc, oi, 1);
- err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen);
+ err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen, out, sizeof(out));
if (err)
goto err_cqb;
kvfree(cqb);
- return &cq->ibcq;
+ return 0;
err_cmd:
mlx5_core_destroy_cq(dev->mdev, &cq->mcq);
destroy_cq_user(cq, udata);
else
destroy_cq_kernel(dev, cq);
-
-err_create:
- kfree(cq);
-
- return ERR_PTR(err);
+ return err;
}
-int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
+void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(cq->device);
struct mlx5_ib_cq *mcq = to_mcq(cq);
destroy_cq_user(mcq, udata);
else
destroy_cq_kernel(dev, mcq);
-
- kfree(mcq);
-
- return 0;
}
static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn)
return 0;
}
-static void un_resize_user(struct mlx5_ib_cq *cq)
-{
- ib_umem_release(cq->resize_umem);
-}
-
static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
int entries, int cqe_size)
{
return err;
}
-static void un_resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
-{
- free_cq_buf(dev, cq->resize_buf);
- cq->resize_buf = NULL;
-}
-
static int copy_resize_cqes(struct mlx5_ib_cq *cq)
{
struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
kvfree(in);
ex_resize:
- if (udata)
- un_resize_user(cq);
- else
- un_resize_kernel(dev, cq);
+ ib_umem_release(cq->resize_umem);
+ if (!udata) {
+ free_cq_buf(dev, cq->resize_buf);
+ cq->resize_buf = NULL;
+ }
ex:
mutex_unlock(&cq->resize_mutex);
return err;
int vport_index;
ibdev = mlx5_ib_get_uplink_ibdev(dev->priv.eswitch);
- vport_index = ibdev->free_port++;
+ vport_index = rep->vport_index;
ibdev->port[vport_index].rep = rep;
+ rep->rep_data[REP_IB].priv = ibdev;
write_lock(&ibdev->port[vport_index].roce.netdev_lock);
ibdev->port[vport_index].roce.netdev =
mlx5_ib_get_rep_netdev(dev->priv.eswitch, rep->vport);
}
ibdev->is_rep = true;
- vport_index = ibdev->free_port++;
+ vport_index = rep->vport_index;
ibdev->port[vport_index].rep = rep;
ibdev->port[vport_index].roce.netdev =
mlx5_ib_get_rep_netdev(dev->priv.eswitch, rep->vport);
static void
mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
{
- struct mlx5_ib_dev *dev;
-
- if (!rep->rep_data[REP_IB].priv ||
- rep->vport != MLX5_VPORT_UPLINK)
- return;
+ struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep);
+ struct mlx5_ib_port *port;
- dev = mlx5_ib_rep_to_dev(rep);
- __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
+ port = &dev->port[rep->vport_index];
+ write_lock(&port->roce.netdev_lock);
+ port->roce.netdev = NULL;
+ write_unlock(&port->roce.netdev_lock);
rep->rep_data[REP_IB].priv = NULL;
+ port->rep = NULL;
+
+ if (rep->vport == MLX5_VPORT_UPLINK)
+ __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
}
static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep)
}
struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
- int vport_index)
+ u16 vport_num)
{
- return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_IB);
+ return mlx5_eswitch_get_proto_dev(esw, vport_num, REP_IB);
}
struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
- int vport_index)
+ u16 vport_num)
{
- return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_ETH);
+ return mlx5_eswitch_get_proto_dev(esw, vport_num, REP_ETH);
}
struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
return mlx5_eswitch_uplink_get_proto_dev(esw, REP_IB);
}
-struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport)
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
+ u16 vport_num)
{
- return mlx5_eswitch_vport_rep(esw, vport);
+ return mlx5_eswitch_vport_rep(esw, vport_num);
}
struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw);
struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
- int vport_index);
+ u16 vport_num);
struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw);
struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
- int vport_index);
+ u16 vport_num);
void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev);
void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev);
struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq,
u16 port);
struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
- int vport_index);
+ u16 vport_num);
#else /* CONFIG_MLX5_ESWITCH */
static inline u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
{
- return SRIOV_NONE;
+ return MLX5_ESWITCH_NONE;
}
static inline
struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
- int vport_index)
+ u16 vport_num)
{
return NULL;
}
static inline
struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
- int vport_index)
+ u16 vport_num)
{
return NULL;
}
static inline
struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
- int vport_index)
+ u16 vport_num)
{
return NULL;
}
#include <linux/mlx5/port.h>
#include <linux/mlx5/vport.h>
#include <linux/mlx5/fs.h>
+#include <linux/mlx5/eswitch.h>
#include <linux/list.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
}
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
if (MLX5_CAP_GEN(mdev, sho)) {
- props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER;
+ props->device_cap_flags |= IB_DEVICE_INTEGRITY_HANDOVER;
/* At this stage no support for signature handover */
props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 |
IB_PROT_T10DIF_TYPE_2 |
props->max_srq_sge = max_rq_sg - 1;
props->max_fast_reg_page_list_len =
1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size);
+ props->max_pi_fast_reg_page_list_len =
+ props->max_fast_reg_page_list_len / 2;
get_atomic_caps_qp(dev, props);
props->masked_atomic_cap = IB_ATOMIC_NONE;
props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
/* Allocation size must a multiple of the basic block size
* and a power of 2.
*/
- act_size = roundup(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dm_db->dev));
+ act_size = round_up(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dm_db->dev));
act_size = roundup_pow_of_two(act_size);
dm->size = act_size;
}
}
- static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
- u32 *match_v, const union ib_flow_spec *ib_spec,
+ static int parse_flow_attr(struct mlx5_core_dev *mdev,
+ struct mlx5_flow_spec *spec,
+ const union ib_flow_spec *ib_spec,
const struct ib_flow_attr *flow_attr,
struct mlx5_flow_act *action, u32 prev_type)
{
+ struct mlx5_flow_context *flow_context = &spec->flow_context;
+ u32 *match_c = spec->match_criteria;
+ u32 *match_v = spec->match_value;
void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
misc_parameters);
void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
if (ib_spec->flow_tag.tag_id >= BIT(24))
return -EINVAL;
- action->flow_tag = ib_spec->flow_tag.tag_id;
- action->flags |= FLOW_ACT_HAS_TAG;
+ flow_context->flow_tag = ib_spec->flow_tag.tag_id;
+ flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
break;
case IB_FLOW_SPEC_ACTION_DROP:
if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
return VALID_SPEC_NA;
return is_crypto && is_ipsec &&
- (!egress || (!is_drop && !(flow_act->flags & FLOW_ACT_HAS_TAG))) ?
+ (!egress || (!is_drop &&
+ !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ?
VALID_SPEC_VALID : VALID_SPEC_INVALID;
}
int max_table_size;
int num_entries;
int num_groups;
+ bool esw_encap;
u32 flags = 0;
int priority;
max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
log_max_ft_size));
+ esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
+ DEVLINK_ESWITCH_ENCAP_MODE_NONE;
if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
enum mlx5_flow_namespace_type fn_type;
if (ft_type == MLX5_IB_FT_RX) {
fn_type = MLX5_FLOW_NAMESPACE_BYPASS;
prio = &dev->flow_db->prios[priority];
- if (!dev->is_rep &&
+ if (!dev->is_rep && !esw_encap &&
MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
- if (!dev->is_rep &&
+ if (!dev->is_rep && !esw_encap &&
MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
reformat_l3_tunnel_to_l2))
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
log_max_ft_size));
fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
prio = &dev->flow_db->egress_prios[priority];
- if (!dev->is_rep &&
+ if (!dev->is_rep && !esw_encap &&
MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
}
return ret;
}
+ static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_eswitch_rep *rep)
+ {
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+ void *misc;
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters_2);
+
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(esw,
+ rep->vport));
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters_2);
+
+ MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0);
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport);
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ }
+ }
+
static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *ft_prio,
const struct ib_flow_attr *flow_attr,
{
struct mlx5_flow_table *ft = ft_prio->flow_table;
struct mlx5_ib_flow_handler *handler;
- struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
+ struct mlx5_flow_act flow_act = {};
struct mlx5_flow_spec *spec;
struct mlx5_flow_destination dest_arr[2] = {};
struct mlx5_flow_destination *rule_dst = dest_arr;
}
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
- err = parse_flow_attr(dev->mdev, spec->match_criteria,
- spec->match_value,
+ err = parse_flow_attr(dev->mdev, spec,
ib_flow, flow_attr, &flow_act,
prev_type);
if (err < 0)
set_underlay_qp(dev, spec, underlay_qpn);
if (dev->is_rep) {
- void *misc;
+ struct mlx5_eswitch_rep *rep;
- if (!dev->port[flow_attr->port - 1].rep) {
+ rep = dev->port[flow_attr->port - 1].rep;
+ if (!rep) {
err = -EINVAL;
goto free;
}
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters);
- MLX5_SET(fte_match_set_misc, misc, source_port,
- dev->port[flow_attr->port - 1].rep->vport);
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- misc_parameters);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+ mlx5_ib_set_rule_source_port(dev, spec, rep);
}
spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
}
- if ((flow_act.flags & FLOW_ACT_HAS_TAG) &&
+ if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG) &&
(flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
- flow_act.flow_tag, flow_attr->type);
+ spec->flow_context.flow_tag, flow_attr->type);
err = -EINVAL;
goto free;
}
struct mlx5_flow_namespace *ns = NULL;
struct mlx5_ib_flow_prio *prio = NULL;
int max_table_size = 0;
+ bool esw_encap;
u32 flags = 0;
int priority;
else
priority = ib_prio_to_core_prio(fs_matcher->priority, false);
+ esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
+ DEVLINK_ESWITCH_ENCAP_MODE_NONE;
if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
log_max_ft_size));
- if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap)
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- reformat_l3_tunnel_to_l2))
+ reformat_l3_tunnel_to_l2) &&
+ !esw_encap)
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
} else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) {
max_table_size = BIT(
MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size));
- if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
+ if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap)
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
} else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) {
max_table_size = BIT(
MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size));
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) &&
+ esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
priority = FDB_BYPASS_PATH;
}
struct mlx5_ib_flow_prio *ft_prio,
struct mlx5_flow_destination *dst,
struct mlx5_ib_flow_matcher *fs_matcher,
+ struct mlx5_flow_context *flow_context,
struct mlx5_flow_act *flow_act,
void *cmd_in, int inlen,
int dst_num)
memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
fs_matcher->mask_len);
spec->match_criteria_enable = fs_matcher->match_criteria_enable;
+ spec->flow_context = *flow_context;
handler->rule = mlx5_add_flow_rules(ft, spec,
flow_act, dst, dst_num);
struct mlx5_ib_flow_handler *
mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_matcher *fs_matcher,
+ struct mlx5_flow_context *flow_context,
struct mlx5_flow_act *flow_act,
u32 counter_id,
void *cmd_in, int inlen, int dest_id,
dst_num++;
}
- handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, flow_act,
+ handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher,
+ flow_context, flow_act,
cmd_in, inlen, dst_num);
if (IS_ERR(handler)) {
* lock/unlock above locks Now need to arm all involved CQs.
*/
list_for_each_entry(mcq, &cq_armed_list, reset_notify) {
- mcq->comp(mcq);
+ mcq->comp(mcq, NULL);
}
spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
}
if (ret)
goto error0;
- devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL);
- if (IS_ERR(devr->c0)) {
- ret = PTR_ERR(devr->c0);
+ devr->c0 = rdma_zalloc_drv_obj(ibdev, ib_cq);
+ if (!devr->c0) {
+ ret = -ENOMEM;
goto error1;
}
- devr->c0->device = &dev->ib_dev;
- devr->c0->uobject = NULL;
- devr->c0->comp_handler = NULL;
- devr->c0->event_handler = NULL;
- devr->c0->cq_context = NULL;
+
+ devr->c0->device = &dev->ib_dev;
atomic_set(&devr->c0->usecnt, 0);
+ ret = mlx5_ib_create_cq(devr->c0, &cq_attr, NULL);
+ if (ret)
+ goto err_create_cq;
+
devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL);
if (IS_ERR(devr->x0)) {
ret = PTR_ERR(devr->x0);
mlx5_ib_dealloc_xrcd(devr->x0, NULL);
error2:
mlx5_ib_destroy_cq(devr->c0, NULL);
+err_create_cq:
+ kfree(devr->c0);
error1:
mlx5_ib_dealloc_pd(devr->p0, NULL);
error0:
mlx5_ib_dealloc_xrcd(devr->x0, NULL);
mlx5_ib_dealloc_xrcd(devr->x1, NULL);
mlx5_ib_destroy_cq(devr->c0, NULL);
+ kfree(devr->c0);
mlx5_ib_dealloc_pd(devr->p0, NULL);
kfree(devr->p0);
if (mlx5_use_mad_ifc(dev))
get_ext_port_caps(dev);
- dev->ib_dev.owner = THIS_MODULE;
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
dev->ib_dev.phys_port_cnt = dev->num_ports;
}
static const struct ib_device_ops mlx5_ib_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_MLX5,
+ .uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION,
+
.add_gid = mlx5_ib_add_gid,
.alloc_mr = mlx5_ib_alloc_mr,
+ .alloc_mr_integrity = mlx5_ib_alloc_mr_integrity,
.alloc_pd = mlx5_ib_alloc_pd,
.alloc_ucontext = mlx5_ib_alloc_ucontext,
.attach_mcast = mlx5_ib_mcg_attach,
.get_dma_mr = mlx5_ib_get_dma_mr,
.get_link_layer = mlx5_ib_port_link_layer,
.map_mr_sg = mlx5_ib_map_mr_sg,
+ .map_mr_sg_pi = mlx5_ib_map_mr_sg_pi,
.mmap = mlx5_ib_mmap,
.modify_cq = mlx5_ib_modify_cq,
.modify_device = mlx5_ib_modify_device,
.resize_cq = mlx5_ib_resize_cq,
INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, mlx5_ib_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_srq, mlx5_ib_srq, ibsrq),
INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx5_ib_ucontext, ibucontext),
struct mlx5_core_dev *mdev = dev->mdev;
int err;
- dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION;
dev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
if (mlx5_accel_ipsec_device_caps(dev->mdev) &
MLX5_ACCEL_IPSEC_CAP_DEVICE)
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_flow_ipsec_ops);
- dev->ib_dev.driver_id = RDMA_DRIVER_MLX5;
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_ops);
if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
printk_once(KERN_INFO "%s", mlx5_version);
if (MLX5_ESWITCH_MANAGER(mdev) &&
- mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
+ mlx5_ib_eswitch_mode(mdev->priv.eswitch) == MLX5_ESWITCH_OFFLOADS) {
if (!mlx5_core_mp_enabled(mdev))
mlx5_ib_register_vport_reps(mdev);
return mdev;
int create_type;
- /* Store signature errors */
- bool signature_en;
-
struct list_head qps_list;
struct list_head cq_recv_list;
struct list_head cq_send_list;
void *descs;
dma_addr_t desc_map;
int ndescs;
+ int data_length;
+ int meta_ndescs;
+ int meta_length;
int max_descs;
int desc_size;
int access_mode;
int access_flags; /* Needed for rereg MR */
struct mlx5_ib_mr *parent;
+ /* Needed for IB_MR_TYPE_INTEGRITY */
+ struct mlx5_ib_mr *pi_mr;
+ struct mlx5_ib_mr *klm_mr;
+ struct mlx5_ib_mr *mtt_mr;
+ u64 data_iova;
+ u64 pi_iova;
+
atomic_t num_leaf_free;
wait_queue_head_t q_leaf_free;
struct mlx5_async_work cb_work;
u16 devx_whitelist_uid;
struct mlx5_srq_table srq_table;
struct mlx5_async_ctx async_ctx;
- int free_port;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
int buflen, size_t *bc);
int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index,
void *buffer, int buflen, size_t *bc);
-struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
-int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
+int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata);
+void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg, struct ib_udata *udata);
+struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd,
+ u32 max_num_sg,
+ u32 max_num_meta_sg);
int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
+int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset);
int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const struct ib_mad_hdr *in, size_t in_mad_size,
struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata);
-int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
+void mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
u32 wq_attr_mask, struct ib_udata *udata);
struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
extern const struct uapi_definition mlx5_ib_flow_defs[];
struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add(
struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
+ struct mlx5_flow_context *flow_context,
struct mlx5_flow_act *flow_act, u32 counter_id,
void *cmd_in, int inlen, int dest_id, int dest_type);
bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type);
struct mlx5_cache_ent *ent = &cache->ent[c];
u8 key;
unsigned long flags;
- struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table;
+ struct xarray *mkeys = &dev->mdev->priv.mkey_table;
int err;
spin_lock_irqsave(&ent->lock, flags);
ent->size++;
spin_unlock_irqrestore(&ent->lock, flags);
- write_lock_irqsave(&table->lock, flags);
- err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmkey.key),
- &mr->mmkey);
+ xa_lock_irqsave(mkeys, flags);
+ err = xa_err(__xa_store(mkeys, mlx5_base_mkey(mr->mmkey.key),
+ &mr->mmkey, GFP_ATOMIC));
+ xa_unlock_irqrestore(mkeys, flags);
if (err)
pr_err("Error inserting to mkey tree. 0x%x\n", -err);
- write_unlock_irqrestore(&table->lock, flags);
if (!completion_done(&ent->compl))
complete(&ent->compl);
return 0;
err:
- if (mr->umem) {
- ib_umem_release(mr->umem);
- mr->umem = NULL;
- }
+ ib_umem_release(mr->umem);
+ mr->umem = NULL;
+
clean_mr(dev, mr);
return err;
}
synchronize_srcu(&dev->mr_srcu);
/* Destroy all page mappings */
if (umem_odp->page_list)
- mlx5_ib_invalidate_range(umem_odp, ib_umem_start(umem),
- ib_umem_end(umem));
+ mlx5_ib_invalidate_range(umem_odp,
+ ib_umem_start(umem_odp),
+ ib_umem_end(umem_odp));
else
mlx5_ib_free_implicit_mr(mr);
/*
* remove the DMA mapping.
*/
mlx5_mr_cache_free(dev, mr);
- if (umem) {
- ib_umem_release(umem);
+ ib_umem_release(umem);
+ if (umem)
atomic_sub(npages, &dev->mdev->priv.reg_pages);
- }
+
if (!mr->allocated_from_cache)
kfree(mr);
}
int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
- dereg_mr(to_mdev(ibmr->device), to_mmr(ibmr));
+ struct mlx5_ib_mr *mmr = to_mmr(ibmr);
+
+ if (ibmr->type == IB_MR_TYPE_INTEGRITY) {
+ dereg_mr(to_mdev(mmr->mtt_mr->ibmr.device), mmr->mtt_mr);
+ dereg_mr(to_mdev(mmr->klm_mr->ibmr.device), mmr->klm_mr);
+ }
+
+ dereg_mr(to_mdev(ibmr->device), mmr);
+
return 0;
}
-struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs,
+ int access_mode, int page_shift)
+{
+ void *mkc;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+ MLX5_SET(mkc, mkc, free, 1);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+ MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
+ MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3);
+ MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7);
+ MLX5_SET(mkc, mkc, umr_en, 1);
+ MLX5_SET(mkc, mkc, log_page_size, page_shift);
+}
+
+static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
+ int ndescs, int desc_size, int page_shift,
+ int access_mode, u32 *in, int inlen)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ int err;
+
+ mr->access_mode = access_mode;
+ mr->desc_size = desc_size;
+ mr->max_descs = ndescs;
+
+ err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, desc_size);
+ if (err)
+ return err;
+
+ mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift);
+
+ err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
+ if (err)
+ goto err_free_descs;
+
+ mr->mmkey.type = MLX5_MKEY_MR;
+ mr->ibmr.lkey = mr->mmkey.key;
+ mr->ibmr.rkey = mr->mmkey.key;
+
+ return 0;
+
+err_free_descs:
+ mlx5_free_priv_descs(mr);
+ return err;
+}
+
+static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd,
+ u32 max_num_sg, u32 max_num_meta_sg,
+ int desc_size, int access_mode)
+{
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
- int ndescs = ALIGN(max_num_sg, 4);
+ int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4);
+ int page_shift = 0;
struct mlx5_ib_mr *mr;
- void *mkc;
u32 *in;
int err;
if (!mr)
return ERR_PTR(-ENOMEM);
+ mr->ibmr.pd = pd;
+ mr->ibmr.device = pd->device;
+
in = kzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
goto err_free;
}
+ if (access_mode == MLX5_MKC_ACCESS_MODE_MTT)
+ page_shift = PAGE_SHIFT;
+
+ err = _mlx5_alloc_mkey_descs(pd, mr, ndescs, desc_size, page_shift,
+ access_mode, in, inlen);
+ if (err)
+ goto err_free_in;
+
+ mr->umem = NULL;
+ kfree(in);
+
+ return mr;
+
+err_free_in:
+ kfree(in);
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+static int mlx5_alloc_mem_reg_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
+ int ndescs, u32 *in, int inlen)
+{
+ return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_mtt),
+ PAGE_SHIFT, MLX5_MKC_ACCESS_MODE_MTT, in,
+ inlen);
+}
+
+static int mlx5_alloc_sg_gaps_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
+ int ndescs, u32 *in, int inlen)
+{
+ return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_klm),
+ 0, MLX5_MKC_ACCESS_MODE_KLMS, in, inlen);
+}
+
+static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
+ int max_num_sg, int max_num_meta_sg,
+ u32 *in, int inlen)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ u32 psv_index[2];
+ void *mkc;
+ int err;
+
+ mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
+ if (!mr->sig)
+ return -ENOMEM;
+
+ /* create mem & wire PSVs */
+ err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 2, psv_index);
+ if (err)
+ goto err_free_sig;
+
+ mr->sig->psv_memory.psv_idx = psv_index[0];
+ mr->sig->psv_wire.psv_idx = psv_index[1];
+
+ mr->sig->sig_status_checked = true;
+ mr->sig->sig_err_exists = false;
+ /* Next UMR, Arm SIGERR */
+ ++mr->sig->sigerr_count;
+ mr->klm_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg,
+ sizeof(struct mlx5_klm),
+ MLX5_MKC_ACCESS_MODE_KLMS);
+ if (IS_ERR(mr->klm_mr)) {
+ err = PTR_ERR(mr->klm_mr);
+ goto err_destroy_psv;
+ }
+ mr->mtt_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg,
+ sizeof(struct mlx5_mtt),
+ MLX5_MKC_ACCESS_MODE_MTT);
+ if (IS_ERR(mr->mtt_mr)) {
+ err = PTR_ERR(mr->mtt_mr);
+ goto err_free_klm_mr;
+ }
+
+ /* Set bsf descriptors for mkey */
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
- MLX5_SET(mkc, mkc, free, 1);
- MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
- MLX5_SET(mkc, mkc, qpn, 0xffffff);
- MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+ MLX5_SET(mkc, mkc, bsf_en, 1);
+ MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
- if (mr_type == IB_MR_TYPE_MEM_REG) {
- mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
- MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
- err = mlx5_alloc_priv_descs(pd->device, mr,
- ndescs, sizeof(struct mlx5_mtt));
- if (err)
- goto err_free_in;
+ err = _mlx5_alloc_mkey_descs(pd, mr, 4, sizeof(struct mlx5_klm), 0,
+ MLX5_MKC_ACCESS_MODE_KLMS, in, inlen);
+ if (err)
+ goto err_free_mtt_mr;
- mr->desc_size = sizeof(struct mlx5_mtt);
- mr->max_descs = ndescs;
- } else if (mr_type == IB_MR_TYPE_SG_GAPS) {
- mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
+ return 0;
- err = mlx5_alloc_priv_descs(pd->device, mr,
- ndescs, sizeof(struct mlx5_klm));
- if (err)
- goto err_free_in;
- mr->desc_size = sizeof(struct mlx5_klm);
- mr->max_descs = ndescs;
- } else if (mr_type == IB_MR_TYPE_SIGNATURE) {
- u32 psv_index[2];
-
- MLX5_SET(mkc, mkc, bsf_en, 1);
- MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
- mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
- if (!mr->sig) {
- err = -ENOMEM;
- goto err_free_in;
- }
+err_free_mtt_mr:
+ dereg_mr(to_mdev(mr->mtt_mr->ibmr.device), mr->mtt_mr);
+ mr->mtt_mr = NULL;
+err_free_klm_mr:
+ dereg_mr(to_mdev(mr->klm_mr->ibmr.device), mr->klm_mr);
+ mr->klm_mr = NULL;
+err_destroy_psv:
+ if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx))
+ mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
+ mr->sig->psv_memory.psv_idx);
+ if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx))
+ mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
+ mr->sig->psv_wire.psv_idx);
+err_free_sig:
+ kfree(mr->sig);
- /* create mem & wire PSVs */
- err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn,
- 2, psv_index);
- if (err)
- goto err_free_sig;
+ return err;
+}
+
+static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd,
+ enum ib_mr_type mr_type, u32 max_num_sg,
+ u32 max_num_meta_sg)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ int ndescs = ALIGN(max_num_sg, 4);
+ struct mlx5_ib_mr *mr;
+ u32 *in;
+ int err;
- mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
- mr->sig->psv_memory.psv_idx = psv_index[0];
- mr->sig->psv_wire.psv_idx = psv_index[1];
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
- mr->sig->sig_status_checked = true;
- mr->sig->sig_err_exists = false;
- /* Next UMR, Arm SIGERR */
- ++mr->sig->sigerr_count;
- } else {
+ in = kzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ mr->ibmr.device = pd->device;
+ mr->umem = NULL;
+
+ switch (mr_type) {
+ case IB_MR_TYPE_MEM_REG:
+ err = mlx5_alloc_mem_reg_descs(pd, mr, ndescs, in, inlen);
+ break;
+ case IB_MR_TYPE_SG_GAPS:
+ err = mlx5_alloc_sg_gaps_descs(pd, mr, ndescs, in, inlen);
+ break;
+ case IB_MR_TYPE_INTEGRITY:
+ err = mlx5_alloc_integrity_descs(pd, mr, max_num_sg,
+ max_num_meta_sg, in, inlen);
+ break;
+ default:
mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
err = -EINVAL;
- goto err_free_in;
}
- MLX5_SET(mkc, mkc, access_mode_1_0, mr->access_mode & 0x3);
- MLX5_SET(mkc, mkc, access_mode_4_2, (mr->access_mode >> 2) & 0x7);
- MLX5_SET(mkc, mkc, umr_en, 1);
-
- mr->ibmr.device = pd->device;
- err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
if (err)
- goto err_destroy_psv;
+ goto err_free_in;
- mr->mmkey.type = MLX5_MKEY_MR;
- mr->ibmr.lkey = mr->mmkey.key;
- mr->ibmr.rkey = mr->mmkey.key;
- mr->umem = NULL;
kfree(in);
return &mr->ibmr;
-err_destroy_psv:
- if (mr->sig) {
- if (mlx5_core_destroy_psv(dev->mdev,
- mr->sig->psv_memory.psv_idx))
- mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
- mr->sig->psv_memory.psv_idx);
- if (mlx5_core_destroy_psv(dev->mdev,
- mr->sig->psv_wire.psv_idx))
- mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
- mr->sig->psv_wire.psv_idx);
- }
- mlx5_free_priv_descs(mr);
-err_free_sig:
- kfree(mr->sig);
err_free_in:
kfree(in);
err_free:
return ERR_PTR(err);
}
+struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg, struct ib_udata *udata)
+{
+ return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0);
+}
+
+struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd,
+ u32 max_num_sg, u32 max_num_meta_sg)
+{
+ return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_INTEGRITY, max_num_sg,
+ max_num_meta_sg);
+}
+
struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
struct ib_udata *udata)
{
return ret;
}
+static int
+mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ unsigned int sg_offset = 0;
+ int n = 0;
+
+ mr->meta_length = 0;
+ if (data_sg_nents == 1) {
+ n++;
+ mr->ndescs = 1;
+ if (data_sg_offset)
+ sg_offset = *data_sg_offset;
+ mr->data_length = sg_dma_len(data_sg) - sg_offset;
+ mr->data_iova = sg_dma_address(data_sg) + sg_offset;
+ if (meta_sg_nents == 1) {
+ n++;
+ mr->meta_ndescs = 1;
+ if (meta_sg_offset)
+ sg_offset = *meta_sg_offset;
+ else
+ sg_offset = 0;
+ mr->meta_length = sg_dma_len(meta_sg) - sg_offset;
+ mr->pi_iova = sg_dma_address(meta_sg) + sg_offset;
+ }
+ ibmr->length = mr->data_length + mr->meta_length;
+ }
+
+ return n;
+}
+
static int
mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
struct scatterlist *sgl,
unsigned short sg_nents,
- unsigned int *sg_offset_p)
+ unsigned int *sg_offset_p,
+ struct scatterlist *meta_sgl,
+ unsigned short meta_sg_nents,
+ unsigned int *meta_sg_offset_p)
{
struct scatterlist *sg = sgl;
struct mlx5_klm *klms = mr->descs;
unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
u32 lkey = mr->ibmr.pd->local_dma_lkey;
- int i;
+ int i, j = 0;
mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
mr->ibmr.length = 0;
sg_offset = 0;
}
- mr->ndescs = i;
if (sg_offset_p)
*sg_offset_p = sg_offset;
- return i;
+ mr->ndescs = i;
+ mr->data_length = mr->ibmr.length;
+
+ if (meta_sg_nents) {
+ sg = meta_sgl;
+ sg_offset = meta_sg_offset_p ? *meta_sg_offset_p : 0;
+ for_each_sg(meta_sgl, sg, meta_sg_nents, j) {
+ if (unlikely(i + j >= mr->max_descs))
+ break;
+ klms[i + j].va = cpu_to_be64(sg_dma_address(sg) +
+ sg_offset);
+ klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) -
+ sg_offset);
+ klms[i + j].key = cpu_to_be32(lkey);
+ mr->ibmr.length += sg_dma_len(sg) - sg_offset;
+
+ sg_offset = 0;
+ }
+ if (meta_sg_offset_p)
+ *meta_sg_offset_p = sg_offset;
+
+ mr->meta_ndescs = j;
+ mr->meta_length = mr->ibmr.length - mr->data_length;
+ }
+
+ return i + j;
}
static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
return 0;
}
+static int mlx5_set_page_pi(struct ib_mr *ibmr, u64 addr)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ __be64 *descs;
+
+ if (unlikely(mr->ndescs + mr->meta_ndescs == mr->max_descs))
+ return -ENOMEM;
+
+ descs = mr->descs;
+ descs[mr->ndescs + mr->meta_ndescs++] =
+ cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
+
+ return 0;
+}
+
+static int
+mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ struct mlx5_ib_mr *pi_mr = mr->mtt_mr;
+ int n;
+
+ pi_mr->ndescs = 0;
+ pi_mr->meta_ndescs = 0;
+ pi_mr->meta_length = 0;
+
+ ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
+ pi_mr->desc_size * pi_mr->max_descs,
+ DMA_TO_DEVICE);
+
+ pi_mr->ibmr.page_size = ibmr->page_size;
+ n = ib_sg_to_pages(&pi_mr->ibmr, data_sg, data_sg_nents, data_sg_offset,
+ mlx5_set_page);
+ if (n != data_sg_nents)
+ return n;
+
+ pi_mr->data_iova = pi_mr->ibmr.iova;
+ pi_mr->data_length = pi_mr->ibmr.length;
+ pi_mr->ibmr.length = pi_mr->data_length;
+ ibmr->length = pi_mr->data_length;
+
+ if (meta_sg_nents) {
+ u64 page_mask = ~((u64)ibmr->page_size - 1);
+ u64 iova = pi_mr->data_iova;
+
+ n += ib_sg_to_pages(&pi_mr->ibmr, meta_sg, meta_sg_nents,
+ meta_sg_offset, mlx5_set_page_pi);
+
+ pi_mr->meta_length = pi_mr->ibmr.length;
+ /*
+ * PI address for the HW is the offset of the metadata address
+ * relative to the first data page address.
+ * It equals to first data page address + size of data pages +
+ * metadata offset at the first metadata page
+ */
+ pi_mr->pi_iova = (iova & page_mask) +
+ pi_mr->ndescs * ibmr->page_size +
+ (pi_mr->ibmr.iova & ~page_mask);
+ /*
+ * In order to use one MTT MR for data and metadata, we register
+ * also the gaps between the end of the data and the start of
+ * the metadata (the sig MR will verify that the HW will access
+ * to right addresses). This mapping is safe because we use
+ * internal mkey for the registration.
+ */
+ pi_mr->ibmr.length = pi_mr->pi_iova + pi_mr->meta_length - iova;
+ pi_mr->ibmr.iova = iova;
+ ibmr->length += pi_mr->meta_length;
+ }
+
+ ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
+ pi_mr->desc_size * pi_mr->max_descs,
+ DMA_TO_DEVICE);
+
+ return n;
+}
+
+static int
+mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ struct mlx5_ib_mr *pi_mr = mr->klm_mr;
+ int n;
+
+ pi_mr->ndescs = 0;
+ pi_mr->meta_ndescs = 0;
+ pi_mr->meta_length = 0;
+
+ ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
+ pi_mr->desc_size * pi_mr->max_descs,
+ DMA_TO_DEVICE);
+
+ n = mlx5_ib_sg_to_klms(pi_mr, data_sg, data_sg_nents, data_sg_offset,
+ meta_sg, meta_sg_nents, meta_sg_offset);
+
+ ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
+ pi_mr->desc_size * pi_mr->max_descs,
+ DMA_TO_DEVICE);
+
+ /* This is zero-based memory region */
+ pi_mr->data_iova = 0;
+ pi_mr->ibmr.iova = 0;
+ pi_mr->pi_iova = pi_mr->data_length;
+ ibmr->length = pi_mr->ibmr.length;
+
+ return n;
+}
+
+int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ struct mlx5_ib_mr *pi_mr = NULL;
+ int n;
+
+ WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY);
+
+ mr->ndescs = 0;
+ mr->data_length = 0;
+ mr->data_iova = 0;
+ mr->meta_ndescs = 0;
+ mr->pi_iova = 0;
+ /*
+ * As a performance optimization, if possible, there is no need to
+ * perform UMR operation to register the data/metadata buffers.
+ * First try to map the sg lists to PA descriptors with local_dma_lkey.
+ * Fallback to UMR only in case of a failure.
+ */
+ n = mlx5_ib_map_pa_mr_sg_pi(ibmr, data_sg, data_sg_nents,
+ data_sg_offset, meta_sg, meta_sg_nents,
+ meta_sg_offset);
+ if (n == data_sg_nents + meta_sg_nents)
+ goto out;
+ /*
+ * As a performance optimization, if possible, there is no need to map
+ * the sg lists to KLM descriptors. First try to map the sg lists to MTT
+ * descriptors and fallback to KLM only in case of a failure.
+ * It's more efficient for the HW to work with MTT descriptors
+ * (especially in high load).
+ * Use KLM (indirect access) only if it's mandatory.
+ */
+ pi_mr = mr->mtt_mr;
+ n = mlx5_ib_map_mtt_mr_sg_pi(ibmr, data_sg, data_sg_nents,
+ data_sg_offset, meta_sg, meta_sg_nents,
+ meta_sg_offset);
+ if (n == data_sg_nents + meta_sg_nents)
+ goto out;
+
+ pi_mr = mr->klm_mr;
+ n = mlx5_ib_map_klm_mr_sg_pi(ibmr, data_sg, data_sg_nents,
+ data_sg_offset, meta_sg, meta_sg_nents,
+ meta_sg_offset);
+ if (unlikely(n != data_sg_nents + meta_sg_nents))
+ return -ENOMEM;
+
+out:
+ /* This is zero-based memory region */
+ ibmr->iova = 0;
+ mr->pi_mr = pi_mr;
+ if (pi_mr)
+ ibmr->sig_attrs->meta_length = pi_mr->meta_length;
+ else
+ ibmr->sig_attrs->meta_length = mr->meta_length;
+
+ return 0;
+}
+
int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset)
{
DMA_TO_DEVICE);
if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
- n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset);
+ n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset, NULL, 0,
+ NULL);
else
n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
mlx5_set_page);
if (!rb)
goto not_found;
odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb);
- if (ib_umem_start(&odp->umem) > start + length)
+ if (ib_umem_start(odp) > start + length)
goto not_found;
}
not_found:
static void mr_leaf_free_action(struct work_struct *work)
{
struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work);
- int idx = ib_umem_start(&odp->umem) >> MLX5_IMR_MTT_SHIFT;
+ int idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT;
struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent;
mr->parent = NULL;
const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT /
sizeof(struct mlx5_mtt)) - 1;
u64 idx = 0, blk_start_idx = 0;
- struct ib_umem *umem;
int in_block = 0;
u64 addr;
pr_err("invalidation called on NULL umem or non-ODP umem\n");
return;
}
- umem = &umem_odp->umem;
mr = umem_odp->private;
if (!mr || !mr->ibmr.pd)
return;
- start = max_t(u64, ib_umem_start(umem), start);
- end = min_t(u64, ib_umem_end(umem), end);
+ start = max_t(u64, ib_umem_start(umem_odp), start);
+ end = min_t(u64, ib_umem_end(umem_odp), end);
/*
* Iteration one - zap the HW's MTTs. The notifiers_count ensures that
* but they will write 0s as well, so no difference in the end result.
*/
- for (addr = start; addr < end; addr += BIT(umem->page_shift)) {
- idx = (addr - ib_umem_start(umem)) >> umem->page_shift;
+ for (addr = start; addr < end; addr += BIT(umem_odp->page_shift)) {
+ idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
/*
* Strive to write the MTTs in chunks, but avoid overwriting
* non-existing MTTs. The huristic here can be improved to
void *cookie)
{
struct mlx5_ib_mr *mr = umem_odp->private, *imr = cookie;
- struct ib_umem *umem = &umem_odp->umem;
if (mr->parent != imr)
return 0;
- ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem),
- ib_umem_end(umem));
+ ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
+ ib_umem_end(umem_odp));
if (umem_odp->dying)
return 0;
}
next_mr:
- size = min_t(size_t, bcnt, ib_umem_end(&odp->umem) - io_virt);
+ size = min_t(size_t, bcnt, ib_umem_end(odp) - io_virt);
- page_shift = mr->umem->page_shift;
+ page_shift = odp->page_shift;
page_mask = ~(BIT(page_shift) - 1);
start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
access_mask = ODP_READ_ALLOWED_BIT;
bcnt -= *bytes_committed;
next_mr:
- mmkey = __mlx5_mr_lookup(dev->mdev, mlx5_base_mkey(key));
+ mmkey = xa_load(&dev->mdev->priv.mkey_table, mlx5_base_mkey(key));
if (!mkey_is_eq(mmkey, key)) {
mlx5_ib_dbg(dev, "failed to find mkey %x\n", key);
ret = -EFAULT;
eq->irq_nb.notifier_call = mlx5_ib_eq_pf_int;
param = (struct mlx5_eq_param) {
.irq_index = 0,
- .mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT,
.nent = MLX5_IB_NUM_PF_EQE,
};
+ param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_FAULT;
eq->core = mlx5_eq_create_generic(dev->mdev, ¶m);
if (IS_ERR(eq->core)) {
err = PTR_ERR(eq->core);
struct mlx5_core_mkey *mmkey;
struct mlx5_ib_mr *mr;
- mmkey = __mlx5_mr_lookup(dev->mdev,
- mlx5_base_mkey(sg_list[i].lkey));
+ mmkey = xa_load(&dev->mdev->priv.mkey_table,
+ mlx5_base_mkey(sg_list[i].lkey));
mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
atomic_dec(&mr->num_pending_prefetch);
}
struct mlx5_core_mkey *mmkey;
struct mlx5_ib_mr *mr;
- mmkey = __mlx5_mr_lookup(dev->mdev,
- mlx5_base_mkey(sg_list[i].lkey));
+ mmkey = xa_load(&dev->mdev->priv.mkey_table,
+ mlx5_base_mkey(sg_list[i].lkey));
if (!mmkey || mmkey->key != sg_list[i].lkey) {
ret = false;
break;
}
size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg);
- if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN &&
+ if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN &&
ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB) < MLX5_SIG_WQE_SIZE)
- return MLX5_SIG_WQE_SIZE;
+ return MLX5_SIG_WQE_SIZE;
else
return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
}
sizeof(struct mlx5_wqe_inline_seg);
attr->cap.max_inline_data = qp->max_inline_data;
- if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN)
- qp->signature_en = true;
-
wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);
qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) {
atomic_dec(&dev->delay_drop.rqs_cnt);
mlx5_ib_db_unmap_user(context, &rwq->db);
- if (rwq->umem)
- ib_umem_release(rwq->umem);
+ ib_umem_release(rwq->umem);
}
static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
kvfree(*in);
err_umem:
- if (ubuffer->umem)
- ib_umem_release(ubuffer->umem);
+ ib_umem_release(ubuffer->umem);
err_bfreg:
if (bfregn != MLX5_IB_INVALID_BFREG)
ibucontext);
mlx5_ib_db_unmap_user(context, &qp->db);
- if (base->ubuffer.umem)
- ib_umem_release(base->ubuffer.umem);
+ ib_umem_release(base->ubuffer.umem);
/*
* Free only the BFREGs which are handled by the kernel.
void *qpc;
int err;
- if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN |
+ if (init_attr->create_flags & ~(IB_QP_CREATE_INTEGRITY_EN |
IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
IB_QP_CREATE_IPOIB_UD_LSO |
IB_QP_CREATE_NETIF_QP |
}
static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
- struct mlx5_ib_mr *mr, bool umr_inline)
+ struct mlx5_ib_mr *mr, u8 flags)
{
- int size = mr->ndescs * mr->desc_size;
+ int size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size;
memset(umr, 0, sizeof(*umr));
- umr->flags = MLX5_UMR_CHECK_NOT_FREE;
- if (umr_inline)
- umr->flags |= MLX5_UMR_INLINE;
+ umr->flags = flags;
umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
umr->mkey_mask = frwr_mkey_mask();
}
struct mlx5_ib_mr *mr,
u32 key, int access)
{
- int ndescs = ALIGN(mr->ndescs, 8) >> 1;
+ int ndescs = ALIGN(mr->ndescs + mr->meta_ndescs, 8) >> 1;
memset(seg, 0, sizeof(*seg));
struct mlx5_ib_mr *mr,
struct mlx5_ib_pd *pd)
{
- int bcount = mr->desc_size * mr->ndescs;
+ int bcount = mr->desc_size * (mr->ndescs + mr->meta_ndescs);
dseg->addr = cpu_to_be64(mr->desc_map);
dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64));
return 0;
}
-static int set_sig_data_segment(const struct ib_sig_handover_wr *wr,
- struct mlx5_ib_qp *qp, void **seg,
- int *size, void **cur_edge)
+static int set_sig_data_segment(const struct ib_send_wr *send_wr,
+ struct ib_mr *sig_mr,
+ struct ib_sig_attrs *sig_attrs,
+ struct mlx5_ib_qp *qp, void **seg, int *size,
+ void **cur_edge)
{
- struct ib_sig_attrs *sig_attrs = wr->sig_attrs;
- struct ib_mr *sig_mr = wr->sig_mr;
struct mlx5_bsf *bsf;
- u32 data_len = wr->wr.sg_list->length;
- u32 data_key = wr->wr.sg_list->lkey;
- u64 data_va = wr->wr.sg_list->addr;
+ u32 data_len;
+ u32 data_key;
+ u64 data_va;
+ u32 prot_len = 0;
+ u32 prot_key = 0;
+ u64 prot_va = 0;
+ bool prot = false;
int ret;
int wqe_size;
+ struct mlx5_ib_mr *mr = to_mmr(sig_mr);
+ struct mlx5_ib_mr *pi_mr = mr->pi_mr;
+
+ data_len = pi_mr->data_length;
+ data_key = pi_mr->ibmr.lkey;
+ data_va = pi_mr->data_iova;
+ if (pi_mr->meta_ndescs) {
+ prot_len = pi_mr->meta_length;
+ prot_key = pi_mr->ibmr.lkey;
+ prot_va = pi_mr->pi_iova;
+ prot = true;
+ }
- if (!wr->prot ||
- (data_key == wr->prot->lkey &&
- data_va == wr->prot->addr &&
- data_len == wr->prot->length)) {
+ if (!prot || (data_key == prot_key && data_va == prot_va &&
+ data_len == prot_len)) {
/**
* Source domain doesn't contain signature information
* or data and protection are interleaved in memory.
struct mlx5_stride_block_ctrl_seg *sblock_ctrl;
struct mlx5_stride_block_entry *data_sentry;
struct mlx5_stride_block_entry *prot_sentry;
- u32 prot_key = wr->prot->lkey;
- u64 prot_va = wr->prot->addr;
u16 block_size = sig_attrs->mem.sig.dif.pi_interval;
int prot_size;
}
static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
- const struct ib_sig_handover_wr *wr, u32 size,
- u32 length, u32 pdn)
+ struct ib_mr *sig_mr, int access_flags,
+ u32 size, u32 length, u32 pdn)
{
- struct ib_mr *sig_mr = wr->sig_mr;
u32 sig_key = sig_mr->rkey;
u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1;
memset(seg, 0, sizeof(*seg));
- seg->flags = get_umr_flags(wr->access_flags) |
- MLX5_MKC_ACCESS_MODE_KLMS;
+ seg->flags = get_umr_flags(access_flags) | MLX5_MKC_ACCESS_MODE_KLMS;
seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
MLX5_MKEY_BSF_EN | pdn);
umr->mkey_mask = sig_mkey_mask();
}
-
-static int set_sig_umr_wr(const struct ib_send_wr *send_wr,
- struct mlx5_ib_qp *qp, void **seg, int *size,
- void **cur_edge)
+static int set_pi_umr_wr(const struct ib_send_wr *send_wr,
+ struct mlx5_ib_qp *qp, void **seg, int *size,
+ void **cur_edge)
{
- const struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr);
- struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr);
+ const struct ib_reg_wr *wr = reg_wr(send_wr);
+ struct mlx5_ib_mr *sig_mr = to_mmr(wr->mr);
+ struct mlx5_ib_mr *pi_mr = sig_mr->pi_mr;
+ struct ib_sig_attrs *sig_attrs = sig_mr->ibmr.sig_attrs;
u32 pdn = get_pd(qp)->pdn;
u32 xlt_size;
int region_len, ret;
- if (unlikely(wr->wr.num_sge != 1) ||
- unlikely(wr->access_flags & IB_ACCESS_REMOTE_ATOMIC) ||
- unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) ||
+ if (unlikely(send_wr->num_sge != 0) ||
+ unlikely(wr->access & IB_ACCESS_REMOTE_ATOMIC) ||
+ unlikely(!sig_mr->sig) || unlikely(!qp->ibqp.integrity_en) ||
unlikely(!sig_mr->sig->sig_status_checked))
return -EINVAL;
/* length of the protected region, data + protection */
- region_len = wr->wr.sg_list->length;
- if (wr->prot &&
- (wr->prot->lkey != wr->wr.sg_list->lkey ||
- wr->prot->addr != wr->wr.sg_list->addr ||
- wr->prot->length != wr->wr.sg_list->length))
- region_len += wr->prot->length;
+ region_len = pi_mr->ibmr.length;
/**
* KLM octoword size - if protection was provided
* then we use strided block format (3 octowords),
* else we use single KLM (1 octoword)
**/
- xlt_size = wr->prot ? 0x30 : sizeof(struct mlx5_klm);
+ if (sig_attrs->mem.sig_type != IB_SIG_TYPE_NONE)
+ xlt_size = 0x30;
+ else
+ xlt_size = sizeof(struct mlx5_klm);
set_sig_umr_segment(*seg, xlt_size);
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
- set_sig_mkey_segment(*seg, wr, xlt_size, region_len, pdn);
+ set_sig_mkey_segment(*seg, wr->mr, wr->access, xlt_size, region_len,
+ pdn);
*seg += sizeof(struct mlx5_mkey_seg);
*size += sizeof(struct mlx5_mkey_seg) / 16;
handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
- ret = set_sig_data_segment(wr, qp, seg, size, cur_edge);
+ ret = set_sig_data_segment(send_wr, wr->mr, sig_attrs, qp, seg, size,
+ cur_edge);
if (ret)
return ret;
static int set_reg_wr(struct mlx5_ib_qp *qp,
const struct ib_reg_wr *wr,
- void **seg, int *size, void **cur_edge)
+ void **seg, int *size, void **cur_edge,
+ bool check_not_free)
{
struct mlx5_ib_mr *mr = to_mmr(wr->mr);
struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd);
- size_t mr_list_size = mr->ndescs * mr->desc_size;
+ int mr_list_size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size;
bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD;
+ u8 flags = 0;
if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) {
mlx5_ib_warn(to_mdev(qp->ibqp.device),
return -EINVAL;
}
- set_reg_umr_seg(*seg, mr, umr_inline);
+ if (check_not_free)
+ flags |= MLX5_UMR_CHECK_NOT_FREE;
+ if (umr_inline)
+ flags |= MLX5_UMR_INLINE;
+
+ set_reg_umr_seg(*seg, mr, flags);
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_core_dev *mdev = dev->mdev;
+ struct ib_reg_wr reg_pi_wr;
struct mlx5_ib_qp *qp;
struct mlx5_ib_mr *mr;
+ struct mlx5_ib_mr *pi_mr;
+ struct mlx5_ib_mr pa_pi_mr;
+ struct ib_sig_attrs *sig_attrs;
struct mlx5_wqe_xrc_seg *xrc;
struct mlx5_bf *bf;
void *cur_edge;
goto out;
}
- if (wr->opcode == IB_WR_REG_MR) {
+ if (wr->opcode == IB_WR_REG_MR ||
+ wr->opcode == IB_WR_REG_MR_INTEGRITY) {
fence = dev->umr_fence;
next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
} else {
qp->sq.wr_data[idx] = IB_WR_REG_MR;
ctrl->imm = cpu_to_be32(reg_wr(wr)->key);
err = set_reg_wr(qp, reg_wr(wr), &seg, &size,
- &cur_edge);
+ &cur_edge, true);
if (err) {
*bad_wr = wr;
goto out;
num_sge = 0;
break;
- case IB_WR_REG_SIG_MR:
- qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR;
- mr = to_mmr(sig_handover_wr(wr)->sig_mr);
-
+ case IB_WR_REG_MR_INTEGRITY:
+ qp->sq.wr_data[idx] = IB_WR_REG_MR_INTEGRITY;
+
+ mr = to_mmr(reg_wr(wr)->mr);
+ pi_mr = mr->pi_mr;
+
+ if (pi_mr) {
+ memset(®_pi_wr, 0,
+ sizeof(struct ib_reg_wr));
+
+ reg_pi_wr.mr = &pi_mr->ibmr;
+ reg_pi_wr.access = reg_wr(wr)->access;
+ reg_pi_wr.key = pi_mr->ibmr.rkey;
+
+ ctrl->imm = cpu_to_be32(reg_pi_wr.key);
+ /* UMR for data + prot registration */
+ err = set_reg_wr(qp, ®_pi_wr, &seg,
+ &size, &cur_edge,
+ false);
+ if (err) {
+ *bad_wr = wr;
+ goto out;
+ }
+ finish_wqe(qp, ctrl, seg, size,
+ cur_edge, idx, wr->wr_id,
+ nreq, fence,
+ MLX5_OPCODE_UMR);
+
+ err = begin_wqe(qp, &seg, &ctrl, wr,
+ &idx, &size, &cur_edge,
+ nreq);
+ if (err) {
+ mlx5_ib_warn(dev, "\n");
+ err = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+ } else {
+ memset(&pa_pi_mr, 0,
+ sizeof(struct mlx5_ib_mr));
+ /* No UMR, use local_dma_lkey */
+ pa_pi_mr.ibmr.lkey =
+ mr->ibmr.pd->local_dma_lkey;
+
+ pa_pi_mr.ndescs = mr->ndescs;
+ pa_pi_mr.data_length = mr->data_length;
+ pa_pi_mr.data_iova = mr->data_iova;
+ if (mr->meta_ndescs) {
+ pa_pi_mr.meta_ndescs =
+ mr->meta_ndescs;
+ pa_pi_mr.meta_length =
+ mr->meta_length;
+ pa_pi_mr.pi_iova = mr->pi_iova;
+ }
+
+ pa_pi_mr.ibmr.length = mr->ibmr.length;
+ mr->pi_mr = &pa_pi_mr;
+ }
ctrl->imm = cpu_to_be32(mr->ibmr.rkey);
- err = set_sig_umr_wr(wr, qp, &seg, &size,
- &cur_edge);
+ /* UMR for sig MR */
+ err = set_pi_umr_wr(wr, qp, &seg, &size,
+ &cur_edge);
if (err) {
mlx5_ib_warn(dev, "\n");
*bad_wr = wr;
goto out;
}
-
finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
wr->wr_id, nreq, fence,
MLX5_OPCODE_UMR);
+
/*
* SET_PSV WQEs are not signaled and solicited
* on error
*/
+ sig_attrs = mr->ibmr.sig_attrs;
err = __begin_wqe(qp, &seg, &ctrl, wr, &idx,
&size, &cur_edge, nreq, false,
true);
*bad_wr = wr;
goto out;
}
-
- err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->mem,
- mr->sig->psv_memory.psv_idx, &seg,
- &size);
+ err = set_psv_wr(&sig_attrs->mem,
+ mr->sig->psv_memory.psv_idx,
+ &seg, &size);
if (err) {
mlx5_ib_warn(dev, "\n");
*bad_wr = wr;
goto out;
}
-
finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
- wr->wr_id, nreq, fence,
+ wr->wr_id, nreq, next_fence,
MLX5_OPCODE_SET_PSV);
+
err = __begin_wqe(qp, &seg, &ctrl, wr, &idx,
&size, &cur_edge, nreq, false,
true);
*bad_wr = wr;
goto out;
}
-
- err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->wire,
- mr->sig->psv_wire.psv_idx, &seg,
- &size);
+ err = set_psv_wr(&sig_attrs->wire,
+ mr->sig->psv_wire.psv_idx,
+ &seg, &size);
if (err) {
mlx5_ib_warn(dev, "\n");
*bad_wr = wr;
goto out;
}
-
finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
- wr->wr_id, nreq, fence,
+ wr->wr_id, nreq, next_fence,
MLX5_OPCODE_SET_PSV);
- qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
+
+ qp->next_fence =
+ MLX5_FENCE_MODE_INITIATOR_SMALL;
num_sge = 0;
goto skip_psv;
return ERR_PTR(err);
}
-int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
+void mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(wq->device);
struct mlx5_ib_rwq *rwq = to_mrwq(wq);
mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
destroy_user_rq(dev, wq->pd, rwq, udata);
kfree(rwq);
-
- return 0;
}
struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
/* Run the CQ handler - this makes sure that the drain WR will
* be processed if wasn't processed yet.
*/
- mcq->mcq.comp(&mcq->mcq);
+ mcq->mcq.comp(&mcq->mcq, NULL);
}
wait_for_completion(&sdrain->done);
}
EXPORT_SYMBOL(mlx5_unregister_interface);
+/* Must be called with intf_mutex held */
+static bool mlx5_has_added_dev_by_protocol(struct mlx5_core_dev *mdev, int protocol)
+{
+ struct mlx5_device_context *dev_ctx;
+ struct mlx5_interface *intf;
+ bool found = false;
+
+ list_for_each_entry(intf, &intf_list, list) {
+ if (intf->protocol == protocol) {
+ dev_ctx = mlx5_get_device(intf, &mdev->priv);
+ if (dev_ctx && test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
+ found = true;
+ break;
+ }
+ }
+
+ return found;
+}
+
void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol)
{
mutex_lock(&mlx5_intf_mutex);
- mlx5_remove_dev_by_protocol(mdev, protocol);
- mlx5_add_dev_by_protocol(mdev, protocol);
+ if (mlx5_has_added_dev_by_protocol(mdev, protocol)) {
+ mlx5_remove_dev_by_protocol(mdev, protocol);
+ mlx5_add_dev_by_protocol(mdev, protocol);
+ }
mutex_unlock(&mlx5_intf_mutex);
}
/* Must be called with intf_mutex held */
struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
{
- u32 pci_id = mlx5_gen_pci_id(dev);
struct mlx5_core_dev *res = NULL;
struct mlx5_core_dev *tmp_dev;
struct mlx5_priv *priv;
+ u32 pci_id;
+ if (!mlx5_core_is_pf(dev))
+ return NULL;
+
+ pci_id = mlx5_gen_pci_id(dev);
list_for_each_entry(priv, &mlx5_dev_list, dev_list) {
tmp_dev = container_of(priv, struct mlx5_core_dev, priv);
+ if (!mlx5_core_is_pf(tmp_dev))
+ continue;
+
if ((dev != tmp_dev) && (mlx5_gen_pci_id(tmp_dev) == pci_id)) {
res = tmp_dev;
break;
/* control path */
struct mlx5_wq_ctrl wq_ctrl;
struct mlx5e_channel *channel;
+ int ch_ix;
int txq_ix;
u32 rate_limit;
struct work_struct recover_work;
struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more);
void mlx5e_trigger_irq(struct mlx5e_icosq *sq);
- void mlx5e_completion_event(struct mlx5_core_cq *mcq);
+ void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe);
void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
int mlx5e_napi_poll(struct napi_struct *napi, int budget);
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
netdev_features_t mlx5e_features_check(struct sk_buff *skb,
struct net_device *netdev,
netdev_features_t features);
+int mlx5e_set_features(struct net_device *netdev, netdev_features_t features);
#ifdef CONFIG_MLX5_ESWITCH
int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac);
int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, int max_tx_rate);
sq->clock = &mdev->clock;
sq->mkey_be = c->mkey_be;
sq->channel = c;
+ sq->ch_ix = c->ix;
sq->txq_ix = txq_ix;
sq->uar_map = mdev->mlx5e_res.bfreg.map;
sq->min_inline_mode = params->tx_min_inline_mode;
static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
{
+ u32 out[MLX5_ST_SZ_DW(create_cq_out)];
struct mlx5_core_dev *mdev = cq->mdev;
struct mlx5_core_cq *mcq = &cq->mcq;
MLX5_ADAPTER_PAGE_SHIFT);
MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
- err = mlx5_core_create_cq(mdev, mcq, in, inlen);
+ err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out));
kvfree(in);
return 0;
}
-static int mlx5e_set_features(struct net_device *netdev,
- netdev_features_t features)
+int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
{
netdev_features_t oper_features = netdev->features;
int err = 0;
netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n");
}
+ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
+ features &= ~NETIF_F_RXHASH;
+ if (netdev->features & NETIF_F_RXHASH)
+ netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n");
+ }
+
mutex_unlock(&priv->state_lock);
return features;
memcpy(&priv->tstamp, &config, sizeof(config));
mutex_unlock(&priv->state_lock);
+ /* might need to fix some features */
+ netdev_update_features(priv->netdev);
+
return copy_to_user(ifr->ifr_data, &config,
sizeof(config)) ? -EFAULT : 0;
}
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- mlx5_query_nic_vport_mac_address(priv->mdev, 0, netdev->dev_addr);
+ mlx5_query_mac_address(priv->mdev, netdev->dev_addr);
if (is_zero_ether_addr(netdev->dev_addr) &&
!MLX5_CAP_GEN(priv->mdev, vport_group_manager)) {
eth_hw_addr_random(netdev);
if (!priv->channels.params.scatter_fcs_en)
netdev->features &= ~NETIF_F_RXFCS;
+ /* prefere CQE compression over rxhash */
+ if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS))
+ netdev->features &= ~NETIF_F_RXHASH;
+
#define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f)
if (FT_CAP(flow_modify_en) &&
FT_CAP(modify_root) &&
struct mlx5e_priv *priv = vpriv;
struct net_device *netdev = priv->netdev;
+#ifdef CONFIG_MLX5_ESWITCH
+ if (MLX5_ESWITCH_MANAGER(mdev) && vpriv == mdev)
+ return;
+#endif
+
if (!netif_device_present(netdev))
return;
#ifdef CONFIG_MLX5_ESWITCH
if (MLX5_ESWITCH_MANAGER(mdev) &&
- mlx5_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
+ mlx5_eswitch_mode(mdev->priv.eswitch) == MLX5_ESWITCH_OFFLOADS) {
mlx5e_rep_register_vport_reps(mdev);
return mdev;
}
static void mlx5e_rep_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *drvinfo)
{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
strlcpy(drvinfo->driver, mlx5e_rep_driver_name,
sizeof(drvinfo->driver));
strlcpy(drvinfo->version, UTS_RELEASE, sizeof(drvinfo->version));
+ snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+ "%d.%d.%04d (%.16s)",
+ fw_rev_maj(mdev), fw_rev_min(mdev),
+ fw_rev_sub(mdev), mdev->board_id);
+}
+
+static void mlx5e_uplink_rep_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ mlx5e_rep_get_drvinfo(dev, drvinfo);
+ strlcpy(drvinfo->bus_info, pci_name(priv->mdev->pdev),
+ sizeof(drvinfo->bus_info));
}
static const struct counter_desc sw_rep_stats_desc[] = {
};
static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = {
- .get_drvinfo = mlx5e_rep_get_drvinfo,
+ .get_drvinfo = mlx5e_uplink_rep_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_strings = mlx5e_rep_get_strings,
.get_sset_count = mlx5e_rep_get_sset_count,
struct mlx5e_priv *uplink_priv = NULL;
struct net_device *uplink_dev;
- if (esw->mode == SRIOV_NONE)
+ if (esw->mode == MLX5_ESWITCH_NONE)
return -EOPNOTSUPP;
uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
struct mlx5e_rep_sq *rep_sq, *tmp;
struct mlx5e_rep_priv *rpriv;
- if (esw->mode != SRIOV_OFFLOADS)
+ if (esw->mode != MLX5_ESWITCH_OFFLOADS)
return;
rpriv = mlx5e_rep_to_rep_priv(rep);
int err;
int i;
- if (esw->mode != SRIOV_OFFLOADS)
+ if (esw->mode != MLX5_ESWITCH_OFFLOADS)
return 0;
rpriv = mlx5e_rep_to_rep_priv(rep);
struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
if (!mlx5e_tc_tun_device_to_offload(priv, netdev) &&
- !is_vlan_dev(netdev))
+ !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev))
return NOTIFY_OK;
switch (event) {
.ndo_get_vf_stats = mlx5e_get_vf_stats,
.ndo_set_vf_vlan = mlx5e_uplink_rep_set_vf_vlan,
.ndo_get_port_parent_id = mlx5e_rep_get_port_parent_id,
+ .ndo_set_features = mlx5e_set_features,
};
bool mlx5e_eswitch_rep(struct net_device *netdev)
SET_NETDEV_DEV(netdev, mdev->device);
netdev->netdev_ops = &mlx5e_netdev_ops_uplink_rep;
/* we want a persistent mac for the uplink rep */
- mlx5_query_nic_vport_mac_address(mdev, 0, netdev->dev_addr);
+ mlx5_query_mac_address(mdev, netdev->dev_addr);
netdev->ethtool_ops = &mlx5e_uplink_rep_ethtool_ops;
#ifdef CONFIG_MLX5_CORE_EN_DCB
if (MLX5_CAP_GEN(mdev, qos))
netdev->watchdog_timeo = 15 * HZ;
+ netdev->features |= NETIF_F_NETNS_LOCAL;
- netdev->features |= NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL;
- netdev->hw_features |= NETIF_F_HW_TC;
-
+ netdev->hw_features |= NETIF_F_HW_TC;
netdev->hw_features |= NETIF_F_SG;
netdev->hw_features |= NETIF_F_IP_CSUM;
netdev->hw_features |= NETIF_F_IPV6_CSUM;
netdev->hw_features |= NETIF_F_TSO6;
netdev->hw_features |= NETIF_F_RXCSUM;
- if (rep->vport != MLX5_VPORT_UPLINK)
+ if (rep->vport == MLX5_VPORT_UPLINK)
+ netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
+ else
netdev->features |= NETIF_F_VLAN_CHALLENGED;
netdev->features |= netdev->hw_features;
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack)
{
+ struct mlx5_flow_context *flow_context = &parse_attr->spec.flow_context;
struct mlx5_nic_flow_attr *attr = flow->nic_attr;
struct mlx5_core_dev *dev = priv->mdev;
struct mlx5_flow_destination dest[2] = {};
struct mlx5_flow_act flow_act = {
.action = attr->action,
- .flow_tag = attr->flow_tag,
.reformat_id = 0,
- .flags = FLOW_ACT_HAS_TAG | FLOW_ACT_NO_APPEND,
+ .flags = FLOW_ACT_NO_APPEND,
};
struct mlx5_fc *counter = NULL;
bool table_created = false;
int err, dest_ix = 0;
+ flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
+ flow_context->flow_tag = attr->flow_tag;
+
if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) {
err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
if (err) {
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
struct flow_match_vlan match;
- flow_rule_match_vlan(rule, &match);
+ flow_rule_match_cvlan(rule, &match);
if (match.mask->vlan_id ||
match.mask->vlan_priority ||
match.mask->vlan_tpid) {
offsetof(struct pedit_headers, field) + (off), \
MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
+/* masked values are the same and there are no rewrites that do not have a
+ * match.
+ */
+#define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \
+ type matchmaskx = *(type *)(matchmaskp); \
+ type matchvalx = *(type *)(matchvalp); \
+ type maskx = *(type *)(maskp); \
+ type valx = *(type *)(valp); \
+ \
+ (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \
+ matchmaskx)); \
+})
+
static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
void *matchmaskp, int size)
{
switch (size) {
case sizeof(u8):
- same = ((*(u8 *)valp) & (*(u8 *)maskp)) ==
- ((*(u8 *)matchvalp) & (*(u8 *)matchmaskp));
+ same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp);
break;
case sizeof(u16):
- same = ((*(u16 *)valp) & (*(u16 *)maskp)) ==
- ((*(u16 *)matchvalp) & (*(u16 *)matchmaskp));
+ same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp);
break;
case sizeof(u32):
- same = ((*(u32 *)valp) & (*(u32 *)maskp)) ==
- ((*(u32 *)matchvalp) & (*(u32 *)matchmaskp));
+ same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp);
break;
}
/* in case all pedit actions are skipped, remove the MOD_HDR
* flag.
*/
- if (parse_attr->num_mod_hdr_actions == 0)
+ if (parse_attr->num_mod_hdr_actions == 0) {
action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ kfree(parse_attr->mod_hdr_actions);
+ }
}
attr->action = action;
if (!flow_action_has_entries(flow_action))
return -EINVAL;
- attr->in_rep = rpriv->rep;
- attr->in_mdev = priv->mdev;
-
flow_action_for_each(i, act, flow_action) {
switch (act->id) {
case FLOW_ACTION_DROP:
*/
if (parse_attr->num_mod_hdr_actions == 0) {
action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ kfree(parse_attr->mod_hdr_actions);
if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
(action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
attr->split_count = 0;
if (!tc_can_offload_extack(priv->netdev, f->common.extack))
return -EOPNOTSUPP;
- if (esw && esw->mode == SRIOV_OFFLOADS)
+ if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
err = mlx5e_add_fdb_flow(priv, f, flow_flags,
filter_dev, flow);
else
return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
}
+ int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
+ void *in, int inlen)
+ {
+ return modify_esw_vport_context_cmd(esw->dev, vport, in, inlen);
+ }
+
+ static int query_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
+ void *out, int outlen)
+ {
+ u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {};
+
+ MLX5_SET(query_esw_vport_context_in, in, opcode,
+ MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT);
+ MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport);
+ MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+ }
+
+ int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
+ void *out, int outlen)
+ {
+ return query_esw_vport_context_cmd(esw->dev, vport, out, outlen);
+ }
+
static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport,
u16 vlan, u8 qos, u8 set_flags)
{
fdb_add:
/* SRIOV is enabled: Forward UC MAC to vport */
- if (esw->fdb_table.legacy.fdb && esw->mode == SRIOV_LEGACY)
+ if (esw->fdb_table.legacy.fdb && esw->mode == MLX5_ESWITCH_LEGACY)
vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport);
esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM fr(%p)\n",
struct mlx5_eswitch *esw = dev->priv.eswitch;
u8 mac[ETH_ALEN];
- mlx5_query_nic_vport_mac_address(dev, vport->vport, mac);
+ mlx5_query_nic_vport_mac_address(dev, vport->vport, true, mac);
esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n",
vport->vport, mac);
vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size));
root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS,
- vport->vport);
+ mlx5_eswitch_vport_num_to_index(esw, vport->vport));
if (!root_ns) {
esw_warn(dev, "Failed to get E-Switch egress flow namespace for vport (%d)\n", vport->vport);
return -EOPNOTSUPP;
vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size));
root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
- vport->vport);
+ mlx5_eswitch_vport_num_to_index(esw, vport->vport));
if (!root_ns) {
esw_warn(dev, "Failed to get E-Switch ingress flow namespace for vport (%d)\n", vport->vport);
return -EOPNOTSUPP;
vport->ingress.drop_rule = NULL;
vport->ingress.allow_rule = NULL;
+
+ esw_vport_del_ingress_acl_modify_metadata(esw, vport);
}
void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
static void esw_apply_vport_conf(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
{
- int vport_num = vport->vport;
+ u16 vport_num = vport->vport;
+ int flags;
if (esw->manager_vport == vport_num)
return;
vport->info.node_guid);
}
+ flags = (vport->info.vlan || vport->info.qos) ?
+ SET_VLAN_STRIP | SET_VLAN_INSERT : 0;
modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos,
- (vport->info.vlan || vport->info.qos));
+ flags);
/* Only legacy mode needs ACLs */
- if (esw->mode == SRIOV_LEGACY) {
+ if (esw->mode == MLX5_ESWITCH_LEGACY) {
esw_vport_ingress_config(esw, vport);
esw_vport_egress_config(esw, vport);
}
esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num);
/* Create steering drop counters for ingress and egress ACLs */
- if (vport_num && esw->mode == SRIOV_LEGACY)
+ if (vport_num && esw->mode == MLX5_ESWITCH_LEGACY)
esw_vport_create_drop_counters(vport);
/* Restore old vport configuration */
vport->enabled_events = 0;
esw_vport_disable_qos(esw, vport);
if (esw->manager_vport != vport_num &&
- esw->mode == SRIOV_LEGACY) {
+ esw->mode == MLX5_ESWITCH_LEGACY) {
mlx5_modify_vport_admin_state(esw->dev,
MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
vport_num, 1,
return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
}
+ static void mlx5_eswitch_event_handlers_register(struct mlx5_eswitch *esw)
+ {
+ MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE);
+ mlx5_eq_notifier_register(esw->dev, &esw->nb);
+
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) {
+ MLX5_NB_INIT(&esw->esw_funcs.nb, mlx5_esw_funcs_changed_handler,
+ ESW_FUNCTIONS_CHANGED);
+ mlx5_eq_notifier_register(esw->dev, &esw->esw_funcs.nb);
+ }
+ }
+
+ static void mlx5_eswitch_event_handlers_unregister(struct mlx5_eswitch *esw)
+ {
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev))
+ mlx5_eq_notifier_unregister(esw->dev, &esw->esw_funcs.nb);
+
+ mlx5_eq_notifier_unregister(esw->dev, &esw->nb);
+
+ flush_workqueue(esw->work_queue);
+ }
+
/* Public E-Switch API */
#define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev))
- int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
+ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode)
{
struct mlx5_vport *vport;
- int total_nvports = 0;
int err;
int i, enabled_events;
if (!ESW_ALLOWED(esw) ||
!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) {
- esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n");
+ esw_warn(esw->dev, "FDB is not supported, aborting ...\n");
return -EOPNOTSUPP;
}
if (!MLX5_CAP_ESW_INGRESS_ACL(esw->dev, ft_support))
- esw_warn(esw->dev, "E-Switch ingress ACL is not supported by FW\n");
+ esw_warn(esw->dev, "ingress ACL is not supported by FW\n");
if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support))
- esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n");
-
- esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);
-
- if (mode == SRIOV_OFFLOADS) {
- if (mlx5_core_is_ecpf_esw_manager(esw->dev))
- total_nvports = esw->total_vports;
- else
- total_nvports = nvfs + MLX5_SPECIAL_VPORTS(esw->dev);
- }
+ esw_warn(esw->dev, "engress ACL is not supported by FW\n");
esw->mode = mode;
mlx5_lag_update(esw->dev);
- if (mode == SRIOV_LEGACY) {
+ if (mode == MLX5_ESWITCH_LEGACY) {
err = esw_create_legacy_table(esw);
if (err)
goto abort;
} else {
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
- err = esw_offloads_init(esw, nvfs, total_nvports);
+ err = esw_offloads_init(esw);
}
if (err)
if (err)
esw_warn(esw->dev, "Failed to create eswitch TSAR");
- /* Don't enable vport events when in SRIOV_OFFLOADS mode, since:
- * 1. L2 table (MPFS) is programmed by PF/VF representors netdevs set_rx_mode
- * 2. FDB/Eswitch is programmed by user space tools
- */
- enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : 0;
+ enabled_events = (mode == MLX5_ESWITCH_LEGACY) ? SRIOV_VPORT_EVENTS :
+ UC_ADDR_CHANGE;
/* Enable PF vport */
vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
}
/* Enable VF vports */
- mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs)
+ mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs)
esw_enable_vport(esw, vport, enabled_events);
- if (mode == SRIOV_LEGACY) {
- MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE);
- mlx5_eq_notifier_register(esw->dev, &esw->nb);
- }
+ mlx5_eswitch_event_handlers_register(esw);
+
+ esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), active vports(%d)\n",
+ mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS",
+ esw->esw_funcs.num_vfs, esw->enabled_vports);
- esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n",
- esw->enabled_vports);
return 0;
abort:
- esw->mode = SRIOV_NONE;
+ esw->mode = MLX5_ESWITCH_NONE;
- if (mode == SRIOV_OFFLOADS) {
+ if (mode == MLX5_ESWITCH_OFFLOADS) {
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
}
return err;
}
- void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
+ void mlx5_eswitch_disable(struct mlx5_eswitch *esw)
{
struct esw_mc_addr *mc_promisc;
struct mlx5_vport *vport;
int old_mode;
int i;
- if (!ESW_ALLOWED(esw) || esw->mode == SRIOV_NONE)
+ if (!ESW_ALLOWED(esw) || esw->mode == MLX5_ESWITCH_NONE)
return;
- esw_info(esw->dev, "disable SRIOV: active vports(%d) mode(%d)\n",
- esw->enabled_vports, esw->mode);
+ esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n",
+ esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS",
+ esw->esw_funcs.num_vfs, esw->enabled_vports);
mc_promisc = &esw->mc_promisc;
-
- if (esw->mode == SRIOV_LEGACY)
- mlx5_eq_notifier_unregister(esw->dev, &esw->nb);
+ mlx5_eswitch_event_handlers_unregister(esw);
mlx5_esw_for_all_vports(esw, i, vport)
esw_disable_vport(esw, vport);
esw_destroy_tsar(esw);
- if (esw->mode == SRIOV_LEGACY)
+ if (esw->mode == MLX5_ESWITCH_LEGACY)
esw_destroy_legacy_table(esw);
- else if (esw->mode == SRIOV_OFFLOADS)
+ else if (esw->mode == MLX5_ESWITCH_OFFLOADS)
esw_offloads_cleanup(esw);
old_mode = esw->mode;
- esw->mode = SRIOV_NONE;
+ esw->mode = MLX5_ESWITCH_NONE;
mlx5_lag_update(esw->dev);
- if (old_mode == SRIOV_OFFLOADS) {
+ if (old_mode == MLX5_ESWITCH_OFFLOADS) {
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
}
esw->dev = dev;
esw->manager_vport = mlx5_eswitch_manager_vport(dev);
+ esw->first_host_vport = mlx5_eswitch_first_host_vport_num(dev);
esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq");
if (!esw->work_queue) {
}
esw->enabled_vports = 0;
- esw->mode = SRIOV_NONE;
+ esw->mode = MLX5_ESWITCH_NONE;
esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) &&
MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))
/* Vport Administration */
int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
- int vport, u8 mac[ETH_ALEN])
+ u16 vport, u8 mac[ETH_ALEN])
{
struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
u64 node_guid;
ether_addr_copy(evport->info.mac, mac);
evport->info.node_guid = node_guid;
- if (evport->enabled && esw->mode == SRIOV_LEGACY)
+ if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY)
err = esw_vport_ingress_config(esw, evport);
unlock:
}
int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
- int vport, int link_state)
+ u16 vport, int link_state)
{
struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
int err = 0;
}
int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
- int vport, struct ifla_vf_info *ivi)
+ u16 vport, struct ifla_vf_info *ivi)
{
struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
}
int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
- int vport, u16 vlan, u8 qos, u8 set_flags)
+ u16 vport, u16 vlan, u8 qos, u8 set_flags)
{
struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
int err = 0;
evport->info.vlan = vlan;
evport->info.qos = qos;
- if (evport->enabled && esw->mode == SRIOV_LEGACY) {
+ if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) {
err = esw_vport_ingress_config(esw, evport);
if (err)
goto unlock;
}
int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
- int vport, u16 vlan, u8 qos)
+ u16 vport, u16 vlan, u8 qos)
{
u8 set_flags = 0;
}
int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
- int vport, bool spoofchk)
+ u16 vport, bool spoofchk)
{
struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
bool pschk;
mlx5_core_warn(esw->dev,
"Spoofchk in set while MAC is invalid, vport(%d)\n",
evport->vport);
- if (evport->enabled && esw->mode == SRIOV_LEGACY)
+ if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY)
err = esw_vport_ingress_config(esw, evport);
if (err)
evport->info.spoofchk = pschk;
return -EPERM;
mutex_lock(&esw->state_lock);
- if (esw->mode != SRIOV_LEGACY) {
+ if (esw->mode != MLX5_ESWITCH_LEGACY) {
err = -EOPNOTSUPP;
goto out;
}
return -EPERM;
mutex_lock(&esw->state_lock);
- if (esw->mode != SRIOV_LEGACY) {
+ if (esw->mode != MLX5_ESWITCH_LEGACY) {
err = -EOPNOTSUPP;
goto out;
}
}
int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
- int vport, bool setting)
+ u16 vport, bool setting)
{
struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
return 0;
}
-int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport,
+int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport,
u32 max_rate, u32 min_rate)
{
struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
u64 bytes = 0;
int err = 0;
- if (!vport->enabled || esw->mode != SRIOV_LEGACY)
+ if (!vport->enabled || esw->mode != MLX5_ESWITCH_LEGACY)
return 0;
if (vport->egress.drop_counter)
}
int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
- int vport_num,
+ u16 vport_num,
struct ifla_vf_stats *vf_stats)
{
struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw)
{
- return ESW_ALLOWED(esw) ? esw->mode : SRIOV_NONE;
+ return ESW_ALLOWED(esw) ? esw->mode : MLX5_ESWITCH_NONE;
}
EXPORT_SYMBOL_GPL(mlx5_eswitch_mode);
bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1)
{
- if ((dev0->priv.eswitch->mode == SRIOV_NONE &&
- dev1->priv.eswitch->mode == SRIOV_NONE) ||
- (dev0->priv.eswitch->mode == SRIOV_OFFLOADS &&
- dev1->priv.eswitch->mode == SRIOV_OFFLOADS))
+ if ((dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
+ dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE) ||
+ (dev0->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS &&
+ dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS))
return true;
return false;
bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0,
struct mlx5_core_dev *dev1)
{
- return (dev0->priv.eswitch->mode == SRIOV_OFFLOADS &&
- dev1->priv.eswitch->mode == SRIOV_OFFLOADS);
+ return (dev0->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS &&
+ dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS);
+ }
+
+ void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs)
+ {
+ u32 out[MLX5_ST_SZ_DW(query_esw_functions_out)] = {};
+ int err;
+
+ WARN_ON_ONCE(esw->mode != MLX5_ESWITCH_NONE);
+
+ if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+ esw->esw_funcs.num_vfs = num_vfs;
+ return;
+ }
+
+ err = mlx5_esw_query_functions(esw->dev, out, sizeof(out));
+ if (!err)
+ esw->esw_funcs.num_vfs = MLX5_GET(query_esw_functions_out, out,
+ host_params_context.host_num_of_vfs);
}
struct mlx5_flow_group *allow_spoofchk_only_grp;
struct mlx5_flow_group *allow_untagged_only_grp;
struct mlx5_flow_group *drop_grp;
+ int modify_metadata_id;
+ struct mlx5_flow_handle *modify_metadata_rule;
struct mlx5_flow_handle *allow_rule;
struct mlx5_flow_handle *drop_rule;
struct mlx5_fc *drop_counter;
u16 num_vfs;
};
+ enum {
+ MLX5_ESWITCH_VPORT_MATCH_METADATA = BIT(0),
+ };
+
struct mlx5_eswitch {
struct mlx5_core_dev *dev;
struct mlx5_nb nb;
struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE];
struct workqueue_struct *work_queue;
struct mlx5_vport *vports;
+ u32 flags;
int total_vports;
int enabled_vports;
/* Synchronize between vport change events
int mode;
int nvports;
u16 manager_vport;
+ u16 first_host_vport;
struct mlx5_esw_functions esw_funcs;
};
void esw_offloads_cleanup(struct mlx5_eswitch *esw);
- int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports,
- int total_nvports);
+ int esw_offloads_init(struct mlx5_eswitch *esw);
void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw);
int esw_offloads_init_reps(struct mlx5_eswitch *esw);
void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
struct mlx5_vport *vport);
void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
struct mlx5_vport *vport);
+ void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport);
/* E-Switch API */
int mlx5_eswitch_init(struct mlx5_core_dev *dev);
void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
- int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode);
- void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw);
+ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode);
+ void mlx5_eswitch_disable(struct mlx5_eswitch *esw);
int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
- int vport, u8 mac[ETH_ALEN]);
+ u16 vport, u8 mac[ETH_ALEN]);
int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
- int vport, int link_state);
+ u16 vport, int link_state);
int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
- int vport, u16 vlan, u8 qos);
+ u16 vport, u16 vlan, u8 qos);
int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
- int vport, bool spoofchk);
+ u16 vport, bool spoofchk);
int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
- int vport_num, bool setting);
-int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport,
+ u16 vport_num, bool setting);
+int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport,
u32 max_rate, u32 min_rate);
int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting);
int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting);
int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
- int vport, struct ifla_vf_info *ivi);
+ u16 vport, struct ifla_vf_info *ivi);
int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
- int vport,
+ u16 vport,
struct ifla_vf_stats *vf_stats);
void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule);
+ int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
+ void *in, int inlen);
+ int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
+ void *out, int outlen);
+
struct mlx5_flow_spec;
struct mlx5_esw_flow_attr;
mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw);
struct mlx5_flow_handle *
-mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport,
+mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
struct mlx5_flow_destination *dest);
enum {
int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
struct netlink_ext_ack *extack);
int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode);
- int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode);
+ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode);
int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
enum devlink_eswitch_encap_mode encap,
struct netlink_ext_ack *extack);
int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
struct mlx5_esw_flow_attr *attr);
int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
- int vport, u16 vlan, u8 qos, u8 set_flags);
+ u16 vport, u16 vlan, u8 qos, u8 set_flags);
static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev,
u8 vlan_depth)
MLX5_VPORT_ECPF : MLX5_VPORT_PF;
}
+ static inline u16 mlx5_eswitch_first_host_vport_num(struct mlx5_core_dev *dev)
+ {
+ return mlx5_core_is_ecpf_esw_manager(dev) ?
+ MLX5_VPORT_PF : MLX5_VPORT_FIRST_VF;
+ }
+
static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev)
{
/* Ideally device should have the functions changed supported
return vport_num;
}
-static inline int mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw,
+static inline u16 mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw,
int index)
{
if (index == mlx5_eswitch_ecpf_idx(esw) &&
#define mlx5_esw_for_each_vf_vport_num_reverse(esw, vport, nvfs) \
for ((vport) = (nvfs); (vport) >= MLX5_VPORT_FIRST_VF; (vport)--)
+ /* Includes host PF (vport 0) if it's not esw manager. */
+ #define mlx5_esw_for_each_host_func_rep(esw, i, rep, nvfs) \
+ for ((i) = (esw)->first_host_vport; \
+ (rep) = &(esw)->offloads.vport_reps[i], \
+ (i) <= (nvfs); (i)++)
+
+ #define mlx5_esw_for_each_host_func_rep_reverse(esw, i, rep, nvfs) \
+ for ((i) = (nvfs); \
+ (rep) = &(esw)->offloads.vport_reps[i], \
+ (i) >= (esw)->first_host_vport; (i)--)
+
+ #define mlx5_esw_for_each_host_func_vport(esw, vport, nvfs) \
+ for ((vport) = (esw)->first_host_vport; \
+ (vport) <= (nvfs); (vport)++)
+
+ #define mlx5_esw_for_each_host_func_vport_reverse(esw, vport, nvfs) \
+ for ((vport) = (nvfs); \
+ (vport) >= (esw)->first_host_vport; (vport)--)
+
struct mlx5_vport *__must_check
mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num);
+ bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num);
+
+ void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs);
+ int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data);
+
#else /* CONFIG_MLX5_ESWITCH */
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
- static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; }
- static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {}
+ static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) { return 0; }
+ static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {}
static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; }
static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; }
static inline int
return -EOPNOTSUPP;
}
+ static inline void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs) {}
+
#define FDB_MAX_CHAIN 1
#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1)
#define FDB_MAX_PRIO 1
static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw,
u16 vport_num)
{
- u16 idx = mlx5_eswitch_vport_num_to_index(esw, vport_num);
+ int idx = mlx5_eswitch_vport_num_to_index(esw, vport_num);
WARN_ON(idx > esw->total_vports - 1);
return &esw->offloads.vport_reps[idx];
return 1;
}
+ static void
+ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_esw_flow_attr *attr)
+ {
+ void *misc2;
+ void *misc;
+
+ /* Use metadata matching because vport is not represented by single
+ * VHCA in dual-port RoCE mode, and matching on source vport may fail.
+ */
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(attr->in_mdev->priv.eswitch,
+ attr->in_rep->vport));
+
+ misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
+ MLX5_SET_TO_ONES(fte_match_set_misc2, misc2, metadata_reg_c_0);
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ if (memchr_inv(misc, 0, MLX5_ST_SZ_BYTES(fte_match_set_misc)))
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
+
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ MLX5_SET(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(attr->in_mdev, vhca_id));
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id);
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+ }
+
+ if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) &&
+ attr->in_rep->vport == MLX5_VPORT_UPLINK)
+ spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+ }
+
struct mlx5_flow_handle *
mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec,
struct mlx5_flow_handle *rule;
struct mlx5_flow_table *fdb;
int j, i = 0;
- void *misc;
- if (esw->mode != SRIOV_OFFLOADS)
+ if (esw->mode != MLX5_ESWITCH_OFFLOADS)
return ERR_PTR(-EOPNOTSUPP);
flow_act.action = attr->action;
i++;
}
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
- MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
-
- if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
- MLX5_SET(fte_match_set_misc, misc,
- source_eswitch_owner_vhca_id,
- MLX5_CAP_GEN(attr->in_mdev, vhca_id));
-
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
- if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
- MLX5_SET_TO_ONES(fte_match_set_misc, misc,
- source_eswitch_owner_vhca_id);
+ mlx5_eswitch_set_rule_source_port(esw, spec, attr);
- spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) {
if (attr->tunnel_match_level != MLX5_MATCH_NONE)
spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
struct mlx5_flow_table *fast_fdb;
struct mlx5_flow_table *fwd_fdb;
struct mlx5_flow_handle *rule;
- void *misc;
int i;
fast_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 0);
dest[i].ft = fwd_fdb,
i++;
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
- MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
-
- if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
- MLX5_SET(fte_match_set_misc, misc,
- source_eswitch_owner_vhca_id,
- MLX5_CAP_GEN(attr->in_mdev, vhca_id));
-
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
- if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
- MLX5_SET_TO_ONES(fte_match_set_misc, misc,
- source_eswitch_owner_vhca_id);
+ mlx5_eswitch_set_rule_source_port(esw, spec, attr);
- if (attr->match_level == MLX5_MATCH_NONE)
- spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
- else
- spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
- MLX5_MATCH_MISC_PARAMETERS;
+ if (attr->match_level != MLX5_MATCH_NONE)
+ spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i);
static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
{
struct mlx5_eswitch_rep *rep;
- int vf_vport, err = 0;
+ int i, err = 0;
esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none");
- for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) {
- rep = &esw->offloads.vport_reps[vf_vport];
+ mlx5_esw_for_each_host_func_rep(esw, i, rep, esw->esw_funcs.num_vfs) {
if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED)
continue;
}
struct mlx5_flow_handle *
-mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn)
+mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, u16 vport,
+ u32 sqn)
{
struct mlx5_flow_act flow_act = {0};
struct mlx5_flow_destination dest = {};
mlx5_del_flow_rules(rule);
}
- static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev,
+ static int mlx5_eswitch_enable_passing_vport_metadata(struct mlx5_eswitch *esw)
+ {
+ u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
+ u8 fdb_to_vport_reg_c_id;
+ int err;
+
+ err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport,
+ out, sizeof(out));
+ if (err)
+ return err;
+
+ fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out,
+ esw_vport_context.fdb_to_vport_reg_c_id);
+
+ fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0;
+ MLX5_SET(modify_esw_vport_context_in, in,
+ esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id);
+
+ MLX5_SET(modify_esw_vport_context_in, in,
+ field_select.fdb_to_vport_reg_c_id, 1);
+
+ return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport,
+ in, sizeof(in));
+ }
+
+ static int mlx5_eswitch_disable_passing_vport_metadata(struct mlx5_eswitch *esw)
+ {
+ u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
+ u8 fdb_to_vport_reg_c_id;
+ int err;
+
+ err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport,
+ out, sizeof(out));
+ if (err)
+ return err;
+
+ fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out,
+ esw_vport_context.fdb_to_vport_reg_c_id);
+
+ fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0;
+
+ MLX5_SET(modify_esw_vport_context_in, in,
+ esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id);
+
+ MLX5_SET(modify_esw_vport_context_in, in,
+ field_select.fdb_to_vport_reg_c_id, 1);
+
+ return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport,
+ in, sizeof(in));
+ }
+
+ static void peer_miss_rules_setup(struct mlx5_eswitch *esw,
+ struct mlx5_core_dev *peer_dev,
struct mlx5_flow_spec *spec,
struct mlx5_flow_destination *dest)
{
- void *misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters);
+ void *misc;
- MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
- MLX5_CAP_GEN(peer_dev, vhca_id));
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters_2);
+ MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0);
- spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- misc_parameters);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc,
- source_eswitch_owner_vhca_id);
+ MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(peer_dev, vhca_id));
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id);
+ }
dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
dest->vport.num = peer_dev->priv.eswitch->manager_vport;
dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
}
+ static void esw_set_peer_miss_rule_source_port(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch *peer_esw,
+ struct mlx5_flow_spec *spec,
+ u16 vport)
+ {
+ void *misc;
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(peer_esw,
+ vport));
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+ }
+ }
+
static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
struct mlx5_core_dev *peer_dev)
{
if (!spec)
return -ENOMEM;
- peer_miss_rules_setup(peer_dev, spec, &dest);
+ peer_miss_rules_setup(esw, peer_dev, spec, &dest);
flows = kvzalloc(nvports * sizeof(*flows), GFP_KERNEL);
if (!flows) {
misc_parameters);
if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
- MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_PF);
+ esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch,
+ spec, MLX5_VPORT_PF);
+
flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
spec, &flow_act, &dest, 1);
if (IS_ERR(flow)) {
}
mlx5_esw_for_each_vf_vport_num(esw, i, mlx5_core_max_vfs(esw->dev)) {
- MLX5_SET(fte_match_set_misc, misc, source_port, i);
+ esw_set_peer_miss_rule_source_port(esw,
+ peer_dev->priv.eswitch,
+ spec, i);
+
flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
spec, &flow_act, &dest, 1);
if (IS_ERR(flow)) {
#define MAX_PF_SQ 256
#define MAX_SQ_NVPORTS 32
+ static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw,
+ u32 *flow_group_in)
+ {
+ void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+ flow_group_in,
+ match_criteria);
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS_2);
+
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters_2.metadata_reg_c_0);
+ } else {
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS);
+
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_port);
+ }
+ }
+
static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
/* create peer esw miss group */
memset(flow_group_in, 0, inlen);
- MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
- MLX5_MATCH_MISC_PARAMETERS);
- match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
- match_criteria);
+ esw_set_flow_group_source_port(esw, flow_group_in);
+
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+ flow_group_in,
+ match_criteria);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria,
- misc_parameters.source_port);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria,
- misc_parameters.source_eswitch_owner_vhca_id);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_eswitch_owner_vhca_id);
+
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ source_eswitch_owner_vhca_id_valid, 1);
+ }
- MLX5_SET(create_flow_group_in, flow_group_in,
- source_eswitch_owner_vhca_id_valid, 1);
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
ix + esw->total_vports - 1);
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_flow_group *g;
u32 *flow_group_in;
- void *match_criteria, *misc;
int err = 0;
nvports = nvports + MLX5_ESW_MISS_FLOWS;
/* create vport rx group */
memset(flow_group_in, 0, inlen);
- MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
- MLX5_MATCH_MISC_PARAMETERS);
- match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
- misc = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ esw_set_flow_group_source_port(esw, flow_group_in);
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1);
}
struct mlx5_flow_handle *
-mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport,
+mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
struct mlx5_flow_destination *dest)
{
struct mlx5_flow_act flow_act = {0};
goto out;
}
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
- MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(esw, vport));
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
+ MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0);
- spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ }
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec,
static int esw_offloads_start(struct mlx5_eswitch *esw,
struct netlink_ext_ack *extack)
{
- int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs;
+ int err, err1;
- if (esw->mode != SRIOV_LEGACY &&
+ if (esw->mode != MLX5_ESWITCH_LEGACY &&
!mlx5_core_is_ecpf_esw_manager(esw->dev)) {
NL_SET_ERR_MSG_MOD(extack,
"Can't set offloads mode, SRIOV legacy not enabled");
return -EINVAL;
}
- mlx5_eswitch_disable_sriov(esw);
- err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS);
+ mlx5_eswitch_disable(esw);
+ mlx5_eswitch_update_num_of_vfs(esw, esw->dev->priv.sriov.num_vfs);
+ err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS);
if (err) {
NL_SET_ERR_MSG_MOD(extack,
"Failed setting eswitch to offloads");
- err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
+ err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY);
if (err1) {
NL_SET_ERR_MSG_MOD(extack,
"Failed setting eswitch back to legacy");
}
if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) {
if (mlx5_eswitch_inline_mode_get(esw,
- num_vfs,
&esw->offloads.inline_mode)) {
esw->offloads.inline_mode = MLX5_INLINE_MODE_L2;
NL_SET_ERR_MSG_MOD(extack,
struct mlx5_core_dev *dev = esw->dev;
struct mlx5_eswitch_rep *rep;
u8 hw_id[ETH_ALEN], rep_type;
- int vport;
+ int vport_index;
esw->offloads.vport_reps = kcalloc(total_vports,
sizeof(struct mlx5_eswitch_rep),
if (!esw->offloads.vport_reps)
return -ENOMEM;
- mlx5_query_nic_vport_mac_address(dev, 0, hw_id);
+ mlx5_query_mac_address(dev, hw_id);
- mlx5_esw_for_all_reps(esw, vport, rep) {
- rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport);
+ mlx5_esw_for_all_reps(esw, vport_index, rep) {
+ rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport_index);
+ rep->vport_index = vport_index;
ether_addr_copy(rep->hw_id, hw_id);
for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
__unload_reps_vf_vport(esw, nvports, rep_type);
}
- static void __unload_reps_all_vport(struct mlx5_eswitch *esw, int nvports,
- u8 rep_type)
+ static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type)
{
- __unload_reps_vf_vport(esw, nvports, rep_type);
+ __unload_reps_vf_vport(esw, esw->esw_funcs.num_vfs, rep_type);
/* Special vports must be the last to unload. */
__unload_reps_special_vport(esw, rep_type);
}
- static void esw_offloads_unload_all_reps(struct mlx5_eswitch *esw, int nvports)
+ static void esw_offloads_unload_all_reps(struct mlx5_eswitch *esw)
{
u8 rep_type = NUM_REP_TYPES;
while (rep_type-- > 0)
- __unload_reps_all_vport(esw, nvports, rep_type);
+ __unload_reps_all_vport(esw, rep_type);
}
static int __esw_offloads_load_rep(struct mlx5_eswitch *esw,
return err;
}
+ static int __load_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type)
+ {
+ int err;
+
+ /* Special vports must be loaded first, uplink rep creates mdev resource. */
+ err = __load_reps_special_vport(esw, rep_type);
+ if (err)
+ return err;
+
+ err = __load_reps_vf_vport(esw, esw->esw_funcs.num_vfs, rep_type);
+ if (err)
+ goto err_vfs;
+
+ return 0;
+
+ err_vfs:
+ __unload_reps_special_vport(esw, rep_type);
+ return err;
+ }
+
static int esw_offloads_load_vf_reps(struct mlx5_eswitch *esw, int nvports)
{
u8 rep_type = 0;
return err;
}
- static int esw_offloads_load_special_vport(struct mlx5_eswitch *esw)
+ static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw)
{
u8 rep_type = 0;
int err;
for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
- err = __load_reps_special_vport(esw, rep_type);
+ err = __load_reps_all_vport(esw, rep_type);
if (err)
goto err_reps;
}
err_reps:
while (rep_type-- > 0)
- __unload_reps_special_vport(esw, rep_type);
+ __unload_reps_all_vport(esw, rep_type);
return err;
}
switch (event) {
case ESW_OFFLOADS_DEVCOM_PAIR:
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw) !=
+ mlx5_eswitch_vport_match_metadata_enabled(peer_esw))
+ break;
+
err = mlx5_esw_offloads_pair(esw, peer_esw);
if (err)
goto err_out;
static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
{
- struct mlx5_core_dev *dev = esw->dev;
struct mlx5_flow_act flow_act = {0};
struct mlx5_flow_spec *spec;
int err = 0;
/* For prio tag mode, there is only 1 FTEs:
- * 1) Untagged packets - push prio tag VLAN, allow
+ * 1) Untagged packets - push prio tag VLAN and modify metadata if
+ * required, allow
* Unmatched traffic is allowed by default
*/
- if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support))
- return -EOPNOTSUPP;
-
- esw_vport_cleanup_ingress_rules(esw, vport);
-
- err = esw_vport_enable_ingress_acl(esw, vport);
- if (err) {
- mlx5_core_warn(esw->dev,
- "failed to enable prio tag ingress acl (%d) on vport[%d]\n",
- err, vport->vport);
- return err;
- }
-
- esw_debug(esw->dev,
- "vport[%d] configure ingress rules\n", vport->vport);
-
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
if (!spec) {
err = -ENOMEM;
flow_act.vlan[0].ethtype = ETH_P_8021Q;
flow_act.vlan[0].vid = 0;
flow_act.vlan[0].prio = 0;
+
+ if (vport->ingress.modify_metadata_rule) {
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ flow_act.modify_id = vport->ingress.modify_metadata_id;
+ }
+
vport->ingress.allow_rule =
mlx5_add_flow_rules(vport->ingress.acl, spec,
&flow_act, NULL, 0);
return err;
}
+ static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+ {
+ u8 action[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec spec = {};
+ int err = 0;
+
+ MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
+ MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_0);
+ MLX5_SET(set_action_in, action, data,
+ mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport));
+
+ err = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+ 1, action, &vport->ingress.modify_metadata_id);
+ if (err) {
+ esw_warn(esw->dev,
+ "failed to alloc modify header for vport %d ingress acl (%d)\n",
+ vport->vport, err);
+ return err;
+ }
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ flow_act.modify_id = vport->ingress.modify_metadata_id;
+ vport->ingress.modify_metadata_rule = mlx5_add_flow_rules(vport->ingress.acl,
+ &spec, &flow_act, NULL, 0);
+ if (IS_ERR(vport->ingress.modify_metadata_rule)) {
+ err = PTR_ERR(vport->ingress.modify_metadata_rule);
+ esw_warn(esw->dev,
+ "failed to add setting metadata rule for vport %d ingress acl, err(%d)\n",
+ vport->vport, err);
+ vport->ingress.modify_metadata_rule = NULL;
+ goto out;
+ }
+
+ out:
+ if (err)
+ mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id);
+ return err;
+ }
+
+ void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+ {
+ if (vport->ingress.modify_metadata_rule) {
+ mlx5_del_flow_rules(vport->ingress.modify_metadata_rule);
+ mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id);
+
+ vport->ingress.modify_metadata_rule = NULL;
+ }
+ }
+
static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
{
struct mlx5_flow_spec *spec;
int err = 0;
+ if (!MLX5_CAP_GEN(esw->dev, prio_tag_required))
+ return 0;
+
/* For prio tag mode, there is only 1 FTEs:
* 1) prio tag packets - pop the prio tag VLAN, allow
* Unmatched traffic is allowed by default
return err;
}
- static int esw_prio_tag_acls_config(struct mlx5_eswitch *esw, int nvports)
+ static int esw_vport_ingress_common_config(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
{
- struct mlx5_vport *vport = NULL;
- int i, j;
int err;
- mlx5_esw_for_each_vf_vport(esw, i, vport, nvports) {
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw) &&
+ !MLX5_CAP_GEN(esw->dev, prio_tag_required))
+ return 0;
+
+ esw_vport_cleanup_ingress_rules(esw, vport);
+
+ err = esw_vport_enable_ingress_acl(esw, vport);
+ if (err) {
+ esw_warn(esw->dev,
+ "failed to enable ingress acl (%d) on vport[%d]\n",
+ err, vport->vport);
+ return err;
+ }
+
+ esw_debug(esw->dev,
+ "vport[%d] configure ingress rules\n", vport->vport);
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ err = esw_vport_add_ingress_acl_modify_metadata(esw, vport);
+ if (err)
+ goto out;
+ }
+
+ if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
+ mlx5_eswitch_is_vf_vport(esw, vport->vport)) {
err = esw_vport_ingress_prio_tag_config(esw, vport);
if (err)
- goto err_ingress;
- err = esw_vport_egress_prio_tag_config(esw, vport);
+ goto out;
+ }
+
+ out:
+ if (err)
+ esw_vport_disable_ingress_acl(esw, vport);
+ return err;
+ }
+
+ static bool
+ esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw)
+ {
+ if (!MLX5_CAP_ESW(esw->dev, esw_uplink_ingress_acl))
+ return false;
+
+ if (!(MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) &
+ MLX5_FDB_TO_VPORT_REG_C_0))
+ return false;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source))
+ return false;
+
+ if (mlx5_core_is_ecpf_esw_manager(esw->dev) ||
+ mlx5_ecpf_vport_exists(esw->dev))
+ return false;
+
+ return true;
+ }
+
+ static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw)
+ {
+ struct mlx5_vport *vport;
+ int i, j;
+ int err;
+
+ if (esw_check_vport_match_metadata_supported(esw))
+ esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
+
+ mlx5_esw_for_all_vports(esw, i, vport) {
+ err = esw_vport_ingress_common_config(esw, vport);
if (err)
- goto err_egress;
+ goto err_ingress;
+
+ if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) {
+ err = esw_vport_egress_prio_tag_config(esw, vport);
+ if (err)
+ goto err_egress;
+ }
}
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw))
+ esw_info(esw->dev, "Use metadata reg_c as source vport to match\n");
+
return 0;
err_egress:
esw_vport_disable_ingress_acl(esw, vport);
err_ingress:
- mlx5_esw_for_each_vf_vport_reverse(esw, j, vport, i - 1) {
+ for (j = MLX5_VPORT_PF; j < i; j++) {
+ vport = &esw->vports[j];
esw_vport_disable_egress_acl(esw, vport);
esw_vport_disable_ingress_acl(esw, vport);
}
return err;
}
- static void esw_prio_tag_acls_cleanup(struct mlx5_eswitch *esw)
+ static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw)
{
struct mlx5_vport *vport;
int i;
- mlx5_esw_for_each_vf_vport(esw, i, vport, esw->dev->priv.sriov.num_vfs) {
+ mlx5_esw_for_all_vports(esw, i, vport) {
esw_vport_disable_egress_acl(esw, vport);
esw_vport_disable_ingress_acl(esw, vport);
}
+
+ esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
}
- static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int vf_nvports,
- int nvports)
+ static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
{
+ int num_vfs = esw->esw_funcs.num_vfs;
+ int total_vports;
int err;
+ if (mlx5_core_is_ecpf_esw_manager(esw->dev))
+ total_vports = esw->total_vports;
+ else
+ total_vports = num_vfs + MLX5_SPECIAL_VPORTS(esw->dev);
+
memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb));
mutex_init(&esw->fdb_table.offloads.fdb_prio_lock);
- if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) {
- err = esw_prio_tag_acls_config(esw, vf_nvports);
- if (err)
- return err;
- }
-
- err = esw_create_offloads_fdb_tables(esw, nvports);
+ err = esw_create_offloads_acl_tables(esw);
if (err)
return err;
- err = esw_create_offloads_table(esw, nvports);
+ err = esw_create_offloads_fdb_tables(esw, total_vports);
+ if (err)
+ goto create_fdb_err;
+
+ err = esw_create_offloads_table(esw, total_vports);
if (err)
goto create_ft_err;
- err = esw_create_vport_rx_group(esw, nvports);
+ err = esw_create_vport_rx_group(esw, total_vports);
if (err)
goto create_fg_err;
create_ft_err:
esw_destroy_offloads_fdb_tables(esw);
+ create_fdb_err:
+ esw_destroy_offloads_acl_tables(esw);
+
return err;
}
esw_destroy_vport_rx_group(esw);
esw_destroy_offloads_table(esw);
esw_destroy_offloads_fdb_tables(esw);
- if (MLX5_CAP_GEN(esw->dev, prio_tag_required))
- esw_prio_tag_acls_cleanup(esw);
+ esw_destroy_offloads_acl_tables(esw);
}
static void esw_functions_changed_event_handler(struct work_struct *work)
u32 out[MLX5_ST_SZ_DW(query_esw_functions_out)] = {};
struct mlx5_host_work *host_work;
struct mlx5_eswitch *esw;
+ bool host_pf_disabled;
u16 num_vfs = 0;
int err;
err = mlx5_esw_query_functions(esw->dev, out, sizeof(out));
num_vfs = MLX5_GET(query_esw_functions_out, out,
host_params_context.host_num_of_vfs);
- if (err || num_vfs == esw->esw_funcs.num_vfs)
+ host_pf_disabled = MLX5_GET(query_esw_functions_out, out,
+ host_params_context.host_pf_disabled);
+ if (err || host_pf_disabled || num_vfs == esw->esw_funcs.num_vfs)
goto out;
/* Number of VFs can only change from "0 to x" or "x to 0". */
kfree(host_work);
}
- static void esw_emulate_event_handler(struct work_struct *work)
- {
- struct mlx5_host_work *host_work =
- container_of(work, struct mlx5_host_work, work);
- struct mlx5_eswitch *esw = host_work->esw;
- int err;
-
- if (esw->esw_funcs.num_vfs) {
- err = esw_offloads_load_vf_reps(esw, esw->esw_funcs.num_vfs);
- if (err)
- esw_warn(esw->dev, "Load vf reps err=%d\n", err);
- }
- kfree(host_work);
- }
-
- static int esw_functions_changed_event(struct notifier_block *nb,
- unsigned long type, void *data)
+ int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data)
{
struct mlx5_esw_functions *esw_funcs;
struct mlx5_host_work *host_work;
host_work->esw = esw;
- if (mlx5_eswitch_is_funcs_handler(esw->dev))
- INIT_WORK(&host_work->work,
- esw_functions_changed_event_handler);
- else
- INIT_WORK(&host_work->work, esw_emulate_event_handler);
+ INIT_WORK(&host_work->work, esw_functions_changed_event_handler);
queue_work(esw->work_queue, &host_work->work);
return NOTIFY_OK;
}
- static void esw_functions_changed_event_init(struct mlx5_eswitch *esw,
- u16 vf_nvports)
- {
- if (mlx5_eswitch_is_funcs_handler(esw->dev)) {
- esw->esw_funcs.num_vfs = 0;
- MLX5_NB_INIT(&esw->esw_funcs.nb, esw_functions_changed_event,
- ESW_FUNCTIONS_CHANGED);
- mlx5_eq_notifier_register(esw->dev, &esw->esw_funcs.nb);
- } else {
- esw->esw_funcs.num_vfs = vf_nvports;
- }
- }
-
- static void esw_functions_changed_event_cleanup(struct mlx5_eswitch *esw)
- {
- if (!mlx5_eswitch_is_funcs_handler(esw->dev))
- return;
-
- mlx5_eq_notifier_unregister(esw->dev, &esw->esw_funcs.nb);
- flush_workqueue(esw->work_queue);
- }
-
- int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports,
- int total_nvports)
+ int esw_offloads_init(struct mlx5_eswitch *esw)
{
int err;
- err = esw_offloads_steering_init(esw, vf_nvports, total_nvports);
+ err = esw_offloads_steering_init(esw);
if (err)
return err;
- /* Only load special vports reps. VF reps will be loaded in
- * context of functions_changed event handler through real
- * or emulated event.
- */
- err = esw_offloads_load_special_vport(esw);
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ err = mlx5_eswitch_enable_passing_vport_metadata(esw);
+ if (err)
+ goto err_vport_metadata;
+ }
+
+ err = esw_offloads_load_all_reps(esw);
if (err)
goto err_reps;
esw_offloads_devcom_init(esw);
- esw_functions_changed_event_init(esw, vf_nvports);
-
mlx5_rdma_enable_roce(esw->dev);
- /* Call esw_functions_changed event to load VF reps:
- * 1. HW does not support the event then emulate it
- * Or
- * 2. The event was already notified when num_vfs changed
- * and eswitch was in legacy mode
- */
- esw_functions_changed_event(&esw->esw_funcs.nb.nb,
- MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED,
- NULL);
-
return 0;
err_reps:
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw))
+ mlx5_eswitch_disable_passing_vport_metadata(esw);
+ err_vport_metadata:
esw_offloads_steering_cleanup(esw);
return err;
}
static int esw_offloads_stop(struct mlx5_eswitch *esw,
struct netlink_ext_ack *extack)
{
- int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs;
+ int err, err1;
- mlx5_eswitch_disable_sriov(esw);
- err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
+ mlx5_eswitch_disable(esw);
+ err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy");
- err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS);
+ err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS);
if (err1) {
NL_SET_ERR_MSG_MOD(extack,
"Failed setting eswitch back to offloads");
void esw_offloads_cleanup(struct mlx5_eswitch *esw)
{
- esw_functions_changed_event_cleanup(esw);
mlx5_rdma_disable_roce(esw->dev);
esw_offloads_devcom_cleanup(esw);
- esw_offloads_unload_all_reps(esw, esw->esw_funcs.num_vfs);
+ esw_offloads_unload_all_reps(esw);
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw))
+ mlx5_eswitch_disable_passing_vport_metadata(esw);
esw_offloads_steering_cleanup(esw);
}
{
switch (mode) {
case DEVLINK_ESWITCH_MODE_LEGACY:
- *mlx5_mode = SRIOV_LEGACY;
+ *mlx5_mode = MLX5_ESWITCH_LEGACY;
break;
case DEVLINK_ESWITCH_MODE_SWITCHDEV:
- *mlx5_mode = SRIOV_OFFLOADS;
+ *mlx5_mode = MLX5_ESWITCH_OFFLOADS;
break;
default:
return -EINVAL;
static int esw_mode_to_devlink(u16 mlx5_mode, u16 *mode)
{
switch (mlx5_mode) {
- case SRIOV_LEGACY:
+ case MLX5_ESWITCH_LEGACY:
*mode = DEVLINK_ESWITCH_MODE_LEGACY;
break;
- case SRIOV_OFFLOADS:
+ case MLX5_ESWITCH_OFFLOADS:
*mode = DEVLINK_ESWITCH_MODE_SWITCHDEV;
break;
default:
if(!MLX5_ESWITCH_MANAGER(dev))
return -EPERM;
- if (dev->priv.eswitch->mode == SRIOV_NONE &&
+ if (dev->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
!mlx5_core_is_ecpf_esw_manager(dev))
return -EOPNOTSUPP;
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
struct mlx5_eswitch *esw = dev->priv.eswitch;
- int err, vport;
+ int err, vport, num_vport;
u8 mlx5_mode;
err = mlx5_devlink_eswitch_check(devlink);
if (err)
goto out;
- for (vport = 1; vport < esw->enabled_vports; vport++) {
+ mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) {
err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode);
if (err) {
NL_SET_ERR_MSG_MOD(extack,
return 0;
revert_inline_mode:
- while (--vport > 0)
+ num_vport = --vport;
+ mlx5_esw_for_each_host_func_vport_reverse(esw, vport, num_vport)
mlx5_modify_nic_vport_min_inline(dev,
vport,
esw->offloads.inline_mode);
return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
}
- int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
+ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode)
{
u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
struct mlx5_core_dev *dev = esw->dev;
if (!MLX5_CAP_GEN(dev, vport_group_manager))
return -EOPNOTSUPP;
- if (esw->mode == SRIOV_NONE)
+ if (esw->mode == MLX5_ESWITCH_NONE)
return -EOPNOTSUPP;
switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
}
query_vports:
- for (vport = 1; vport <= nvfs; vport++) {
+ mlx5_query_nic_vport_min_inline(dev, esw->first_host_vport, &prev_mlx5_mode);
+ mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) {
mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode);
- if (vport > 1 && prev_mlx5_mode != mlx5_mode)
+ if (prev_mlx5_mode != mlx5_mode)
return -EINVAL;
prev_mlx5_mode = mlx5_mode;
}
if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC)
return -EOPNOTSUPP;
- if (esw->mode == SRIOV_LEGACY) {
+ if (esw->mode == MLX5_ESWITCH_LEGACY) {
esw->offloads.encap = encap;
return 0;
}
void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type)
{
- u16 max_vf = mlx5_core_max_vfs(esw->dev);
struct mlx5_eswitch_rep *rep;
int i;
- if (esw->mode == SRIOV_OFFLOADS)
- __unload_reps_all_vport(esw, max_vf, rep_type);
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS)
+ __unload_reps_all_vport(esw, rep_type);
mlx5_esw_for_all_reps(esw, i, rep)
atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED);
}
void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
- int vport,
+ u16 vport,
u8 rep_type)
{
struct mlx5_eswitch_rep *rep;
EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev);
struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
- int vport)
+ u16 vport)
{
return mlx5_eswitch_get_rep(esw, vport);
}
EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
+
+ bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num)
+ {
+ return vport_num >= MLX5_VPORT_FIRST_VF &&
+ vport_num <= esw->dev->priv.sriov.max_vfs;
+ }
+
+ bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw)
+ {
+ return !!(esw->flags & MLX5_ESWITCH_VPORT_MATCH_METADATA);
+ }
+ EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled);
+
+ u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw,
+ u16 vport_num)
+ {
+ return ((MLX5_CAP_GEN(esw->dev, vhca_id) & 0xffff) << 16) | vport_num;
+ }
+ EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match);
}
static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft,
- u32 *match_value,
+ struct mlx5_flow_spec *spec,
struct mlx5_flow_act *flow_act)
{
struct mlx5_flow_steering *steering = get_steering(&ft->node);
if (!fte)
return ERR_PTR(-ENOMEM);
- memcpy(fte->val, match_value, sizeof(fte->val));
+ memcpy(fte->val, &spec->match_value, sizeof(fte->val));
fte->node.type = FS_TYPE_FLOW_ENTRY;
fte->action = *flow_act;
+ fte->flow_context = spec->flow_context;
tree_init_node(&fte->node, NULL, del_sw_fte);
if ((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
d1->vport.num == d2->vport.num &&
d1->vport.flags == d2->vport.flags &&
+ ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ?
+ (d1->vport.vhca_id == d2->vport.vhca_id) : true) &&
((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ?
(d1->vport.reformat_id == d2->vport.reformat_id) : true)) ||
(d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
return false;
}
- static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act *flow_act)
+ static int check_conflicting_ftes(struct fs_fte *fte,
+ const struct mlx5_flow_context *flow_context,
+ const struct mlx5_flow_act *flow_act)
{
if (check_conflicting_actions(flow_act->action, fte->action.action)) {
mlx5_core_warn(get_dev(&fte->node),
return -EEXIST;
}
- if ((flow_act->flags & FLOW_ACT_HAS_TAG) &&
- fte->action.flow_tag != flow_act->flow_tag) {
+ if ((flow_context->flags & FLOW_CONTEXT_HAS_TAG) &&
+ fte->flow_context.flow_tag != flow_context->flow_tag) {
mlx5_core_warn(get_dev(&fte->node),
"FTE flow tag %u already exists with different flow tag %u\n",
- fte->action.flow_tag,
- flow_act->flow_tag);
+ fte->flow_context.flow_tag,
+ flow_context->flow_tag);
return -EEXIST;
}
}
static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
- u32 *match_value,
+ struct mlx5_flow_spec *spec,
struct mlx5_flow_act *flow_act,
struct mlx5_flow_destination *dest,
int dest_num,
int i;
int ret;
- ret = check_conflicting_ftes(fte, flow_act);
+ ret = check_conflicting_ftes(fte, &spec->flow_context, flow_act);
if (ret)
return ERR_PTR(ret);
u64 version;
int err;
- fte = alloc_fte(ft, spec->match_value, flow_act);
+ fte = alloc_fte(ft, spec, flow_act);
if (IS_ERR(fte))
return ERR_PTR(-ENOMEM);
fte_tmp = lookup_fte_locked(g, spec->match_value, take_write);
if (!fte_tmp)
continue;
- rule = add_rule_fg(g, spec->match_value,
- flow_act, dest, dest_num, fte_tmp);
+ rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte_tmp);
up_write_ref_node(&fte_tmp->node, false);
tree_put_node(&fte_tmp->node, false);
kmem_cache_free(steering->ftes_cache, fte);
nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
up_write_ref_node(&g->node, false);
- rule = add_rule_fg(g, spec->match_value,
- flow_act, dest, dest_num, fte);
+ rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte);
up_write_ref_node(&fte->node, false);
tree_put_node(&fte->node, false);
return rule;
if (err)
goto err_release_fg;
- fte = alloc_fte(ft, spec->match_value, flow_act);
+ fte = alloc_fte(ft, spec, flow_act);
if (IS_ERR(fte)) {
err = PTR_ERR(fte);
goto err_release_fg;
nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
up_write_ref_node(&g->node, false);
- rule = add_rule_fg(g, spec->match_value, flow_act, dest,
- dest_num, fte);
+ rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte);
up_write_ref_node(&fte->node, false);
tree_put_node(&fte->node, false);
tree_put_node(&g->node, false);
cmds = mlx5_fs_cmd_get_default_ipsec_fpga_cmds(table_type);
/* Create the root namespace */
- root_ns = kvzalloc(sizeof(*root_ns), GFP_KERNEL);
+ root_ns = kzalloc(sizeof(*root_ns), GFP_KERNEL);
if (!root_ns)
return NULL;
cleanup_root_ns(steering->esw_egress_root_ns[i]);
kfree(steering->esw_egress_root_ns);
+ steering->esw_egress_root_ns = NULL;
}
static void cleanup_ingress_acls_root_ns(struct mlx5_core_dev *dev)
cleanup_root_ns(steering->esw_ingress_root_ns[i]);
kfree(steering->esw_ingress_root_ns);
+ steering->esw_ingress_root_ns = NULL;
}
void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
/* Create single prio */
prio = fs_create_prio(&steering->sniffer_tx_root_ns->ns, 0, 1);
- if (IS_ERR(prio)) {
- cleanup_root_ns(steering->sniffer_tx_root_ns);
- return PTR_ERR(prio);
- }
- return 0;
+ return PTR_ERR_OR_ZERO(prio);
}
static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering)
/* Create single prio */
prio = fs_create_prio(&steering->sniffer_rx_root_ns->ns, 0, 1);
- if (IS_ERR(prio)) {
- cleanup_root_ns(steering->sniffer_rx_root_ns);
- return PTR_ERR(prio);
- }
- return 0;
+ return PTR_ERR_OR_ZERO(prio);
}
static int init_rdma_rx_root_ns(struct mlx5_flow_steering *steering)
/* Create single prio */
prio = fs_create_prio(&steering->rdma_rx_root_ns->ns, 0, 1);
- if (IS_ERR(prio)) {
- cleanup_root_ns(steering->rdma_rx_root_ns);
- return PTR_ERR(prio);
- }
- return 0;
+ return PTR_ERR_OR_ZERO(prio);
}
static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
{
for (i--; i >= 0; i--)
cleanup_root_ns(steering->esw_egress_root_ns[i]);
kfree(steering->esw_egress_root_ns);
+ steering->esw_egress_root_ns = NULL;
return err;
}
for (i--; i >= 0; i--)
cleanup_root_ns(steering->esw_ingress_root_ns[i]);
kfree(steering->esw_ingress_root_ns);
+ steering->esw_ingress_root_ns = NULL;
return err;
}
struct mlx5_priv *priv = &dev->priv;
int err = 0;
- priv->pci_dev_data = id->driver_data;
-
+ mutex_init(&dev->pci_status_mutex);
pci_set_drvdata(dev->pdev, dev);
dev->bar_addr = pci_resource_start(pdev, 0);
err = mlx5_core_set_hca_defaults(dev);
if (err) {
mlx5_core_err(dev, "Failed to set hca defaults\n");
- goto err_fs;
+ goto err_sriov;
}
err = mlx5_sriov_attach(dev);
INIT_LIST_HEAD(&priv->ctx_list);
spin_lock_init(&priv->ctx_lock);
- mutex_init(&dev->pci_status_mutex);
mutex_init(&dev->intf_state_mutex);
mutex_init(&priv->bfregs.reg_head.lock);
dev->device = &pdev->dev;
dev->pdev = pdev;
+ dev->coredev_type = id->driver_data & MLX5_PCI_DEV_IS_VF ?
+ MLX5_COREDEV_VF : MLX5_COREDEV_PF;
+
err = mlx5_mdev_init(dev, prof_sel);
if (err)
goto mdev_init_err;
#define MLX5_ESWITCH_MANAGER(mdev) MLX5_CAP_GEN(mdev, eswitch_manager)
enum {
- SRIOV_NONE,
- SRIOV_LEGACY,
- SRIOV_OFFLOADS
+ MLX5_ESWITCH_NONE,
+ MLX5_ESWITCH_LEGACY,
+ MLX5_ESWITCH_OFFLOADS
};
enum {
u16 vport;
u8 hw_id[ETH_ALEN];
u16 vlan;
+ /* Only IB rep is using vport_index */
+ u16 vport_index;
u32 vlan_refcount;
};
u8 rep_type);
void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type);
void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
- int vport,
+ u16 vport_num,
u8 rep_type);
struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
- int vport);
+ u16 vport_num);
void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type);
u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw);
struct mlx5_flow_handle *
mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw,
- int vport, u32 sqn);
+ u16 vport_num, u32 sqn);
#ifdef CONFIG_MLX5_ESWITCH
enum devlink_eswitch_encap_mode
mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev);
+
+ bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw);
+ u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw,
+ u16 vport_num);
#else /* CONFIG_MLX5_ESWITCH */
static inline enum devlink_eswitch_encap_mode
mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev)
{
return DEVLINK_ESWITCH_ENCAP_MODE_NONE;
}
+
+ static inline bool
+ mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw)
+ {
+ return false;
+ };
+
+ static inline u32
+ mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw,
+ int vport_num)
+ {
+ return 0;
+ };
#endif /* CONFIG_MLX5_ESWITCH */
#endif
#include <linux/mlx5/driver.h>
#define MLX5_INVALID_LKEY 0x100
-#define MLX5_SIG_WQE_SIZE (MLX5_SEND_WQE_BB * 5)
+/* UMR (3 WQE_BB's) + SIG (3 WQE_BB's) + PSV (mem) + PSV (wire) */
+#define MLX5_SIG_WQE_SIZE (MLX5_SEND_WQE_BB * 8)
#define MLX5_DIF_SIZE 8
#define MLX5_STRIDE_BLOCK_OP 0x400
#define MLX5_CPY_GRD_MASK 0xc0
return radix_tree_lookup(&dev->priv.qp_table.tree, qpn);
}
- static inline struct mlx5_core_mkey *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key)
- {
- return radix_tree_lookup(&dev->priv.mkey_table.tree, key);
- }
-
int mlx5_core_create_dct(struct mlx5_core_dev *dev,
struct mlx5_core_dct *qp,
u32 *in, int inlen,