From: Jakub Kicinski Date: Tue, 31 Aug 2021 16:06:04 +0000 (-0700) Subject: Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net X-Git-Tag: v5.15-rc1~157^2 X-Git-Url: https://repo.jachan.dev/linux.git/commitdiff_plain/29ce8f9701072fc221d9c38ad952de1a9578f95c?hp=-c Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net include/linux/netdevice.h net/socket.c d0efb16294d1 ("net: don't unconditionally copy_from_user a struct ifreq for socket ioctls") 876f0bf9d0d5 ("net: socket: simplify dev_ifconf handling") 29c4964822aa ("net: socket: rework compat_ifreq_ioctl()") Signed-off-by: Jakub Kicinski --- 29ce8f9701072fc221d9c38ad952de1a9578f95c diff --combined drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index cdeece459c14,f26d03735619..dee9ff74d6d6 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@@ -119,10 -119,16 +119,10 @@@ static int aq_pci_func_init(struct pci_ { int err; - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); - if (!err) - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (err) { - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); - if (!err) - err = pci_set_consistent_dma_mask(pdev, - DMA_BIT_MASK(32)); - } - if (err != 0) { err = -ENOSR; goto err_exit; } @@@ -411,6 -417,9 +411,9 @@@ static int atl_resume_common(struct dev pci_restore_state(pdev); if (deep) { + /* Reinitialize Nic/Vecs objects */ + aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol); + ret = aq_nic_init(nic); if (ret) goto err_exit; diff --combined drivers/net/ethernet/intel/ice/ice_main.c index 60d55d043a94,a8bd512d5b45..0d6c143f6653 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@@ -5122,6 -5122,7 +5122,7 @@@ static int ice_set_mac_address(struct n struct ice_hw *hw = &pf->hw; struct sockaddr *addr = pi; enum ice_status status; + u8 old_mac[ETH_ALEN]; u8 flags = 0; int err = 0; u8 *mac; @@@ -5144,8 -5145,13 +5145,13 @@@ } netif_addr_lock_bh(netdev); + ether_addr_copy(old_mac, netdev->dev_addr); + /* change the netdev's MAC address */ + memcpy(netdev->dev_addr, mac, netdev->addr_len); + netif_addr_unlock_bh(netdev); + /* Clean up old MAC filter. Not an error if old filter doesn't exist */ - status = ice_fltr_remove_mac(vsi, netdev->dev_addr, ICE_FWD_TO_VSI); + status = ice_fltr_remove_mac(vsi, old_mac, ICE_FWD_TO_VSI); if (status && status != ICE_ERR_DOES_NOT_EXIST) { err = -EADDRNOTAVAIL; goto err_update_filters; @@@ -5168,13 -5174,12 +5174,12 @@@ err_update_filters if (err) { netdev_err(netdev, "can't set MAC %pM. filter update failed\n", mac); + netif_addr_lock_bh(netdev); + ether_addr_copy(netdev->dev_addr, old_mac); netif_addr_unlock_bh(netdev); return err; } - /* change the netdev's MAC address */ - memcpy(netdev->dev_addr, mac, netdev->addr_len); - netif_addr_unlock_bh(netdev); netdev_dbg(vsi->netdev, "updated MAC address to %pM\n", netdev->dev_addr); @@@ -6570,12 -6575,12 +6575,12 @@@ event_after } /** - * ice_do_ioctl - Access the hwtstamp interface + * ice_eth_ioctl - Access the hwtstamp interface * @netdev: network interface device structure * @ifr: interface request data * @cmd: ioctl command */ -static int ice_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) +static int ice_eth_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_pf *pf = np->vsi->back; @@@ -7241,7 -7246,7 +7246,7 @@@ static const struct net_device_ops ice_ .ndo_change_mtu = ice_change_mtu, .ndo_get_stats64 = ice_get_stats64, .ndo_set_tx_maxrate = ice_set_tx_maxrate, - .ndo_do_ioctl = ice_do_ioctl, + .ndo_eth_ioctl = ice_eth_ioctl, .ndo_set_vf_spoofchk = ice_set_vf_spoofchk, .ndo_set_vf_mac = ice_set_vf_mac, .ndo_get_vf_config = ice_get_vf_cfg, diff --combined drivers/net/ethernet/mellanox/mlx5/core/dev.c index ff6b03dc7e32,20bb37266254..e8093c4e09d4 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@@ -53,7 -53,7 +53,7 @@@ static bool is_eth_rep_supported(struc return true; } -static bool is_eth_supported(struct mlx5_core_dev *dev) +bool mlx5_eth_supported(struct mlx5_core_dev *dev) { if (!IS_ENABLED(CONFIG_MLX5_CORE_EN)) return false; @@@ -105,18 -105,7 +105,18 @@@ return true; } -static bool is_vnet_supported(struct mlx5_core_dev *dev) +static bool is_eth_enabled(struct mlx5_core_dev *dev) +{ + union devlink_param_value val; + int err; + + err = devlink_param_driverinit_value_get(priv_to_devlink(dev), + DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, + &val); + return err ? false : val.vbool; +} + +bool mlx5_vnet_supported(struct mlx5_core_dev *dev) { if (!IS_ENABLED(CONFIG_MLX5_VDPA_NET)) return false; @@@ -138,17 -127,6 +138,17 @@@ return true; } +static bool is_vnet_enabled(struct mlx5_core_dev *dev) +{ + union devlink_param_value val; + int err; + + err = devlink_param_driverinit_value_get(priv_to_devlink(dev), + DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET, + &val); + return err ? false : val.vbool; +} + static bool is_ib_rep_supported(struct mlx5_core_dev *dev) { if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND)) @@@ -192,7 -170,7 +192,7 @@@ static bool is_mp_supported(struct mlx5 return true; } -static bool is_ib_supported(struct mlx5_core_dev *dev) +bool mlx5_rdma_supported(struct mlx5_core_dev *dev) { if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND)) return false; @@@ -209,17 -187,6 +209,17 @@@ return true; } +static bool is_ib_enabled(struct mlx5_core_dev *dev) +{ + union devlink_param_value val; + int err; + + err = devlink_param_driverinit_value_get(priv_to_devlink(dev), + DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, + &val); + return err ? false : val.vbool; +} + enum { MLX5_INTERFACE_PROTOCOL_ETH, MLX5_INTERFACE_PROTOCOL_ETH_REP, @@@ -234,17 -201,13 +234,17 @@@ static const struct mlx5_adev_device { const char *suffix; bool (*is_supported)(struct mlx5_core_dev *dev); + bool (*is_enabled)(struct mlx5_core_dev *dev); } mlx5_adev_devices[] = { [MLX5_INTERFACE_PROTOCOL_VNET] = { .suffix = "vnet", - .is_supported = &is_vnet_supported }, + .is_supported = &mlx5_vnet_supported, + .is_enabled = &is_vnet_enabled }, [MLX5_INTERFACE_PROTOCOL_IB] = { .suffix = "rdma", - .is_supported = &is_ib_supported }, + .is_supported = &mlx5_rdma_supported, + .is_enabled = &is_ib_enabled }, [MLX5_INTERFACE_PROTOCOL_ETH] = { .suffix = "eth", - .is_supported = &is_eth_supported }, + .is_supported = &mlx5_eth_supported, + .is_enabled = &is_eth_enabled }, [MLX5_INTERFACE_PROTOCOL_ETH_REP] = { .suffix = "eth-rep", .is_supported = &is_eth_rep_supported }, [MLX5_INTERFACE_PROTOCOL_IB_REP] = { .suffix = "rdma-rep", @@@ -345,14 -308,6 +345,14 @@@ int mlx5_attach_device(struct mlx5_core if (!priv->adev[i]) { bool is_supported = false; + if (mlx5_adev_devices[i].is_enabled) { + bool enabled; + + enabled = mlx5_adev_devices[i].is_enabled(dev); + if (!enabled) + continue; + } + if (mlx5_adev_devices[i].is_supported) is_supported = mlx5_adev_devices[i].is_supported(dev); @@@ -405,14 -360,6 +405,14 @@@ void mlx5_detach_device(struct mlx5_cor if (!priv->adev[i]) continue; + if (mlx5_adev_devices[i].is_enabled) { + bool enabled; + + enabled = mlx5_adev_devices[i].is_enabled(dev); + if (!enabled) + goto skip_suspend; + } + adev = &priv->adev[i]->adev; /* Auxiliary driver was unbind manually through sysfs */ if (!adev->dev.driver) @@@ -450,7 -397,7 +450,7 @@@ int mlx5_register_device(struct mlx5_co void mlx5_unregister_device(struct mlx5_core_dev *dev) { mutex_lock(&mlx5_intf_mutex); - dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV; + dev->priv.flags = MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV; mlx5_rescan_drivers_locked(dev); mutex_unlock(&mlx5_intf_mutex); } @@@ -500,21 -447,12 +500,21 @@@ static void delete_drivers(struct mlx5_ if (!priv->adev[i]) continue; + if (mlx5_adev_devices[i].is_enabled) { + bool enabled; + + enabled = mlx5_adev_devices[i].is_enabled(dev); + if (!enabled) + goto del_adev; + } + if (mlx5_adev_devices[i].is_supported && !delete_all) is_supported = mlx5_adev_devices[i].is_supported(dev); if (is_supported) continue; +del_adev: del_adev(&priv->adev[i]->adev); priv->adev[i] = NULL; } diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 6603d9c823a3,6eba574c5a36..ba8164792016 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@@ -34,20 -34,25 +34,20 @@@ #include #include #include -#include -#include #include #include #include #include #include -#include -#include -#include #include #include -#include #include #include #include #include #include #include "en.h" +#include "en/tc/post_act.h" #include "en_rep.h" #include "en/rep/tc.h" #include "en/rep/neigh.h" @@@ -61,7 -66,7 +61,7 @@@ #include "en/mod_hdr.h" #include "en/tc_priv.h" #include "en/tc_tun_encap.h" -#include "esw/sample.h" +#include "en/tc/sample.h" #include "lib/devcom.h" #include "lib/geneve.h" #include "lib/fs_chains.h" @@@ -98,7 -103,7 +98,7 @@@ struct mlx5e_tc_attr_to_reg_mapping mlx [MARK_TO_REG] = mark_to_reg_ct, [LABELS_TO_REG] = labels_to_reg_ct, [FTEID_TO_REG] = fteid_to_reg_ct, - /* For NIC rules we store the retore metadata directly + /* For NIC rules we store the restore metadata directly * into reg_b that is passed to SW since we don't * jump between steering domains. */ @@@ -247,7 -252,7 +247,7 @@@ get_ct_priv(struct mlx5e_priv *priv } #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) -static struct mlx5_esw_psample * +static struct mlx5e_tc_psample * get_sample_priv(struct mlx5e_priv *priv) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@@ -258,7 -263,7 +258,7 @@@ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); uplink_priv = &uplink_rpriv->uplink_priv; - return uplink_priv->esw_psample; + return uplink_priv->tc_psample; } return NULL; @@@ -335,12 -340,12 +335,12 @@@ struct mlx5e_hairpin struct mlx5_core_dev *func_mdev; struct mlx5e_priv *func_priv; u32 tdn; - u32 tirn; + struct mlx5e_tir direct_tir; int num_channels; struct mlx5e_rqt indir_rqt; - u32 indir_tirn[MLX5E_NUM_INDIR_TIRS]; - struct mlx5e_ttc_table ttc; + struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; + struct mlx5_ttc_table *ttc; }; struct mlx5e_hairpin_entry { @@@ -477,101 -482,126 +477,101 @@@ struct mlx5_core_dev *mlx5e_hairpin_get static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp) { - u32 in[MLX5_ST_SZ_DW(create_tir_in)] = {}; - void *tirc; + struct mlx5e_tir_builder *builder; int err; + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn); if (err) - goto alloc_tdn_err; - - tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); - - MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); - MLX5_SET(tirc, tirc, inline_rqn, hp->pair->rqn[0]); - MLX5_SET(tirc, tirc, transport_domain, hp->tdn); + goto out; - err = mlx5_core_create_tir(hp->func_mdev, in, &hp->tirn); + mlx5e_tir_builder_build_inline(builder, hp->tdn, hp->pair->rqn[0]); + err = mlx5e_tir_init(&hp->direct_tir, builder, hp->func_mdev, false); if (err) goto create_tir_err; - return 0; +out: + mlx5e_tir_builder_free(builder); + return err; create_tir_err: mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn); -alloc_tdn_err: - return err; + + goto out; } static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp) { - mlx5_core_destroy_tir(hp->func_mdev, hp->tirn); + mlx5e_tir_destroy(&hp->direct_tir); mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn); } -static int mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc) -{ - struct mlx5e_priv *priv = hp->func_priv; - int i, ix, sz = MLX5E_INDIR_RQT_SIZE; - u32 *indirection_rqt, rqn; - - indirection_rqt = kcalloc(sz, sizeof(*indirection_rqt), GFP_KERNEL); - if (!indirection_rqt) - return -ENOMEM; - - mlx5e_build_default_indir_rqt(indirection_rqt, sz, - hp->num_channels); - - for (i = 0; i < sz; i++) { - ix = i; - if (priv->rss_params.hfunc == ETH_RSS_HASH_XOR) - ix = mlx5e_bits_invert(i, ilog2(sz)); - ix = indirection_rqt[ix]; - rqn = hp->pair->rqn[ix]; - MLX5_SET(rqtc, rqtc, rq_num[i], rqn); - } - - kfree(indirection_rqt); - return 0; -} - static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp) { - int inlen, err, sz = MLX5E_INDIR_RQT_SIZE; struct mlx5e_priv *priv = hp->func_priv; struct mlx5_core_dev *mdev = priv->mdev; - void *rqtc; - u32 *in; + struct mlx5e_rss_params_indir *indir; + int err; - inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) + indir = kvmalloc(sizeof(*indir), GFP_KERNEL); + if (!indir) return -ENOMEM; - rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); + mlx5e_rss_params_indir_init_uniform(indir, hp->num_channels); + err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, hp->num_channels, + mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc, + indir); - MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); - MLX5_SET(rqtc, rqtc, rqt_max_size, sz); - - err = mlx5e_hairpin_fill_rqt_rqns(hp, rqtc); - if (err) - goto out; - - err = mlx5_core_create_rqt(mdev, in, inlen, &hp->indir_rqt.rqtn); - if (!err) - hp->indir_rqt.enabled = true; - -out: - kvfree(in); + kvfree(indir); return err; } static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp) { struct mlx5e_priv *priv = hp->func_priv; - u32 in[MLX5_ST_SZ_DW(create_tir_in)]; - int tt, i, err; - void *tirc; + struct mlx5e_rss_params_hash rss_hash; + enum mlx5_traffic_types tt, max_tt; + struct mlx5e_tir_builder *builder; + int err = 0; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + + rss_hash = mlx5e_rx_res_get_current_hash(priv->rx_res); for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - struct mlx5e_tirc_config ttconfig = mlx5e_tirc_get_default_config(tt); + struct mlx5e_rss_params_traffic_type rss_tt; - memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in)); - tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + rss_tt = mlx5e_rss_get_default_tt_config(tt); - MLX5_SET(tirc, tirc, transport_domain, hp->tdn); - MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); - MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn); - mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &ttconfig, tirc, false); + mlx5e_tir_builder_build_rqt(builder, hp->tdn, + mlx5e_rqt_get_rqtn(&hp->indir_rqt), + false); + mlx5e_tir_builder_build_rss(builder, &rss_hash, &rss_tt, false); - err = mlx5_core_create_tir(hp->func_mdev, in, - &hp->indir_tirn[tt]); + err = mlx5e_tir_init(&hp->indir_tir[tt], builder, hp->func_mdev, false); if (err) { mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err); goto err_destroy_tirs; } + + mlx5e_tir_builder_clear(builder); } - return 0; -err_destroy_tirs: - for (i = 0; i < tt; i++) - mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[i]); +out: + mlx5e_tir_builder_free(builder); return err; + +err_destroy_tirs: + max_tt = tt; + for (tt = 0; tt < max_tt; tt++) + mlx5e_tir_destroy(&hp->indir_tir[tt]); + + goto out; } static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp) @@@ -579,7 -609,7 +579,7 @@@ int tt; for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[tt]); + mlx5e_tir_destroy(&hp->indir_tir[tt]); } static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp, @@@ -590,16 -620,12 +590,16 @@@ memset(ttc_params, 0, sizeof(*ttc_params)); - ttc_params->any_tt_tirn = hp->tirn; - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - ttc_params->indir_tirn[tt] = hp->indir_tirn[tt]; + ttc_params->ns = mlx5_get_flow_namespace(hp->func_mdev, + MLX5_FLOW_NAMESPACE_KERNEL); + for (tt = 0; tt < MLX5_NUM_TT; tt++) { + ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR; + ttc_params->dests[tt].tir_num = + tt == MLX5_TT_ANY ? + mlx5e_tir_get_tirn(&hp->direct_tir) : + mlx5e_tir_get_tirn(&hp->indir_tir[tt]); + } - ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE; ft_attr->level = MLX5E_TC_TTC_FT_LEVEL; ft_attr->prio = MLX5E_TC_PRIO; } @@@ -619,31 -645,30 +619,31 @@@ static int mlx5e_hairpin_rss_init(struc goto err_create_indirect_tirs; mlx5e_hairpin_set_ttc_params(hp, &ttc_params); - err = mlx5e_create_ttc_table(priv, &ttc_params, &hp->ttc); - if (err) + hp->ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params); + if (IS_ERR(hp->ttc)) { + err = PTR_ERR(hp->ttc); goto err_create_ttc_table; + } netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n", - hp->num_channels, hp->ttc.ft.t->id); + hp->num_channels, + mlx5_get_ttc_flow_table(priv->fs.ttc)->id); return 0; err_create_ttc_table: mlx5e_hairpin_destroy_indirect_tirs(hp); err_create_indirect_tirs: - mlx5e_destroy_rqt(priv, &hp->indir_rqt); + mlx5e_rqt_destroy(&hp->indir_rqt); return err; } static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp) { - struct mlx5e_priv *priv = hp->func_priv; - - mlx5e_destroy_ttc_table(priv, &hp->ttc); + mlx5_destroy_ttc_table(hp->ttc); mlx5e_hairpin_destroy_indirect_tirs(hp); - mlx5e_destroy_rqt(priv, &hp->indir_rqt); + mlx5e_rqt_destroy(&hp->indir_rqt); } static struct mlx5e_hairpin * @@@ -878,17 -903,16 +878,17 @@@ static int mlx5e_hairpin_flow_add(struc } netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n", - hp->tirn, hp->pair->rqn[0], + mlx5e_tir_get_tirn(&hp->direct_tir), hp->pair->rqn[0], dev_name(hp->pair->peer_mdev->device), hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets); attach_flow: if (hpe->hp->num_channels > 1) { flow_flag_set(flow, HAIRPIN_RSS); - flow->attr->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t; + flow->attr->nic_attr->hairpin_ft = + mlx5_get_ttc_flow_table(hpe->hp->ttc); } else { - flow->attr->nic_attr->hairpin_tirn = hpe->hp->tirn; + flow->attr->nic_attr->hairpin_tirn = mlx5e_tir_get_tirn(&hpe->hp->direct_tir); } flow->hpe = hpe; @@@ -1032,17 -1056,15 +1032,17 @@@ err_ft_get static int mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, - struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { + struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5_flow_attr *attr = flow->attr; struct mlx5_core_dev *dev = priv->mdev; - struct mlx5_fc *counter = NULL; + struct mlx5_fc *counter; int err; + parse_attr = attr->parse_attr; + if (flow_flag_test(flow, HAIRPIN)) { err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack); if (err) @@@ -1148,8 -1170,7 +1148,8 @@@ mlx5e_tc_offload_fdb_rules(struct mlx5_ mod_hdr_acts); #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) } else if (flow_flag_test(flow, SAMPLE)) { - rule = mlx5_esw_sample_offload(get_sample_priv(flow->priv), spec, attr); + rule = mlx5e_tc_sample_offload(get_sample_priv(flow->priv), spec, attr, + mlx5e_tc_get_flow_tun_id(flow)); #endif } else { rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); @@@ -1188,7 -1209,7 +1188,7 @@@ void mlx5e_tc_unoffload_fdb_rules(struc #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) if (flow_flag_test(flow, SAMPLE)) { - mlx5_esw_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr); + mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr); return; } #endif @@@ -1317,6 -1338,7 +1317,7 @@@ bool mlx5e_tc_is_vf_tunnel(struct net_d int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport) { struct mlx5e_priv *out_priv, *route_priv; + struct mlx5_devcom *devcom = NULL; struct mlx5_core_dev *route_mdev; struct mlx5_eswitch *esw; u16 vhca_id; @@@ -1328,7 -1350,24 +1329,24 @@@ route_mdev = route_priv->mdev; vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id); + if (mlx5_lag_is_active(out_priv->mdev)) { + /* In lag case we may get devices from different eswitch instances. + * If we failed to get vport num, it means, mostly, that we on the wrong + * eswitch. + */ + err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); + if (err != -ENOENT) + return err; + + devcom = out_priv->mdev->priv.devcom; + esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (!esw) + return -ENODEV; + } + err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); + if (devcom) + mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); return err; } @@@ -1363,9 -1402,9 +1381,9 @@@ mlx5e_tc_add_fdb_flow(struct mlx5e_pri bool vf_tun = false, encap_valid = true; struct net_device *encap_dev = NULL; struct mlx5_esw_flow_attr *esw_attr; - struct mlx5_fc *counter = NULL; struct mlx5e_rep_priv *rpriv; struct mlx5e_priv *out_priv; + struct mlx5_fc *counter; u32 max_prio, max_chain; int err = 0; int out_index; @@@ -1552,7 -1591,6 +1570,7 @@@ static void mlx5e_tc_del_fdb_flow(struc else mlx5e_detach_mod_hdr(priv, flow); } + kfree(attr->sample_attr); kvfree(attr->parse_attr); kvfree(attr->esw_attr->rx_tun_attr); @@@ -1562,6 -1600,7 +1580,6 @@@ if (flow_flag_test(flow, L3_TO_L2_DECAP)) mlx5e_detach_decap(priv, flow); - kfree(flow->attr->esw_attr->sample); kfree(flow->attr); } @@@ -1626,22 -1665,17 +1644,22 @@@ static void mlx5e_tc_del_flow(struct ml } } -static int flow_has_tc_fwd_action(struct flow_cls_offload *f) +static bool flow_requires_tunnel_mapping(u32 chain, struct flow_cls_offload *f) { struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct flow_action *flow_action = &rule->action; const struct flow_action_entry *act; int i; + if (chain) + return false; + flow_action_for_each(i, act, flow_action) { switch (act->id) { case FLOW_ACTION_GOTO: return true; + case FLOW_ACTION_SAMPLE: + return true; default: continue; } @@@ -1882,7 -1916,7 +1900,7 @@@ static int parse_tunnel_attr(struct mlx return -EOPNOTSUPP; needs_mapping = !!flow->attr->chain; - sets_mapping = !flow->attr->chain && flow_has_tc_fwd_action(f); + sets_mapping = flow_requires_tunnel_mapping(flow->attr->chain, f); *match_inner = !needs_mapping; if ((needs_mapping || sets_mapping) && @@@ -2455,7 -2489,7 +2473,7 @@@ static int __parse_cls_flower(struct ml spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3; } } - /* Currenlty supported only for MPLS over UDP */ + /* Currently supported only for MPLS over UDP */ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) && !netif_is_bareudp(filter_dev)) { NL_SET_ERR_MSG_MOD(extack, @@@ -2709,9 -2743,7 +2727,9 @@@ static int offload_pedit_fields(struct if (s_mask && a_mask) { NL_SET_ERR_MSG_MOD(extack, "can't set and add to the same HW field"); - printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field); + netdev_warn(priv->netdev, + "mlx5: can't set and add to the same HW field (%x)\n", + f->field); return -EOPNOTSUPP; } @@@ -2750,9 -2782,8 +2768,9 @@@ if (first < next_z && next_z < last) { NL_SET_ERR_MSG_MOD(extack, "rewrite of few sub-fields isn't supported"); - printk(KERN_WARNING "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n", - mask); + netdev_warn(priv->netdev, + "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n", + mask); return -EOPNOTSUPP; } @@@ -3339,10 -3370,10 +3357,10 @@@ static int validate_goto_chain(struct m static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, - struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { + struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5_flow_attr *attr = flow->attr; struct pedit_headers_action hdrs[2] = {}; const struct flow_action_entry *act; @@@ -3358,8 -3389,8 +3376,8 @@@ return -EOPNOTSUPP; nic_attr = attr->nic_attr; - nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; + parse_attr = attr->parse_attr; flow_action_for_each(i, act, flow_action) { switch (act->id) { @@@ -3368,8 -3399,10 +3386,8 @@@ MLX5_FLOW_CONTEXT_ACTION_COUNT; break; case FLOW_ACTION_DROP: - action |= MLX5_FLOW_CONTEXT_ACTION_DROP; - if (MLX5_CAP_FLOWTABLE(priv->mdev, - flow_table_properties_nic_receive.flow_counter)) - action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + action |= MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_COUNT; break; case FLOW_ACTION_MANGLE: case FLOW_ACTION_ADD: @@@ -3410,7 -3443,7 +3428,7 @@@ "device is not on same HW, can't offload"); netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n", peer_dev->name); - return -EINVAL; + return -EOPNOTSUPP; } } break; @@@ -3420,7 -3453,7 +3438,7 @@@ if (mark & ~MLX5E_TC_FLOW_ID_MASK) { NL_SET_ERR_MSG_MOD(extack, "Bad flow mark - only 16 bit is supported"); - return -EINVAL; + return -EOPNOTSUPP; } nic_attr->flow_tag = mark; @@@ -3717,19 -3750,20 +3735,19 @@@ static int verify_uplink_forwarding(str static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack, - struct net_device *filter_dev) + struct netlink_ext_ack *extack) { struct pedit_headers_action hdrs[2] = {}; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5e_sample_attr sample_attr = {}; const struct ip_tunnel_info *info = NULL; struct mlx5_flow_attr *attr = flow->attr; int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS]; bool ft_flow = mlx5e_is_ft_flow(flow); const struct flow_action_entry *act; struct mlx5_esw_flow_attr *esw_attr; - struct mlx5_sample_attr sample = {}; bool encap = false, decap = false; u32 action = attr->action; int err, i, if_count = 0; @@@ -3782,7 -3816,7 +3800,7 @@@ "mpls pop supported only as first action"); return -EOPNOTSUPP; } - if (!netif_is_bareudp(filter_dev)) { + if (!netif_is_bareudp(parse_attr->filter_dev)) { NL_SET_ERR_MSG_MOD(extack, "mpls pop supported only on bareudp devices"); return -EOPNOTSUPP; @@@ -3931,7 -3965,7 +3949,7 @@@ "devices %s %s not on same switch HW, can't offload forwarding\n", priv->netdev->name, out_dev->name); - return -EINVAL; + return -EOPNOTSUPP; } } break; @@@ -4000,10 -4034,10 +4018,10 @@@ NL_SET_ERR_MSG_MOD(extack, "Sample action with connection tracking is not supported"); return -EOPNOTSUPP; } - sample.rate = act->sample.rate; - sample.group_num = act->sample.psample_group->group_num; + sample_attr.rate = act->sample.rate; + sample_attr.group_num = act->sample.psample_group->group_num; if (act->sample.truncate) - sample.trunc_size = act->sample.trunc_size; + sample_attr.trunc_size = act->sample.trunc_size; flow_flag_set(flow, SAMPLE); break; default: @@@ -4088,10 -4122,10 +4106,10 @@@ * no errors after parsing. */ if (flow_flag_test(flow, SAMPLE)) { - esw_attr->sample = kzalloc(sizeof(*esw_attr->sample), GFP_KERNEL); - if (!esw_attr->sample) + attr->sample_attr = kzalloc(sizeof(*attr->sample_attr), GFP_KERNEL); + if (!attr->sample_attr) return -ENOMEM; - *esw_attr->sample = sample; + *attr->sample_attr = sample_attr; } return 0; @@@ -4284,7 -4318,7 +4302,7 @@@ __mlx5e_add_fdb_flow(struct mlx5e_priv if (err) goto err_free; - err = parse_tc_fdb_actions(priv, &rule->action, flow, extack, filter_dev); + err = parse_tc_fdb_actions(priv, &rule->action, flow, extack); if (err) goto err_free; @@@ -4430,11 -4464,11 +4448,11 @@@ mlx5e_add_nic_flow(struct mlx5e_priv *p if (err) goto err_free; - err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack); + err = parse_tc_nic_actions(priv, &rule->action, flow, extack); if (err) goto err_free; - err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack); + err = mlx5e_tc_add_nic_flow(priv, flow, extack); if (err) goto err_free; @@@ -4689,7 -4723,7 +4707,7 @@@ static int apply_police_params(struct m rate_mbps = max_t(u32, rate, 1); } - err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps); + err = mlx5_esw_qos_modify_vport_rate(esw, vport_num, rate_mbps); if (err) NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware"); @@@ -4861,7 -4895,6 +4879,7 @@@ int mlx5e_tc_nic_init(struct mlx5e_pri struct mlx5_core_dev *dev = priv->mdev; struct mapping_ctx *chains_mapping; struct mlx5_chains_attr attr = {}; + u64 mapping_id; int err; mlx5e_mod_hdr_tbl_init(&tc->mod_hdr); @@@ -4875,12 -4908,8 +4893,12 @@@ lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key); - chains_mapping = mapping_create(sizeof(struct mlx5_mapped_obj), - MLX5E_TC_TABLE_CHAIN_TAG_MASK, true); + mapping_id = mlx5_query_nic_system_image_guid(dev); + + chains_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN, + sizeof(struct mlx5_mapped_obj), + MLX5E_TC_TABLE_CHAIN_TAG_MASK, true); + if (IS_ERR(chains_mapping)) { err = PTR_ERR(chains_mapping); goto err_mapping; @@@ -4902,9 -4931,8 +4920,9 @@@ goto err_chains; } + tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL); tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr, - MLX5_FLOW_NAMESPACE_KERNEL); + MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act); tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event; err = register_netdevice_notifier_dev_net(priv->netdev, @@@ -4920,7 -4948,6 +4938,7 @@@ err_reg: mlx5_tc_ct_clean(tc->ct); + mlx5e_tc_post_act_destroy(tc->post_act); mlx5_chains_destroy(tc->chains); err_chains: mapping_destroy(chains_mapping); @@@ -4959,7 -4986,6 +4977,7 @@@ void mlx5e_tc_nic_cleanup(struct mlx5e_ mutex_destroy(&tc->t_lock); mlx5_tc_ct_clean(tc->ct); + mlx5e_tc_post_act_destroy(tc->post_act); mapping_destroy(tc->mapping); mlx5_chains_destroy(tc->chains); } @@@ -4972,7 -4998,6 +4990,7 @@@ int mlx5e_tc_esw_init(struct rhashtabl struct mapping_ctx *mapping; struct mlx5_eswitch *esw; struct mlx5e_priv *priv; + u64 mapping_id; int err = 0; uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht); @@@ -4980,24 -5005,17 +4998,24 @@@ priv = netdev_priv(rpriv->netdev); esw = priv->mdev->priv.eswitch; + uplink_priv->post_act = mlx5e_tc_post_act_init(priv, esw_chains(esw), + MLX5_FLOW_NAMESPACE_FDB); uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev), esw_chains(esw), &esw->offloads.mod_hdr, - MLX5_FLOW_NAMESPACE_FDB); + MLX5_FLOW_NAMESPACE_FDB, + uplink_priv->post_act); #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) - uplink_priv->esw_psample = mlx5_esw_sample_init(netdev_priv(priv->netdev)); + uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act); #endif - mapping = mapping_create(sizeof(struct tunnel_match_key), - TUNNEL_INFO_BITS_MASK, true); + mapping_id = mlx5_query_nic_system_image_guid(esw->dev); + + mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL, + sizeof(struct tunnel_match_key), + TUNNEL_INFO_BITS_MASK, true); + if (IS_ERR(mapping)) { err = PTR_ERR(mapping); goto err_tun_mapping; @@@ -5005,8 -5023,7 +5023,8 @@@ uplink_priv->tunnel_mapping = mapping; /* 0xFFF is reserved for stack devices slow path table mark */ - mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK - 1, true); + mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL_ENC_OPTS, + sz_enc_opts, ENC_OPTS_BITS_MASK - 1, true); if (IS_ERR(mapping)) { err = PTR_ERR(mapping); goto err_enc_opts_mapping; @@@ -5035,12 -5052,11 +5053,12 @@@ err_enc_opts_mapping mapping_destroy(uplink_priv->tunnel_mapping); err_tun_mapping: #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) - mlx5_esw_sample_cleanup(uplink_priv->esw_psample); + mlx5e_tc_sample_cleanup(uplink_priv->tc_psample); #endif mlx5_tc_ct_clean(uplink_priv->ct_priv); netdev_warn(priv->netdev, "Failed to initialize tc (eswitch), err: %d", err); + mlx5e_tc_post_act_destroy(uplink_priv->post_act); return err; } @@@ -5057,10 -5073,9 +5075,10 @@@ void mlx5e_tc_esw_cleanup(struct rhasht mapping_destroy(uplink_priv->tunnel_mapping); #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) - mlx5_esw_sample_cleanup(uplink_priv->esw_psample); + mlx5e_tc_sample_cleanup(uplink_priv->tc_psample); #endif mlx5_tc_ct_clean(uplink_priv->ct_priv); + mlx5e_tc_post_act_destroy(uplink_priv->post_act); } int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags) diff --combined drivers/net/ethernet/mellanox/mlx5/core/lag.c index f4dfa55c8c7e,40ef60f562b4..49ca57c6d31d --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@@ -32,9 -32,7 +32,9 @@@ #include #include +#include #include +#include "lib/devcom.h" #include "mlx5_core.h" #include "eswitch.h" #include "lag.h" @@@ -47,7 -45,7 +47,7 @@@ static DEFINE_SPINLOCK(lag_lock); static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1, - u8 remap_port2) + u8 remap_port2, bool shared_fdb) { u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {}; void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); @@@ -56,7 -54,6 +56,7 @@@ MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1); MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2); + MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb); return mlx5_cmd_exec_in(dev, create_lag, in); } @@@ -227,59 -224,35 +227,59 @@@ void mlx5_modify_lag(struct mlx5_lag *l } static int mlx5_create_lag(struct mlx5_lag *ldev, - struct lag_tracker *tracker) + struct lag_tracker *tracker, + bool shared_fdb) { struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; + u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; int err; mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1], &ldev->v2p_map[MLX5_LAG_P2]); - mlx5_core_info(dev0, "lag map port 1:%d port 2:%d", - ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2]); + mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d", + ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2], + shared_fdb); err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1], - ldev->v2p_map[MLX5_LAG_P2]); - if (err) + ldev->v2p_map[MLX5_LAG_P2], shared_fdb); + if (err) { mlx5_core_err(dev0, "Failed to create LAG (%d)\n", err); + return err; + } + + if (shared_fdb) { + err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch, + dev1->priv.eswitch); + if (err) + mlx5_core_err(dev0, "Can't enable single FDB mode\n"); + else + mlx5_core_info(dev0, "Operation mode is single FDB\n"); + } + + if (err) { + MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); + if (mlx5_cmd_exec_in(dev0, destroy_lag, in)) + mlx5_core_err(dev0, + "Failed to deactivate RoCE LAG; driver restart required\n"); + } + return err; } int mlx5_activate_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker, - u8 flags) + u8 flags, + bool shared_fdb) { bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE); struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; int err; - err = mlx5_create_lag(ldev, tracker); + err = mlx5_create_lag(ldev, tracker, shared_fdb); if (err) { if (roce_lag) { mlx5_core_err(dev0, @@@ -293,7 -266,6 +293,7 @@@ } ldev->flags |= flags; + ldev->shared_fdb = shared_fdb; return 0; } @@@ -305,13 -277,8 +305,14 @@@ static int mlx5_deactivate_lag(struct m int err; ldev->flags &= ~MLX5_LAG_MODE_FLAGS; + mlx5_lag_mp_reset(ldev); + if (ldev->shared_fdb) { + mlx5_eswitch_offloads_destroy_single_fdb(ldev->pf[MLX5_LAG_P1].dev->priv.eswitch, + ldev->pf[MLX5_LAG_P2].dev->priv.eswitch); + ldev->shared_fdb = false; + } + MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); err = mlx5_cmd_exec_in(dev0, destroy_lag, in); if (err) { @@@ -367,10 -334,6 +368,10 @@@ static void mlx5_lag_remove_devices(str if (!ldev->pf[i].dev) continue; + if (ldev->pf[i].dev->priv.flags & + MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) + continue; + ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(ldev->pf[i].dev); } @@@ -380,15 -343,12 +381,15 @@@ static void mlx5_disable_lag(struct mlx { struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; + bool shared_fdb = ldev->shared_fdb; bool roce_lag; int err; roce_lag = __mlx5_lag_is_roce(ldev); - if (roce_lag) { + if (shared_fdb) { + mlx5_lag_remove_devices(ldev); + } else if (roce_lag) { if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) { dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); @@@ -400,34 -360,8 +401,34 @@@ if (err) return; - if (roce_lag) + if (shared_fdb || roce_lag) mlx5_lag_add_devices(ldev); + + if (shared_fdb) { + if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) + mlx5_eswitch_reload_reps(dev0->priv.eswitch); + if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) + mlx5_eswitch_reload_reps(dev1->priv.eswitch); + } +} + +static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; + + if (is_mdev_switchdev_mode(dev0) && + is_mdev_switchdev_mode(dev1) && + mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) && + mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) && + mlx5_devcom_is_paired(dev0->priv.devcom, + MLX5_DEVCOM_ESW_OFFLOADS) && + MLX5_CAP_GEN(dev1, lag_native_fdb_selection) && + MLX5_CAP_ESW(dev1, root_ft_on_other_esw) && + MLX5_CAP_ESW(dev0, esw_shared_ingress_acl)) + return true; + + return false; } static void mlx5_do_bond(struct mlx5_lag *ldev) @@@ -438,17 -372,14 +439,17 @@@ bool do_bond, roce_lag; int err; - if (!mlx5_lag_is_ready(ldev)) - return; - - tracker = ldev->tracker; + if (!mlx5_lag_is_ready(ldev)) { + do_bond = false; + } else { + tracker = ldev->tracker; - do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); + do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); + } if (do_bond && !__mlx5_lag_is_active(ldev)) { + bool shared_fdb = mlx5_shared_fdb_supported(ldev); + roce_lag = !mlx5_sriov_is_enabled(dev0) && !mlx5_sriov_is_enabled(dev1); @@@ -458,40 -389,23 +459,40 @@@ dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE; #endif - if (roce_lag) + if (shared_fdb || roce_lag) mlx5_lag_remove_devices(ldev); err = mlx5_activate_lag(ldev, &tracker, roce_lag ? MLX5_LAG_FLAG_ROCE : - MLX5_LAG_FLAG_SRIOV); + MLX5_LAG_FLAG_SRIOV, + shared_fdb); if (err) { - if (roce_lag) + if (shared_fdb || roce_lag) mlx5_lag_add_devices(ldev); return; - } - - if (roce_lag) { + } else if (roce_lag) { dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); mlx5_nic_vport_enable_roce(dev1); + } else if (shared_fdb) { + dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(dev0); + + err = mlx5_eswitch_reload_reps(dev0->priv.eswitch); + if (!err) + err = mlx5_eswitch_reload_reps(dev1->priv.eswitch); + + if (err) { + dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(dev0); + mlx5_deactivate_lag(ldev); + mlx5_lag_add_devices(ldev); + mlx5_eswitch_reload_reps(dev0->priv.eswitch); + mlx5_eswitch_reload_reps(dev1->priv.eswitch); + mlx5_core_err(dev0, "Failed to enable lag\n"); + return; + } } } else if (do_bond && __mlx5_lag_is_active(ldev)) { mlx5_modify_lag(ldev, &tracker); @@@ -505,48 -419,21 +506,48 @@@ static void mlx5_queue_bond_work(struc queue_delayed_work(ldev->wq, &ldev->bond_work, delay); } +static void mlx5_lag_lock_eswitches(struct mlx5_core_dev *dev0, + struct mlx5_core_dev *dev1) +{ + if (dev0) + mlx5_esw_lock(dev0->priv.eswitch); + if (dev1) + mlx5_esw_lock(dev1->priv.eswitch); +} + +static void mlx5_lag_unlock_eswitches(struct mlx5_core_dev *dev0, + struct mlx5_core_dev *dev1) +{ + if (dev1) + mlx5_esw_unlock(dev1->priv.eswitch); + if (dev0) + mlx5_esw_unlock(dev0->priv.eswitch); +} + static void mlx5_do_bond_work(struct work_struct *work) { struct delayed_work *delayed_work = to_delayed_work(work); struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag, bond_work); + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; int status; status = mlx5_dev_list_trylock(); if (!status) { - /* 1 sec delay. */ mlx5_queue_bond_work(ldev, HZ); return; } + if (ldev->mode_changes_in_progress) { + mlx5_dev_list_unlock(); + mlx5_queue_bond_work(ldev, HZ); + return; + } + + mlx5_lag_lock_eswitches(dev0, dev1); mlx5_do_bond(ldev); + mlx5_lag_unlock_eswitches(dev0, dev1); mlx5_dev_list_unlock(); } @@@ -744,7 -631,7 +745,7 @@@ static void mlx5_ldev_remove_mdev(struc } /* Must be called with intf_mutex held */ -static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) +static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev = NULL; struct mlx5_core_dev *tmp_dev; @@@ -752,7 -639,7 +753,7 @@@ if (!MLX5_CAP_GEN(dev, vport_group_manager) || !MLX5_CAP_GEN(dev, lag_master) || MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS) - return; + return 0; tmp_dev = mlx5_get_next_phys_dev(dev); if (tmp_dev) @@@ -762,17 -649,15 +763,17 @@@ ldev = mlx5_lag_dev_alloc(dev); if (!ldev) { mlx5_core_err(dev, "Failed to alloc lag dev\n"); - return; + return 0; } } else { + if (ldev->mode_changes_in_progress) + return -EAGAIN; mlx5_ldev_get(ldev); } mlx5_ldev_add_mdev(ldev, dev); - return; + return 0; } void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev) @@@ -783,13 -668,7 +784,13 @@@ if (!ldev) return; +recheck: mlx5_dev_list_lock(); + if (ldev->mode_changes_in_progress) { + mlx5_dev_list_unlock(); + msleep(100); + goto recheck; + } mlx5_ldev_remove_mdev(ldev, dev); mlx5_dev_list_unlock(); mlx5_ldev_put(ldev); @@@ -797,16 -676,8 +798,16 @@@ void mlx5_lag_add_mdev(struct mlx5_core_dev *dev) { + int err; + +recheck: mlx5_dev_list_lock(); - __mlx5_lag_dev_add_mdev(dev); + err = __mlx5_lag_dev_add_mdev(dev); + if (err) { + mlx5_dev_list_unlock(); + msleep(100); + goto recheck; + } mlx5_dev_list_unlock(); } @@@ -820,11 -691,11 +821,11 @@@ void mlx5_lag_remove_netdev(struct mlx5 if (!ldev) return; - if (__mlx5_lag_is_active(ldev)) - mlx5_disable_lag(ldev); - mlx5_ldev_remove_netdev(ldev, netdev); ldev->flags &= ~MLX5_LAG_FLAG_READY; + + if (__mlx5_lag_is_active(ldev)) + mlx5_queue_bond_work(ldev, 0); } /* Must be called with intf_mutex held */ @@@ -846,7 -717,6 +847,7 @@@ void mlx5_lag_add_netdev(struct mlx5_co if (i >= MLX5_MAX_PORTS) ldev->flags |= MLX5_LAG_FLAG_READY; + mlx5_queue_bond_work(ldev, 0); } bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) @@@ -877,21 -747,6 +878,21 @@@ bool mlx5_lag_is_active(struct mlx5_cor } EXPORT_SYMBOL(mlx5_lag_is_active); +bool mlx5_lag_is_master(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + bool res; + + spin_lock(&lag_lock); + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_active(ldev) && + dev == ldev->pf[MLX5_LAG_P1].dev; + spin_unlock(&lag_lock); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_is_master); + bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; @@@ -906,50 -761,19 +907,50 @@@ } EXPORT_SYMBOL(mlx5_lag_is_sriov); -void mlx5_lag_update(struct mlx5_core_dev *dev) +bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + bool res; + + spin_lock(&lag_lock); + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_sriov(ldev) && ldev->shared_fdb; + spin_unlock(&lag_lock); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_is_shared_fdb); + +void mlx5_lag_disable_change(struct mlx5_core_dev *dev) { + struct mlx5_core_dev *dev0; + struct mlx5_core_dev *dev1; struct mlx5_lag *ldev; mlx5_dev_list_lock(); + ldev = mlx5_lag_dev(dev); - if (!ldev) - goto unlock; + dev0 = ldev->pf[MLX5_LAG_P1].dev; + dev1 = ldev->pf[MLX5_LAG_P2].dev; - mlx5_do_bond(ldev); + ldev->mode_changes_in_progress++; + if (__mlx5_lag_is_active(ldev)) { + mlx5_lag_lock_eswitches(dev0, dev1); + mlx5_disable_lag(ldev); + mlx5_lag_unlock_eswitches(dev0, dev1); + } + mlx5_dev_list_unlock(); +} -unlock: +void mlx5_lag_enable_change(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + + mlx5_dev_list_lock(); + ldev = mlx5_lag_dev(dev); + ldev->mode_changes_in_progress--; mlx5_dev_list_unlock(); + mlx5_queue_bond_work(ldev, 0); } struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) @@@ -1004,26 -828,6 +1005,26 @@@ unlock } EXPORT_SYMBOL(mlx5_lag_get_slave_port); +struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev) +{ + struct mlx5_core_dev *peer_dev = NULL; + struct mlx5_lag *ldev; + + spin_lock(&lag_lock); + ldev = mlx5_lag_dev(dev); + if (!ldev) + goto unlock; + + peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ? + ldev->pf[MLX5_LAG_P2].dev : + ldev->pf[MLX5_LAG_P1].dev; + +unlock: + spin_unlock(&lag_lock); + return peer_dev; +} +EXPORT_SYMBOL(mlx5_lag_get_peer_mdev); + int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, u64 *values, int num_counters, diff --combined drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index 011b639b29bf,516bfc2bd797..f239b352a58a --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@@ -161,7 -161,7 +161,7 @@@ static void mlx5_lag_fib_route_event(st struct lag_tracker tracker; tracker = ldev->tracker; - mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH); + mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH, false); } mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY); @@@ -302,6 -302,14 +302,14 @@@ static int mlx5_lag_fib_event(struct no return NOTIFY_DONE; } + void mlx5_lag_mp_reset(struct mlx5_lag *ldev) + { + /* Clear mfi, as it might become stale when a route delete event + * has been missed, see mlx5_lag_fib_route_event(). + */ + ldev->lag_mp.mfi = NULL; + } + int mlx5_lag_mp_init(struct mlx5_lag *ldev) { struct lag_mp *mp = &ldev->lag_mp; diff --combined drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c index a1c8ac0ecc23,ffdfb5a94b14..aca80efc28fa --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@@ -81,7 -81,6 +81,7 @@@ dr_rule_create_collision_entry(struct m } ste->ste_chain_location = orig_ste->ste_chain_location; + ste->htbl->pointing_ste = orig_ste->htbl->pointing_ste; /* In collision entry, all members share the same miss_list_head */ ste->htbl->miss_list = mlx5dr_ste_get_miss_list(orig_ste); @@@ -186,9 -185,6 +186,9 @@@ dr_rule_rehash_handle_collision(struct if (!new_ste) return NULL; + /* Update collision pointing STE */ + new_ste->htbl->pointing_ste = col_ste->htbl->pointing_ste; + /* In collision entry, all members share the same miss_list_head */ new_ste->htbl->miss_list = mlx5dr_ste_get_miss_list(col_ste); @@@ -216,7 -212,7 +216,7 @@@ static void dr_rule_rehash_copy_ste_ctr new_ste->next_htbl = cur_ste->next_htbl; new_ste->ste_chain_location = cur_ste->ste_chain_location; - if (!mlx5dr_ste_is_last_in_rule(nic_matcher, new_ste->ste_chain_location)) + if (new_ste->next_htbl) new_ste->next_htbl->pointing_ste = new_ste; /* We need to copy the refcount since this ste @@@ -224,8 -220,10 +224,8 @@@ */ new_ste->refcount = cur_ste->refcount; - /* Link old STEs rule_mem list to the new ste */ - mlx5dr_rule_update_rule_member(cur_ste, new_ste); - INIT_LIST_HEAD(&new_ste->rule_list); - list_splice_tail_init(&cur_ste->rule_list, &new_ste->rule_list); + /* Link old STEs rule to the new ste */ + mlx5dr_rule_set_last_member(cur_ste->rule_rx_tx, new_ste, false); } static struct mlx5dr_ste * @@@ -406,7 -404,7 +406,7 @@@ dr_rule_rehash_htbl(struct mlx5dr_rule info.miss_icm_addr = nic_matcher->e_anchor->chunk->icm_addr; mlx5dr_ste_set_formatted_ste(dmn->ste_ctx, dmn->info.caps.gvmi, - nic_dmn, + nic_dmn->type, new_htbl, formatted_ste, &info); @@@ -583,66 -581,34 +583,66 @@@ free_action_members return -ENOMEM; } -/* While the pointer of ste is no longer valid, like while moving ste to be - * the first in the miss_list, and to be in the origin table, - * all rule-members that are attached to this ste should update their ste member - * to the new pointer - */ -void mlx5dr_rule_update_rule_member(struct mlx5dr_ste *ste, - struct mlx5dr_ste *new_ste) +void mlx5dr_rule_set_last_member(struct mlx5dr_rule_rx_tx *nic_rule, + struct mlx5dr_ste *ste, + bool force) +{ + /* Update rule member is usually done for the last STE or during rule + * creation to recover from mid-creation failure (for this peruse the + * force flag is used) + */ + if (ste->next_htbl && !force) + return; + + /* Update is required since each rule keeps track of its last STE */ + ste->rule_rx_tx = nic_rule; + nic_rule->last_rule_ste = ste; +} + +static struct mlx5dr_ste *dr_rule_get_pointed_ste(struct mlx5dr_ste *curr_ste) +{ + struct mlx5dr_ste *first_ste; + + first_ste = list_first_entry(mlx5dr_ste_get_miss_list(curr_ste), + struct mlx5dr_ste, miss_list_node); + + return first_ste->htbl->pointing_ste; +} + +int mlx5dr_rule_get_reverse_rule_members(struct mlx5dr_ste **ste_arr, + struct mlx5dr_ste *curr_ste, + int *num_of_stes) { - struct mlx5dr_rule_member *rule_mem; + bool first = false; + + *num_of_stes = 0; + + if (!curr_ste) + return -ENOENT; + + /* Iterate from last to first */ + while (!first) { + first = curr_ste->ste_chain_location == 1; + ste_arr[*num_of_stes] = curr_ste; + *num_of_stes += 1; + curr_ste = dr_rule_get_pointed_ste(curr_ste); + } - list_for_each_entry(rule_mem, &ste->rule_list, use_ste_list) - rule_mem->ste = new_ste; + return 0; } static void dr_rule_clean_rule_members(struct mlx5dr_rule *rule, struct mlx5dr_rule_rx_tx *nic_rule) { - struct mlx5dr_rule_member *rule_mem; - struct mlx5dr_rule_member *tmp_mem; + struct mlx5dr_ste *ste_arr[DR_RULE_MAX_STES + DR_ACTION_MAX_STES]; + struct mlx5dr_ste *curr_ste = nic_rule->last_rule_ste; + int i; - if (list_empty(&nic_rule->rule_members_list)) + if (mlx5dr_rule_get_reverse_rule_members(ste_arr, curr_ste, &i)) return; - list_for_each_entry_safe(rule_mem, tmp_mem, &nic_rule->rule_members_list, list) { - list_del(&rule_mem->list); - list_del(&rule_mem->use_ste_list); - mlx5dr_ste_put(rule_mem->ste, rule->matcher, nic_rule->nic_matcher); - kvfree(rule_mem); - } + + while (i--) + mlx5dr_ste_put(ste_arr[i], rule->matcher, nic_rule->nic_matcher); } static u16 dr_get_bits_per_mask(u16 byte_mask) @@@ -662,25 -628,43 +662,25 @@@ static bool dr_rule_need_enlarge_hash(s struct mlx5dr_domain_rx_tx *nic_dmn) { struct mlx5dr_ste_htbl_ctrl *ctrl = &htbl->ctrl; + int threshold; if (dmn->info.max_log_sw_icm_sz <= htbl->chunk_size) return false; - if (!ctrl->may_grow) + if (!mlx5dr_ste_htbl_may_grow(htbl)) return false; if (dr_get_bits_per_mask(htbl->byte_mask) * BITS_PER_BYTE <= htbl->chunk_size) return false; - if (ctrl->num_of_collisions >= ctrl->increase_threshold && - (ctrl->num_of_valid_entries - ctrl->num_of_collisions) >= ctrl->increase_threshold) + threshold = mlx5dr_ste_htbl_increase_threshold(htbl); + if (ctrl->num_of_collisions >= threshold && + (ctrl->num_of_valid_entries - ctrl->num_of_collisions) >= threshold) return true; return false; } -static int dr_rule_add_member(struct mlx5dr_rule_rx_tx *nic_rule, - struct mlx5dr_ste *ste) -{ - struct mlx5dr_rule_member *rule_mem; - - rule_mem = kvzalloc(sizeof(*rule_mem), GFP_KERNEL); - if (!rule_mem) - return -ENOMEM; - - INIT_LIST_HEAD(&rule_mem->list); - INIT_LIST_HEAD(&rule_mem->use_ste_list); - - rule_mem->ste = ste; - list_add_tail(&rule_mem->list, &nic_rule->rule_members_list); - - list_add_tail(&rule_mem->use_ste_list, &ste->rule_list); - - return 0; -} - static int dr_rule_handle_action_stes(struct mlx5dr_rule *rule, struct mlx5dr_rule_rx_tx *nic_rule, struct list_head *send_ste_list, @@@ -695,13 -679,15 +695,13 @@@ struct mlx5dr_domain *dmn = matcher->tbl->dmn; u8 *curr_hw_ste, *prev_hw_ste; struct mlx5dr_ste *action_ste; - int i, k, ret; + int i, k; /* Two cases: * 1. num_of_builders is equal to new_hw_ste_arr_sz, the action in the ste * 2. num_of_builders is less then new_hw_ste_arr_sz, new ste was added * to support the action. */ - if (num_of_builders == new_hw_ste_arr_sz) - return 0; for (i = num_of_builders, k = 0; i < new_hw_ste_arr_sz; i++, k++) { curr_hw_ste = hw_ste_arr + i * DR_STE_SIZE; @@@ -714,10 -700,6 +714,10 @@@ mlx5dr_ste_get(action_ste); + action_ste->htbl->pointing_ste = last_ste; + last_ste->next_htbl = action_ste->htbl; + last_ste = action_ste; + /* While free ste we go over the miss list, so add this ste to the list */ list_add_tail(&action_ste->miss_list_node, mlx5dr_ste_get_miss_list(action_ste)); @@@ -731,19 -713,21 +731,19 @@@ mlx5dr_ste_set_hit_addr_by_next_htbl(dmn->ste_ctx, prev_hw_ste, action_ste->htbl); - ret = dr_rule_add_member(nic_rule, action_ste); - if (ret) { - mlx5dr_dbg(dmn, "Failed adding rule member\n"); - goto free_ste_info; - } + + mlx5dr_rule_set_last_member(nic_rule, action_ste, true); + mlx5dr_send_fill_and_append_ste_send_info(action_ste, DR_STE_SIZE, 0, curr_hw_ste, ste_info_arr[k], send_ste_list, false); } + last_ste->next_htbl = NULL; + return 0; -free_ste_info: - kfree(ste_info_arr[k]); err_exit: mlx5dr_ste_put(action_ste, matcher, nic_matcher); return -ENOMEM; @@@ -862,9 -846,9 +862,9 @@@ again new_htbl = dr_rule_rehash(rule, nic_rule, cur_htbl, ste_location, send_ste_list); if (!new_htbl) { - mlx5dr_htbl_put(cur_htbl); mlx5dr_err(dmn, "Failed creating rehash table, htbl-log_size: %d\n", cur_htbl->chunk_size); + mlx5dr_htbl_put(cur_htbl); } else { cur_htbl = new_htbl; } @@@ -1031,12 -1015,12 +1031,12 @@@ static enum mlx5dr_ipv dr_rule_get_ipv( } static bool dr_rule_skip(enum mlx5dr_domain_type domain, - enum mlx5dr_ste_entry_type ste_type, + enum mlx5dr_domain_nic_type nic_type, struct mlx5dr_match_param *mask, struct mlx5dr_match_param *value, u32 flow_source) { - bool rx = ste_type == MLX5DR_STE_TYPE_RX; + bool rx = nic_type == DR_DOMAIN_NIC_TYPE_RX; if (domain != MLX5DR_DOMAIN_TYPE_FDB) return false; @@@ -1081,7 -1065,9 +1081,7 @@@ dr_rule_create_rule_nic(struct mlx5dr_r nic_matcher = nic_rule->nic_matcher; nic_dmn = nic_matcher->nic_tbl->nic_dmn; - INIT_LIST_HEAD(&nic_rule->rule_members_list); - - if (dr_rule_skip(dmn->type, nic_dmn->ste_type, &matcher->mask, param, + if (dr_rule_skip(dmn->type, nic_dmn->type, &matcher->mask, param, rule->flow_source)) return 0; @@@ -1135,8 -1121,14 +1135,8 @@@ cur_htbl = ste->next_htbl; - /* Keep all STEs in the rule struct */ - ret = dr_rule_add_member(nic_rule, ste); - if (ret) { - mlx5dr_dbg(dmn, "Failed adding rule member index %d\n", i); - goto free_ste; - } - mlx5dr_ste_get(ste); + mlx5dr_rule_set_last_member(nic_rule, ste, true); } /* Connect actions */ @@@ -1161,6 -1153,8 +1161,6 @@@ return 0; -free_ste: - mlx5dr_ste_put(ste, matcher, nic_matcher); free_rule: dr_rule_clean_rule_members(rule, nic_rule); /* Clean all ste_info's */ diff --combined drivers/net/phy/marvell10g.c index 0b7cae118ad7,f4d758f8a1ee..bd310e8d5e43 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@@ -28,7 -28,6 +28,7 @@@ #include #include #include +#include #define MV_PHY_ALASKA_NBT_QUIRK_MASK 0xfffffffe #define MV_PHY_ALASKA_NBT_QUIRK_REV (MARVELL_PHY_ID_88X3310 | 0xa) @@@ -105,16 -104,6 +105,16 @@@ enum MV_V2_33X0_PORT_CTRL_MACTYPE_10GBASER_NO_SGMII_AN = 0x5, MV_V2_33X0_PORT_CTRL_MACTYPE_10GBASER_RATE_MATCH = 0x6, MV_V2_33X0_PORT_CTRL_MACTYPE_USXGMII = 0x7, + MV_V2_PORT_INTR_STS = 0xf040, + MV_V2_PORT_INTR_MASK = 0xf043, + MV_V2_PORT_INTR_STS_WOL_EN = BIT(8), + MV_V2_MAGIC_PKT_WORD0 = 0xf06b, + MV_V2_MAGIC_PKT_WORD1 = 0xf06c, + MV_V2_MAGIC_PKT_WORD2 = 0xf06d, + /* Wake on LAN registers */ + MV_V2_WOL_CTRL = 0xf06e, + MV_V2_WOL_CTRL_CLEAR_STS = BIT(15), + MV_V2_WOL_CTRL_MAGIC_PKT_EN = BIT(0), /* Temperature control/read registers (88X3310 only) */ MV_V2_TEMP_CTRL = 0xf08a, MV_V2_TEMP_CTRL_MASK = 0xc000, @@@ -998,11 -987,19 +998,19 @@@ static int mv3310_get_number_of_ports(s static int mv3310_match_phy_device(struct phy_device *phydev) { + if ((phydev->c45_ids.device_ids[MDIO_MMD_PMAPMD] & + MARVELL_PHY_ID_MASK) != MARVELL_PHY_ID_88X3310) + return 0; + return mv3310_get_number_of_ports(phydev) == 1; } static int mv3340_match_phy_device(struct phy_device *phydev) { + if ((phydev->c45_ids.device_ids[MDIO_MMD_PMAPMD] & + MARVELL_PHY_ID_MASK) != MARVELL_PHY_ID_88X3310) + return 0; + return mv3310_get_number_of_ports(phydev) == 4; } @@@ -1031,80 -1028,6 +1039,80 @@@ static int mv2111_match_phy_device(stru return mv211x_match_phy_device(phydev, false); } +static void mv3110_get_wol(struct phy_device *phydev, + struct ethtool_wolinfo *wol) +{ + int ret; + + wol->supported = WAKE_MAGIC; + wol->wolopts = 0; + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, MV_V2_WOL_CTRL); + if (ret < 0) + return; + + if (ret & MV_V2_WOL_CTRL_MAGIC_PKT_EN) + wol->wolopts |= WAKE_MAGIC; +} + +static int mv3110_set_wol(struct phy_device *phydev, + struct ethtool_wolinfo *wol) +{ + int ret; + + if (wol->wolopts & WAKE_MAGIC) { + /* Enable the WOL interrupt */ + ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, + MV_V2_PORT_INTR_MASK, + MV_V2_PORT_INTR_STS_WOL_EN); + if (ret < 0) + return ret; + + /* Store the device address for the magic packet */ + ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, + MV_V2_MAGIC_PKT_WORD2, + ((phydev->attached_dev->dev_addr[5] << 8) | + phydev->attached_dev->dev_addr[4])); + if (ret < 0) + return ret; + + ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, + MV_V2_MAGIC_PKT_WORD1, + ((phydev->attached_dev->dev_addr[3] << 8) | + phydev->attached_dev->dev_addr[2])); + if (ret < 0) + return ret; + + ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, + MV_V2_MAGIC_PKT_WORD0, + ((phydev->attached_dev->dev_addr[1] << 8) | + phydev->attached_dev->dev_addr[0])); + if (ret < 0) + return ret; + + /* Clear WOL status and enable magic packet matching */ + ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, + MV_V2_WOL_CTRL, + MV_V2_WOL_CTRL_MAGIC_PKT_EN | + MV_V2_WOL_CTRL_CLEAR_STS); + if (ret < 0) + return ret; + } else { + /* Disable magic packet matching & reset WOL status bit */ + ret = phy_modify_mmd(phydev, MDIO_MMD_VEND2, + MV_V2_WOL_CTRL, + MV_V2_WOL_CTRL_MAGIC_PKT_EN, + MV_V2_WOL_CTRL_CLEAR_STS); + if (ret < 0) + return ret; + } + + /* Reset the clear WOL status bit as it does not self-clear */ + return phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2, + MV_V2_WOL_CTRL, + MV_V2_WOL_CTRL_CLEAR_STS); +} + static struct phy_driver mv3310_drivers[] = { { .phy_id = MARVELL_PHY_ID_88X3310, @@@ -1124,8 -1047,6 +1132,8 @@@ .set_tunable = mv3310_set_tunable, .remove = mv3310_remove, .set_loopback = genphy_c45_loopback, + .get_wol = mv3110_get_wol, + .set_wol = mv3110_set_wol, }, { .phy_id = MARVELL_PHY_ID_88X3310, @@@ -1163,8 -1084,6 +1171,8 @@@ .set_tunable = mv3310_set_tunable, .remove = mv3310_remove, .set_loopback = genphy_c45_loopback, + .get_wol = mv3110_get_wol, + .set_wol = mv3110_set_wol, }, { .phy_id = MARVELL_PHY_ID_88E2110, diff --combined include/linux/netdevice.h index 6fd3a4d42668,d65ce093e5a7..7c41593c1d6a --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@@ -47,7 -47,6 +47,7 @@@ #include #include #include +#include struct netpoll_info; struct device; @@@ -209,7 -208,6 +209,7 @@@ struct sk_buff struct netdev_hw_addr { struct list_head list; + struct rb_node node; unsigned char addr[MAX_ADDR_LEN]; unsigned char type; #define NETDEV_HW_ADDR_T_LAN 1 @@@ -226,9 -224,6 +226,9 @@@ struct netdev_hw_addr_list { struct list_head list; int count; + + /* Auxiliary tree for faster lookup on addition and deletion */ + struct rb_root tree; }; #define netdev_hw_addr_list_count(l) ((l)->count) @@@ -300,6 -295,18 +300,6 @@@ enum netdev_state_t }; -/* - * This structure holds boot-time configured netdevice settings. They - * are then used in the device probing. - */ -struct netdev_boot_setup { - char name[IFNAMSIZ]; - struct ifmap map; -}; -#define NETDEV_BOOT_SETUP_MAX 8 - -int __init netdev_boot_setup(char *str); - struct gro_list { struct list_head list; int count; @@@ -727,13 -734,13 +727,13 @@@ bool rps_may_expire_flow(struct net_dev /* This structure contains an instance of an RX queue. */ struct netdev_rx_queue { + struct xdp_rxq_info xdp_rxq; #ifdef CONFIG_RPS struct rps_map __rcu *rps_map; struct rps_dev_flow_table __rcu *rps_flow_table; #endif struct kobject kobj; struct net_device *dev; - struct xdp_rxq_info xdp_rxq; #ifdef CONFIG_XDP_SOCKETS struct xsk_buff_pool *pool; #endif @@@ -1079,18 -1086,9 +1079,18 @@@ struct netdev_net_notifier * Test if Media Access Control address is valid for the device. * * int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); - * Called when a user requests an ioctl which can't be handled by - * the generic interface code. If not defined ioctls return - * not supported error code. + * Old-style ioctl entry point. This is used internally by the + * appletalk and ieee802154 subsystems but is no longer called by + * the device ioctl handler. + * + * int (*ndo_siocbond)(struct net_device *dev, struct ifreq *ifr, int cmd); + * Used by the bonding driver for its device specific ioctls: + * SIOCBONDENSLAVE, SIOCBONDRELEASE, SIOCBONDSETHWADDR, SIOCBONDCHANGEACTIVE, + * SIOCBONDSLAVEINFOQUERY, and SIOCBONDINFOQUERY + * + * * int (*ndo_eth_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); + * Called for ethernet specific ioctls: SIOCGMIIPHY, SIOCGMIIREG, + * SIOCSMIIREG, SIOCSHWTSTAMP and SIOCGHWTSTAMP. * * int (*ndo_set_config)(struct net_device *dev, struct ifmap *map); * Used to set network devices bus interface parameters. This interface @@@ -1323,9 -1321,6 +1323,9 @@@ * that got dropped are freed/returned via xdp_return_frame(). * Returns negative number, means general error invoking ndo, meaning * no frames were xmit'ed and core-caller will free all frames. + * struct net_device *(*ndo_xdp_get_xmit_slave)(struct net_device *dev, + * struct xdp_buff *xdp); + * Get the xmit slave of master device based on the xdp_buff. * int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags); * This function is used to wake up the softirq, ksoftirqd or kthread * responsible for sending and/or receiving packets on a specific @@@ -1366,15 -1361,6 +1366,15 @@@ struct net_device_ops int (*ndo_validate_addr)(struct net_device *dev); int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); + int (*ndo_eth_ioctl)(struct net_device *dev, + struct ifreq *ifr, int cmd); + int (*ndo_siocbond)(struct net_device *dev, + struct ifreq *ifr, int cmd); + int (*ndo_siocwandev)(struct net_device *dev, + struct if_settings *ifs); + int (*ndo_siocdevprivate)(struct net_device *dev, + struct ifreq *ifr, + void __user *data, int cmd); int (*ndo_set_config)(struct net_device *dev, struct ifmap *map); int (*ndo_change_mtu)(struct net_device *dev, @@@ -1553,8 -1539,6 +1553,8 @@@ int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp, u32 flags); + struct net_device * (*ndo_xdp_get_xmit_slave)(struct net_device *dev, + struct xdp_buff *xdp); int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags); struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev); @@@ -1821,7 -1805,6 +1821,7 @@@ enum netdev_ml_priv_type * @ieee802154_ptr: IEEE 802.15.4 low-rate Wireless Personal Area Network * device struct * @mpls_ptr: mpls_dev struct pointer + * @mctp_ptr: MCTP specific data * * @dev_addr: Hw address (before bcast, * because most packets are unicast) @@@ -2109,9 -2092,6 +2109,9 @@@ struct net_device #if IS_ENABLED(CONFIG_MPLS_ROUTING) struct mpls_dev __rcu *mpls_ptr; #endif +#if IS_ENABLED(CONFIG_MCTP) + struct mctp_dev __rcu *mctp_ptr; +#endif /* * Cache lines mostly used on receive path (including eth_type_trans()) @@@ -2937,6 -2917,7 +2937,6 @@@ static inline struct net_device *first_ } int netdev_boot_setup_check(struct net_device *dev); -unsigned long netdev_boot_base(const char *prefix, int unit); struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, const char *hwaddr); struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type); @@@ -3308,6 -3289,14 +3308,6 @@@ static inline bool dev_has_header(cons return dev->header_ops && dev->header_ops->create; } -typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, - int len, int size); -int register_gifconf(unsigned int family, gifconf_func_t *gifconf); -static inline int unregister_gifconf(unsigned int family) -{ - return register_gifconf(family, NULL); -} - #ifdef CONFIG_NET_FLOW_LIMIT #define FLOW_LIMIT_HISTORY (1 << 7) /* must be ^2 and !overflow buckets */ struct sd_flow_limit { @@@ -3926,8 -3915,6 +3926,8 @@@ static inline int netif_set_real_num_rx return 0; } #endif +int netif_set_real_num_queues(struct net_device *dev, + unsigned int txq, unsigned int rxq); static inline struct netdev_rx_queue * __netif_get_rx_queue(struct net_device *dev, unsigned int rxq) @@@ -3961,7 -3948,7 +3961,7 @@@ void __dev_kfree_skb_any(struct sk_buf /* * It is not allowed to call kfree_skb() or consume_skb() from hardware * interrupt context or with hardware interrupts being disabled. - * (in_irq() || irqs_disabled()) + * (in_hardirq() || irqs_disabled()) * * We provide four helpers that can be used in following contexts : * @@@ -3997,8 -3984,6 +3997,8 @@@ static inline void dev_consume_skb_any( __dev_kfree_skb_any(skb, SKB_REASON_CONSUMED); } +u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, + struct bpf_prog *xdp_prog); void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog); int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb); int netif_rx(struct sk_buff *skb); @@@ -4027,12 -4012,14 +4027,16 @@@ int netdev_rx_handler_register(struct n void netdev_rx_handler_unregister(struct net_device *dev); bool dev_valid_name(const char *name); + static inline bool is_socket_ioctl_cmd(unsigned int cmd) + { + return _IOC_TYPE(cmd) == SOCK_IOC_TYPE; + } +int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg); +int put_user_ifreq(struct ifreq *ifr, void __user *arg); int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, - bool *need_copyout); -int dev_ifconf(struct net *net, struct ifconf *, int); -int dev_ethtool(struct net *net, struct ifreq *); + void __user *data, bool *need_copyout); +int dev_ifconf(struct net *net, struct ifconf __user *ifc); +int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *userdata); unsigned int dev_get_flags(const struct net_device *); int __dev_change_flags(struct net_device *dev, unsigned int flags, struct netlink_ext_ack *extack); @@@ -4086,7 -4073,6 +4090,7 @@@ typedef int (*bpf_op_t)(struct net_devi int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, int expected_fd, u32 flags); int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); +u8 dev_xdp_prog_count(struct net_device *dev); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); @@@ -4154,13 -4140,11 +4158,13 @@@ void netdev_run_todo(void) */ static inline void dev_put(struct net_device *dev) { + if (dev) { #ifdef CONFIG_PCPU_DEV_REFCNT - this_cpu_dec(*dev->pcpu_refcnt); + this_cpu_dec(*dev->pcpu_refcnt); #else - refcount_dec(&dev->dev_refcnt); + refcount_dec(&dev->dev_refcnt); #endif + } } /** @@@ -4171,13 -4155,11 +4175,13 @@@ */ static inline void dev_hold(struct net_device *dev) { + if (dev) { #ifdef CONFIG_PCPU_DEV_REFCNT - this_cpu_inc(*dev->pcpu_refcnt); + this_cpu_inc(*dev->pcpu_refcnt); #else - refcount_inc(&dev->dev_refcnt); + refcount_inc(&dev->dev_refcnt); #endif + } } /* Carrier loss detection, dial on demand. The functions netif_carrier_on diff --combined net/socket.c index 3c10504e46d9,8808b3617dac..83e7ac902f5e --- a/net/socket.c +++ b/net/socket.c @@@ -212,7 -212,6 +212,7 @@@ static const char * const pf_family_nam [PF_QIPCRTR] = "PF_QIPCRTR", [PF_SMC] = "PF_SMC", [PF_XDP] = "PF_XDP", + [PF_MCTP] = "PF_MCTP", }; /* @@@ -1065,13 -1064,9 +1065,13 @@@ static ssize_t sock_write_iter(struct k */ static DEFINE_MUTEX(br_ioctl_mutex); -static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg); +static int (*br_ioctl_hook)(struct net *net, struct net_bridge *br, + unsigned int cmd, struct ifreq *ifr, + void __user *uarg); -void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) +void brioctl_set(int (*hook)(struct net *net, struct net_bridge *br, + unsigned int cmd, struct ifreq *ifr, + void __user *uarg)) { mutex_lock(&br_ioctl_mutex); br_ioctl_hook = hook; @@@ -1079,22 -1074,6 +1079,22 @@@ } EXPORT_SYMBOL(brioctl_set); +int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd, + struct ifreq *ifr, void __user *uarg) +{ + int err = -ENOPKG; + + if (!br_ioctl_hook) + request_module("bridge"); + + mutex_lock(&br_ioctl_mutex); + if (br_ioctl_hook) + err = br_ioctl_hook(net, br, cmd, ifr, uarg); + mutex_unlock(&br_ioctl_mutex); + + return err; +} + static DEFINE_MUTEX(vlan_ioctl_mutex); static int (*vlan_ioctl_hook) (struct net *, void __user *arg); @@@ -1109,11 -1088,8 +1109,11 @@@ EXPORT_SYMBOL(vlan_ioctl_set) static long sock_do_ioctl(struct net *net, struct socket *sock, unsigned int cmd, unsigned long arg) { + struct ifreq ifr; + bool need_copyout; int err; void __user *argp = (void __user *)arg; + void __user *data; err = sock->ops->ioctl(sock, cmd, arg); @@@ -1124,13 -1100,27 +1124,16 @@@ if (err != -ENOIOCTLCMD) return err; - if (cmd == SIOCGIFCONF) { - struct ifconf ifc; - if (copy_from_user(&ifc, argp, sizeof(struct ifconf))) - return -EFAULT; - rtnl_lock(); - err = dev_ifconf(net, &ifc, sizeof(struct ifreq)); - rtnl_unlock(); - if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf))) - err = -EFAULT; - } else if (is_socket_ioctl_cmd(cmd)) { - struct ifreq ifr; - bool need_copyout; - if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) ++ if (!is_socket_ioctl_cmd(cmd)) ++ return -ENOTTY; ++ + if (get_user_ifreq(&ifr, &data, argp)) + return -EFAULT; + err = dev_ioctl(net, cmd, &ifr, data, &need_copyout); + if (!err && need_copyout) + if (put_user_ifreq(&ifr, argp)) return -EFAULT; - err = dev_ioctl(net, cmd, &ifr, &need_copyout); - if (!err && need_copyout) - if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) - return -EFAULT; - } else { - err = -ENOTTY; - } + return err; } @@@ -1152,13 -1142,12 +1155,13 @@@ static long sock_ioctl(struct file *fil net = sock_net(sk); if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) { struct ifreq ifr; + void __user *data; bool need_copyout; - if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) + if (get_user_ifreq(&ifr, &data, argp)) return -EFAULT; - err = dev_ioctl(net, cmd, &ifr, &need_copyout); + err = dev_ioctl(net, cmd, &ifr, data, &need_copyout); if (!err && need_copyout) - if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) + if (put_user_ifreq(&ifr, argp)) return -EFAULT; } else #ifdef CONFIG_WEXT_CORE @@@ -1183,7 -1172,14 +1186,7 @@@ case SIOCSIFBR: case SIOCBRADDBR: case SIOCBRDELBR: - err = -ENOPKG; - if (!br_ioctl_hook) - request_module("bridge"); - - mutex_lock(&br_ioctl_mutex); - if (br_ioctl_hook) - err = br_ioctl_hook(net, cmd, argp); - mutex_unlock(&br_ioctl_mutex); + err = br_ioctl_call(net, NULL, cmd, NULL, argp); break; case SIOCGIFVLAN: case SIOCSIFVLAN: @@@ -1223,11 -1219,6 +1226,11 @@@ cmd == SIOCGSTAMP_NEW, false); break; + + case SIOCGIFCONF: + err = dev_ifconf(net, argp); + break; + default: err = sock_do_ioctl(net, sock, cmd, arg); break; @@@ -3137,55 -3128,154 +3140,55 @@@ void socket_seq_show(struct seq_file *s } #endif /* CONFIG_PROC_FS */ -#ifdef CONFIG_COMPAT -static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) +/* Handle the fact that while struct ifreq has the same *layout* on + * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data, + * which are handled elsewhere, it still has different *size* due to + * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit, + * resulting in struct ifreq being 32 and 40 bytes respectively). + * As a result, if the struct happens to be at the end of a page and + * the next page isn't readable/writable, we get a fault. To prevent + * that, copy back and forth to the full size. + */ +int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg) { - struct compat_ifconf ifc32; - struct ifconf ifc; - int err; + if (in_compat_syscall()) { + struct compat_ifreq *ifr32 = (struct compat_ifreq *)ifr; - if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf))) - return -EFAULT; + memset(ifr, 0, sizeof(*ifr)); + if (copy_from_user(ifr32, arg, sizeof(*ifr32))) + return -EFAULT; - ifc.ifc_len = ifc32.ifc_len; - ifc.ifc_req = compat_ptr(ifc32.ifcbuf); + if (ifrdata) + *ifrdata = compat_ptr(ifr32->ifr_data); - rtnl_lock(); - err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq)); - rtnl_unlock(); - if (err) - return err; + return 0; + } - ifc32.ifc_len = ifc.ifc_len; - if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf))) + if (copy_from_user(ifr, arg, sizeof(*ifr))) return -EFAULT; + if (ifrdata) + *ifrdata = ifr->ifr_data; + return 0; } +EXPORT_SYMBOL(get_user_ifreq); -static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) +int put_user_ifreq(struct ifreq *ifr, void __user *arg) { - struct compat_ethtool_rxnfc __user *compat_rxnfc; - bool convert_in = false, convert_out = false; - size_t buf_size = 0; - struct ethtool_rxnfc __user *rxnfc = NULL; - struct ifreq ifr; - u32 rule_cnt = 0, actual_rule_cnt; - u32 ethcmd; - u32 data; - int ret; + size_t size = sizeof(*ifr); - if (get_user(data, &ifr32->ifr_ifru.ifru_data)) - return -EFAULT; - - compat_rxnfc = compat_ptr(data); + if (in_compat_syscall()) + size = sizeof(struct compat_ifreq); - if (get_user(ethcmd, &compat_rxnfc->cmd)) + if (copy_to_user(arg, ifr, size)) return -EFAULT; - /* Most ethtool structures are defined without padding. - * Unfortunately struct ethtool_rxnfc is an exception. - */ - switch (ethcmd) { - default: - break; - case ETHTOOL_GRXCLSRLALL: - /* Buffer size is variable */ - if (get_user(rule_cnt, &compat_rxnfc->rule_cnt)) - return -EFAULT; - if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32)) - return -ENOMEM; - buf_size += rule_cnt * sizeof(u32); - fallthrough; - case ETHTOOL_GRXRINGS: - case ETHTOOL_GRXCLSRLCNT: - case ETHTOOL_GRXCLSRULE: - case ETHTOOL_SRXCLSRLINS: - convert_out = true; - fallthrough; - case ETHTOOL_SRXCLSRLDEL: - buf_size += sizeof(struct ethtool_rxnfc); - convert_in = true; - rxnfc = compat_alloc_user_space(buf_size); - break; - } - - if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ)) - return -EFAULT; - - ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc; - - if (convert_in) { - /* We expect there to be holes between fs.m_ext and - * fs.ring_cookie and at the end of fs, but nowhere else. - */ - BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) + - sizeof(compat_rxnfc->fs.m_ext) != - offsetof(struct ethtool_rxnfc, fs.m_ext) + - sizeof(rxnfc->fs.m_ext)); - BUILD_BUG_ON( - offsetof(struct compat_ethtool_rxnfc, fs.location) - - offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) != - offsetof(struct ethtool_rxnfc, fs.location) - - offsetof(struct ethtool_rxnfc, fs.ring_cookie)); - - if (copy_in_user(rxnfc, compat_rxnfc, - (void __user *)(&rxnfc->fs.m_ext + 1) - - (void __user *)rxnfc) || - copy_in_user(&rxnfc->fs.ring_cookie, - &compat_rxnfc->fs.ring_cookie, - (void __user *)(&rxnfc->fs.location + 1) - - (void __user *)&rxnfc->fs.ring_cookie)) - return -EFAULT; - if (ethcmd == ETHTOOL_GRXCLSRLALL) { - if (put_user(rule_cnt, &rxnfc->rule_cnt)) - return -EFAULT; - } else if (copy_in_user(&rxnfc->rule_cnt, - &compat_rxnfc->rule_cnt, - sizeof(rxnfc->rule_cnt))) - return -EFAULT; - } - - ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL); - if (ret) - return ret; - - if (convert_out) { - if (copy_in_user(compat_rxnfc, rxnfc, - (const void __user *)(&rxnfc->fs.m_ext + 1) - - (const void __user *)rxnfc) || - copy_in_user(&compat_rxnfc->fs.ring_cookie, - &rxnfc->fs.ring_cookie, - (const void __user *)(&rxnfc->fs.location + 1) - - (const void __user *)&rxnfc->fs.ring_cookie) || - copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt, - sizeof(rxnfc->rule_cnt))) - return -EFAULT; - - if (ethcmd == ETHTOOL_GRXCLSRLALL) { - /* As an optimisation, we only copy the actual - * number of rules that the underlying - * function returned. Since Mallory might - * change the rule count in user memory, we - * check that it is less than the rule count - * originally given (as the user buffer size), - * which has been range-checked. - */ - if (get_user(actual_rule_cnt, &rxnfc->rule_cnt)) - return -EFAULT; - if (actual_rule_cnt < rule_cnt) - rule_cnt = actual_rule_cnt; - if (copy_in_user(&compat_rxnfc->rule_locs[0], - &rxnfc->rule_locs[0], - rule_cnt * sizeof(u32))) - return -EFAULT; - } - } - return 0; } +EXPORT_SYMBOL(put_user_ifreq); +#ifdef CONFIG_COMPAT static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32) { compat_uptr_t uptr32; @@@ -3193,7 -3283,7 +3196,7 @@@ void __user *saved; int err; - if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq))) + if (get_user_ifreq(&ifr, NULL, uifr32)) return -EFAULT; if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu)) @@@ -3202,10 -3292,10 +3205,10 @@@ saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc; ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32); - err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL); + err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL, NULL); if (!err) { ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved; - if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq))) + if (put_user_ifreq(&ifr, uifr32)) err = -EFAULT; } return err; @@@ -3216,13 -3306,99 +3219,15 @@@ static int compat_ifr_data_ioctl(struc struct compat_ifreq __user *u_ifreq32) { struct ifreq ifreq; - u32 data32; + void __user *data; + if (!is_socket_ioctl_cmd(cmd)) + return -ENOTTY; - if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ)) - return -EFAULT; - if (get_user(data32, &u_ifreq32->ifr_data)) - return -EFAULT; - ifreq.ifr_data = compat_ptr(data32); - - return dev_ioctl(net, cmd, &ifreq, NULL); -} - -static int compat_ifreq_ioctl(struct net *net, struct socket *sock, - unsigned int cmd, - struct compat_ifreq __user *uifr32) -{ - struct ifreq __user *uifr; - int err; - - /* Handle the fact that while struct ifreq has the same *layout* on - * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data, - * which are handled elsewhere, it still has different *size* due to - * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit, - * resulting in struct ifreq being 32 and 40 bytes respectively). - * As a result, if the struct happens to be at the end of a page and - * the next page isn't readable/writable, we get a fault. To prevent - * that, copy back and forth to the full size. - */ - - uifr = compat_alloc_user_space(sizeof(*uifr)); - if (copy_in_user(uifr, uifr32, sizeof(*uifr32))) - return -EFAULT; - - err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr); - - if (!err) { - switch (cmd) { - case SIOCGIFFLAGS: - case SIOCGIFMETRIC: - case SIOCGIFMTU: - case SIOCGIFMEM: - case SIOCGIFHWADDR: - case SIOCGIFINDEX: - case SIOCGIFADDR: - case SIOCGIFBRDADDR: - case SIOCGIFDSTADDR: - case SIOCGIFNETMASK: - case SIOCGIFPFLAGS: - case SIOCGIFTXQLEN: - case SIOCGMIIPHY: - case SIOCGMIIREG: - case SIOCGIFNAME: - if (copy_in_user(uifr32, uifr, sizeof(*uifr32))) - err = -EFAULT; - break; - } - } - return err; -} - -static int compat_sioc_ifmap(struct net *net, unsigned int cmd, - struct compat_ifreq __user *uifr32) -{ - struct ifreq ifr; - struct compat_ifmap __user *uifmap32; - int err; - - uifmap32 = &uifr32->ifr_ifru.ifru_map; - err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name)); - err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); - err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); - err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); - err |= get_user(ifr.ifr_map.irq, &uifmap32->irq); - err |= get_user(ifr.ifr_map.dma, &uifmap32->dma); - err |= get_user(ifr.ifr_map.port, &uifmap32->port); - if (err) + if (get_user_ifreq(&ifreq, &data, u_ifreq32)) return -EFAULT; + ifreq.ifr_data = data; - err = dev_ioctl(net, cmd, &ifr, NULL); - - if (cmd == SIOCGIFMAP && !err) { - err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); - err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); - err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); - err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); - err |= put_user(ifr.ifr_map.irq, &uifmap32->irq); - err |= put_user(ifr.ifr_map.dma, &uifmap32->dma); - err |= put_user(ifr.ifr_map.port, &uifmap32->port); - if (err) - err = -EFAULT; - } - return err; + return dev_ioctl(net, cmd, &ifreq, data, NULL); } /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE @@@ -3248,14 -3424,21 +3253,14 @@@ static int compat_sock_ioctl_trans(stru struct net *net = sock_net(sk); if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) - return compat_ifr_data_ioctl(net, cmd, argp); + return sock_ioctl(file, cmd, (unsigned long)argp); switch (cmd) { case SIOCSIFBR: case SIOCGIFBR: return old_bridge_ioctl(argp); - case SIOCGIFCONF: - return compat_dev_ifconf(net, argp); - case SIOCETHTOOL: - return ethtool_ioctl(net, argp); case SIOCWANDEV: return compat_siocwandev(net, argp); - case SIOCGIFMAP: - case SIOCSIFMAP: - return compat_sioc_ifmap(net, cmd, argp); case SIOCGSTAMP_OLD: case SIOCGSTAMPNS_OLD: if (!sock->ops->gettstamp) @@@ -3263,7 -3446,6 +3268,7 @@@ return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD, !COMPAT_USE_64BIT_TIME); + case SIOCETHTOOL: case SIOCBONDSLAVEINFOQUERY: case SIOCBONDINFOQUERY: case SIOCSHWTSTAMP: @@@ -3281,13 -3463,10 +3286,13 @@@ case SIOCGSKNS: case SIOCGSTAMP_NEW: case SIOCGSTAMPNS_NEW: + case SIOCGIFCONF: return sock_ioctl(file, cmd, arg); case SIOCGIFFLAGS: case SIOCSIFFLAGS: + case SIOCGIFMAP: + case SIOCSIFMAP: case SIOCGIFMETRIC: case SIOCSIFMETRIC: case SIOCGIFMTU: @@@ -3324,6 -3503,8 +3329,6 @@@ case SIOCBONDRELEASE: case SIOCBONDSETHWADDR: case SIOCBONDCHANGEACTIVE: - return compat_ifreq_ioctl(net, sock, cmd, argp); - case SIOCSARP: case SIOCGARP: case SIOCDARP: