]> Git Repo - linux.git/commitdiff
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
authorDavid S. Miller <[email protected]>
Tue, 4 Aug 2020 01:27:40 +0000 (18:27 -0700)
committerDavid S. Miller <[email protected]>
Tue, 4 Aug 2020 01:27:40 +0000 (18:27 -0700)
Daniel Borkmann says:

====================
pull-request: bpf-next 2020-08-04

The following pull-request contains BPF updates for your *net-next* tree.

We've added 73 non-merge commits during the last 9 day(s) which contain
a total of 135 files changed, 4603 insertions(+), 1013 deletions(-).

The main changes are:

1) Implement bpf_link support for XDP. Also add LINK_DETACH operation for the BPF
   syscall allowing processes with BPF link FD to force-detach, from Andrii Nakryiko.

2) Add BPF iterator for map elements and to iterate all BPF programs for efficient
   in-kernel inspection, from Yonghong Song and Alexei Starovoitov.

3) Separate bpf_get_{stack,stackid}() helpers for perf events in BPF to avoid
   unwinder errors, from Song Liu.

4) Allow cgroup local storage map to be shared between programs on the same
   cgroup. Also extend BPF selftests with coverage, from YiFei Zhu.

5) Add BPF exception tables to ARM64 JIT in order to be able to JIT BPF_PROBE_MEM
   load instructions, from Jean-Philippe Brucker.

6) Follow-up fixes on BPF socket lookup in combination with reuseport group
   handling. Also add related BPF selftests, from Jakub Sitnicki.

7) Allow to use socket storage in BPF_PROG_TYPE_CGROUP_SOCK-typed programs for
   socket create/release as well as bind functions, from Stanislav Fomichev.

8) Fix an info leak in xsk_getsockopt() when retrieving XDP stats via old struct
   xdp_statistics, from Peilin Ye.

9) Fix PT_REGS_RC{,_CORE}() macros in libbpf for MIPS arch, from Jerry Crunchtime.

10) Extend BPF kernel test infra with skb->family and skb->{local,remote}_ip{4,6}
    fields and allow user space to specify skb->dev via ifindex, from Dmitry Yakunin.

11) Fix a bpftool segfault due to missing program type name and make it more robust
    to prevent them in future gaps, from Quentin Monnet.

12) Consolidate cgroup helper functions across selftests and fix a v6 localhost
    resolver issue, from John Fastabend.
====================

Signed-off-by: David S. Miller <[email protected]>
15 files changed:
1  2 
drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
drivers/net/ethernet/intel/ice/ice_main.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
drivers/net/ethernet/marvell/mvneta.c
drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/sfc/efx.c
drivers/net/tun.c
include/linux/netdevice.h
kernel/bpf/btf.c
kernel/bpf/hashtab.c
net/core/dev.c
net/core/rtnetlink.c
net/ipv6/route.c

index 50f52fe2012f3677b5991d975b3e22409ef428ab,17f6bcafc944e25c68e3da6a61da7786a893e24f..83b1e974bff06d7813faebe051326a25f74cd39e
@@@ -2077,14 -2077,9 +2077,9 @@@ out_err
  
  static int dpaa2_eth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
  {
-       struct dpaa2_eth_priv *priv = netdev_priv(dev);
        switch (xdp->command) {
        case XDP_SETUP_PROG:
                return setup_xdp(dev, xdp->prog);
-       case XDP_QUERY_PROG:
-               xdp->prog_id = priv->xdp_prog ? priv->xdp_prog->aux->id : 0;
-               break;
        default:
                return -EINVAL;
        }
@@@ -2330,7 -2325,7 +2325,7 @@@ static void cdan_cb(struct dpaa2_io_not
        /* Update NAPI statistics */
        ch->stats.cdan++;
  
 -      napi_schedule_irqoff(&ch->napi);
 +      napi_schedule(&ch->napi);
  }
  
  /* Allocate and configure a DPCON object */
index 22e3d32463f19e3065490048c1426fb0138cda8e,231f4b6e93d02ffc154ab2b30535ce58f65c09f2..8437d72795b02ed12d5f6e71d512aef46765120e
@@@ -369,7 -369,6 +369,7 @@@ static int ice_vsi_sync_fltr(struct ice
                                                ~IFF_PROMISC;
                                        goto out_promisc;
                                }
 +                              ice_cfg_vlan_pruning(vsi, false, false);
                        }
                } else {
                        /* Clear Rx filter to remove traffic from wire */
                                                IFF_PROMISC;
                                        goto out_promisc;
                                }
 +                              if (vsi->num_vlan > 1)
 +                                      ice_cfg_vlan_pruning(vsi, true, false);
                        }
                }
        }
@@@ -769,100 -766,6 +769,100 @@@ static void ice_vsi_link_event(struct i
        }
  }
  
 +/**
 + * ice_set_dflt_mib - send a default config MIB to the FW
 + * @pf: private PF struct
 + *
 + * This function sends a default configuration MIB to the FW.
 + *
 + * If this function errors out at any point, the driver is still able to
 + * function.  The main impact is that LFC may not operate as expected.
 + * Therefore an error state in this function should be treated with a DBG
 + * message and continue on with driver rebuild/reenable.
 + */
 +static void ice_set_dflt_mib(struct ice_pf *pf)
 +{
 +      struct device *dev = ice_pf_to_dev(pf);
 +      u8 mib_type, *buf, *lldpmib = NULL;
 +      u16 len, typelen, offset = 0;
 +      struct ice_lldp_org_tlv *tlv;
 +      struct ice_hw *hw;
 +      u32 ouisubtype;
 +
 +      if (!pf) {
 +              dev_dbg(dev, "%s NULL pf pointer\n", __func__);
 +              return;
 +      }
 +
 +      hw = &pf->hw;
 +      mib_type = SET_LOCAL_MIB_TYPE_LOCAL_MIB;
 +      lldpmib = kzalloc(ICE_LLDPDU_SIZE, GFP_KERNEL);
 +      if (!lldpmib) {
 +              dev_dbg(dev, "%s Failed to allocate MIB memory\n",
 +                      __func__);
 +              return;
 +      }
 +
 +      /* Add ETS CFG TLV */
 +      tlv = (struct ice_lldp_org_tlv *)lldpmib;
 +      typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
 +                 ICE_IEEE_ETS_TLV_LEN);
 +      tlv->typelen = htons(typelen);
 +      ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
 +                    ICE_IEEE_SUBTYPE_ETS_CFG);
 +      tlv->ouisubtype = htonl(ouisubtype);
 +
 +      buf = tlv->tlvinfo;
 +      buf[0] = 0;
 +
 +      /* ETS CFG all UPs map to TC 0. Next 4 (1 - 4) Octets = 0.
 +       * Octets 5 - 12 are BW values, set octet 5 to 100% BW.
 +       * Octets 13 - 20 are TSA values - leave as zeros
 +       */
 +      buf[5] = 0x64;
 +      len = (typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S;
 +      offset += len + 2;
 +      tlv = (struct ice_lldp_org_tlv *)
 +              ((char *)tlv + sizeof(tlv->typelen) + len);
 +
 +      /* Add ETS REC TLV */
 +      buf = tlv->tlvinfo;
 +      tlv->typelen = htons(typelen);
 +
 +      ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
 +                    ICE_IEEE_SUBTYPE_ETS_REC);
 +      tlv->ouisubtype = htonl(ouisubtype);
 +
 +      /* First octet of buf is reserved
 +       * Octets 1 - 4 map UP to TC - all UPs map to zero
 +       * Octets 5 - 12 are BW values - set TC 0 to 100%.
 +       * Octets 13 - 20 are TSA value - leave as zeros
 +       */
 +      buf[5] = 0x64;
 +      offset += len + 2;
 +      tlv = (struct ice_lldp_org_tlv *)
 +              ((char *)tlv + sizeof(tlv->typelen) + len);
 +
 +      /* Add PFC CFG TLV */
 +      typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
 +                 ICE_IEEE_PFC_TLV_LEN);
 +      tlv->typelen = htons(typelen);
 +
 +      ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
 +                    ICE_IEEE_SUBTYPE_PFC_CFG);
 +      tlv->ouisubtype = htonl(ouisubtype);
 +
 +      /* Octet 1 left as all zeros - PFC disabled */
 +      buf[0] = 0x08;
 +      len = (typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S;
 +      offset += len + 2;
 +
 +      if (ice_aq_set_lldp_mib(hw, mib_type, (void *)lldpmib, offset, NULL))
 +              dev_dbg(dev, "%s Failed to set default LLDP MIB\n", __func__);
 +
 +      kfree(lldpmib);
 +}
 +
  /**
   * ice_link_event - process the link event
   * @pf: PF that the link event is associated with
@@@ -897,12 -800,6 +897,12 @@@ ice_link_event(struct ice_pf *pf, struc
                dev_dbg(dev, "Failed to update link status and re-enable link events for port %d\n",
                        pi->lport);
  
 +      /* Check if the link state is up after updating link info, and treat
 +       * this event as an UP event since the link is actually UP now.
 +       */
 +      if (phy_info->link_info.link_info & ICE_AQ_LINK_UP)
 +              link_up = true;
 +
        vsi = ice_get_main_vsi(pf);
        if (!vsi || !vsi->port_info)
                return -EINVAL;
        if (link_up == old_link && link_speed == old_link_speed)
                return result;
  
 -      ice_dcb_rebuild(pf);
 +      if (ice_is_dcb_active(pf)) {
 +              if (test_bit(ICE_FLAG_DCB_ENA, pf->flags))
 +                      ice_dcb_rebuild(pf);
 +      } else {
 +              if (link_up)
 +                      ice_set_dflt_mib(pf);
 +      }
        ice_vsi_link_event(vsi, link_up);
        ice_print_link_msg(vsi, link_up);
  
@@@ -1023,151 -914,6 +1023,151 @@@ ice_handle_link_event(struct ice_pf *pf
        return status;
  }
  
 +enum ice_aq_task_state {
 +      ICE_AQ_TASK_WAITING = 0,
 +      ICE_AQ_TASK_COMPLETE,
 +      ICE_AQ_TASK_CANCELED,
 +};
 +
 +struct ice_aq_task {
 +      struct hlist_node entry;
 +
 +      u16 opcode;
 +      struct ice_rq_event_info *event;
 +      enum ice_aq_task_state state;
 +};
 +
 +/**
 + * ice_wait_for_aq_event - Wait for an AdminQ event from firmware
 + * @pf: pointer to the PF private structure
 + * @opcode: the opcode to wait for
 + * @timeout: how long to wait, in jiffies
 + * @event: storage for the event info
 + *
 + * Waits for a specific AdminQ completion event on the ARQ for a given PF. The
 + * current thread will be put to sleep until the specified event occurs or
 + * until the given timeout is reached.
 + *
 + * To obtain only the descriptor contents, pass an event without an allocated
 + * msg_buf. If the complete data buffer is desired, allocate the
 + * event->msg_buf with enough space ahead of time.
 + *
 + * Returns: zero on success, or a negative error code on failure.
 + */
 +int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout,
 +                        struct ice_rq_event_info *event)
 +{
 +      struct ice_aq_task *task;
 +      long ret;
 +      int err;
 +
 +      task = kzalloc(sizeof(*task), GFP_KERNEL);
 +      if (!task)
 +              return -ENOMEM;
 +
 +      INIT_HLIST_NODE(&task->entry);
 +      task->opcode = opcode;
 +      task->event = event;
 +      task->state = ICE_AQ_TASK_WAITING;
 +
 +      spin_lock_bh(&pf->aq_wait_lock);
 +      hlist_add_head(&task->entry, &pf->aq_wait_list);
 +      spin_unlock_bh(&pf->aq_wait_lock);
 +
 +      ret = wait_event_interruptible_timeout(pf->aq_wait_queue, task->state,
 +                                             timeout);
 +      switch (task->state) {
 +      case ICE_AQ_TASK_WAITING:
 +              err = ret < 0 ? ret : -ETIMEDOUT;
 +              break;
 +      case ICE_AQ_TASK_CANCELED:
 +              err = ret < 0 ? ret : -ECANCELED;
 +              break;
 +      case ICE_AQ_TASK_COMPLETE:
 +              err = ret < 0 ? ret : 0;
 +              break;
 +      default:
 +              WARN(1, "Unexpected AdminQ wait task state %u", task->state);
 +              err = -EINVAL;
 +              break;
 +      }
 +
 +      spin_lock_bh(&pf->aq_wait_lock);
 +      hlist_del(&task->entry);
 +      spin_unlock_bh(&pf->aq_wait_lock);
 +      kfree(task);
 +
 +      return err;
 +}
 +
 +/**
 + * ice_aq_check_events - Check if any thread is waiting for an AdminQ event
 + * @pf: pointer to the PF private structure
 + * @opcode: the opcode of the event
 + * @event: the event to check
 + *
 + * Loops over the current list of pending threads waiting for an AdminQ event.
 + * For each matching task, copy the contents of the event into the task
 + * structure and wake up the thread.
 + *
 + * If multiple threads wait for the same opcode, they will all be woken up.
 + *
 + * Note that event->msg_buf will only be duplicated if the event has a buffer
 + * with enough space already allocated. Otherwise, only the descriptor and
 + * message length will be copied.
 + *
 + * Returns: true if an event was found, false otherwise
 + */
 +static void ice_aq_check_events(struct ice_pf *pf, u16 opcode,
 +                              struct ice_rq_event_info *event)
 +{
 +      struct ice_aq_task *task;
 +      bool found = false;
 +
 +      spin_lock_bh(&pf->aq_wait_lock);
 +      hlist_for_each_entry(task, &pf->aq_wait_list, entry) {
 +              if (task->state || task->opcode != opcode)
 +                      continue;
 +
 +              memcpy(&task->event->desc, &event->desc, sizeof(event->desc));
 +              task->event->msg_len = event->msg_len;
 +
 +              /* Only copy the data buffer if a destination was set */
 +              if (task->event->msg_buf &&
 +                  task->event->buf_len > event->buf_len) {
 +                      memcpy(task->event->msg_buf, event->msg_buf,
 +                             event->buf_len);
 +                      task->event->buf_len = event->buf_len;
 +              }
 +
 +              task->state = ICE_AQ_TASK_COMPLETE;
 +              found = true;
 +      }
 +      spin_unlock_bh(&pf->aq_wait_lock);
 +
 +      if (found)
 +              wake_up(&pf->aq_wait_queue);
 +}
 +
 +/**
 + * ice_aq_cancel_waiting_tasks - Immediately cancel all waiting tasks
 + * @pf: the PF private structure
 + *
 + * Set all waiting tasks to ICE_AQ_TASK_CANCELED, and wake up their threads.
 + * This will then cause ice_aq_wait_for_event to exit with -ECANCELED.
 + */
 +static void ice_aq_cancel_waiting_tasks(struct ice_pf *pf)
 +{
 +      struct ice_aq_task *task;
 +
 +      spin_lock_bh(&pf->aq_wait_lock);
 +      hlist_for_each_entry(task, &pf->aq_wait_list, entry)
 +              task->state = ICE_AQ_TASK_CANCELED;
 +      spin_unlock_bh(&pf->aq_wait_lock);
 +
 +      wake_up(&pf->aq_wait_queue);
 +}
 +
  /**
   * __ice_clean_ctrlq - helper function to clean controlq rings
   * @pf: ptr to struct ice_pf
@@@ -1264,9 -1010,6 +1264,9 @@@ static int __ice_clean_ctrlq(struct ice
  
                opcode = le16_to_cpu(event.desc.opcode);
  
 +              /* Notify any thread that might be waiting for this event */
 +              ice_aq_check_events(pf, opcode, &event);
 +
                switch (opcode) {
                case ice_aqc_opc_get_link_status:
                        if (ice_handle_link_event(pf, &event))
@@@ -2549,9 -2292,6 +2549,6 @@@ static int ice_xdp(struct net_device *d
        switch (xdp->command) {
        case XDP_SETUP_PROG:
                return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
-       case XDP_QUERY_PROG:
-               xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
-               return 0;
        case XDP_SETUP_XSK_UMEM:
                return ice_xsk_umem_setup(vsi, xdp->xsk.umem,
                                          xdp->xsk.queue_id);
@@@ -3346,10 -3086,6 +3343,10 @@@ static int ice_init_pf(struct ice_pf *p
        mutex_init(&pf->sw_mutex);
        mutex_init(&pf->tc_mutex);
  
 +      INIT_HLIST_HEAD(&pf->aq_wait_list);
 +      spin_lock_init(&pf->aq_wait_lock);
 +      init_waitqueue_head(&pf->aq_wait_queue);
 +
        /* setup service timer and periodic service task */
        timer_setup(&pf->serv_tmr, ice_service_timer, 0);
        pf->serv_tmr_period = HZ;
@@@ -3586,60 -3322,6 +3583,60 @@@ done
        return err;
  }
  
 +/**
 + * ice_set_safe_mode_vlan_cfg - configure PF VSI to allow all VLANs in safe mode
 + * @pf: PF to configure
 + *
 + * No VLAN offloads/filtering are advertised in safe mode so make sure the PF
 + * VSI can still Tx/Rx VLAN tagged packets.
 + */
 +static void ice_set_safe_mode_vlan_cfg(struct ice_pf *pf)
 +{
 +      struct ice_vsi *vsi = ice_get_main_vsi(pf);
 +      struct ice_vsi_ctx *ctxt;
 +      enum ice_status status;
 +      struct ice_hw *hw;
 +
 +      if (!vsi)
 +              return;
 +
 +      ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
 +      if (!ctxt)
 +              return;
 +
 +      hw = &pf->hw;
 +      ctxt->info = vsi->info;
 +
 +      ctxt->info.valid_sections =
 +              cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID |
 +                          ICE_AQ_VSI_PROP_SECURITY_VALID |
 +                          ICE_AQ_VSI_PROP_SW_VALID);
 +
 +      /* disable VLAN anti-spoof */
 +      ctxt->info.sec_flags &= ~(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
 +                                ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
 +
 +      /* disable VLAN pruning and keep all other settings */
 +      ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
 +
 +      /* allow all VLANs on Tx and don't strip on Rx */
 +      ctxt->info.vlan_flags = ICE_AQ_VSI_VLAN_MODE_ALL |
 +              ICE_AQ_VSI_VLAN_EMOD_NOTHING;
 +
 +      status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
 +      if (status) {
 +              dev_err(ice_pf_to_dev(vsi->back), "Failed to update VSI for safe mode VLANs, err %s aq_err %s\n",
 +                      ice_stat_str(status),
 +                      ice_aq_str(hw->adminq.sq_last_status));
 +      } else {
 +              vsi->info.sec_flags = ctxt->info.sec_flags;
 +              vsi->info.sw_flags2 = ctxt->info.sw_flags2;
 +              vsi->info.vlan_flags = ctxt->info.vlan_flags;
 +      }
 +
 +      kfree(ctxt);
 +}
 +
  /**
   * ice_log_pkg_init - log result of DDP package load
   * @hw: pointer to hardware info
@@@ -4137,7 -3819,7 +4134,7 @@@ ice_probe(struct pci_dev *pdev, const s
        if (err) {
                dev_err(dev, "probe failed sending driver version %s. error: %d\n",
                        UTS_RELEASE, err);
 -              goto err_alloc_sw_unroll;
 +              goto err_send_version_unroll;
        }
  
        /* since everything is good, start the service timer */
        err = ice_init_link_events(pf->hw.port_info);
        if (err) {
                dev_err(dev, "ice_init_link_events failed: %d\n", err);
 -              goto err_alloc_sw_unroll;
 +              goto err_send_version_unroll;
        }
  
        err = ice_init_nvm_phy_type(pf->hw.port_info);
        if (err) {
                dev_err(dev, "ice_init_nvm_phy_type failed: %d\n", err);
 -              goto err_alloc_sw_unroll;
 +              goto err_send_version_unroll;
        }
  
        err = ice_update_link_info(pf->hw.port_info);
        if (err) {
                dev_err(dev, "ice_update_link_info failed: %d\n", err);
 -              goto err_alloc_sw_unroll;
 +              goto err_send_version_unroll;
        }
  
        ice_init_link_dflt_override(pf->hw.port_info);
                err = ice_init_phy_user_cfg(pf->hw.port_info);
                if (err) {
                        dev_err(dev, "ice_init_phy_user_cfg failed: %d\n", err);
 -                      goto err_alloc_sw_unroll;
 +                      goto err_send_version_unroll;
                }
  
                if (!test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags)) {
        /* Disable WoL at init, wait for user to enable */
        device_set_wakeup_enable(dev, false);
  
 -      /* If no DDP driven features have to be setup, we are done with probe */
 -      if (ice_is_safe_mode(pf))
 +      if (ice_is_safe_mode(pf)) {
 +              ice_set_safe_mode_vlan_cfg(pf);
                goto probe_done;
 +      }
  
        /* initialize DDP driven features */
  
@@@ -4223,8 -3904,6 +4220,8 @@@ probe_done
        clear_bit(__ICE_DOWN, pf->state);
        return 0;
  
 +err_send_version_unroll:
 +      ice_vsi_release_all(pf);
  err_alloc_sw_unroll:
        ice_devlink_destroy_port(pf);
        set_bit(__ICE_SERVICE_DIS, pf->state);
@@@ -4335,8 -4014,6 +4332,8 @@@ static void ice_remove(struct pci_dev *
        set_bit(__ICE_DOWN, pf->state);
        ice_service_task_stop(pf);
  
 +      ice_aq_cancel_waiting_tasks(pf);
 +
        mutex_destroy(&(&pf->hw)->fdir_fltr_lock);
        if (!ice_is_safe_mode(pf))
                ice_remove_arfs(pf);
@@@ -4470,7 -4147,7 +4467,7 @@@ err_reinit
   * Power Management callback to quiesce the device and prepare
   * for D3 transition.
   */
 -static int ice_suspend(struct device *dev)
 +static int __maybe_unused ice_suspend(struct device *dev)
  {
        struct pci_dev *pdev = to_pci_dev(dev);
        struct ice_pf *pf;
   * ice_resume - PM callback for waking up from D3
   * @dev: generic device information structure
   */
 -static int ice_resume(struct device *dev)
 +static int __maybe_unused ice_resume(struct device *dev)
  {
        struct pci_dev *pdev = to_pci_dev(dev);
        enum ice_reset_req reset_type;
@@@ -4683,8 -4360,6 +4680,8 @@@ static void ice_pci_err_resume(struct p
                return;
        }
  
 +      ice_restore_all_vfs_msi_state(pdev);
 +
        ice_do_reset(pf, ICE_RESET_PFR);
        ice_service_task_restart(pf);
        mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
@@@ -5293,7 -4968,6 +5290,7 @@@ static void ice_update_vsi_ring_stats(s
        vsi->tx_linearize = 0;
        vsi->rx_buf_failed = 0;
        vsi->rx_page_failed = 0;
 +      vsi->rx_gro_dropped = 0;
  
        rcu_read_lock();
  
                vsi_stats->rx_bytes += bytes;
                vsi->rx_buf_failed += ring->rx_stats.alloc_buf_failed;
                vsi->rx_page_failed += ring->rx_stats.alloc_page_failed;
 +              vsi->rx_gro_dropped += ring->rx_stats.gro_dropped;
        }
  
        /* update XDP Tx rings counters */
@@@ -5340,7 -5013,7 +5337,7 @@@ void ice_update_vsi_stats(struct ice_vs
        ice_update_eth_stats(vsi);
  
        cur_ns->tx_errors = cur_es->tx_errors;
 -      cur_ns->rx_dropped = cur_es->rx_discards;
 +      cur_ns->rx_dropped = cur_es->rx_discards + vsi->rx_gro_dropped;
        cur_ns->tx_dropped = cur_es->tx_discards;
        cur_ns->multicast = cur_es->rx_multicast;
  
@@@ -5983,6 -5656,10 +5980,6 @@@ static void ice_rebuild(struct ice_pf *
        if (err)
                goto err_sched_init_port;
  
 -      err = ice_update_link_info(hw->port_info);
 -      if (err)
 -              dev_err(dev, "Get link status error %d\n", err);
 -
        /* start misc vector */
        err = ice_req_irq_msix_misc(pf);
        if (err) {
index e339edd0b593e7074d5e6a0732e4b5184cddf441,6f32b1706ab95ab2f1e25e63b2cd37f27897d9f6..2f8a4cfc5fa1ffe6a8fa77c3add02010b0b5c438
@@@ -6877,20 -6877,32 +6877,20 @@@ int ixgbe_close(struct net_device *netd
        return 0;
  }
  
 -#ifdef CONFIG_PM
 -static int ixgbe_resume(struct pci_dev *pdev)
 +static int __maybe_unused ixgbe_resume(struct device *dev_d)
  {
 +      struct pci_dev *pdev = to_pci_dev(dev_d);
        struct ixgbe_adapter *adapter = pci_get_drvdata(pdev);
        struct net_device *netdev = adapter->netdev;
        u32 err;
  
        adapter->hw.hw_addr = adapter->io_addr;
 -      pci_set_power_state(pdev, PCI_D0);
 -      pci_restore_state(pdev);
 -      /*
 -       * pci_restore_state clears dev->state_saved so call
 -       * pci_save_state to restore it.
 -       */
 -      pci_save_state(pdev);
  
 -      err = pci_enable_device_mem(pdev);
 -      if (err) {
 -              e_dev_err("Cannot enable PCI device from suspend\n");
 -              return err;
 -      }
        smp_mb__before_atomic();
        clear_bit(__IXGBE_DISABLED, &adapter->state);
        pci_set_master(pdev);
  
 -      pci_wake_from_d3(pdev, false);
 +      device_wakeup_disable(dev_d);
  
        ixgbe_reset(adapter);
  
  
        return err;
  }
 -#endif /* CONFIG_PM */
  
  static int __ixgbe_shutdown(struct pci_dev *pdev, bool *enable_wake)
  {
        struct ixgbe_hw *hw = &adapter->hw;
        u32 ctrl;
        u32 wufc = adapter->wol;
 -#ifdef CONFIG_PM
 -      int retval = 0;
 -#endif
  
        rtnl_lock();
        netif_device_detach(netdev);
        ixgbe_clear_interrupt_scheme(adapter);
        rtnl_unlock();
  
 -#ifdef CONFIG_PM
 -      retval = pci_save_state(pdev);
 -      if (retval)
 -              return retval;
 -
 -#endif
        if (hw->mac.ops.stop_link_on_d3)
                hw->mac.ops.stop_link_on_d3(hw);
  
        return 0;
  }
  
 -#ifdef CONFIG_PM
 -static int ixgbe_suspend(struct pci_dev *pdev, pm_message_t state)
 +static int __maybe_unused ixgbe_suspend(struct device *dev_d)
  {
 +      struct pci_dev *pdev = to_pci_dev(dev_d);
        int retval;
        bool wake;
  
        retval = __ixgbe_shutdown(pdev, &wake);
 -      if (retval)
 -              return retval;
  
 -      if (wake) {
 -              pci_prepare_to_sleep(pdev);
 -      } else {
 -              pci_wake_from_d3(pdev, false);
 -              pci_set_power_state(pdev, PCI_D3hot);
 -      }
 +      device_set_wakeup_enable(dev_d, wake);
  
 -      return 0;
 +      return retval;
  }
 -#endif /* CONFIG_PM */
  
  static void ixgbe_shutdown(struct pci_dev *pdev)
  {
@@@ -10160,10 -10190,6 +10160,6 @@@ static int ixgbe_xdp(struct net_device 
        switch (xdp->command) {
        case XDP_SETUP_PROG:
                return ixgbe_xdp_setup(dev, xdp->prog);
-       case XDP_QUERY_PROG:
-               xdp->prog_id = adapter->xdp_prog ?
-                       adapter->xdp_prog->aux->id : 0;
-               return 0;
        case XDP_SETUP_XSK_UMEM:
                return ixgbe_xsk_umem_setup(adapter, xdp->xsk.umem,
                                            xdp->xsk.queue_id);
@@@ -11353,15 -11379,16 +11349,15 @@@ static const struct pci_error_handlers 
        .resume = ixgbe_io_resume,
  };
  
 +static SIMPLE_DEV_PM_OPS(ixgbe_pm_ops, ixgbe_suspend, ixgbe_resume);
 +
  static struct pci_driver ixgbe_driver = {
 -      .name     = ixgbe_driver_name,
 -      .id_table = ixgbe_pci_tbl,
 -      .probe    = ixgbe_probe,
 -      .remove   = ixgbe_remove,
 -#ifdef CONFIG_PM
 -      .suspend  = ixgbe_suspend,
 -      .resume   = ixgbe_resume,
 -#endif
 -      .shutdown = ixgbe_shutdown,
 +      .name      = ixgbe_driver_name,
 +      .id_table  = ixgbe_pci_tbl,
 +      .probe     = ixgbe_probe,
 +      .remove    = ixgbe_remove,
 +      .driver.pm = &ixgbe_pm_ops,
 +      .shutdown  = ixgbe_shutdown,
        .sriov_configure = ixgbe_pci_sriov_configure,
        .err_handler = &ixgbe_err_handler
  };
index c3d26cc0cf51459bee657470e8547cadb88ab298,a6267569bfa909e8d6b3d9e7ec0cf199f34c0eb3..a428113e6d54088d3385a54633389930ea524274
@@@ -4297,10 -4297,13 +4297,10 @@@ static int ixgbevf_change_mtu(struct ne
        return 0;
  }
  
 -static int ixgbevf_suspend(struct pci_dev *pdev, pm_message_t state)
 +static int __maybe_unused ixgbevf_suspend(struct device *dev_d)
  {
 -      struct net_device *netdev = pci_get_drvdata(pdev);
 +      struct net_device *netdev = dev_get_drvdata(dev_d);
        struct ixgbevf_adapter *adapter = netdev_priv(netdev);
 -#ifdef CONFIG_PM
 -      int retval = 0;
 -#endif
  
        rtnl_lock();
        netif_device_detach(netdev);
        ixgbevf_clear_interrupt_scheme(adapter);
        rtnl_unlock();
  
 -#ifdef CONFIG_PM
 -      retval = pci_save_state(pdev);
 -      if (retval)
 -              return retval;
 -
 -#endif
 -      if (!test_and_set_bit(__IXGBEVF_DISABLED, &adapter->state))
 -              pci_disable_device(pdev);
 -
        return 0;
  }
  
 -#ifdef CONFIG_PM
 -static int ixgbevf_resume(struct pci_dev *pdev)
 +static int __maybe_unused ixgbevf_resume(struct device *dev_d)
  {
 +      struct pci_dev *pdev = to_pci_dev(dev_d);
        struct net_device *netdev = pci_get_drvdata(pdev);
        struct ixgbevf_adapter *adapter = netdev_priv(netdev);
        u32 err;
  
 -      pci_restore_state(pdev);
 -      /* pci_restore_state clears dev->state_saved so call
 -       * pci_save_state to restore it.
 -       */
 -      pci_save_state(pdev);
 -
 -      err = pci_enable_device_mem(pdev);
 -      if (err) {
 -              dev_err(&pdev->dev, "Cannot enable PCI device from suspend\n");
 -              return err;
 -      }
 -
        adapter->hw.hw_addr = adapter->io_addr;
        smp_mb__before_atomic();
        clear_bit(__IXGBEVF_DISABLED, &adapter->state);
        return err;
  }
  
 -#endif /* CONFIG_PM */
  static void ixgbevf_shutdown(struct pci_dev *pdev)
  {
 -      ixgbevf_suspend(pdev, PMSG_SUSPEND);
 +      ixgbevf_suspend(&pdev->dev);
  }
  
  static void ixgbevf_get_tx_ring_stats(struct rtnl_link_stats64 *stats,
@@@ -4477,15 -4502,9 +4477,9 @@@ static int ixgbevf_xdp_setup(struct net
  
  static int ixgbevf_xdp(struct net_device *dev, struct netdev_bpf *xdp)
  {
-       struct ixgbevf_adapter *adapter = netdev_priv(dev);
        switch (xdp->command) {
        case XDP_SETUP_PROG:
                return ixgbevf_xdp_setup(dev, xdp->prog);
-       case XDP_QUERY_PROG:
-               xdp->prog_id = adapter->xdp_prog ?
-                              adapter->xdp_prog->aux->id : 0;
-               return 0;
        default:
                return -EINVAL;
        }
@@@ -4863,17 -4882,16 +4857,17 @@@ static const struct pci_error_handlers 
        .resume = ixgbevf_io_resume,
  };
  
 +static SIMPLE_DEV_PM_OPS(ixgbevf_pm_ops, ixgbevf_suspend, ixgbevf_resume);
 +
  static struct pci_driver ixgbevf_driver = {
        .name           = ixgbevf_driver_name,
        .id_table       = ixgbevf_pci_tbl,
        .probe          = ixgbevf_probe,
        .remove         = ixgbevf_remove,
 -#ifdef CONFIG_PM
 +
        /* Power Management Hooks */
 -      .suspend        = ixgbevf_suspend,
 -      .resume         = ixgbevf_resume,
 -#endif
 +      .driver.pm      = &ixgbevf_pm_ops,
 +
        .shutdown       = ixgbevf_shutdown,
        .err_handler    = &ixgbevf_err_handler
  };
index c9b6b0f85bb081319357f40521c0c6592a467a65,6e3f9e2f883b29a8df0df1955f277d1b18f18614..832bbb8b05c80d814f8d3dee7c7c270a54d2e0f7
@@@ -3637,7 -3637,7 +3637,7 @@@ static void mvneta_start_dev(struct mvn
  
        phylink_start(pp->phylink);
  
 -      /* We may have called phy_speed_down before */
 +      /* We may have called phylink_speed_down before */
        phylink_speed_up(pp->phylink);
  
        netif_tx_start_all_queues(pp->dev);
@@@ -4442,14 -4442,9 +4442,9 @@@ static int mvneta_xdp_setup(struct net_
  
  static int mvneta_xdp(struct net_device *dev, struct netdev_bpf *xdp)
  {
-       struct mvneta_port *pp = netdev_priv(dev);
        switch (xdp->command) {
        case XDP_SETUP_PROG:
                return mvneta_xdp_setup(dev, xdp->prog, xdp->extack);
-       case XDP_QUERY_PROG:
-               xdp->prog_id = pp->xdp_prog ? pp->xdp_prog->aux->id : 0;
-               return 0;
        default:
                return -EINVAL;
        }
index 4298a029be5546d3996d384bdbb2f2fa1ffc72ad,cd5e9d60307ec0c3dcf5d6317a59481930a8d8de..2a8a5842eaefdf8d2d38795a3f9c26fb723a8ecd
@@@ -3508,7 -3508,6 +3508,7 @@@ static int mvpp2_rx(struct mvpp2_port *
                err = mvpp2_rx_refill(port, bm_pool, pp, pool);
                if (err) {
                        netdev_err(port->dev, "failed to refill BM pools\n");
 +                      dev_kfree_skb_any(skb);
                        goto err_drop_frame;
                }
  
@@@ -4657,9 -4656,6 +4657,6 @@@ static int mvpp2_xdp(struct net_device 
        switch (xdp->command) {
        case XDP_SETUP_PROG:
                return mvpp2_xdp_setup(port, xdp);
-       case XDP_QUERY_PROG:
-               xdp->prog_id = port->xdp_prog ? port->xdp_prog->aux->id : 0;
-               return 0;
        default:
                return -EINVAL;
        }
index 8f26cd951ff5405c474badcb041caa26f63739ca,aa4fb503dac359880e0fce0077598d7c9fb583b0..aebcf73f8546d054cb1e907a1a229066375e6b6e
@@@ -45,6 -45,7 +45,6 @@@
  #include "en_tc.h"
  #include "en_rep.h"
  #include "en_accel/ipsec.h"
 -#include "en_accel/ipsec_rxtx.h"
  #include "en_accel/en_accel.h"
  #include "en_accel/tls.h"
  #include "accel/ipsec.h"
@@@ -64,6 -65,7 +64,6 @@@
  #include "en/hv_vhca_stats.h"
  #include "en/devlink.h"
  #include "lib/mlx5.h"
 -#include "fpga/ipsec.h"
  
  bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
  {
@@@ -274,7 -276,7 +274,7 @@@ static int mlx5e_create_umr_mkey(struc
        MLX5_SET(mkc, mkc, lw, 1);
        MLX5_SET(mkc, mkc, lr, 1);
        MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
 -
 +      mlx5e_mkey_set_relaxed_ordering(mdev, mkc);
        MLX5_SET(mkc, mkc, qpn, 0xffffff);
        MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.pdn);
        MLX5_SET64(mkc, mkc, len, npages << page_shift);
@@@ -417,7 -419,7 +417,7 @@@ static int mlx5e_alloc_rq(struct mlx5e_
                err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq,
                                        &rq->wq_ctrl);
                if (err)
 -                      return err;
 +                      goto err_rq_wq_destroy;
  
                rq->mpwqe.wq.db = &rq->mpwqe.wq.db[MLX5_RCV_DBR];
  
                pool_size = MLX5_MPWRQ_PAGES_PER_WQE <<
                        mlx5e_mpwqe_get_log_rq_size(params, xsk);
  
 -              rq->post_wqes = mlx5e_post_rx_mpwqes;
 -              rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
 -
 -              rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe_mpwqe;
 -#ifdef CONFIG_MLX5_EN_IPSEC
 -              if (MLX5_IPSEC_DEV(mdev)) {
 -                      err = -EINVAL;
 -                      netdev_err(c->netdev, "MPWQE RQ with IPSec offload not supported\n");
 -                      goto err_rq_wq_destroy;
 -              }
 -#endif
 -              if (!rq->handle_rx_cqe) {
 -                      err = -EINVAL;
 -                      netdev_err(c->netdev, "RX handler of MPWQE RQ is not set, err %d\n", err);
 -                      goto err_rq_wq_destroy;
 -              }
 -
 -              rq->mpwqe.skb_from_cqe_mpwrq = xsk ?
 -                      mlx5e_xsk_skb_from_cqe_mpwrq_linear :
 -                      mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ?
 -                              mlx5e_skb_from_cqe_mpwrq_linear :
 -                              mlx5e_skb_from_cqe_mpwrq_nonlinear;
 -
                rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
                rq->mpwqe.num_strides =
                        BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
                err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq,
                                         &rq->wq_ctrl);
                if (err)
 -                      return err;
 +                      goto err_rq_wq_destroy;
  
                rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR];
  
                if (err)
                        goto err_free;
  
 -              rq->post_wqes = mlx5e_post_rx_wqes;
 -              rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
 -
 -#ifdef CONFIG_MLX5_EN_IPSEC
 -              if ((mlx5_fpga_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE) &&
 -                  c->priv->ipsec)
 -                      rq->handle_rx_cqe = mlx5e_ipsec_handle_rx_cqe;
 -              else
 -#endif
 -                      rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe;
 -              if (!rq->handle_rx_cqe) {
 -                      err = -EINVAL;
 -                      netdev_err(c->netdev, "RX handler of RQ is not set, err %d\n", err);
 -                      goto err_free;
 -              }
 -
 -              rq->wqe.skb_from_cqe = xsk ?
 -                      mlx5e_xsk_skb_from_cqe_linear :
 -                      mlx5e_rx_is_linear_skb(params, NULL) ?
 -                              mlx5e_skb_from_cqe_linear :
 -                              mlx5e_skb_from_cqe_nonlinear;
                rq->mkey_be = c->mkey_be;
        }
  
 +      err = mlx5e_rq_set_handlers(rq, params, xsk);
 +      if (err)
 +              goto err_free;
 +
        if (xsk) {
                err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
                                                 MEM_TYPE_XSK_BUFF_POOL, NULL);
@@@ -3062,25 -3104,6 +3062,25 @@@ void mlx5e_timestamp_init(struct mlx5e_
        priv->tstamp.rx_filter = HWTSTAMP_FILTER_NONE;
  }
  
 +static void mlx5e_modify_admin_state(struct mlx5_core_dev *mdev,
 +                                   enum mlx5_port_status state)
 +{
 +      struct mlx5_eswitch *esw = mdev->priv.eswitch;
 +      int vport_admin_state;
 +
 +      mlx5_set_port_admin_status(mdev, state);
 +
 +      if (!MLX5_ESWITCH_MANAGER(mdev) ||  mlx5_eswitch_mode(esw) == MLX5_ESWITCH_OFFLOADS)
 +              return;
 +
 +      if (state == MLX5_PORT_UP)
 +              vport_admin_state = MLX5_VPORT_ADMIN_STATE_AUTO;
 +      else
 +              vport_admin_state = MLX5_VPORT_ADMIN_STATE_DOWN;
 +
 +      mlx5_eswitch_set_vport_state(esw, MLX5_VPORT_UPLINK, vport_admin_state);
 +}
 +
  int mlx5e_open_locked(struct net_device *netdev)
  {
        struct mlx5e_priv *priv = netdev_priv(netdev);
@@@ -3113,7 -3136,7 +3113,7 @@@ int mlx5e_open(struct net_device *netde
        mutex_lock(&priv->state_lock);
        err = mlx5e_open_locked(netdev);
        if (!err)
 -              mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_UP);
 +              mlx5e_modify_admin_state(priv->mdev, MLX5_PORT_UP);
        mutex_unlock(&priv->state_lock);
  
        return err;
@@@ -3147,7 -3170,7 +3147,7 @@@ int mlx5e_close(struct net_device *netd
                return -ENODEV;
  
        mutex_lock(&priv->state_lock);
 -      mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_DOWN);
 +      mlx5e_modify_admin_state(priv->mdev, MLX5_PORT_DOWN);
        err = mlx5e_close_locked(netdev);
        mutex_unlock(&priv->state_lock);
  
@@@ -4191,6 -4214,83 +4191,6 @@@ int mlx5e_get_vf_stats(struct net_devic
  }
  #endif
  
 -struct mlx5e_vxlan_work {
 -      struct work_struct      work;
 -      struct mlx5e_priv       *priv;
 -      u16                     port;
 -};
 -
 -static void mlx5e_vxlan_add_work(struct work_struct *work)
 -{
 -      struct mlx5e_vxlan_work *vxlan_work =
 -              container_of(work, struct mlx5e_vxlan_work, work);
 -      struct mlx5e_priv *priv = vxlan_work->priv;
 -      u16 port = vxlan_work->port;
 -
 -      mutex_lock(&priv->state_lock);
 -      mlx5_vxlan_add_port(priv->mdev->vxlan, port);
 -      mutex_unlock(&priv->state_lock);
 -
 -      kfree(vxlan_work);
 -}
 -
 -static void mlx5e_vxlan_del_work(struct work_struct *work)
 -{
 -      struct mlx5e_vxlan_work *vxlan_work =
 -              container_of(work, struct mlx5e_vxlan_work, work);
 -      struct mlx5e_priv *priv         = vxlan_work->priv;
 -      u16 port = vxlan_work->port;
 -
 -      mutex_lock(&priv->state_lock);
 -      mlx5_vxlan_del_port(priv->mdev->vxlan, port);
 -      mutex_unlock(&priv->state_lock);
 -      kfree(vxlan_work);
 -}
 -
 -static void mlx5e_vxlan_queue_work(struct mlx5e_priv *priv, u16 port, int add)
 -{
 -      struct mlx5e_vxlan_work *vxlan_work;
 -
 -      vxlan_work = kmalloc(sizeof(*vxlan_work), GFP_ATOMIC);
 -      if (!vxlan_work)
 -              return;
 -
 -      if (add)
 -              INIT_WORK(&vxlan_work->work, mlx5e_vxlan_add_work);
 -      else
 -              INIT_WORK(&vxlan_work->work, mlx5e_vxlan_del_work);
 -
 -      vxlan_work->priv = priv;
 -      vxlan_work->port = port;
 -      queue_work(priv->wq, &vxlan_work->work);
 -}
 -
 -void mlx5e_add_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti)
 -{
 -      struct mlx5e_priv *priv = netdev_priv(netdev);
 -
 -      if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
 -              return;
 -
 -      if (!mlx5_vxlan_allowed(priv->mdev->vxlan))
 -              return;
 -
 -      mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 1);
 -}
 -
 -void mlx5e_del_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti)
 -{
 -      struct mlx5e_priv *priv = netdev_priv(netdev);
 -
 -      if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
 -              return;
 -
 -      if (!mlx5_vxlan_allowed(priv->mdev->vxlan))
 -              return;
 -
 -      mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 0);
 -}
 -
  static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
                                                     struct sk_buff *skb,
                                                     netdev_features_t features)
@@@ -4418,29 -4518,11 +4418,11 @@@ unlock
        return err;
  }
  
- static u32 mlx5e_xdp_query(struct net_device *dev)
- {
-       struct mlx5e_priv *priv = netdev_priv(dev);
-       const struct bpf_prog *xdp_prog;
-       u32 prog_id = 0;
-       mutex_lock(&priv->state_lock);
-       xdp_prog = priv->channels.params.xdp_prog;
-       if (xdp_prog)
-               prog_id = xdp_prog->aux->id;
-       mutex_unlock(&priv->state_lock);
-       return prog_id;
- }
  static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp)
  {
        switch (xdp->command) {
        case XDP_SETUP_PROG:
                return mlx5e_xdp_set(dev, xdp->prog);
-       case XDP_QUERY_PROG:
-               xdp->prog_id = mlx5e_xdp_query(dev);
-               return 0;
        case XDP_SETUP_XSK_UMEM:
                return mlx5e_xsk_setup_umem(dev, xdp->xsk.umem,
                                            xdp->xsk.queue_id);
@@@ -4520,8 -4602,8 +4502,8 @@@ const struct net_device_ops mlx5e_netde
        .ndo_change_mtu          = mlx5e_change_nic_mtu,
        .ndo_do_ioctl            = mlx5e_ioctl,
        .ndo_set_tx_maxrate      = mlx5e_set_tx_maxrate,
 -      .ndo_udp_tunnel_add      = mlx5e_add_vxlan_port,
 -      .ndo_udp_tunnel_del      = mlx5e_del_vxlan_port,
 +      .ndo_udp_tunnel_add      = udp_tunnel_nic_add_port,
 +      .ndo_udp_tunnel_del      = udp_tunnel_nic_del_port,
        .ndo_features_check      = mlx5e_features_check,
        .ndo_tx_timeout          = mlx5e_tx_timeout,
        .ndo_bpf                 = mlx5e_xdp,
@@@ -4792,39 -4874,6 +4774,39 @@@ static void mlx5e_set_netdev_dev_addr(s
        }
  }
  
 +static int mlx5e_vxlan_set_port(struct net_device *netdev, unsigned int table,
 +                              unsigned int entry, struct udp_tunnel_info *ti)
 +{
 +      struct mlx5e_priv *priv = netdev_priv(netdev);
 +
 +      return mlx5_vxlan_add_port(priv->mdev->vxlan, ntohs(ti->port));
 +}
 +
 +static int mlx5e_vxlan_unset_port(struct net_device *netdev, unsigned int table,
 +                                unsigned int entry, struct udp_tunnel_info *ti)
 +{
 +      struct mlx5e_priv *priv = netdev_priv(netdev);
 +
 +      return mlx5_vxlan_del_port(priv->mdev->vxlan, ntohs(ti->port));
 +}
 +
 +void mlx5e_vxlan_set_netdev_info(struct mlx5e_priv *priv)
 +{
 +      if (!mlx5_vxlan_allowed(priv->mdev->vxlan))
 +              return;
 +
 +      priv->nic_info.set_port = mlx5e_vxlan_set_port;
 +      priv->nic_info.unset_port = mlx5e_vxlan_unset_port;
 +      priv->nic_info.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
 +                              UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN;
 +      priv->nic_info.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN;
 +      /* Don't count the space hard-coded to the IANA port */
 +      priv->nic_info.tables[0].n_entries =
 +              mlx5_vxlan_max_udp_ports(priv->mdev) - 1;
 +
 +      priv->netdev->udp_tunnel_nic_info = &priv->nic_info;
 +}
 +
  static void mlx5e_build_nic_netdev(struct net_device *netdev)
  {
        struct mlx5e_priv *priv = netdev_priv(netdev);
        netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_FILTER;
        netdev->hw_features      |= NETIF_F_HW_VLAN_STAG_TX;
  
 +      mlx5e_vxlan_set_netdev_info(priv);
 +
        if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev) ||
            mlx5e_any_tunnel_proto_supported(mdev)) {
                netdev->hw_enc_features |= NETIF_F_HW_CSUM;
@@@ -5154,7 -5201,7 +5136,7 @@@ static void mlx5e_nic_enable(struct mlx
  
        /* Marking the link as currently not needed by the Driver */
        if (!netif_running(netdev))
 -              mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN);
 +              mlx5e_modify_admin_state(mdev, MLX5_PORT_DOWN);
  
        mlx5e_set_netdev_mtu_boundaries(priv);
        mlx5e_set_dev_port_mtu(priv);
        rtnl_lock();
        if (netif_running(netdev))
                mlx5e_open(netdev);
 -      if (mlx5_vxlan_allowed(priv->mdev->vxlan))
 -              udp_tunnel_get_rx_info(netdev);
 +      udp_tunnel_nic_reset_ntf(priv->netdev);
        netif_device_attach(netdev);
        rtnl_unlock();
  }
@@@ -5190,6 -5238,8 +5172,6 @@@ static void mlx5e_nic_disable(struct ml
        rtnl_lock();
        if (netif_running(priv->netdev))
                mlx5e_close(priv->netdev);
 -      if (mlx5_vxlan_allowed(priv->mdev->vxlan))
 -              udp_tunnel_drop_rx_info(priv->netdev);
        netif_device_detach(priv->netdev);
        rtnl_unlock();
  
@@@ -5220,7 -5270,8 +5202,7 @@@ static const struct mlx5e_profile mlx5e
        .update_rx         = mlx5e_update_nic_rx,
        .update_stats      = mlx5e_update_ndo_stats,
        .update_carrier    = mlx5e_update_carrier,
 -      .rx_handlers.handle_rx_cqe       = mlx5e_handle_rx_cqe,
 -      .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
 +      .rx_handlers       = &mlx5e_rx_handlers_nic,
        .max_tc            = MLX5E_MAX_NUM_TC,
        .rq_groups         = MLX5E_NUM_RQ_GROUPS(XSK),
        .stats_grps        = mlx5e_nic_stats_grps,
@@@ -5358,8 -5409,6 +5340,8 @@@ err_cleanup_tx
        profile->cleanup_tx(priv);
  
  out:
 +      set_bit(MLX5E_STATE_DESTROYING, &priv->state);
 +      cancel_work_sync(&priv->update_stats_work);
        return err;
  }
  
index f5aa1bd02f19af1ddab177bc845991c2fbeb701e,d60acaa3879d021e425ab3d6f82acab797aedbaf..e06fa89f2d7259feaaaad7ba69942f7ac907f22f
@@@ -25,7 -25,6 +25,7 @@@
  #include "efx.h"
  #include "efx_common.h"
  #include "efx_channels.h"
 +#include "ef100.h"
  #include "rx_common.h"
  #include "tx_common.h"
  #include "nic.h"
@@@ -654,15 -653,10 +654,10 @@@ static int efx_xdp_setup_prog(struct ef
  static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp)
  {
        struct efx_nic *efx = netdev_priv(dev);
-       struct bpf_prog *xdp_prog;
  
        switch (xdp->command) {
        case XDP_SETUP_PROG:
                return efx_xdp_setup_prog(efx, xdp->prog);
-       case XDP_QUERY_PROG:
-               xdp_prog = rtnl_dereference(efx->xdp_prog);
-               xdp->prog_id = xdp_prog ? xdp_prog->aux->id : 0;
-               return 0;
        default:
                return -EINVAL;
        }
@@@ -1361,14 -1355,8 +1356,14 @@@ static int __init efx_init_module(void
        if (rc < 0)
                goto err_pci;
  
 +      rc = pci_register_driver(&ef100_pci_driver);
 +      if (rc < 0)
 +              goto err_pci_ef100;
 +
        return 0;
  
 + err_pci_ef100:
 +      pci_unregister_driver(&efx_pci_driver);
   err_pci:
        efx_destroy_reset_workqueue();
   err_reset:
@@@ -1385,7 -1373,6 +1380,7 @@@ static void __exit efx_exit_module(void
  {
        printk(KERN_INFO "Solarflare NET driver unloading\n");
  
 +      pci_unregister_driver(&ef100_pci_driver);
        pci_unregister_driver(&efx_pci_driver);
        efx_destroy_reset_workqueue();
  #ifdef CONFIG_SFC_SRIOV
diff --combined drivers/net/tun.c
index 9b4b25358f9bb0458eb90dec4dd925e37ae179e4,061bebe25cb1a241dd3dfee5c22acb534ed8671d..3c11a77f5709f1b0218e44f2e5b1fd90256e0d73
@@@ -1184,26 -1184,11 +1184,11 @@@ static int tun_xdp_set(struct net_devic
        return 0;
  }
  
- static u32 tun_xdp_query(struct net_device *dev)
- {
-       struct tun_struct *tun = netdev_priv(dev);
-       const struct bpf_prog *xdp_prog;
-       xdp_prog = rtnl_dereference(tun->xdp_prog);
-       if (xdp_prog)
-               return xdp_prog->aux->id;
-       return 0;
- }
  static int tun_xdp(struct net_device *dev, struct netdev_bpf *xdp)
  {
        switch (xdp->command) {
        case XDP_SETUP_PROG:
                return tun_xdp_set(dev, xdp->prog, xdp->extack);
-       case XDP_QUERY_PROG:
-               xdp->prog_id = tun_xdp_query(dev);
-               return 0;
        default:
                return -EINVAL;
        }
@@@ -2983,7 -2968,7 +2968,7 @@@ unlock
        return ret;
  }
  
 -static int tun_set_ebpf(struct tun_struct *tun, struct tun_prog **prog_p,
 +static int tun_set_ebpf(struct tun_struct *tun, struct tun_prog __rcu **prog_p,
                        void __user *data)
  {
        struct bpf_prog *prog;
index ba0fa6b22787a2a872329f80414824c74e5345f4,1046763cd0dcef4c798a0bb7a7365b637e3ef880..88d40b9abaa1fa84ba128dd4efc584a32f8bed3c
@@@ -876,8 -876,6 +876,6 @@@ enum bpf_netdev_command 
         */
        XDP_SETUP_PROG,
        XDP_SETUP_PROG_HW,
-       XDP_QUERY_PROG,
-       XDP_QUERY_PROG_HW,
        /* BPF program for offload callbacks, invoked at program load time. */
        BPF_OFFLOAD_MAP_ALLOC,
        BPF_OFFLOAD_MAP_FREE,
@@@ -888,6 -886,19 +886,19 @@@ struct bpf_prog_offload_ops
  struct netlink_ext_ack;
  struct xdp_umem;
  struct xdp_dev_bulk_queue;
+ struct bpf_xdp_link;
+ enum bpf_xdp_mode {
+       XDP_MODE_SKB = 0,
+       XDP_MODE_DRV = 1,
+       XDP_MODE_HW = 2,
+       __MAX_XDP_MODE
+ };
+ struct bpf_xdp_entity {
+       struct bpf_prog *prog;
+       struct bpf_xdp_link *link;
+ };
  
  struct netdev_bpf {
        enum bpf_netdev_command command;
                        struct bpf_prog *prog;
                        struct netlink_ext_ack *extack;
                };
-               /* XDP_QUERY_PROG, XDP_QUERY_PROG_HW */
-               struct {
-                       u32 prog_id;
-                       /* flags with which program was installed */
-                       u32 prog_flags;
-               };
                /* BPF_OFFLOAD_MAP_ALLOC, BPF_OFFLOAD_MAP_FREE */
                struct {
                        struct bpf_offloaded_map *offmap;
@@@ -2058,8 -2063,6 +2063,8 @@@ struct net_device 
        struct timer_list       watchdog_timer;
        int                     watchdog_timeo;
  
 +      u32                     proto_down_reason;
 +
        struct list_head        todo_list;
        int __percpu            *pcpu_refcnt;
  
  #endif
        const struct udp_tunnel_nic_info        *udp_tunnel_nic_info;
        struct udp_tunnel_nic   *udp_tunnel_nic;
+       /* protected by rtnl_lock */
+       struct bpf_xdp_entity   xdp_state[__MAX_XDP_MODE];
  };
  #define to_net_dev(d) container_of(d, struct net_device, dev)
  
@@@ -3812,8 -3818,6 +3820,8 @@@ int dev_get_port_parent_id(struct net_d
  bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b);
  int dev_change_proto_down(struct net_device *dev, bool proto_down);
  int dev_change_proto_down_generic(struct net_device *dev, bool proto_down);
 +void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask,
 +                                u32 value);
  struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again);
  struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
                                    struct netdev_queue *txq, int *ret);
  typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
  int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
                      int fd, int expected_fd, u32 flags);
- u32 __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op,
-                   enum bpf_netdev_command cmd);
+ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+ u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode);
  int xdp_umem_query(struct net_device *dev, u16 queue_id);
  
  int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
diff --combined kernel/bpf/btf.c
index 4ff0de860ab754736394dbe94fd45a1e42a081a1,0fd6bb62be3ab2f170e1398989d732afe77e5477..91afdd4c82e33109f7284017bc6c3da55f6b7191
@@@ -3806,6 -3806,19 +3806,19 @@@ bool btf_ctx_access(int off, int size, 
                        btf_kind_str[BTF_INFO_KIND(t->info)]);
                return false;
        }
+       /* check for PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL */
+       for (i = 0; i < prog->aux->ctx_arg_info_size; i++) {
+               const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
+               if (ctx_arg_info->offset == off &&
+                   (ctx_arg_info->reg_type == PTR_TO_RDONLY_BUF_OR_NULL ||
+                    ctx_arg_info->reg_type == PTR_TO_RDWR_BUF_OR_NULL)) {
+                       info->reg_type = ctx_arg_info->reg_type;
+                       return true;
+               }
+       }
        if (t->type == 0)
                /* This is a pointer to void.
                 * It is the same as scalar from the verifier safety pov.
@@@ -4082,7 -4095,7 +4095,7 @@@ int btf_resolve_helper_id(struct bpf_ve
  {
        int id;
  
 -      if (fn->arg_type[arg] != ARG_PTR_TO_BTF_ID)
 +      if (fn->arg_type[arg] != ARG_PTR_TO_BTF_ID || !btf_vmlinux)
                return -EINVAL;
        id = fn->btf_id[arg];
        if (!id || id > btf_vmlinux->nr_types)
diff --combined kernel/bpf/hashtab.c
index 7b136953f21414d4e2d9532852d03b7495b45a67,02427678705504d08d19df20463dfc31b3de1365..78dfff6a501b96c013b0e861ecc662be358d018e
@@@ -779,20 -779,15 +779,20 @@@ static void htab_elem_free_rcu(struct r
        htab_elem_free(htab, l);
  }
  
 -static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
 +static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l)
  {
        struct bpf_map *map = &htab->map;
 +      void *ptr;
  
        if (map->ops->map_fd_put_ptr) {
 -              void *ptr = fd_htab_map_get_ptr(map, l);
 -
 +              ptr = fd_htab_map_get_ptr(map, l);
                map->ops->map_fd_put_ptr(ptr);
        }
 +}
 +
 +static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
 +{
 +      htab_put_fd_value(htab, l);
  
        if (htab_is_prealloc(htab)) {
                __pcpu_freelist_push(&htab->freelist, &l->fnode);
@@@ -844,7 -839,6 +844,7 @@@ static struct htab_elem *alloc_htab_ele
                         */
                        pl_new = this_cpu_ptr(htab->extra_elems);
                        l_new = *pl_new;
 +                      htab_put_fd_value(htab, old_elem);
                        *pl_new = old_elem;
                } else {
                        struct pcpu_freelist_node *l;
@@@ -1618,6 -1612,196 +1618,196 @@@ htab_lru_map_lookup_and_delete_batch(st
                                                  true, false);
  }
  
+ struct bpf_iter_seq_hash_map_info {
+       struct bpf_map *map;
+       struct bpf_htab *htab;
+       void *percpu_value_buf; // non-zero means percpu hash
+       unsigned long flags;
+       u32 bucket_id;
+       u32 skip_elems;
+ };
+ static struct htab_elem *
+ bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info,
+                          struct htab_elem *prev_elem)
+ {
+       const struct bpf_htab *htab = info->htab;
+       unsigned long flags = info->flags;
+       u32 skip_elems = info->skip_elems;
+       u32 bucket_id = info->bucket_id;
+       struct hlist_nulls_head *head;
+       struct hlist_nulls_node *n;
+       struct htab_elem *elem;
+       struct bucket *b;
+       u32 i, count;
+       if (bucket_id >= htab->n_buckets)
+               return NULL;
+       /* try to find next elem in the same bucket */
+       if (prev_elem) {
+               /* no update/deletion on this bucket, prev_elem should be still valid
+                * and we won't skip elements.
+                */
+               n = rcu_dereference_raw(hlist_nulls_next_rcu(&prev_elem->hash_node));
+               elem = hlist_nulls_entry_safe(n, struct htab_elem, hash_node);
+               if (elem)
+                       return elem;
+               /* not found, unlock and go to the next bucket */
+               b = &htab->buckets[bucket_id++];
+               htab_unlock_bucket(htab, b, flags);
+               skip_elems = 0;
+       }
+       for (i = bucket_id; i < htab->n_buckets; i++) {
+               b = &htab->buckets[i];
+               flags = htab_lock_bucket(htab, b);
+               count = 0;
+               head = &b->head;
+               hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) {
+                       if (count >= skip_elems) {
+                               info->flags = flags;
+                               info->bucket_id = i;
+                               info->skip_elems = count;
+                               return elem;
+                       }
+                       count++;
+               }
+               htab_unlock_bucket(htab, b, flags);
+               skip_elems = 0;
+       }
+       info->bucket_id = i;
+       info->skip_elems = 0;
+       return NULL;
+ }
+ static void *bpf_hash_map_seq_start(struct seq_file *seq, loff_t *pos)
+ {
+       struct bpf_iter_seq_hash_map_info *info = seq->private;
+       struct htab_elem *elem;
+       elem = bpf_hash_map_seq_find_next(info, NULL);
+       if (!elem)
+               return NULL;
+       if (*pos == 0)
+               ++*pos;
+       return elem;
+ }
+ static void *bpf_hash_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+ {
+       struct bpf_iter_seq_hash_map_info *info = seq->private;
+       ++*pos;
+       ++info->skip_elems;
+       return bpf_hash_map_seq_find_next(info, v);
+ }
+ static int __bpf_hash_map_seq_show(struct seq_file *seq, struct htab_elem *elem)
+ {
+       struct bpf_iter_seq_hash_map_info *info = seq->private;
+       u32 roundup_key_size, roundup_value_size;
+       struct bpf_iter__bpf_map_elem ctx = {};
+       struct bpf_map *map = info->map;
+       struct bpf_iter_meta meta;
+       int ret = 0, off = 0, cpu;
+       struct bpf_prog *prog;
+       void __percpu *pptr;
+       meta.seq = seq;
+       prog = bpf_iter_get_info(&meta, elem == NULL);
+       if (prog) {
+               ctx.meta = &meta;
+               ctx.map = info->map;
+               if (elem) {
+                       roundup_key_size = round_up(map->key_size, 8);
+                       ctx.key = elem->key;
+                       if (!info->percpu_value_buf) {
+                               ctx.value = elem->key + roundup_key_size;
+                       } else {
+                               roundup_value_size = round_up(map->value_size, 8);
+                               pptr = htab_elem_get_ptr(elem, map->key_size);
+                               for_each_possible_cpu(cpu) {
+                                       bpf_long_memcpy(info->percpu_value_buf + off,
+                                                       per_cpu_ptr(pptr, cpu),
+                                                       roundup_value_size);
+                                       off += roundup_value_size;
+                               }
+                               ctx.value = info->percpu_value_buf;
+                       }
+               }
+               ret = bpf_iter_run_prog(prog, &ctx);
+       }
+       return ret;
+ }
+ static int bpf_hash_map_seq_show(struct seq_file *seq, void *v)
+ {
+       return __bpf_hash_map_seq_show(seq, v);
+ }
+ static void bpf_hash_map_seq_stop(struct seq_file *seq, void *v)
+ {
+       struct bpf_iter_seq_hash_map_info *info = seq->private;
+       if (!v)
+               (void)__bpf_hash_map_seq_show(seq, NULL);
+       else
+               htab_unlock_bucket(info->htab,
+                                  &info->htab->buckets[info->bucket_id],
+                                  info->flags);
+ }
+ static int bpf_iter_init_hash_map(void *priv_data,
+                                 struct bpf_iter_aux_info *aux)
+ {
+       struct bpf_iter_seq_hash_map_info *seq_info = priv_data;
+       struct bpf_map *map = aux->map;
+       void *value_buf;
+       u32 buf_size;
+       if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+           map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
+               buf_size = round_up(map->value_size, 8) * num_possible_cpus();
+               value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN);
+               if (!value_buf)
+                       return -ENOMEM;
+               seq_info->percpu_value_buf = value_buf;
+       }
+       seq_info->map = map;
+       seq_info->htab = container_of(map, struct bpf_htab, map);
+       return 0;
+ }
+ static void bpf_iter_fini_hash_map(void *priv_data)
+ {
+       struct bpf_iter_seq_hash_map_info *seq_info = priv_data;
+       kfree(seq_info->percpu_value_buf);
+ }
+ static const struct seq_operations bpf_hash_map_seq_ops = {
+       .start  = bpf_hash_map_seq_start,
+       .next   = bpf_hash_map_seq_next,
+       .stop   = bpf_hash_map_seq_stop,
+       .show   = bpf_hash_map_seq_show,
+ };
+ static const struct bpf_iter_seq_info iter_seq_info = {
+       .seq_ops                = &bpf_hash_map_seq_ops,
+       .init_seq_private       = bpf_iter_init_hash_map,
+       .fini_seq_private       = bpf_iter_fini_hash_map,
+       .seq_priv_size          = sizeof(struct bpf_iter_seq_hash_map_info),
+ };
  static int htab_map_btf_id;
  const struct bpf_map_ops htab_map_ops = {
        .map_alloc_check = htab_map_alloc_check,
        BATCH_OPS(htab),
        .map_btf_name = "bpf_htab",
        .map_btf_id = &htab_map_btf_id,
+       .iter_seq_info = &iter_seq_info,
  };
  
  static int htab_lru_map_btf_id;
@@@ -1649,6 -1834,7 +1840,7 @@@ const struct bpf_map_ops htab_lru_map_o
        BATCH_OPS(htab_lru),
        .map_btf_name = "bpf_htab",
        .map_btf_id = &htab_lru_map_btf_id,
+       .iter_seq_info = &iter_seq_info,
  };
  
  /* Called from eBPF program */
@@@ -1766,6 -1952,7 +1958,7 @@@ const struct bpf_map_ops htab_percpu_ma
        BATCH_OPS(htab_percpu),
        .map_btf_name = "bpf_htab",
        .map_btf_id = &htab_percpu_map_btf_id,
+       .iter_seq_info = &iter_seq_info,
  };
  
  static int htab_lru_percpu_map_btf_id;
@@@ -1781,6 -1968,7 +1974,7 @@@ const struct bpf_map_ops htab_lru_percp
        BATCH_OPS(htab_lru_percpu),
        .map_btf_name = "bpf_htab",
        .map_btf_id = &htab_lru_percpu_map_btf_id,
+       .iter_seq_info = &iter_seq_info,
  };
  
  static int fd_htab_map_alloc_check(union bpf_attr *attr)
diff --combined net/core/dev.c
index f7ef0f5c556986890c8d65466aee19442fe2d9f4,c8b911b101879a6fde010b4c6a804361f1eaaec0..7df6c9617321a3c5a25d9fdb5f661315677c555c
@@@ -3454,9 -3454,10 +3454,9 @@@ static netdev_features_t net_mpls_featu
  static netdev_features_t harmonize_features(struct sk_buff *skb,
        netdev_features_t features)
  {
 -      int tmp;
        __be16 type;
  
 -      type = skb_network_protocol(skb, &tmp);
 +      type = skb_network_protocol(skb, NULL);
        features = net_mpls_features(skb, features, type);
  
        if (skb->ip_summed != CHECKSUM_NONE &&
@@@ -5467,10 -5468,6 +5467,6 @@@ static int generic_xdp_install(struct n
                }
                break;
  
-       case XDP_QUERY_PROG:
-               xdp->prog_id = old ? old->aux->id : 0;
-               break;
        default:
                ret = -EINVAL;
                break;
@@@ -8715,214 -8712,464 +8711,489 @@@ int dev_change_proto_down_generic(struc
  }
  EXPORT_SYMBOL(dev_change_proto_down_generic);
  
- u32 __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op,
-                   enum bpf_netdev_command cmd)
 +/**
 + *    dev_change_proto_down_reason - proto down reason
 + *
 + *    @dev: device
 + *    @mask: proto down mask
 + *    @value: proto down value
 + */
 +void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask,
 +                                u32 value)
 +{
 +      int b;
 +
 +      if (!mask) {
 +              dev->proto_down_reason = value;
 +      } else {
 +              for_each_set_bit(b, &mask, 32) {
 +                      if (value & (1 << b))
 +                              dev->proto_down_reason |= BIT(b);
 +                      else
 +                              dev->proto_down_reason &= ~BIT(b);
 +              }
 +      }
 +}
 +EXPORT_SYMBOL(dev_change_proto_down_reason);
 +
+ struct bpf_xdp_link {
+       struct bpf_link link;
+       struct net_device *dev; /* protected by rtnl_lock, no refcnt held */
+       int flags;
+ };
+ static enum bpf_xdp_mode dev_xdp_mode(u32 flags)
  {
-       struct netdev_bpf xdp;
+       if (flags & XDP_FLAGS_HW_MODE)
+               return XDP_MODE_HW;
+       if (flags & XDP_FLAGS_DRV_MODE)
+               return XDP_MODE_DRV;
+       return XDP_MODE_SKB;
+ }
  
-       if (!bpf_op)
-               return 0;
+ static bpf_op_t dev_xdp_bpf_op(struct net_device *dev, enum bpf_xdp_mode mode)
+ {
+       switch (mode) {
+       case XDP_MODE_SKB:
+               return generic_xdp_install;
+       case XDP_MODE_DRV:
+       case XDP_MODE_HW:
+               return dev->netdev_ops->ndo_bpf;
+       default:
+               return NULL;
+       };
+ }
  
-       memset(&xdp, 0, sizeof(xdp));
-       xdp.command = cmd;
+ static struct bpf_xdp_link *dev_xdp_link(struct net_device *dev,
+                                        enum bpf_xdp_mode mode)
+ {
+       return dev->xdp_state[mode].link;
+ }
+ static struct bpf_prog *dev_xdp_prog(struct net_device *dev,
+                                    enum bpf_xdp_mode mode)
+ {
+       struct bpf_xdp_link *link = dev_xdp_link(dev, mode);
+       if (link)
+               return link->link.prog;
+       return dev->xdp_state[mode].prog;
+ }
+ u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
+ {
+       struct bpf_prog *prog = dev_xdp_prog(dev, mode);
  
-       /* Query must always succeed. */
-       WARN_ON(bpf_op(dev, &xdp) < 0 && cmd == XDP_QUERY_PROG);
+       return prog ? prog->aux->id : 0;
+ }
  
-       return xdp.prog_id;
+ static void dev_xdp_set_link(struct net_device *dev, enum bpf_xdp_mode mode,
+                            struct bpf_xdp_link *link)
+ {
+       dev->xdp_state[mode].link = link;
+       dev->xdp_state[mode].prog = NULL;
  }
  
- static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
-                          struct netlink_ext_ack *extack, u32 flags,
-                          struct bpf_prog *prog)
+ static void dev_xdp_set_prog(struct net_device *dev, enum bpf_xdp_mode mode,
+                            struct bpf_prog *prog)
+ {
+       dev->xdp_state[mode].link = NULL;
+       dev->xdp_state[mode].prog = prog;
+ }
+ static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode,
+                          bpf_op_t bpf_op, struct netlink_ext_ack *extack,
+                          u32 flags, struct bpf_prog *prog)
  {
-       bool non_hw = !(flags & XDP_FLAGS_HW_MODE);
-       struct bpf_prog *prev_prog = NULL;
        struct netdev_bpf xdp;
        int err;
  
-       if (non_hw) {
-               prev_prog = bpf_prog_by_id(__dev_xdp_query(dev, bpf_op,
-                                                          XDP_QUERY_PROG));
-               if (IS_ERR(prev_prog))
-                       prev_prog = NULL;
-       }
        memset(&xdp, 0, sizeof(xdp));
-       if (flags & XDP_FLAGS_HW_MODE)
-               xdp.command = XDP_SETUP_PROG_HW;
-       else
-               xdp.command = XDP_SETUP_PROG;
+       xdp.command = mode == XDP_MODE_HW ? XDP_SETUP_PROG_HW : XDP_SETUP_PROG;
        xdp.extack = extack;
        xdp.flags = flags;
        xdp.prog = prog;
  
+       /* Drivers assume refcnt is already incremented (i.e, prog pointer is
+        * "moved" into driver), so they don't increment it on their own, but
+        * they do decrement refcnt when program is detached or replaced.
+        * Given net_device also owns link/prog, we need to bump refcnt here
+        * to prevent drivers from underflowing it.
+        */
+       if (prog)
+               bpf_prog_inc(prog);
        err = bpf_op(dev, &xdp);
-       if (!err && non_hw)
-               bpf_prog_change_xdp(prev_prog, prog);
+       if (err) {
+               if (prog)
+                       bpf_prog_put(prog);
+               return err;
+       }
  
-       if (prev_prog)
-               bpf_prog_put(prev_prog);
+       if (mode != XDP_MODE_HW)
+               bpf_prog_change_xdp(dev_xdp_prog(dev, mode), prog);
  
-       return err;
+       return 0;
  }
  
  static void dev_xdp_uninstall(struct net_device *dev)
  {
-       struct netdev_bpf xdp;
-       bpf_op_t ndo_bpf;
+       struct bpf_xdp_link *link;
+       struct bpf_prog *prog;
+       enum bpf_xdp_mode mode;
+       bpf_op_t bpf_op;
  
-       /* Remove generic XDP */
-       WARN_ON(dev_xdp_install(dev, generic_xdp_install, NULL, 0, NULL));
+       ASSERT_RTNL();
  
-       /* Remove from the driver */
-       ndo_bpf = dev->netdev_ops->ndo_bpf;
-       if (!ndo_bpf)
-               return;
+       for (mode = XDP_MODE_SKB; mode < __MAX_XDP_MODE; mode++) {
+               prog = dev_xdp_prog(dev, mode);
+               if (!prog)
+                       continue;
  
-       memset(&xdp, 0, sizeof(xdp));
-       xdp.command = XDP_QUERY_PROG;
-       WARN_ON(ndo_bpf(dev, &xdp));
-       if (xdp.prog_id)
-               WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags,
-                                       NULL));
+               bpf_op = dev_xdp_bpf_op(dev, mode);
+               if (!bpf_op)
+                       continue;
  
-       /* Remove HW offload */
-       memset(&xdp, 0, sizeof(xdp));
-       xdp.command = XDP_QUERY_PROG_HW;
-       if (!ndo_bpf(dev, &xdp) && xdp.prog_id)
-               WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags,
-                                       NULL));
+               WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL));
+               /* auto-detach link from net device */
+               link = dev_xdp_link(dev, mode);
+               if (link)
+                       link->dev = NULL;
+               else
+                       bpf_prog_put(prog);
+               dev_xdp_set_link(dev, mode, NULL);
+       }
  }
  
- /**
-  *    dev_change_xdp_fd - set or clear a bpf program for a device rx path
-  *    @dev: device
-  *    @extack: netlink extended ack
-  *    @fd: new program fd or negative value to clear
-  *    @expected_fd: old program fd that userspace expects to replace or clear
-  *    @flags: xdp-related flags
-  *
-  *    Set or clear a bpf program for a device
-  */
- int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
-                     int fd, int expected_fd, u32 flags)
+ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack,
+                         struct bpf_xdp_link *link, struct bpf_prog *new_prog,
+                         struct bpf_prog *old_prog, u32 flags)
  {
-       const struct net_device_ops *ops = dev->netdev_ops;
-       enum bpf_netdev_command query;
-       u32 prog_id, expected_id = 0;
-       bpf_op_t bpf_op, bpf_chk;
-       struct bpf_prog *prog;
-       bool offload;
+       struct bpf_prog *cur_prog;
+       enum bpf_xdp_mode mode;
+       bpf_op_t bpf_op;
        int err;
  
        ASSERT_RTNL();
  
-       offload = flags & XDP_FLAGS_HW_MODE;
-       query = offload ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG;
+       /* either link or prog attachment, never both */
+       if (link && (new_prog || old_prog))
+               return -EINVAL;
+       /* link supports only XDP mode flags */
+       if (link && (flags & ~XDP_FLAGS_MODES)) {
+               NL_SET_ERR_MSG(extack, "Invalid XDP flags for BPF link attachment");
+               return -EINVAL;
+       }
+       /* just one XDP mode bit should be set, zero defaults to SKB mode */
+       if (hweight32(flags & XDP_FLAGS_MODES) > 1) {
+               NL_SET_ERR_MSG(extack, "Only one XDP mode flag can be set");
+               return -EINVAL;
+       }
+       /* old_prog != NULL implies XDP_FLAGS_REPLACE is set */
+       if (old_prog && !(flags & XDP_FLAGS_REPLACE)) {
+               NL_SET_ERR_MSG(extack, "XDP_FLAGS_REPLACE is not specified");
+               return -EINVAL;
+       }
  
-       bpf_op = bpf_chk = ops->ndo_bpf;
-       if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) {
-               NL_SET_ERR_MSG(extack, "underlying driver does not support XDP in native mode");
-               return -EOPNOTSUPP;
+       mode = dev_xdp_mode(flags);
+       /* can't replace attached link */
+       if (dev_xdp_link(dev, mode)) {
+               NL_SET_ERR_MSG(extack, "Can't replace active BPF XDP link");
+               return -EBUSY;
        }
-       if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE))
-               bpf_op = generic_xdp_install;
-       if (bpf_op == bpf_chk)
-               bpf_chk = generic_xdp_install;
-       prog_id = __dev_xdp_query(dev, bpf_op, query);
-       if (flags & XDP_FLAGS_REPLACE) {
-               if (expected_fd >= 0) {
-                       prog = bpf_prog_get_type_dev(expected_fd,
-                                                    BPF_PROG_TYPE_XDP,
-                                                    bpf_op == ops->ndo_bpf);
-                       if (IS_ERR(prog))
-                               return PTR_ERR(prog);
-                       expected_id = prog->aux->id;
-                       bpf_prog_put(prog);
-               }
  
-               if (prog_id != expected_id) {
-                       NL_SET_ERR_MSG(extack, "Active program does not match expected");
-                       return -EEXIST;
-               }
+       cur_prog = dev_xdp_prog(dev, mode);
+       /* can't replace attached prog with link */
+       if (link && cur_prog) {
+               NL_SET_ERR_MSG(extack, "Can't replace active XDP program with BPF link");
+               return -EBUSY;
+       }
+       if ((flags & XDP_FLAGS_REPLACE) && cur_prog != old_prog) {
+               NL_SET_ERR_MSG(extack, "Active program does not match expected");
+               return -EEXIST;
+       }
+       if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && cur_prog) {
+               NL_SET_ERR_MSG(extack, "XDP program already attached");
+               return -EBUSY;
        }
-       if (fd >= 0) {
-               if (!offload && __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG)) {
-                       NL_SET_ERR_MSG(extack, "native and generic XDP can't be active at the same time");
-                       return -EEXIST;
-               }
  
-               if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && prog_id) {
-                       NL_SET_ERR_MSG(extack, "XDP program already attached");
-                       return -EBUSY;
-               }
+       /* put effective new program into new_prog */
+       if (link)
+               new_prog = link->link.prog;
  
-               prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
-                                            bpf_op == ops->ndo_bpf);
-               if (IS_ERR(prog))
-                       return PTR_ERR(prog);
+       if (new_prog) {
+               bool offload = mode == XDP_MODE_HW;
+               enum bpf_xdp_mode other_mode = mode == XDP_MODE_SKB
+                                              ? XDP_MODE_DRV : XDP_MODE_SKB;
  
-               if (!offload && bpf_prog_is_dev_bound(prog->aux)) {
-                       NL_SET_ERR_MSG(extack, "using device-bound program without HW_MODE flag is not supported");
-                       bpf_prog_put(prog);
+               if (!offload && dev_xdp_prog(dev, other_mode)) {
+                       NL_SET_ERR_MSG(extack, "Native and generic XDP can't be active at the same time");
+                       return -EEXIST;
+               }
+               if (!offload && bpf_prog_is_dev_bound(new_prog->aux)) {
+                       NL_SET_ERR_MSG(extack, "Using device-bound program without HW_MODE flag is not supported");
                        return -EINVAL;
                }
-               if (prog->expected_attach_type == BPF_XDP_DEVMAP) {
+               if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) {
                        NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device");
-                       bpf_prog_put(prog);
                        return -EINVAL;
                }
-               if (prog->expected_attach_type == BPF_XDP_CPUMAP) {
-                       NL_SET_ERR_MSG(extack,
-                                      "BPF_XDP_CPUMAP programs can not be attached to a device");
-                       bpf_prog_put(prog);
+               if (new_prog->expected_attach_type == BPF_XDP_CPUMAP) {
+                       NL_SET_ERR_MSG(extack, "BPF_XDP_CPUMAP programs can not be attached to a device");
                        return -EINVAL;
                }
+       }
  
-               /* prog->aux->id may be 0 for orphaned device-bound progs */
-               if (prog->aux->id && prog->aux->id == prog_id) {
-                       bpf_prog_put(prog);
-                       return 0;
+       /* don't call drivers if the effective program didn't change */
+       if (new_prog != cur_prog) {
+               bpf_op = dev_xdp_bpf_op(dev, mode);
+               if (!bpf_op) {
+                       NL_SET_ERR_MSG(extack, "Underlying driver does not support XDP in native mode");
+                       return -EOPNOTSUPP;
+               }
+               err = dev_xdp_install(dev, mode, bpf_op, extack, flags, new_prog);
+               if (err)
+                       return err;
+       }
+       if (link)
+               dev_xdp_set_link(dev, mode, link);
+       else
+               dev_xdp_set_prog(dev, mode, new_prog);
+       if (cur_prog)
+               bpf_prog_put(cur_prog);
+       return 0;
+ }
+ static int dev_xdp_attach_link(struct net_device *dev,
+                              struct netlink_ext_ack *extack,
+                              struct bpf_xdp_link *link)
+ {
+       return dev_xdp_attach(dev, extack, link, NULL, NULL, link->flags);
+ }
+ static int dev_xdp_detach_link(struct net_device *dev,
+                              struct netlink_ext_ack *extack,
+                              struct bpf_xdp_link *link)
+ {
+       enum bpf_xdp_mode mode;
+       bpf_op_t bpf_op;
+       ASSERT_RTNL();
+       mode = dev_xdp_mode(link->flags);
+       if (dev_xdp_link(dev, mode) != link)
+               return -EINVAL;
+       bpf_op = dev_xdp_bpf_op(dev, mode);
+       WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL));
+       dev_xdp_set_link(dev, mode, NULL);
+       return 0;
+ }
+ static void bpf_xdp_link_release(struct bpf_link *link)
+ {
+       struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
+       rtnl_lock();
+       /* if racing with net_device's tear down, xdp_link->dev might be
+        * already NULL, in which case link was already auto-detached
+        */
+       if (xdp_link->dev) {
+               WARN_ON(dev_xdp_detach_link(xdp_link->dev, NULL, xdp_link));
+               xdp_link->dev = NULL;
+       }
+       rtnl_unlock();
+ }
+ static int bpf_xdp_link_detach(struct bpf_link *link)
+ {
+       bpf_xdp_link_release(link);
+       return 0;
+ }
+ static void bpf_xdp_link_dealloc(struct bpf_link *link)
+ {
+       struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
+       kfree(xdp_link);
+ }
+ static void bpf_xdp_link_show_fdinfo(const struct bpf_link *link,
+                                    struct seq_file *seq)
+ {
+       struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
+       u32 ifindex = 0;
+       rtnl_lock();
+       if (xdp_link->dev)
+               ifindex = xdp_link->dev->ifindex;
+       rtnl_unlock();
+       seq_printf(seq, "ifindex:\t%u\n", ifindex);
+ }
+ static int bpf_xdp_link_fill_link_info(const struct bpf_link *link,
+                                      struct bpf_link_info *info)
+ {
+       struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
+       u32 ifindex = 0;
+       rtnl_lock();
+       if (xdp_link->dev)
+               ifindex = xdp_link->dev->ifindex;
+       rtnl_unlock();
+       info->xdp.ifindex = ifindex;
+       return 0;
+ }
+ static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
+                              struct bpf_prog *old_prog)
+ {
+       struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
+       enum bpf_xdp_mode mode;
+       bpf_op_t bpf_op;
+       int err = 0;
+       rtnl_lock();
+       /* link might have been auto-released already, so fail */
+       if (!xdp_link->dev) {
+               err = -ENOLINK;
+               goto out_unlock;
+       }
+       if (old_prog && link->prog != old_prog) {
+               err = -EPERM;
+               goto out_unlock;
+       }
+       old_prog = link->prog;
+       if (old_prog == new_prog) {
+               /* no-op, don't disturb drivers */
+               bpf_prog_put(new_prog);
+               goto out_unlock;
+       }
+       mode = dev_xdp_mode(xdp_link->flags);
+       bpf_op = dev_xdp_bpf_op(xdp_link->dev, mode);
+       err = dev_xdp_install(xdp_link->dev, mode, bpf_op, NULL,
+                             xdp_link->flags, new_prog);
+       if (err)
+               goto out_unlock;
+       old_prog = xchg(&link->prog, new_prog);
+       bpf_prog_put(old_prog);
+ out_unlock:
+       rtnl_unlock();
+       return err;
+ }
+ static const struct bpf_link_ops bpf_xdp_link_lops = {
+       .release = bpf_xdp_link_release,
+       .dealloc = bpf_xdp_link_dealloc,
+       .detach = bpf_xdp_link_detach,
+       .show_fdinfo = bpf_xdp_link_show_fdinfo,
+       .fill_link_info = bpf_xdp_link_fill_link_info,
+       .update_prog = bpf_xdp_link_update,
+ };
+ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+ {
+       struct net *net = current->nsproxy->net_ns;
+       struct bpf_link_primer link_primer;
+       struct bpf_xdp_link *link;
+       struct net_device *dev;
+       int err, fd;
+       dev = dev_get_by_index(net, attr->link_create.target_ifindex);
+       if (!dev)
+               return -EINVAL;
+       link = kzalloc(sizeof(*link), GFP_USER);
+       if (!link) {
+               err = -ENOMEM;
+               goto out_put_dev;
+       }
+       bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog);
+       link->dev = dev;
+       link->flags = attr->link_create.flags;
+       err = bpf_link_prime(&link->link, &link_primer);
+       if (err) {
+               kfree(link);
+               goto out_put_dev;
+       }
+       rtnl_lock();
+       err = dev_xdp_attach_link(dev, NULL, link);
+       rtnl_unlock();
+       if (err) {
+               bpf_link_cleanup(&link_primer);
+               goto out_put_dev;
+       }
+       fd = bpf_link_settle(&link_primer);
+       /* link itself doesn't hold dev's refcnt to not complicate shutdown */
+       dev_put(dev);
+       return fd;
+ out_put_dev:
+       dev_put(dev);
+       return err;
+ }
+ /**
+  *    dev_change_xdp_fd - set or clear a bpf program for a device rx path
+  *    @dev: device
+  *    @extack: netlink extended ack
+  *    @fd: new program fd or negative value to clear
+  *    @expected_fd: old program fd that userspace expects to replace or clear
+  *    @flags: xdp-related flags
+  *
+  *    Set or clear a bpf program for a device
+  */
+ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
+                     int fd, int expected_fd, u32 flags)
+ {
+       enum bpf_xdp_mode mode = dev_xdp_mode(flags);
+       struct bpf_prog *new_prog = NULL, *old_prog = NULL;
+       int err;
+       ASSERT_RTNL();
+       if (fd >= 0) {
+               new_prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
+                                                mode != XDP_MODE_SKB);
+               if (IS_ERR(new_prog))
+                       return PTR_ERR(new_prog);
+       }
+       if (expected_fd >= 0) {
+               old_prog = bpf_prog_get_type_dev(expected_fd, BPF_PROG_TYPE_XDP,
+                                                mode != XDP_MODE_SKB);
+               if (IS_ERR(old_prog)) {
+                       err = PTR_ERR(old_prog);
+                       old_prog = NULL;
+                       goto err_out;
                }
-       } else {
-               if (!prog_id)
-                       return 0;
-               prog = NULL;
        }
  
-       err = dev_xdp_install(dev, bpf_op, extack, flags, prog);
-       if (err < 0 && prog)
-               bpf_prog_put(prog);
+       err = dev_xdp_attach(dev, extack, NULL, new_prog, old_prog, flags);
  
+ err_out:
+       if (err && new_prog)
+               bpf_prog_put(new_prog);
+       if (old_prog)
+               bpf_prog_put(old_prog);
        return err;
  }
  
diff --combined net/core/rtnetlink.c
index a54c3e0f2ee1bc5ac0492c31659bd2fb7e851dc3,58c484a28395c6be03e67a705da2a93f1328e2be..68e0682450c60dc7da96bda94517551f72f53f6f
@@@ -1000,16 -1000,6 +1000,16 @@@ static size_t rtnl_prop_list_size(cons
        return size;
  }
  
 +static size_t rtnl_proto_down_size(const struct net_device *dev)
 +{
 +      size_t size = nla_total_size(1);
 +
 +      if (dev->proto_down_reason)
 +              size += nla_total_size(0) + nla_total_size(4);
 +
 +      return size;
 +}
 +
  static noinline size_t if_nlmsg_size(const struct net_device *dev,
                                     u32 ext_filter_mask)
  {
               + nla_total_size(4)  /* IFLA_EVENT */
               + nla_total_size(4)  /* IFLA_NEW_NETNSID */
               + nla_total_size(4)  /* IFLA_NEW_IFINDEX */
 -             + nla_total_size(1)  /* IFLA_PROTO_DOWN */
 +             + rtnl_proto_down_size(dev)  /* proto down */
               + nla_total_size(4)  /* IFLA_TARGET_NETNSID */
               + nla_total_size(4)  /* IFLA_CARRIER_UP_COUNT */
               + nla_total_size(4)  /* IFLA_CARRIER_DOWN_COUNT */
@@@ -1426,13 -1416,12 +1426,12 @@@ static u32 rtnl_xdp_prog_skb(struct net
  
  static u32 rtnl_xdp_prog_drv(struct net_device *dev)
  {
-       return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf, XDP_QUERY_PROG);
+       return dev_xdp_prog_id(dev, XDP_MODE_DRV);
  }
  
  static u32 rtnl_xdp_prog_hw(struct net_device *dev)
  {
-       return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf,
-                              XDP_QUERY_PROG_HW);
+       return dev_xdp_prog_id(dev, XDP_MODE_HW);
  }
  
  static int rtnl_xdp_report_one(struct sk_buff *skb, struct net_device *dev,
@@@ -1668,35 -1657,6 +1667,35 @@@ nest_cancel
        return ret;
  }
  
 +static int rtnl_fill_proto_down(struct sk_buff *skb,
 +                              const struct net_device *dev)
 +{
 +      struct nlattr *pr;
 +      u32 preason;
 +
 +      if (nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
 +              goto nla_put_failure;
 +
 +      preason = dev->proto_down_reason;
 +      if (!preason)
 +              return 0;
 +
 +      pr = nla_nest_start(skb, IFLA_PROTO_DOWN_REASON);
 +      if (!pr)
 +              return -EMSGSIZE;
 +
 +      if (nla_put_u32(skb, IFLA_PROTO_DOWN_REASON_VALUE, preason)) {
 +              nla_nest_cancel(skb, pr);
 +              goto nla_put_failure;
 +      }
 +
 +      nla_nest_end(skb, pr);
 +      return 0;
 +
 +nla_put_failure:
 +      return -EMSGSIZE;
 +}
 +
  static int rtnl_fill_ifinfo(struct sk_buff *skb,
                            struct net_device *dev, struct net *src_net,
                            int type, u32 pid, u32 seq, u32 change,
            nla_put_u32(skb, IFLA_CARRIER_CHANGES,
                        atomic_read(&dev->carrier_up_count) +
                        atomic_read(&dev->carrier_down_count)) ||
 -          nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down) ||
            nla_put_u32(skb, IFLA_CARRIER_UP_COUNT,
                        atomic_read(&dev->carrier_up_count)) ||
            nla_put_u32(skb, IFLA_CARRIER_DOWN_COUNT,
                        atomic_read(&dev->carrier_down_count)))
                goto nla_put_failure;
  
 +      if (rtnl_fill_proto_down(skb, dev))
 +              goto nla_put_failure;
 +
        if (event != IFLA_EVENT_NONE) {
                if (nla_put_u32(skb, IFLA_EVENT, event))
                        goto nla_put_failure;
@@@ -1875,7 -1833,6 +1874,7 @@@ static const struct nla_policy ifla_pol
        [IFLA_ALT_IFNAME]       = { .type = NLA_STRING,
                                    .len = ALTIFNAMSIZ - 1 },
        [IFLA_PERM_ADDRESS]     = { .type = NLA_REJECT },
 +      [IFLA_PROTO_DOWN_REASON] = { .type = NLA_NESTED },
  };
  
  static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@@ -2525,67 -2482,6 +2524,67 @@@ static int do_set_master(struct net_dev
        return 0;
  }
  
 +static const struct nla_policy ifla_proto_down_reason_policy[IFLA_PROTO_DOWN_REASON_VALUE + 1] = {
 +      [IFLA_PROTO_DOWN_REASON_MASK]   = { .type = NLA_U32 },
 +      [IFLA_PROTO_DOWN_REASON_VALUE]  = { .type = NLA_U32 },
 +};
 +
 +static int do_set_proto_down(struct net_device *dev,
 +                           struct nlattr *nl_proto_down,
 +                           struct nlattr *nl_proto_down_reason,
 +                           struct netlink_ext_ack *extack)
 +{
 +      struct nlattr *pdreason[IFLA_PROTO_DOWN_REASON_MAX + 1];
 +      const struct net_device_ops *ops = dev->netdev_ops;
 +      unsigned long mask = 0;
 +      u32 value;
 +      bool proto_down;
 +      int err;
 +
 +      if (!ops->ndo_change_proto_down) {
 +              NL_SET_ERR_MSG(extack,  "Protodown not supported by device");
 +              return -EOPNOTSUPP;
 +      }
 +
 +      if (nl_proto_down_reason) {
 +              err = nla_parse_nested_deprecated(pdreason,
 +                                                IFLA_PROTO_DOWN_REASON_MAX,
 +                                                nl_proto_down_reason,
 +                                                ifla_proto_down_reason_policy,
 +                                                NULL);
 +              if (err < 0)
 +                      return err;
 +
 +              if (!pdreason[IFLA_PROTO_DOWN_REASON_VALUE]) {
 +                      NL_SET_ERR_MSG(extack, "Invalid protodown reason value");
 +                      return -EINVAL;
 +              }
 +
 +              value = nla_get_u32(pdreason[IFLA_PROTO_DOWN_REASON_VALUE]);
 +
 +              if (pdreason[IFLA_PROTO_DOWN_REASON_MASK])
 +                      mask = nla_get_u32(pdreason[IFLA_PROTO_DOWN_REASON_MASK]);
 +
 +              dev_change_proto_down_reason(dev, mask, value);
 +      }
 +
 +      if (nl_proto_down) {
 +              proto_down = nla_get_u8(nl_proto_down);
 +
 +              /* Dont turn off protodown if there are active reasons */
 +              if (!proto_down && dev->proto_down_reason) {
 +                      NL_SET_ERR_MSG(extack, "Cannot clear protodown, active reasons");
 +                      return -EBUSY;
 +              }
 +              err = dev_change_proto_down(dev,
 +                                          proto_down);
 +              if (err)
 +                      return err;
 +      }
 +
 +      return 0;
 +}
 +
  #define DO_SETLINK_MODIFIED   0x01
  /* notify flag means notify + modified. */
  #define DO_SETLINK_NOTIFY     0x03
@@@ -2874,9 -2770,9 +2873,9 @@@ static int do_setlink(const struct sk_b
        }
        err = 0;
  
 -      if (tb[IFLA_PROTO_DOWN]) {
 -              err = dev_change_proto_down(dev,
 -                                          nla_get_u8(tb[IFLA_PROTO_DOWN]));
 +      if (tb[IFLA_PROTO_DOWN] || tb[IFLA_PROTO_DOWN_REASON]) {
 +              err = do_set_proto_down(dev, tb[IFLA_PROTO_DOWN],
 +                                      tb[IFLA_PROTO_DOWN_REASON], extack);
                if (err)
                        goto errout;
                status |= DO_SETLINK_NOTIFY;
diff --combined net/ipv6/route.c
index 48d499d763fad9d2a19d8ebf620162a1c84da375,8bfc57b0802a9fe184b07d18521879e3f7a518c2..5e7e25e2523ac23b1613bbb01c742480814595fd
@@@ -3686,14 -3686,14 +3686,14 @@@ static struct fib6_info *ip6_route_info
        rt->fib6_src.plen = cfg->fc_src_len;
  #endif
        if (nh) {
 -              if (!nexthop_get(nh)) {
 -                      NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
 -                      goto out;
 -              }
                if (rt->fib6_src.plen) {
                        NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
                        goto out;
                }
 +              if (!nexthop_get(nh)) {
 +                      NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
 +                      goto out;
 +              }
                rt->nh = nh;
                fib6_nh = nexthop_fib6_nh(rt->nh);
        } else {
@@@ -6427,17 -6427,21 +6427,21 @@@ DEFINE_BPF_ITER_FUNC(ipv6_route, struc
  BTF_ID_LIST(btf_fib6_info_id)
  BTF_ID(struct, fib6_info)
  
- static struct bpf_iter_reg ipv6_route_reg_info = {
-       .target                 = "ipv6_route",
+ static const struct bpf_iter_seq_info ipv6_route_seq_info = {
        .seq_ops                = &ipv6_route_seq_ops,
        .init_seq_private       = bpf_iter_init_seq_net,
        .fini_seq_private       = bpf_iter_fini_seq_net,
        .seq_priv_size          = sizeof(struct ipv6_route_iter),
+ };
+ static struct bpf_iter_reg ipv6_route_reg_info = {
+       .target                 = "ipv6_route",
        .ctx_arg_info_size      = 1,
        .ctx_arg_info           = {
                { offsetof(struct bpf_iter__ipv6_route, rt),
                  PTR_TO_BTF_ID_OR_NULL },
        },
+       .seq_info               = &ipv6_route_seq_info,
  };
  
  static int __init bpf_iter_register(void)
This page took 0.307227 seconds and 4 git commands to generate.