]> Git Repo - linux.git/commitdiff
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
authorDavid S. Miller <[email protected]>
Wed, 10 Feb 2021 21:30:12 +0000 (13:30 -0800)
committerDavid S. Miller <[email protected]>
Wed, 10 Feb 2021 21:30:12 +0000 (13:30 -0800)
23 files changed:
1  2 
MAINTAINERS
Makefile
drivers/net/dsa/ocelot/felix.c
drivers/net/ethernet/amazon/ena/ena_netdev.c
drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
drivers/net/ethernet/ibm/ibmvnic.c
drivers/net/ethernet/mscc/ocelot.c
drivers/net/hyperv/netvsc.c
drivers/net/hyperv/rndis_filter.c
drivers/net/ipa/gsi.c
drivers/net/usb/qmi_wwan.c
include/linux/netdevice.h
include/net/switchdev.h
include/soc/mscc/ocelot.h
kernel/bpf/stackmap.c
kernel/bpf/verifier.c
net/bridge/br_mrp.c
net/core/dev.c
net/dsa/dsa2.c
net/netfilter/nf_flow_table_core.c
net/netfilter/nf_tables_api.c

diff --combined MAINTAINERS
index cbf4b94f89d4b7421f5e8e0757d3fe9965b8c89b,64c7169db617604f0b6d0490a9ef0371f14dd8ca..0bbd95b73c39392c1329119f27fa9871093610b6
@@@ -2616,8 -2616,8 +2616,8 @@@ S:      Maintaine
  F:    drivers/power/reset/keystone-reset.c
  
  ARM/TEXAS INSTRUMENTS K3 ARCHITECTURE
- M:    Tero Kristo <[email protected]>
  M:    Nishanth Menon <[email protected]>
+ M:    Tero Kristo <[email protected]>
  L:    [email protected] (moderated for non-subscribers)
  S:    Supported
  F:    Documentation/devicetree/bindings/arm/ti/k3.yaml
@@@ -2787,14 -2787,6 +2787,14 @@@ F:    arch/arm64
  F:    tools/testing/selftests/arm64/
  X:    arch/arm64/boot/dts/
  
 +ARROW SPEEDCHIPS XRS7000 SERIES ETHERNET SWITCH DRIVER
 +M:    George McCollister <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/net/dsa/arrow,xrs700x.yaml
 +F:    drivers/net/dsa/xrs700x/*
 +F:    net/dsa/tag_xrs700x.c
 +
  AS3645A LED FLASH CONTROLLER DRIVER
  M:    Sakari Ailus <[email protected]>
  L:    [email protected]
@@@ -3407,7 -3399,6 +3407,7 @@@ L:      [email protected] (sub
  S:    Supported
  F:    Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml
  F:    drivers/net/dsa/b53/*
 +F:    include/linux/dsa/brcm.h
  F:    include/linux/platform_data/b53.h
  
  BROADCOM BCM2711/BCM2835 ARM ARCHITECTURE
@@@ -3445,15 -3436,6 +3445,15 @@@ F:    Documentation/devicetree/bindings/mi
  F:    arch/mips/bcm47xx/*
  F:    arch/mips/include/asm/mach-bcm47xx/*
  
 +BROADCOM BCM4908 ETHERNET DRIVER
 +M:    Rafał Miłecki <[email protected]>
 +M:    [email protected]
 +L:    [email protected]
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/net/brcm,bcm4908enet.yaml
 +F:    drivers/net/ethernet/broadcom/bcm4908enet.*
 +F:    drivers/net/ethernet/broadcom/unimac.h
 +
  BROADCOM BCM5301X ARM ARCHITECTURE
  M:    Hauke Mehrtens <[email protected]>
  M:    Rafał Miłecki <[email protected]>
@@@ -3642,7 -3624,6 +3642,7 @@@ S:      Supporte
  F:    Documentation/devicetree/bindings/net/brcm,bcmgenet.txt
  F:    Documentation/devicetree/bindings/net/brcm,unimac-mdio.txt
  F:    drivers/net/ethernet/broadcom/genet/
 +F:    drivers/net/ethernet/broadcom/unimac.h
  F:    drivers/net/mdio/mdio-bcm-unimac.c
  F:    include/linux/platform_data/bcmgenet.h
  F:    include/linux/platform_data/mdio-bcm-unimac.h
@@@ -3676,15 -3657,6 +3676,15 @@@ N:    bcm8831
  N:    hr2
  N:    stingray
  
 +BROADCOM IPROC GBIT ETHERNET DRIVER
 +M:    Rafał Miłecki <[email protected]>
 +M:    [email protected]
 +L:    [email protected]
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/net/brcm,amac.txt
 +F:    drivers/net/ethernet/broadcom/bgmac*
 +F:    drivers/net/ethernet/broadcom/unimac.h
 +
  BROADCOM KONA GPIO DRIVER
  M:    Ray Jui <[email protected]>
  L:    [email protected]
@@@ -3764,7 -3736,6 +3764,7 @@@ L:      [email protected]
  L:    [email protected]
  S:    Supported
  F:    drivers/net/ethernet/broadcom/bcmsysport.*
 +F:    drivers/net/ethernet/broadcom/unimac.h
  
  BROADCOM TG3 GIGABIT ETHERNET DRIVER
  M:    Siva Reddy Kallam <[email protected]>
@@@ -3959,10 -3930,8 +3959,10 @@@ T:    git git://git.kernel.org/pub/scm/lin
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can-next.git
  F:    Documentation/devicetree/bindings/net/can/
  F:    drivers/net/can/
 +F:    include/linux/can/bittiming.h
  F:    include/linux/can/dev.h
  F:    include/linux/can/led.h
 +F:    include/linux/can/length.h
  F:    include/linux/can/platform/
  F:    include/linux/can/rx-offload.h
  F:    include/uapi/linux/can/error.h
@@@ -3978,7 -3947,6 +3978,7 @@@ W:      https://github.com/linux-ca
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can.git
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can-next.git
  F:    Documentation/networking/can.rst
 +F:    include/linux/can/can-ml.h
  F:    include/linux/can/core.h
  F:    include/linux/can/skb.h
  F:    include/net/netns/can.h
@@@ -4336,7 -4304,7 +4336,7 @@@ S:      Maintaine
  F:    .clang-format
  
  CLANG/LLVM BUILD SUPPORT
- M:    Nathan Chancellor <nat[email protected]>
+ M:    Nathan Chancellor <nat[email protected]>
  M:    Nick Desaulniers <[email protected]>
  L:    [email protected]
  S:    Supported
@@@ -6506,9 -6474,9 +6506,9 @@@ S:      Maintaine
  F:    drivers/edac/skx_*.[ch]
  
  EDAC-TI
- M:    Tero Kristo <[email protected]>
+ M:    Tero Kristo <[email protected]>
  L:    [email protected]
- S:    Maintained
+ S:    Odd Fixes
  F:    drivers/edac/ti_edac.c
  
  EDIROL UA-101/UA-1000 DRIVER
@@@ -9591,7 -9559,7 +9591,7 @@@ F:      Documentation/hwmon/k8temp.rs
  F:    drivers/hwmon/k8temp.c
  
  KASAN
- M:    Andrey Ryabinin <aryabinin@virtuozzo.com>
+ M:    Andrey Ryabinin <ryabinin.a.a@gmail.com>
  R:    Alexander Potapenko <[email protected]>
  R:    Dmitry Vyukov <[email protected]>
  L:    [email protected]
@@@ -12549,14 -12517,6 +12549,14 @@@ F: include/net/nfc
  F:    include/uapi/linux/nfc.h
  F:    net/nfc/
  
 +NFC VIRTUAL NCI DEVICE DRIVER
 +M:    Bongsu Jeon <[email protected]>
 +L:    [email protected]
 +L:    [email protected] (moderated for non-subscribers)
 +S:    Supported
 +F:    drivers/nfc/virtual_ncidev.c
 +F:    tools/testing/selftests/nci/
 +
  NFS, SUNRPC, AND LOCKD CLIENTS
  M:    Trond Myklebust <[email protected]>
  M:    Anna Schumaker <[email protected]>
@@@ -12868,7 -12828,6 +12868,7 @@@ F:   drivers/net/dsa/ocelot/
  F:    drivers/net/ethernet/mscc/
  F:    include/soc/mscc/ocelot*
  F:    net/dsa/tag_ocelot.c
 +F:    net/dsa/tag_ocelot_8021q.c
  F:    tools/testing/selftests/drivers/net/ocelot/*
  
  OCXL (Open Coherent Accelerator Processor Interface OpenCAPI) DRIVER
@@@ -17613,7 -17572,7 +17613,7 @@@ F:   include/linux/dma/k3-psil.
  
  TEXAS INSTRUMENTS' SYSTEM CONTROL INTERFACE (TISCI) PROTOCOL DRIVER
  M:    Nishanth Menon <[email protected]>
- M:    Tero Kristo <[email protected]>
+ M:    Tero Kristo <[email protected]>
  M:    Santosh Shilimkar <[email protected]>
  L:    [email protected]
  S:    Maintained
@@@ -17757,9 -17716,9 +17757,9 @@@ S:   Maintaine
  F:    drivers/clk/clk-cdce706.c
  
  TI CLOCK DRIVER
- M:    Tero Kristo <[email protected]>
+ M:    Tero Kristo <[email protected]>
  L:    [email protected]
- S:    Maintained
+ S:    Odd Fixes
  F:    drivers/clk/ti/
  F:    include/linux/clk/ti.h
  
@@@ -17885,7 -17844,7 +17885,7 @@@ M:   Dan Murphy <[email protected]
  L:    [email protected]
  S:    Maintained
  F:    Documentation/devicetree/bindings/net/can/tcan4x5x.txt
 -F:    drivers/net/can/m_can/tcan4x5x.c
 +F:    drivers/net/can/m_can/tcan4x5x*
  
  TI TRF7970A NFC DRIVER
  M:    Mark Greer <[email protected]>
diff --combined Makefile
index 54b1ae50c817ee6c7ddcc3dd7e9e9a794c8912ef,ade44ac4cc2ffa6b0d679e559d22a73907041e1a..b83df651018b065c9a5cda5484332afab1b08266
+++ b/Makefile
@@@ -2,7 -2,7 +2,7 @@@
  VERSION = 5
  PATCHLEVEL = 11
  SUBLEVEL = 0
- EXTRAVERSION = -rc6
+ EXTRAVERSION = -rc7
  NAME = Kleptomaniac Octopus
  
  # *DOCUMENTATION*
@@@ -452,7 -452,6 +452,6 @@@ AWK                = aw
  INSTALLKERNEL  := installkernel
  DEPMOD                = depmod
  PERL          = perl
- PYTHON                = python
  PYTHON3               = python3
  CHECK         = sparse
  BASH          = bash
@@@ -508,7 -507,7 +507,7 @@@ CLANG_FLAGS :
  
  export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC
  export CPP AR NM STRIP OBJCOPY OBJDUMP READELF PAHOLE RESOLVE_BTFIDS LEX YACC AWK INSTALLKERNEL
- export PERL PYTHON PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX
+ export PERL PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX
  export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD
  export KBUILD_HOSTCXXFLAGS KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS LDFLAGS_MODULE
  
@@@ -649,8 -648,7 +648,8 @@@ ifeq ($(KBUILD_EXTMOD),
  core-y                := init/ usr/
  drivers-y     := drivers/ sound/
  drivers-$(CONFIG_SAMPLES) += samples/
 -drivers-y     += net/ virt/
 +drivers-$(CONFIG_NET) += net/
 +drivers-y     += virt/
  libs-y                := lib/
  endif # KBUILD_EXTMOD
  
@@@ -813,10 -811,12 +812,12 @@@ KBUILD_CFLAGS   += -ftrivial-auto-var-ini
  KBUILD_CFLAGS += -enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang
  endif
  
+ DEBUG_CFLAGS  :=
  # Workaround for GCC versions < 5.0
  # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61801
  ifdef CONFIG_CC_IS_GCC
- DEBUG_CFLAGS  := $(call cc-ifversion, -lt, 0500, $(call cc-option, -fno-var-tracking-assignments))
+ DEBUG_CFLAGS  += $(call cc-ifversion, -lt, 0500, $(call cc-option, -fno-var-tracking-assignments))
  endif
  
  ifdef CONFIG_DEBUG_INFO
@@@ -949,12 -949,6 +950,6 @@@ KBUILD_CFLAGS   += $(call cc-option,-We
  # change __FILE__ to the relative path from the srctree
  KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
  
- # ensure -fcf-protection is disabled when using retpoline as it is
- # incompatible with -mindirect-branch=thunk-extern
- ifdef CONFIG_RETPOLINE
- KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
- endif
  # include additional Makefiles when needed
  include-y                     := scripts/Makefile.extrawarn
  include-$(CONFIG_KASAN)               += scripts/Makefile.kasan
index 1bd5aea12b252fb9f99a89875cdc09dfdbc0350f,45fdb1256dbfeb6160e1e3f6aafddc657677b786..386468e66c417c0ce62eca5e0ddc615861494da4
@@@ -1,5 -1,5 +1,5 @@@
  // SPDX-License-Identifier: GPL-2.0
 -/* Copyright 2019 NXP Semiconductors
 +/* Copyright 2019-2021 NXP Semiconductors
   *
   * This is an umbrella module for all network switches that are
   * register-compatible with Ocelot and that perform I/O to their host CPU
@@@ -13,7 -13,6 +13,7 @@@
  #include <soc/mscc/ocelot_ana.h>
  #include <soc/mscc/ocelot_ptp.h>
  #include <soc/mscc/ocelot.h>
 +#include <linux/dsa/8021q.h>
  #include <linux/platform_device.h>
  #include <linux/packing.h>
  #include <linux/module.h>
  #include <net/dsa.h>
  #include "felix.h"
  
 +static int felix_tag_8021q_rxvlan_add(struct felix *felix, int port, u16 vid,
 +                                    bool pvid, bool untagged)
 +{
 +      struct ocelot_vcap_filter *outer_tagging_rule;
 +      struct ocelot *ocelot = &felix->ocelot;
 +      struct dsa_switch *ds = felix->ds;
 +      int key_length, upstream, err;
 +
 +      /* We don't need to install the rxvlan into the other ports' filtering
 +       * tables, because we're just pushing the rxvlan when sending towards
 +       * the CPU
 +       */
 +      if (!pvid)
 +              return 0;
 +
 +      key_length = ocelot->vcap[VCAP_ES0].keys[VCAP_ES0_IGR_PORT].length;
 +      upstream = dsa_upstream_port(ds, port);
 +
 +      outer_tagging_rule = kzalloc(sizeof(struct ocelot_vcap_filter),
 +                                   GFP_KERNEL);
 +      if (!outer_tagging_rule)
 +              return -ENOMEM;
 +
 +      outer_tagging_rule->key_type = OCELOT_VCAP_KEY_ANY;
 +      outer_tagging_rule->prio = 1;
 +      outer_tagging_rule->id.cookie = port;
 +      outer_tagging_rule->id.tc_offload = false;
 +      outer_tagging_rule->block_id = VCAP_ES0;
 +      outer_tagging_rule->type = OCELOT_VCAP_FILTER_OFFLOAD;
 +      outer_tagging_rule->lookup = 0;
 +      outer_tagging_rule->ingress_port.value = port;
 +      outer_tagging_rule->ingress_port.mask = GENMASK(key_length - 1, 0);
 +      outer_tagging_rule->egress_port.value = upstream;
 +      outer_tagging_rule->egress_port.mask = GENMASK(key_length - 1, 0);
 +      outer_tagging_rule->action.push_outer_tag = OCELOT_ES0_TAG;
 +      outer_tagging_rule->action.tag_a_tpid_sel = OCELOT_TAG_TPID_SEL_8021AD;
 +      outer_tagging_rule->action.tag_a_vid_sel = 1;
 +      outer_tagging_rule->action.vid_a_val = vid;
 +
 +      err = ocelot_vcap_filter_add(ocelot, outer_tagging_rule, NULL);
 +      if (err)
 +              kfree(outer_tagging_rule);
 +
 +      return err;
 +}
 +
 +static int felix_tag_8021q_txvlan_add(struct felix *felix, int port, u16 vid,
 +                                    bool pvid, bool untagged)
 +{
 +      struct ocelot_vcap_filter *untagging_rule, *redirect_rule;
 +      struct ocelot *ocelot = &felix->ocelot;
 +      struct dsa_switch *ds = felix->ds;
 +      int upstream, err;
 +
 +      /* tag_8021q.c assumes we are implementing this via port VLAN
 +       * membership, which we aren't. So we don't need to add any VCAP filter
 +       * for the CPU port.
 +       */
 +      if (ocelot->ports[port]->is_dsa_8021q_cpu)
 +              return 0;
 +
 +      untagging_rule = kzalloc(sizeof(struct ocelot_vcap_filter), GFP_KERNEL);
 +      if (!untagging_rule)
 +              return -ENOMEM;
 +
 +      redirect_rule = kzalloc(sizeof(struct ocelot_vcap_filter), GFP_KERNEL);
 +      if (!redirect_rule) {
 +              kfree(untagging_rule);
 +              return -ENOMEM;
 +      }
 +
 +      upstream = dsa_upstream_port(ds, port);
 +
 +      untagging_rule->key_type = OCELOT_VCAP_KEY_ANY;
 +      untagging_rule->ingress_port_mask = BIT(upstream);
 +      untagging_rule->vlan.vid.value = vid;
 +      untagging_rule->vlan.vid.mask = VLAN_VID_MASK;
 +      untagging_rule->prio = 1;
 +      untagging_rule->id.cookie = port;
 +      untagging_rule->id.tc_offload = false;
 +      untagging_rule->block_id = VCAP_IS1;
 +      untagging_rule->type = OCELOT_VCAP_FILTER_OFFLOAD;
 +      untagging_rule->lookup = 0;
 +      untagging_rule->action.vlan_pop_cnt_ena = true;
 +      untagging_rule->action.vlan_pop_cnt = 1;
 +      untagging_rule->action.pag_override_mask = 0xff;
 +      untagging_rule->action.pag_val = port;
 +
 +      err = ocelot_vcap_filter_add(ocelot, untagging_rule, NULL);
 +      if (err) {
 +              kfree(untagging_rule);
 +              kfree(redirect_rule);
 +              return err;
 +      }
 +
 +      redirect_rule->key_type = OCELOT_VCAP_KEY_ANY;
 +      redirect_rule->ingress_port_mask = BIT(upstream);
 +      redirect_rule->pag = port;
 +      redirect_rule->prio = 1;
 +      redirect_rule->id.cookie = port;
 +      redirect_rule->id.tc_offload = false;
 +      redirect_rule->block_id = VCAP_IS2;
 +      redirect_rule->type = OCELOT_VCAP_FILTER_OFFLOAD;
 +      redirect_rule->lookup = 0;
 +      redirect_rule->action.mask_mode = OCELOT_MASK_MODE_REDIRECT;
 +      redirect_rule->action.port_mask = BIT(port);
 +
 +      err = ocelot_vcap_filter_add(ocelot, redirect_rule, NULL);
 +      if (err) {
 +              ocelot_vcap_filter_del(ocelot, untagging_rule);
 +              kfree(redirect_rule);
 +              return err;
 +      }
 +
 +      return 0;
 +}
 +
 +static int felix_tag_8021q_vlan_add(struct dsa_switch *ds, int port, u16 vid,
 +                                  u16 flags)
 +{
 +      bool untagged = flags & BRIDGE_VLAN_INFO_UNTAGGED;
 +      bool pvid = flags & BRIDGE_VLAN_INFO_PVID;
 +      struct ocelot *ocelot = ds->priv;
 +
 +      if (vid_is_dsa_8021q_rxvlan(vid))
 +              return felix_tag_8021q_rxvlan_add(ocelot_to_felix(ocelot),
 +                                                port, vid, pvid, untagged);
 +
 +      if (vid_is_dsa_8021q_txvlan(vid))
 +              return felix_tag_8021q_txvlan_add(ocelot_to_felix(ocelot),
 +                                                port, vid, pvid, untagged);
 +
 +      return 0;
 +}
 +
 +static int felix_tag_8021q_rxvlan_del(struct felix *felix, int port, u16 vid)
 +{
 +      struct ocelot_vcap_filter *outer_tagging_rule;
 +      struct ocelot_vcap_block *block_vcap_es0;
 +      struct ocelot *ocelot = &felix->ocelot;
 +
 +      block_vcap_es0 = &ocelot->block[VCAP_ES0];
 +
 +      outer_tagging_rule = ocelot_vcap_block_find_filter_by_id(block_vcap_es0,
 +                                                               port, false);
 +      /* In rxvlan_add, we had the "if (!pvid) return 0" logic to avoid
 +       * installing outer tagging ES0 rules where they weren't needed.
 +       * But in rxvlan_del, the API doesn't give us the "flags" anymore,
 +       * so that forces us to be slightly sloppy here, and just assume that
 +       * if we didn't find an outer_tagging_rule it means that there was
 +       * none in the first place, i.e. rxvlan_del is called on a non-pvid
 +       * port. This is most probably true though.
 +       */
 +      if (!outer_tagging_rule)
 +              return 0;
 +
 +      return ocelot_vcap_filter_del(ocelot, outer_tagging_rule);
 +}
 +
 +static int felix_tag_8021q_txvlan_del(struct felix *felix, int port, u16 vid)
 +{
 +      struct ocelot_vcap_filter *untagging_rule, *redirect_rule;
 +      struct ocelot_vcap_block *block_vcap_is1;
 +      struct ocelot_vcap_block *block_vcap_is2;
 +      struct ocelot *ocelot = &felix->ocelot;
 +      int err;
 +
 +      if (ocelot->ports[port]->is_dsa_8021q_cpu)
 +              return 0;
 +
 +      block_vcap_is1 = &ocelot->block[VCAP_IS1];
 +      block_vcap_is2 = &ocelot->block[VCAP_IS2];
 +
 +      untagging_rule = ocelot_vcap_block_find_filter_by_id(block_vcap_is1,
 +                                                           port, false);
 +      if (!untagging_rule)
 +              return 0;
 +
 +      err = ocelot_vcap_filter_del(ocelot, untagging_rule);
 +      if (err)
 +              return err;
 +
 +      redirect_rule = ocelot_vcap_block_find_filter_by_id(block_vcap_is2,
 +                                                          port, false);
 +      if (!redirect_rule)
 +              return 0;
 +
 +      return ocelot_vcap_filter_del(ocelot, redirect_rule);
 +}
 +
 +static int felix_tag_8021q_vlan_del(struct dsa_switch *ds, int port, u16 vid)
 +{
 +      struct ocelot *ocelot = ds->priv;
 +
 +      if (vid_is_dsa_8021q_rxvlan(vid))
 +              return felix_tag_8021q_rxvlan_del(ocelot_to_felix(ocelot),
 +                                                port, vid);
 +
 +      if (vid_is_dsa_8021q_txvlan(vid))
 +              return felix_tag_8021q_txvlan_del(ocelot_to_felix(ocelot),
 +                                                port, vid);
 +
 +      return 0;
 +}
 +
 +static const struct dsa_8021q_ops felix_tag_8021q_ops = {
 +      .vlan_add       = felix_tag_8021q_vlan_add,
 +      .vlan_del       = felix_tag_8021q_vlan_del,
 +};
 +
 +/* Alternatively to using the NPI functionality, that same hardware MAC
 + * connected internally to the enetc or fman DSA master can be configured to
 + * use the software-defined tag_8021q frame format. As far as the hardware is
 + * concerned, it thinks it is a "dumb switch" - the queues of the CPU port
 + * module are now disconnected from it, but can still be accessed through
 + * register-based MMIO.
 + */
 +static void felix_8021q_cpu_port_init(struct ocelot *ocelot, int port)
 +{
 +      ocelot->ports[port]->is_dsa_8021q_cpu = true;
 +      ocelot->npi = -1;
 +
 +      /* Overwrite PGID_CPU with the non-tagging port */
 +      ocelot_write_rix(ocelot, BIT(port), ANA_PGID_PGID, PGID_CPU);
 +
 +      ocelot_apply_bridge_fwd_mask(ocelot);
 +}
 +
 +static void felix_8021q_cpu_port_deinit(struct ocelot *ocelot, int port)
 +{
 +      ocelot->ports[port]->is_dsa_8021q_cpu = false;
 +
 +      /* Restore PGID_CPU */
 +      ocelot_write_rix(ocelot, BIT(ocelot->num_phys_ports), ANA_PGID_PGID,
 +                       PGID_CPU);
 +
 +      ocelot_apply_bridge_fwd_mask(ocelot);
 +}
 +
 +static int felix_setup_tag_8021q(struct dsa_switch *ds, int cpu)
 +{
 +      struct ocelot *ocelot = ds->priv;
 +      struct felix *felix = ocelot_to_felix(ocelot);
 +      unsigned long cpu_flood;
 +      int port, err;
 +
 +      felix_8021q_cpu_port_init(ocelot, cpu);
 +
 +      for (port = 0; port < ds->num_ports; port++) {
 +              if (dsa_is_unused_port(ds, port))
 +                      continue;
 +
 +              /* This overwrites ocelot_init():
 +               * Do not forward BPDU frames to the CPU port module,
 +               * for 2 reasons:
 +               * - When these packets are injected from the tag_8021q
 +               *   CPU port, we want them to go out, not loop back
 +               *   into the system.
 +               * - STP traffic ingressing on a user port should go to
 +               *   the tag_8021q CPU port, not to the hardware CPU
 +               *   port module.
 +               */
 +              ocelot_write_gix(ocelot,
 +                               ANA_PORT_CPU_FWD_BPDU_CFG_BPDU_REDIR_ENA(0),
 +                               ANA_PORT_CPU_FWD_BPDU_CFG, port);
 +      }
 +
 +      /* In tag_8021q mode, the CPU port module is unused. So we
 +       * want to disable flooding of any kind to the CPU port module,
 +       * since packets going there will end in a black hole.
 +       */
 +      cpu_flood = ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports));
 +      ocelot_rmw_rix(ocelot, 0, cpu_flood, ANA_PGID_PGID, PGID_UC);
 +      ocelot_rmw_rix(ocelot, 0, cpu_flood, ANA_PGID_PGID, PGID_MC);
 +
 +      felix->dsa_8021q_ctx = kzalloc(sizeof(*felix->dsa_8021q_ctx),
 +                                     GFP_KERNEL);
 +      if (!felix->dsa_8021q_ctx)
 +              return -ENOMEM;
 +
 +      felix->dsa_8021q_ctx->ops = &felix_tag_8021q_ops;
 +      felix->dsa_8021q_ctx->proto = htons(ETH_P_8021AD);
 +      felix->dsa_8021q_ctx->ds = ds;
 +
 +      err = dsa_8021q_setup(felix->dsa_8021q_ctx, true);
 +      if (err)
 +              goto out_free_dsa_8021_ctx;
 +
 +      return 0;
 +
 +out_free_dsa_8021_ctx:
 +      kfree(felix->dsa_8021q_ctx);
 +      return err;
 +}
 +
 +static void felix_teardown_tag_8021q(struct dsa_switch *ds, int cpu)
 +{
 +      struct ocelot *ocelot = ds->priv;
 +      struct felix *felix = ocelot_to_felix(ocelot);
 +      int err, port;
 +
 +      err = dsa_8021q_setup(felix->dsa_8021q_ctx, false);
 +      if (err)
 +              dev_err(ds->dev, "dsa_8021q_setup returned %d", err);
 +
 +      kfree(felix->dsa_8021q_ctx);
 +
 +      for (port = 0; port < ds->num_ports; port++) {
 +              if (dsa_is_unused_port(ds, port))
 +                      continue;
 +
 +              /* Restore the logic from ocelot_init:
 +               * do not forward BPDU frames to the front ports.
 +               */
 +              ocelot_write_gix(ocelot,
 +                               ANA_PORT_CPU_FWD_BPDU_CFG_BPDU_REDIR_ENA(0xffff),
 +                               ANA_PORT_CPU_FWD_BPDU_CFG,
 +                               port);
 +      }
 +
 +      felix_8021q_cpu_port_deinit(ocelot, cpu);
 +}
 +
 +/* The CPU port module is connected to the Node Processor Interface (NPI). This
 + * is the mode through which frames can be injected from and extracted to an
 + * external CPU, over Ethernet. In NXP SoCs, the "external CPU" is the ARM CPU
 + * running Linux, and this forms a DSA setup together with the enetc or fman
 + * DSA master.
 + */
 +static void felix_npi_port_init(struct ocelot *ocelot, int port)
 +{
 +      ocelot->npi = port;
 +
 +      ocelot_write(ocelot, QSYS_EXT_CPU_CFG_EXT_CPUQ_MSK_M |
 +                   QSYS_EXT_CPU_CFG_EXT_CPU_PORT(port),
 +                   QSYS_EXT_CPU_CFG);
 +
 +      /* NPI port Injection/Extraction configuration */
 +      ocelot_fields_write(ocelot, port, SYS_PORT_MODE_INCL_XTR_HDR,
 +                          ocelot->npi_xtr_prefix);
 +      ocelot_fields_write(ocelot, port, SYS_PORT_MODE_INCL_INJ_HDR,
 +                          ocelot->npi_inj_prefix);
 +
 +      /* Disable transmission of pause frames */
 +      ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 0);
 +}
 +
 +static void felix_npi_port_deinit(struct ocelot *ocelot, int port)
 +{
 +      /* Restore hardware defaults */
 +      int unused_port = ocelot->num_phys_ports + 2;
 +
 +      ocelot->npi = -1;
 +
 +      ocelot_write(ocelot, QSYS_EXT_CPU_CFG_EXT_CPU_PORT(unused_port),
 +                   QSYS_EXT_CPU_CFG);
 +
 +      ocelot_fields_write(ocelot, port, SYS_PORT_MODE_INCL_XTR_HDR,
 +                          OCELOT_TAG_PREFIX_DISABLED);
 +      ocelot_fields_write(ocelot, port, SYS_PORT_MODE_INCL_INJ_HDR,
 +                          OCELOT_TAG_PREFIX_DISABLED);
 +
 +      /* Enable transmission of pause frames */
 +      ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 1);
 +}
 +
 +static int felix_setup_tag_npi(struct dsa_switch *ds, int cpu)
 +{
 +      struct ocelot *ocelot = ds->priv;
 +      unsigned long cpu_flood;
 +
 +      felix_npi_port_init(ocelot, cpu);
 +
 +      /* Include the CPU port module (and indirectly, the NPI port)
 +       * in the forwarding mask for unknown unicast - the hardware
 +       * default value for ANA_FLOODING_FLD_UNICAST excludes
 +       * BIT(ocelot->num_phys_ports), and so does ocelot_init,
 +       * since Ocelot relies on whitelisting MAC addresses towards
 +       * PGID_CPU.
 +       * We do this because DSA does not yet perform RX filtering,
 +       * and the NPI port does not perform source address learning,
 +       * so traffic sent to Linux is effectively unknown from the
 +       * switch's perspective.
 +       */
 +      cpu_flood = ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports));
 +      ocelot_rmw_rix(ocelot, cpu_flood, cpu_flood, ANA_PGID_PGID, PGID_UC);
 +
 +      return 0;
 +}
 +
 +static void felix_teardown_tag_npi(struct dsa_switch *ds, int cpu)
 +{
 +      struct ocelot *ocelot = ds->priv;
 +
 +      felix_npi_port_deinit(ocelot, cpu);
 +}
 +
 +static int felix_set_tag_protocol(struct dsa_switch *ds, int cpu,
 +                                enum dsa_tag_protocol proto)
 +{
 +      int err;
 +
 +      switch (proto) {
 +      case DSA_TAG_PROTO_OCELOT:
 +              err = felix_setup_tag_npi(ds, cpu);
 +              break;
 +      case DSA_TAG_PROTO_OCELOT_8021Q:
 +              err = felix_setup_tag_8021q(ds, cpu);
 +              break;
 +      default:
 +              err = -EPROTONOSUPPORT;
 +      }
 +
 +      return err;
 +}
 +
 +static void felix_del_tag_protocol(struct dsa_switch *ds, int cpu,
 +                                 enum dsa_tag_protocol proto)
 +{
 +      switch (proto) {
 +      case DSA_TAG_PROTO_OCELOT:
 +              felix_teardown_tag_npi(ds, cpu);
 +              break;
 +      case DSA_TAG_PROTO_OCELOT_8021Q:
 +              felix_teardown_tag_8021q(ds, cpu);
 +              break;
 +      default:
 +              break;
 +      }
 +}
 +
 +/* This always leaves the switch in a consistent state, because although the
 + * tag_8021q setup can fail, the NPI setup can't. So either the change is made,
 + * or the restoration is guaranteed to work.
 + */
 +static int felix_change_tag_protocol(struct dsa_switch *ds, int cpu,
 +                                   enum dsa_tag_protocol proto)
 +{
 +      struct ocelot *ocelot = ds->priv;
 +      struct felix *felix = ocelot_to_felix(ocelot);
 +      enum dsa_tag_protocol old_proto = felix->tag_proto;
 +      int err;
 +
 +      if (proto != DSA_TAG_PROTO_OCELOT &&
 +          proto != DSA_TAG_PROTO_OCELOT_8021Q)
 +              return -EPROTONOSUPPORT;
 +
 +      felix_del_tag_protocol(ds, cpu, old_proto);
 +
 +      err = felix_set_tag_protocol(ds, cpu, proto);
 +      if (err) {
 +              felix_set_tag_protocol(ds, cpu, old_proto);
 +              return err;
 +      }
 +
 +      felix->tag_proto = proto;
 +
 +      return 0;
 +}
 +
  static enum dsa_tag_protocol felix_get_tag_protocol(struct dsa_switch *ds,
                                                    int port,
                                                    enum dsa_tag_protocol mp)
  {
 -      return DSA_TAG_PROTO_OCELOT;
 +      struct ocelot *ocelot = ds->priv;
 +      struct felix *felix = ocelot_to_felix(ocelot);
 +
 +      return felix->tag_proto;
  }
  
  static int felix_set_ageing_time(struct dsa_switch *ds,
@@@ -529,12 -65,19 +529,12 @@@ static int felix_fdb_del(struct dsa_swi
        return ocelot_fdb_del(ocelot, port, addr, vid);
  }
  
 -/* This callback needs to be present */
 -static int felix_mdb_prepare(struct dsa_switch *ds, int port,
 -                           const struct switchdev_obj_port_mdb *mdb)
 -{
 -      return 0;
 -}
 -
 -static void felix_mdb_add(struct dsa_switch *ds, int port,
 -                        const struct switchdev_obj_port_mdb *mdb)
 +static int felix_mdb_add(struct dsa_switch *ds, int port,
 +                       const struct switchdev_obj_port_mdb *mdb)
  {
        struct ocelot *ocelot = ds->priv;
  
 -      ocelot_port_mdb_add(ocelot, port, mdb);
 +      return ocelot_port_mdb_add(ocelot, port, mdb);
  }
  
  static int felix_mdb_del(struct dsa_switch *ds, int port,
@@@ -569,40 -112,12 +569,40 @@@ static void felix_bridge_leave(struct d
        ocelot_port_bridge_leave(ocelot, port, br);
  }
  
 +static int felix_lag_join(struct dsa_switch *ds, int port,
 +                        struct net_device *bond,
 +                        struct netdev_lag_upper_info *info)
 +{
 +      struct ocelot *ocelot = ds->priv;
 +
 +      return ocelot_port_lag_join(ocelot, port, bond, info);
 +}
 +
 +static int felix_lag_leave(struct dsa_switch *ds, int port,
 +                         struct net_device *bond)
 +{
 +      struct ocelot *ocelot = ds->priv;
 +
 +      ocelot_port_lag_leave(ocelot, port, bond);
 +
 +      return 0;
 +}
 +
 +static int felix_lag_change(struct dsa_switch *ds, int port)
 +{
 +      struct dsa_port *dp = dsa_to_port(ds, port);
 +      struct ocelot *ocelot = ds->priv;
 +
 +      ocelot_port_lag_change(ocelot, port, dp->lag_tx_enabled);
 +
 +      return 0;
 +}
 +
  static int felix_vlan_prepare(struct dsa_switch *ds, int port,
                              const struct switchdev_obj_port_vlan *vlan)
  {
        struct ocelot *ocelot = ds->priv;
 -      u16 vid, flags = vlan->flags;
 -      int err;
 +      u16 flags = vlan->flags;
  
        /* Ocelot switches copy frames as-is to the CPU, so the flags:
         * egress-untagged or not, pvid or not, make no difference. This
        if (port == ocelot->npi)
                return 0;
  
 -      for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) {
 -              err = ocelot_vlan_prepare(ocelot, port, vid,
 -                                        flags & BRIDGE_VLAN_INFO_PVID,
 -                                        flags & BRIDGE_VLAN_INFO_UNTAGGED);
 -              if (err)
 -                      return err;
 -      }
 -
 -      return 0;
 +      return ocelot_vlan_prepare(ocelot, port, vlan->vid,
 +                                 flags & BRIDGE_VLAN_INFO_PVID,
 +                                 flags & BRIDGE_VLAN_INFO_UNTAGGED);
  }
  
 -static int felix_vlan_filtering(struct dsa_switch *ds, int port, bool enabled,
 -                              struct switchdev_trans *trans)
 +static int felix_vlan_filtering(struct dsa_switch *ds, int port, bool enabled)
  {
        struct ocelot *ocelot = ds->priv;
  
 -      return ocelot_port_vlan_filtering(ocelot, port, enabled, trans);
 +      return ocelot_port_vlan_filtering(ocelot, port, enabled);
  }
  
 -static void felix_vlan_add(struct dsa_switch *ds, int port,
 -                         const struct switchdev_obj_port_vlan *vlan)
 +static int felix_vlan_add(struct dsa_switch *ds, int port,
 +                        const struct switchdev_obj_port_vlan *vlan)
  {
        struct ocelot *ocelot = ds->priv;
        u16 flags = vlan->flags;
 -      u16 vid;
        int err;
  
 -      for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) {
 -              err = ocelot_vlan_add(ocelot, port, vid,
 -                                    flags & BRIDGE_VLAN_INFO_PVID,
 -                                    flags & BRIDGE_VLAN_INFO_UNTAGGED);
 -              if (err) {
 -                      dev_err(ds->dev, "Failed to add VLAN %d to port %d: %d\n",
 -                              vid, port, err);
 -                      return;
 -              }
 -      }
 +      err = felix_vlan_prepare(ds, port, vlan);
 +      if (err)
 +              return err;
 +
 +      return ocelot_vlan_add(ocelot, port, vlan->vid,
 +                             flags & BRIDGE_VLAN_INFO_PVID,
 +                             flags & BRIDGE_VLAN_INFO_UNTAGGED);
  }
  
  static int felix_vlan_del(struct dsa_switch *ds, int port,
                          const struct switchdev_obj_port_vlan *vlan)
  {
        struct ocelot *ocelot = ds->priv;
 -      u16 vid;
 -      int err;
  
 -      for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) {
 -              err = ocelot_vlan_del(ocelot, port, vid);
 -              if (err) {
 -                      dev_err(ds->dev, "Failed to remove VLAN %d from port %d: %d\n",
 -                              vid, port, err);
 -                      return err;
 -              }
 -      }
 -      return 0;
 +      return ocelot_vlan_del(ocelot, port, vlan->vid);
  }
  
  static int felix_port_enable(struct dsa_switch *ds, int port,
@@@ -697,9 -233,24 +697,24 @@@ static void felix_phylink_mac_link_down
  {
        struct ocelot *ocelot = ds->priv;
        struct ocelot_port *ocelot_port = ocelot->ports[port];
+       int err;
+       ocelot_port_rmwl(ocelot_port, 0, DEV_MAC_ENA_CFG_RX_ENA,
+                        DEV_MAC_ENA_CFG);
  
-       ocelot_port_writel(ocelot_port, 0, DEV_MAC_ENA_CFG);
        ocelot_fields_write(ocelot, port, QSYS_SWITCH_PORT_MODE_PORT_ENA, 0);
+       err = ocelot_port_flush(ocelot, port);
+       if (err)
+               dev_err(ocelot->dev, "failed to flush port %d: %d\n",
+                       port, err);
+       /* Put the port in reset. */
+       ocelot_port_writel(ocelot_port,
+                          DEV_CLOCK_CFG_MAC_TX_RST |
+                          DEV_CLOCK_CFG_MAC_RX_RST |
+                          DEV_CLOCK_CFG_LINK_SPEED(OCELOT_SPEED_1000),
+                          DEV_CLOCK_CFG);
  }
  
  static void felix_phylink_mac_link_up(struct dsa_switch *ds, int port,
@@@ -792,7 -343,7 +807,7 @@@ static void felix_port_qos_map_init(str
                       ANA_PORT_QOS_CFG,
                       port);
  
 -      for (i = 0; i < FELIX_NUM_TC * 2; i++) {
 +      for (i = 0; i < OCELOT_NUM_TC * 2; i++) {
                ocelot_rmw_ix(ocelot,
                              (ANA_PORT_PCP_DEI_MAP_DP_PCP_DEI_VAL & i) |
                              ANA_PORT_PCP_DEI_MAP_QOS_PCP_DEI_VAL(i),
@@@ -915,12 -466,12 +930,12 @@@ static int felix_init_structs(struct fe
        ocelot->map             = felix->info->map;
        ocelot->stats_layout    = felix->info->stats_layout;
        ocelot->num_stats       = felix->info->num_stats;
 -      ocelot->shared_queue_sz = felix->info->shared_queue_sz;
        ocelot->num_mact_rows   = felix->info->num_mact_rows;
        ocelot->vcap            = felix->info->vcap;
        ocelot->ops             = felix->info->ops;
 -      ocelot->inj_prefix      = OCELOT_TAG_PREFIX_SHORT;
 -      ocelot->xtr_prefix      = OCELOT_TAG_PREFIX_SHORT;
 +      ocelot->npi_inj_prefix  = OCELOT_TAG_PREFIX_SHORT;
 +      ocelot->npi_xtr_prefix  = OCELOT_TAG_PREFIX_SHORT;
 +      ocelot->devlink         = felix->ds->devlink;
  
        port_phy_modes = kcalloc(num_phys_ports, sizeof(phy_interface_t),
                                 GFP_KERNEL);
        return 0;
  }
  
 -/* The CPU port module is connected to the Node Processor Interface (NPI). This
 - * is the mode through which frames can be injected from and extracted to an
 - * external CPU, over Ethernet.
 - */
 -static void felix_npi_port_init(struct ocelot *ocelot, int port)
 -{
 -      ocelot->npi = port;
 -
 -      ocelot_write(ocelot, QSYS_EXT_CPU_CFG_EXT_CPUQ_MSK_M |
 -                   QSYS_EXT_CPU_CFG_EXT_CPU_PORT(port),
 -                   QSYS_EXT_CPU_CFG);
 -
 -      /* NPI port Injection/Extraction configuration */
 -      ocelot_fields_write(ocelot, port, SYS_PORT_MODE_INCL_XTR_HDR,
 -                          ocelot->xtr_prefix);
 -      ocelot_fields_write(ocelot, port, SYS_PORT_MODE_INCL_INJ_HDR,
 -                          ocelot->inj_prefix);
 -
 -      /* Disable transmission of pause frames */
 -      ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 0);
 -}
 -
  /* Hardware initialization done here so that we can allocate structures with
   * devm without fear of dsa_register_switch returning -EPROBE_DEFER and causing
   * us to allocate structures twice (leak memory) and map PCI memory twice
@@@ -1049,10 -622,10 +1064,10 @@@ static int felix_setup(struct dsa_switc
        }
  
        for (port = 0; port < ds->num_ports; port++) {
 -              ocelot_init_port(ocelot, port);
 +              if (dsa_is_unused_port(ds, port))
 +                      continue;
  
 -              if (dsa_is_cpu_port(ds, port))
 -                      felix_npi_port_init(ocelot, port);
 +              ocelot_init_port(ocelot, port);
  
                /* Set the default QoS Classification based on PCP and DEI
                 * bits of vlan tag.
                felix_port_qos_map_init(ocelot, port);
        }
  
 -      /* Include the CPU port module in the forwarding mask for unknown
 -       * unicast - the hardware default value for ANA_FLOODING_FLD_UNICAST
 -       * excludes BIT(ocelot->num_phys_ports), and so does ocelot_init, since
 -       * Ocelot relies on whitelisting MAC addresses towards PGID_CPU.
 -       */
 -      ocelot_write_rix(ocelot,
 -                       ANA_PGID_PGID_PGID(GENMASK(ocelot->num_phys_ports, 0)),
 -                       ANA_PGID_PGID, PGID_UC);
 +      err = ocelot_devlink_sb_register(ocelot);
 +      if (err)
 +              return err;
 +
 +      for (port = 0; port < ds->num_ports; port++) {
 +              if (!dsa_is_cpu_port(ds, port))
 +                      continue;
 +
 +              /* The initial tag protocol is NPI which always returns 0, so
 +               * there's no real point in checking for errors.
 +               */
 +              felix_set_tag_protocol(ds, port, felix->tag_proto);
 +      }
  
        ds->mtu_enforcement_ingress = true;
 -      ds->configure_vlan_while_not_filtering = true;
 +      ds->assisted_learning_on_cpu_port = true;
  
        return 0;
  }
@@@ -1086,22 -654,14 +1101,22 @@@ static void felix_teardown(struct dsa_s
        struct felix *felix = ocelot_to_felix(ocelot);
        int port;
  
 -      if (felix->info->mdio_bus_free)
 -              felix->info->mdio_bus_free(ocelot);
 +      for (port = 0; port < ds->num_ports; port++) {
 +              if (!dsa_is_cpu_port(ds, port))
 +                      continue;
  
 -      for (port = 0; port < ocelot->num_phys_ports; port++)
 -              ocelot_deinit_port(ocelot, port);
 +              felix_del_tag_protocol(ds, port, felix->tag_proto);
 +      }
 +
 +      ocelot_devlink_sb_unregister(ocelot);
        ocelot_deinit_timestamp(ocelot);
 -      /* stop workqueue thread */
        ocelot_deinit(ocelot);
 +
 +      for (port = 0; port < ocelot->num_phys_ports; port++)
 +              ocelot_deinit_port(ocelot, port);
 +
 +      if (felix->info->mdio_bus_free)
 +              felix->info->mdio_bus_free(ocelot);
  }
  
  static int felix_hwtstamp_get(struct dsa_switch *ds, int port,
@@@ -1235,160 -795,46 +1250,160 @@@ static int felix_port_setup_tc(struct d
                return -EOPNOTSUPP;
  }
  
 +static int felix_sb_pool_get(struct dsa_switch *ds, unsigned int sb_index,
 +                           u16 pool_index,
 +                           struct devlink_sb_pool_info *pool_info)
 +{
 +      struct ocelot *ocelot = ds->priv;
 +
 +      return ocelot_sb_pool_get(ocelot, sb_index, pool_index, pool_info);
 +}
 +
 +static int felix_sb_pool_set(struct dsa_switch *ds, unsigned int sb_index,
 +                           u16 pool_index, u32 size,
 +                           enum devlink_sb_threshold_type threshold_type,
 +                           struct netlink_ext_ack *extack)
 +{
 +      struct ocelot *ocelot = ds->priv;
 +
 +      return ocelot_sb_pool_set(ocelot, sb_index, pool_index, size,
 +                                threshold_type, extack);
 +}
 +
 +static int felix_sb_port_pool_get(struct dsa_switch *ds, int port,
 +                                unsigned int sb_index, u16 pool_index,
 +                                u32 *p_threshold)
 +{
 +      struct ocelot *ocelot = ds->priv;
 +
 +      return ocelot_sb_port_pool_get(ocelot, port, sb_index, pool_index,
 +                                     p_threshold);
 +}
 +
 +static int felix_sb_port_pool_set(struct dsa_switch *ds, int port,
 +                                unsigned int sb_index, u16 pool_index,
 +                                u32 threshold, struct netlink_ext_ack *extack)
 +{
 +      struct ocelot *ocelot = ds->priv;
 +
 +      return ocelot_sb_port_pool_set(ocelot, port, sb_index, pool_index,
 +                                     threshold, extack);
 +}
 +
 +static int felix_sb_tc_pool_bind_get(struct dsa_switch *ds, int port,
 +                                   unsigned int sb_index, u16 tc_index,
 +                                   enum devlink_sb_pool_type pool_type,
 +                                   u16 *p_pool_index, u32 *p_threshold)
 +{
 +      struct ocelot *ocelot = ds->priv;
 +
 +      return ocelot_sb_tc_pool_bind_get(ocelot, port, sb_index, tc_index,
 +                                        pool_type, p_pool_index,
 +                                        p_threshold);
 +}
 +
 +static int felix_sb_tc_pool_bind_set(struct dsa_switch *ds, int port,
 +                                   unsigned int sb_index, u16 tc_index,
 +                                   enum devlink_sb_pool_type pool_type,
 +                                   u16 pool_index, u32 threshold,
 +                                   struct netlink_ext_ack *extack)
 +{
 +      struct ocelot *ocelot = ds->priv;
 +
 +      return ocelot_sb_tc_pool_bind_set(ocelot, port, sb_index, tc_index,
 +                                        pool_type, pool_index, threshold,
 +                                        extack);
 +}
 +
 +static int felix_sb_occ_snapshot(struct dsa_switch *ds,
 +                               unsigned int sb_index)
 +{
 +      struct ocelot *ocelot = ds->priv;
 +
 +      return ocelot_sb_occ_snapshot(ocelot, sb_index);
 +}
 +
 +static int felix_sb_occ_max_clear(struct dsa_switch *ds,
 +                                unsigned int sb_index)
 +{
 +      struct ocelot *ocelot = ds->priv;
 +
 +      return ocelot_sb_occ_max_clear(ocelot, sb_index);
 +}
 +
 +static int felix_sb_occ_port_pool_get(struct dsa_switch *ds, int port,
 +                                    unsigned int sb_index, u16 pool_index,
 +                                    u32 *p_cur, u32 *p_max)
 +{
 +      struct ocelot *ocelot = ds->priv;
 +
 +      return ocelot_sb_occ_port_pool_get(ocelot, port, sb_index, pool_index,
 +                                         p_cur, p_max);
 +}
 +
 +static int felix_sb_occ_tc_port_bind_get(struct dsa_switch *ds, int port,
 +                                       unsigned int sb_index, u16 tc_index,
 +                                       enum devlink_sb_pool_type pool_type,
 +                                       u32 *p_cur, u32 *p_max)
 +{
 +      struct ocelot *ocelot = ds->priv;
 +
 +      return ocelot_sb_occ_tc_port_bind_get(ocelot, port, sb_index, tc_index,
 +                                            pool_type, p_cur, p_max);
 +}
 +
  const struct dsa_switch_ops felix_switch_ops = {
 -      .get_tag_protocol       = felix_get_tag_protocol,
 -      .setup                  = felix_setup,
 -      .teardown               = felix_teardown,
 -      .set_ageing_time        = felix_set_ageing_time,
 -      .get_strings            = felix_get_strings,
 -      .get_ethtool_stats      = felix_get_ethtool_stats,
 -      .get_sset_count         = felix_get_sset_count,
 -      .get_ts_info            = felix_get_ts_info,
 -      .phylink_validate       = felix_phylink_validate,
 -      .phylink_mac_config     = felix_phylink_mac_config,
 -      .phylink_mac_link_down  = felix_phylink_mac_link_down,
 -      .phylink_mac_link_up    = felix_phylink_mac_link_up,
 -      .port_enable            = felix_port_enable,
 -      .port_disable           = felix_port_disable,
 -      .port_fdb_dump          = felix_fdb_dump,
 -      .port_fdb_add           = felix_fdb_add,
 -      .port_fdb_del           = felix_fdb_del,
 -      .port_mdb_prepare       = felix_mdb_prepare,
 -      .port_mdb_add           = felix_mdb_add,
 -      .port_mdb_del           = felix_mdb_del,
 -      .port_bridge_join       = felix_bridge_join,
 -      .port_bridge_leave      = felix_bridge_leave,
 -      .port_stp_state_set     = felix_bridge_stp_state_set,
 -      .port_vlan_prepare      = felix_vlan_prepare,
 -      .port_vlan_filtering    = felix_vlan_filtering,
 -      .port_vlan_add          = felix_vlan_add,
 -      .port_vlan_del          = felix_vlan_del,
 -      .port_hwtstamp_get      = felix_hwtstamp_get,
 -      .port_hwtstamp_set      = felix_hwtstamp_set,
 -      .port_rxtstamp          = felix_rxtstamp,
 -      .port_txtstamp          = felix_txtstamp,
 -      .port_change_mtu        = felix_change_mtu,
 -      .port_max_mtu           = felix_get_max_mtu,
 -      .port_policer_add       = felix_port_policer_add,
 -      .port_policer_del       = felix_port_policer_del,
 -      .cls_flower_add         = felix_cls_flower_add,
 -      .cls_flower_del         = felix_cls_flower_del,
 -      .cls_flower_stats       = felix_cls_flower_stats,
 -      .port_setup_tc          = felix_port_setup_tc,
 +      .get_tag_protocol               = felix_get_tag_protocol,
 +      .change_tag_protocol            = felix_change_tag_protocol,
 +      .setup                          = felix_setup,
 +      .teardown                       = felix_teardown,
 +      .set_ageing_time                = felix_set_ageing_time,
 +      .get_strings                    = felix_get_strings,
 +      .get_ethtool_stats              = felix_get_ethtool_stats,
 +      .get_sset_count                 = felix_get_sset_count,
 +      .get_ts_info                    = felix_get_ts_info,
 +      .phylink_validate               = felix_phylink_validate,
 +      .phylink_mac_config             = felix_phylink_mac_config,
 +      .phylink_mac_link_down          = felix_phylink_mac_link_down,
 +      .phylink_mac_link_up            = felix_phylink_mac_link_up,
 +      .port_enable                    = felix_port_enable,
 +      .port_disable                   = felix_port_disable,
 +      .port_fdb_dump                  = felix_fdb_dump,
 +      .port_fdb_add                   = felix_fdb_add,
 +      .port_fdb_del                   = felix_fdb_del,
 +      .port_mdb_add                   = felix_mdb_add,
 +      .port_mdb_del                   = felix_mdb_del,
 +      .port_bridge_join               = felix_bridge_join,
 +      .port_bridge_leave              = felix_bridge_leave,
 +      .port_lag_join                  = felix_lag_join,
 +      .port_lag_leave                 = felix_lag_leave,
 +      .port_lag_change                = felix_lag_change,
 +      .port_stp_state_set             = felix_bridge_stp_state_set,
 +      .port_vlan_filtering            = felix_vlan_filtering,
 +      .port_vlan_add                  = felix_vlan_add,
 +      .port_vlan_del                  = felix_vlan_del,
 +      .port_hwtstamp_get              = felix_hwtstamp_get,
 +      .port_hwtstamp_set              = felix_hwtstamp_set,
 +      .port_rxtstamp                  = felix_rxtstamp,
 +      .port_txtstamp                  = felix_txtstamp,
 +      .port_change_mtu                = felix_change_mtu,
 +      .port_max_mtu                   = felix_get_max_mtu,
 +      .port_policer_add               = felix_port_policer_add,
 +      .port_policer_del               = felix_port_policer_del,
 +      .cls_flower_add                 = felix_cls_flower_add,
 +      .cls_flower_del                 = felix_cls_flower_del,
 +      .cls_flower_stats               = felix_cls_flower_stats,
 +      .port_setup_tc                  = felix_port_setup_tc,
 +      .devlink_sb_pool_get            = felix_sb_pool_get,
 +      .devlink_sb_pool_set            = felix_sb_pool_set,
 +      .devlink_sb_port_pool_get       = felix_sb_port_pool_get,
 +      .devlink_sb_port_pool_set       = felix_sb_port_pool_set,
 +      .devlink_sb_tc_pool_bind_get    = felix_sb_tc_pool_bind_get,
 +      .devlink_sb_tc_pool_bind_set    = felix_sb_tc_pool_bind_set,
 +      .devlink_sb_occ_snapshot        = felix_sb_occ_snapshot,
 +      .devlink_sb_occ_max_clear       = felix_sb_occ_max_clear,
 +      .devlink_sb_occ_port_pool_get   = felix_sb_occ_port_pool_get,
 +      .devlink_sb_occ_tc_port_bind_get= felix_sb_occ_tc_port_bind_get,
  };
  
  struct net_device *felix_port_to_netdev(struct ocelot *ocelot, int port)
index 1db6cfd2b55c6ebf3f4f1b98f27db2a14b3df3ef,a0596c073dddc10d03f0625b58c24f2edbd91b50..102f2c91fdb855311e164186a2fddda0492f3ea2
@@@ -404,6 -404,7 +404,7 @@@ static int ena_xdp_execute(struct ena_r
                if (unlikely(!xdpf)) {
                        trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
                        xdp_stat = &rx_ring->rx_stats.xdp_aborted;
+                       verdict = XDP_ABORTED;
                        break;
                }
  
                        xdp_stat = &rx_ring->rx_stats.xdp_redirect;
                        break;
                }
-               fallthrough;
+               trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
+               xdp_stat = &rx_ring->rx_stats.xdp_aborted;
+               verdict = XDP_ABORTED;
+               break;
        case XDP_ABORTED:
                trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
                xdp_stat = &rx_ring->rx_stats.xdp_aborted;
@@@ -1585,9 -1589,10 +1589,9 @@@ static int ena_xdp_handle_buff(struct e
        int ret;
  
        rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
 -      xdp->data = page_address(rx_info->page) + rx_info->page_offset;
 -      xdp_set_data_meta_invalid(xdp);
 -      xdp->data_hard_start = page_address(rx_info->page);
 -      xdp->data_end = xdp->data + rx_ring->ena_bufs[0].len;
 +      xdp_prepare_buff(xdp, page_address(rx_info->page),
 +                       rx_info->page_offset,
 +                       rx_ring->ena_bufs[0].len, false);
        /* If for some reason we received a bigger packet than
         * we expect, then we simply drop it
         */
@@@ -1633,7 -1638,8 +1637,7 @@@ static int ena_clean_rx_irq(struct ena_
        netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
                  "%s qid %d\n", __func__, rx_ring->qid);
        res_budget = budget;
 -      xdp.rxq = &rx_ring->xdp_rxq;
 -      xdp.frame_sz = ENA_PAGE_SIZE;
 +      xdp_init_buff(&xdp, ENA_PAGE_SIZE, &rx_ring->xdp_rxq);
  
        do {
                xdp_verdict = XDP_PASS;
index d8e568f6caf303de90bf560d0e012138592a41f9,6faa20bed48858c7785934b0c78c6246d9699f04..ccfe52a50a665bb10b7411207a18b6bab05f9673
@@@ -2180,8 -2180,10 +2180,10 @@@ static int dpaa_a050385_wa_xdpf(struct 
                                struct xdp_frame **init_xdpf)
  {
        struct xdp_frame *new_xdpf, *xdpf = *init_xdpf;
-       void *new_buff;
+       void *new_buff, *aligned_data;
        struct page *p;
+       u32 data_shift;
+       int headroom;
  
        /* Check the data alignment and make sure the headroom is large
         * enough to store the xdpf backpointer. Use an aligned headroom
         * byte frame headroom. If the XDP program uses all of it, copy the
         * data to a new buffer and make room for storing the backpointer.
         */
-       if (PTR_IS_ALIGNED(xdpf->data, DPAA_A050385_ALIGN) &&
+       if (PTR_IS_ALIGNED(xdpf->data, DPAA_FD_DATA_ALIGNMENT) &&
            xdpf->headroom >= priv->tx_headroom) {
                xdpf->headroom = priv->tx_headroom;
                return 0;
        }
  
+       /* Try to move the data inside the buffer just enough to align it and
+        * store the xdpf backpointer. If the available headroom isn't large
+        * enough, resort to allocating a new buffer and copying the data.
+        */
+       aligned_data = PTR_ALIGN_DOWN(xdpf->data, DPAA_FD_DATA_ALIGNMENT);
+       data_shift = xdpf->data - aligned_data;
+       /* The XDP frame's headroom needs to be large enough to accommodate
+        * shifting the data as well as storing the xdpf backpointer.
+        */
+       if (xdpf->headroom  >= data_shift + priv->tx_headroom) {
+               memmove(aligned_data, xdpf->data, xdpf->len);
+               xdpf->data = aligned_data;
+               xdpf->headroom = priv->tx_headroom;
+               return 0;
+       }
+       /* The new xdp_frame is stored in the new buffer. Reserve enough space
+        * in the headroom for storing it along with the driver's private
+        * info. The headroom needs to be aligned to DPAA_FD_DATA_ALIGNMENT to
+        * guarantee the data's alignment in the buffer.
+        */
+       headroom = ALIGN(sizeof(*new_xdpf) + priv->tx_headroom,
+                        DPAA_FD_DATA_ALIGNMENT);
+       /* Assure the extended headroom and data don't overflow the buffer,
+        * while maintaining the mandatory tailroom.
+        */
+       if (headroom + xdpf->len > DPAA_BP_RAW_SIZE -
+                       SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+               return -ENOMEM;
        p = dev_alloc_pages(0);
        if (unlikely(!p))
                return -ENOMEM;
  
        /* Copy the data to the new buffer at a properly aligned offset */
        new_buff = page_address(p);
-       memcpy(new_buff + priv->tx_headroom, xdpf->data, xdpf->len);
+       memcpy(new_buff + headroom, xdpf->data, xdpf->len);
  
        /* Create an XDP frame around the new buffer in a similar fashion
         * to xdp_convert_buff_to_frame.
         */
        new_xdpf = new_buff;
-       new_xdpf->data = new_buff + priv->tx_headroom;
+       new_xdpf->data = new_buff + headroom;
        new_xdpf->len = xdpf->len;
        new_xdpf->headroom = priv->tx_headroom;
        new_xdpf->frame_sz = DPAA_BP_RAW_SIZE;
@@@ -2532,10 -2566,12 +2566,10 @@@ static u32 dpaa_run_xdp(struct dpaa_pri
                return XDP_PASS;
        }
  
 -      xdp.data = vaddr + fd_off;
 -      xdp.data_meta = xdp.data;
 -      xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
 -      xdp.data_end = xdp.data + qm_fd_get_length(fd);
 -      xdp.frame_sz = DPAA_BP_RAW_SIZE - DPAA_TX_PRIV_DATA_SIZE;
 -      xdp.rxq = &dpaa_fq->xdp_rxq;
 +      xdp_init_buff(&xdp, DPAA_BP_RAW_SIZE - DPAA_TX_PRIV_DATA_SIZE,
 +                    &dpaa_fq->xdp_rxq);
 +      xdp_prepare_buff(&xdp, vaddr + fd_off - XDP_PACKET_HEADROOM,
 +                       XDP_PACKET_HEADROOM, qm_fd_get_length(fd), true);
  
        /* We reserve a fixed headroom of 256 bytes under the erratum and we
         * offer it all to XDP programs to use. If no room is left for the
index 64a80a5933cb37873d3bd8d281543f8be1db413c,48549db23c5241e6bcd04d3584c0b837c6f981a0..7d81ffed4dc0f8bb2e284136bda765d6ed1a06c4
@@@ -24,7 -24,7 +24,7 @@@
  #include "hnae3.h"
  
  #define HCLGE_NAME                    "hclge"
 -#define HCLGE_STATS_READ(p, offset) (*((u64 *)((u8 *)(p) + (offset))))
 +#define HCLGE_STATS_READ(p, offset) (*(u64 *)((u8 *)(p) + (offset)))
  #define HCLGE_MAC_STATS_FIELD_OFF(f) (offsetof(struct hclge_mac_stats, f))
  
  #define HCLGE_BUF_SIZE_UNIT   256U
@@@ -55,6 -55,8 +55,6 @@@
  
  #define HCLGE_LINK_STATUS_MS  10
  
 -#define HCLGE_VF_VPORT_START_NUM      1
 -
  static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mps);
  static int hclge_init_vlan_config(struct hclge_dev *hdev);
  static void hclge_sync_vlan_filter(struct hclge_dev *hdev);
@@@ -626,7 -628,7 +626,7 @@@ static u8 *hclge_tqps_get_strings(struc
        for (i = 0; i < kinfo->num_tqps; i++) {
                struct hclge_tqp *tqp = container_of(handle->kinfo.tqp[i],
                        struct hclge_tqp, q);
 -              snprintf(buff, ETH_GSTRING_LEN, "txq%d_pktnum_rcd",
 +              snprintf(buff, ETH_GSTRING_LEN, "txq%u_pktnum_rcd",
                         tqp->index);
                buff = buff + ETH_GSTRING_LEN;
        }
        for (i = 0; i < kinfo->num_tqps; i++) {
                struct hclge_tqp *tqp = container_of(kinfo->tqp[i],
                        struct hclge_tqp, q);
 -              snprintf(buff, ETH_GSTRING_LEN, "rxq%d_pktnum_rcd",
 +              snprintf(buff, ETH_GSTRING_LEN, "rxq%u_pktnum_rcd",
                         tqp->index);
                buff = buff + ETH_GSTRING_LEN;
        }
@@@ -928,7 -930,7 +928,7 @@@ static int hclge_query_pf_resource(stru
        return 0;
  }
  
 -static int hclge_parse_speed(int speed_cmd, int *speed)
 +static int hclge_parse_speed(u8 speed_cmd, u32 *speed)
  {
        switch (speed_cmd) {
        case 6:
@@@ -1371,8 -1373,6 +1371,8 @@@ static void hclge_set_default_dev_specs
        ae_dev->dev_specs.rss_key_size = HCLGE_RSS_KEY_SIZE;
        ae_dev->dev_specs.max_tm_rate = HCLGE_ETHER_MAX_RATE;
        ae_dev->dev_specs.max_int_gl = HCLGE_DEF_MAX_INT_GL;
 +      ae_dev->dev_specs.max_frm_size = HCLGE_MAC_MAX_FRAME;
 +      ae_dev->dev_specs.max_qset_num = HCLGE_MAX_QSET_NUM;
  }
  
  static void hclge_parse_dev_specs(struct hclge_dev *hdev,
        ae_dev->dev_specs.int_ql_max = le16_to_cpu(req0->int_ql_max);
        ae_dev->dev_specs.rss_key_size = le16_to_cpu(req0->rss_key_size);
        ae_dev->dev_specs.max_tm_rate = le32_to_cpu(req0->max_tm_rate);
 +      ae_dev->dev_specs.max_qset_num = le16_to_cpu(req1->max_qset_num);
        ae_dev->dev_specs.max_int_gl = le16_to_cpu(req1->max_int_gl);
 +      ae_dev->dev_specs.max_frm_size = le16_to_cpu(req1->max_frm_size);
  }
  
  static void hclge_check_dev_specs(struct hclge_dev *hdev)
                dev_specs->rss_key_size = HCLGE_RSS_KEY_SIZE;
        if (!dev_specs->max_tm_rate)
                dev_specs->max_tm_rate = HCLGE_ETHER_MAX_RATE;
 +      if (!dev_specs->max_qset_num)
 +              dev_specs->max_qset_num = HCLGE_MAX_QSET_NUM;
        if (!dev_specs->max_int_gl)
                dev_specs->max_int_gl = HCLGE_DEF_MAX_INT_GL;
 +      if (!dev_specs->max_frm_size)
 +              dev_specs->max_frm_size = HCLGE_MAC_MAX_FRAME;
  }
  
  static int hclge_query_dev_specs(struct hclge_dev *hdev)
@@@ -4243,6 -4237,11 +4243,6 @@@ static u32 hclge_get_rss_key_size(struc
        return HCLGE_RSS_KEY_SIZE;
  }
  
 -static u32 hclge_get_rss_indir_size(struct hnae3_handle *handle)
 -{
 -      return HCLGE_RSS_IND_TBL_SIZE;
 -}
 -
  static int hclge_set_rss_algo_key(struct hclge_dev *hdev,
                                  const u8 hfunc, const u8 *key)
  {
@@@ -4284,7 -4283,6 +4284,7 @@@ static int hclge_set_rss_indir_table(st
  {
        struct hclge_rss_indirection_table_cmd *req;
        struct hclge_desc desc;
 +      int rss_cfg_tbl_num;
        u8 rss_msb_oft;
        u8 rss_msb_val;
        int ret;
        u32 j;
  
        req = (struct hclge_rss_indirection_table_cmd *)desc.data;
 +      rss_cfg_tbl_num = hdev->ae_dev->dev_specs.rss_ind_tbl_size /
 +                        HCLGE_RSS_CFG_TBL_SIZE;
  
 -      for (i = 0; i < HCLGE_RSS_CFG_TBL_NUM; i++) {
 +      for (i = 0; i < rss_cfg_tbl_num; i++) {
                hclge_cmd_setup_basic_desc
                        (&desc, HCLGE_OPC_RSS_INDIR_TABLE, false);
  
@@@ -4402,7 -4398,6 +4402,7 @@@ static int hclge_set_rss_input_tuple(st
  static int hclge_get_rss(struct hnae3_handle *handle, u32 *indir,
                         u8 *key, u8 *hfunc)
  {
 +      struct hnae3_ae_dev *ae_dev = pci_get_drvdata(handle->pdev);
        struct hclge_vport *vport = hclge_get_vport(handle);
        int i;
  
  
        /* Get indirect table */
        if (indir)
 -              for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++)
 +              for (i = 0; i < ae_dev->dev_specs.rss_ind_tbl_size; i++)
                        indir[i] =  vport->rss_indirection_tbl[i];
  
        return 0;
  static int hclge_set_rss(struct hnae3_handle *handle, const u32 *indir,
                         const  u8 *key, const  u8 hfunc)
  {
 +      struct hnae3_ae_dev *ae_dev = pci_get_drvdata(handle->pdev);
        struct hclge_vport *vport = hclge_get_vport(handle);
        struct hclge_dev *hdev = vport->back;
        u8 hash_algo;
        }
  
        /* Update the shadow RSS table with user specified qids */
 -      for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++)
 +      for (i = 0; i < ae_dev->dev_specs.rss_ind_tbl_size; i++)
                vport->rss_indirection_tbl[i] = indir[i];
  
        /* Update the hardware */
@@@ -4709,15 -4703,14 +4709,15 @@@ void hclge_rss_indir_init_cfg(struct hc
        int i, j;
  
        for (j = 0; j < hdev->num_vmdq_vport + 1; j++) {
 -              for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++)
 +              for (i = 0; i < hdev->ae_dev->dev_specs.rss_ind_tbl_size; i++)
                        vport[j].rss_indirection_tbl[i] =
                                i % vport[j].alloc_rss_size;
        }
  }
  
 -static void hclge_rss_init_cfg(struct hclge_dev *hdev)
 +static int hclge_rss_init_cfg(struct hclge_dev *hdev)
  {
 +      u16 rss_ind_tbl_size = hdev->ae_dev->dev_specs.rss_ind_tbl_size;
        int i, rss_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
        struct hclge_vport *vport = hdev->vport;
  
                rss_algo = HCLGE_RSS_HASH_ALGO_SIMPLE;
  
        for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
 +              u16 *rss_ind_tbl;
 +
                vport[i].rss_tuple_sets.ipv4_tcp_en =
                        HCLGE_RSS_INPUT_TUPLE_OTHER;
                vport[i].rss_tuple_sets.ipv4_udp_en =
  
                vport[i].rss_algo = rss_algo;
  
 +              rss_ind_tbl = devm_kcalloc(&hdev->pdev->dev, rss_ind_tbl_size,
 +                                         sizeof(*rss_ind_tbl), GFP_KERNEL);
 +              if (!rss_ind_tbl)
 +                      return -ENOMEM;
 +
 +              vport[i].rss_indirection_tbl = rss_ind_tbl;
                memcpy(vport[i].rss_hash_key, hclge_hash_key,
                       HCLGE_RSS_KEY_SIZE);
        }
  
        hclge_rss_indir_init_cfg(hdev);
 +
 +      return 0;
  }
  
  int hclge_bind_ring_with_vector(struct hclge_vport *vport,
@@@ -5595,7 -5578,7 +5595,7 @@@ static int hclge_fd_check_ext_tuple(str
                if (fs->m_ext.vlan_tci &&
                    be16_to_cpu(fs->h_ext.vlan_tci) >= VLAN_N_VID) {
                        dev_err(&hdev->pdev->dev,
 -                              "failed to config vlan_tci, invalid vlan_tci: %u, max is %u.\n",
 +                              "failed to config vlan_tci, invalid vlan_tci: %u, max is %d.\n",
                                ntohs(fs->h_ext.vlan_tci), VLAN_N_VID - 1);
                        return -EINVAL;
                }
@@@ -9681,7 -9664,7 +9681,7 @@@ int hclge_set_vport_mtu(struct hclge_vp
        /* HW supprt 2 layer vlan */
        max_frm_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + 2 * VLAN_HLEN;
        if (max_frm_size < HCLGE_MAC_MIN_FRAME ||
 -          max_frm_size > HCLGE_MAC_MAX_FRAME)
 +          max_frm_size > hdev->ae_dev->dev_specs.max_frm_size)
                return -EINVAL;
  
        max_frm_size = max(max_frm_size, HCLGE_MAC_DEFAULT_FRAME);
@@@ -9830,12 -9813,19 +9830,19 @@@ int hclge_reset_tqp(struct hnae3_handl
  
  void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id)
  {
+       struct hnae3_handle *handle = &vport->nic;
        struct hclge_dev *hdev = vport->back;
        int reset_try_times = 0;
        int reset_status;
        u16 queue_gid;
        int ret;
  
+       if (queue_id >= handle->kinfo.num_tqps) {
+               dev_warn(&hdev->pdev->dev, "Invalid vf queue id(%u)\n",
+                        queue_id);
+               return;
+       }
        queue_gid = hclge_covert_handle_qid_global(&vport->nic, queue_id);
  
        ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true);
@@@ -10598,12 -10588,7 +10605,12 @@@ static int hclge_init_ae_dev(struct hna
                goto err_mdiobus_unreg;
        }
  
 -      hclge_rss_init_cfg(hdev);
 +      ret = hclge_rss_init_cfg(hdev);
 +      if (ret) {
 +              dev_err(&pdev->dev, "failed to init rss cfg, ret = %d\n", ret);
 +              goto err_mdiobus_unreg;
 +      }
 +
        ret = hclge_rss_init_hw(hdev);
        if (ret) {
                dev_err(&pdev->dev, "Rss init fail, ret =%d\n", ret);
@@@ -10831,7 -10816,7 +10838,7 @@@ static void hclge_reset_vf_rate(struct 
        }
  }
  
 -static int hclge_vf_rate_param_check(struct hclge_dev *hdev, int vf,
 +static int hclge_vf_rate_param_check(struct hclge_dev *hdev,
                                     int min_tx_rate, int max_tx_rate)
  {
        if (min_tx_rate != 0 ||
@@@ -10852,7 -10837,7 +10859,7 @@@ static int hclge_set_vf_rate(struct hna
        struct hclge_dev *hdev = vport->back;
        int ret;
  
 -      ret = hclge_vf_rate_param_check(hdev, vf, min_tx_rate, max_tx_rate);
 +      ret = hclge_vf_rate_param_check(hdev, min_tx_rate, max_tx_rate);
        if (ret)
                return ret;
  
@@@ -11094,7 -11079,6 +11101,7 @@@ static void hclge_get_tqps_and_rss_info
  static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num,
                              bool rxfh_configured)
  {
 +      struct hnae3_ae_dev *ae_dev = pci_get_drvdata(handle->pdev);
        struct hclge_vport *vport = hclge_get_vport(handle);
        struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
        u16 tc_offset[HCLGE_MAX_TC_NUM] = {0};
                goto out;
  
        /* Reinitializes the rss indirect table according to the new RSS size */
 -      rss_indir = kcalloc(HCLGE_RSS_IND_TBL_SIZE, sizeof(u32), GFP_KERNEL);
 +      rss_indir = kcalloc(ae_dev->dev_specs.rss_ind_tbl_size, sizeof(u32),
 +                          GFP_KERNEL);
        if (!rss_indir)
                return -ENOMEM;
  
 -      for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++)
 +      for (i = 0; i < ae_dev->dev_specs.rss_ind_tbl_size; i++)
                rss_indir[i] = i % kinfo->rss_size;
  
        ret = hclge_set_rss(handle, rss_indir, NULL, 0);
@@@ -11823,6 -11806,7 +11830,6 @@@ static const struct hnae3_ae_ops hclge_
        .get_fec = hclge_get_fec,
        .set_fec = hclge_set_fec,
        .get_rss_key_size = hclge_get_rss_key_size,
 -      .get_rss_indir_size = hclge_get_rss_indir_size,
        .get_rss = hclge_get_rss,
        .set_rss = hclge_set_rss,
        .set_rss_tuple = hclge_set_rss_tuple,
        .enable_fd = hclge_enable_fd,
        .add_arfs_entry = hclge_add_fd_entry_by_arfs,
        .dbg_run_cmd = hclge_dbg_run_cmd,
 +      .dbg_read_cmd = hclge_dbg_read_cmd,
        .handle_hw_ras_error = hclge_handle_hw_ras_error,
        .get_hw_reset_stat = hclge_get_hw_reset_stat,
        .ae_dev_resetting = hclge_ae_dev_resetting,
index 52a3737225891e250343e76ca3b3dad21dbf6205,ffb416e088a978800a9582abd80ab5c77cec4fad..51a36e74f0881f61db54d8c5c4dafaf30634f57b
@@@ -56,7 -56,7 +56,7 @@@ static int hclge_gen_resp_to_vf(struct 
                resp_pf_to_vf->msg.resp_status = resp;
        } else {
                dev_warn(&hdev->pdev->dev,
 -                       "failed to send response to VF, response status %d is out-of-bound\n",
 +                       "failed to send response to VF, response status %u is out-of-bound\n",
                         resp);
                resp_pf_to_vf->msg.resp_status = EIO;
        }
@@@ -158,21 -158,31 +158,31 @@@ static int hclge_get_ring_chain_from_mb
                        struct hclge_vport *vport)
  {
        struct hnae3_ring_chain_node *cur_chain, *new_chain;
+       struct hclge_dev *hdev = vport->back;
        int ring_num;
-       int i = 0;
+       int i;
  
        ring_num = req->msg.ring_num;
  
        if (ring_num > HCLGE_MBX_MAX_RING_CHAIN_PARAM_NUM)
                return -ENOMEM;
  
+       for (i = 0; i < ring_num; i++) {
+               if (req->msg.param[i].tqp_index >= vport->nic.kinfo.rss_size) {
+                       dev_err(&hdev->pdev->dev, "tqp index(%u) is out of range(0-%u)\n",
+                               req->msg.param[i].tqp_index,
+                               vport->nic.kinfo.rss_size - 1);
+                       return -EINVAL;
+               }
+       }
        hnae3_set_bit(ring_chain->flag, HNAE3_RING_TYPE_B,
-                     req->msg.param[i].ring_type);
+                     req->msg.param[0].ring_type);
        ring_chain->tqp_index =
                hclge_get_queue_id(vport->nic.kinfo.tqp
-                                  [req->msg.param[i].tqp_index]);
+                                  [req->msg.param[0].tqp_index]);
        hnae3_set_field(ring_chain->int_gl_idx, HNAE3_RING_GL_IDX_M,
-                       HNAE3_RING_GL_IDX_S, req->msg.param[i].int_gl_index);
+                       HNAE3_RING_GL_IDX_S, req->msg.param[0].int_gl_index);
  
        cur_chain = ring_chain;
  
@@@ -597,6 -607,17 +607,17 @@@ static void hclge_get_rss_key(struct hc
  
        index = mbx_req->msg.data[0];
  
+       /* Check the query index of rss_hash_key from VF, make sure no
+        * more than the size of rss_hash_key.
+        */
+       if (((index + 1) * HCLGE_RSS_MBX_RESP_LEN) >
+             sizeof(vport[0].rss_hash_key)) {
+               dev_warn(&hdev->pdev->dev,
+                        "failed to get the rss hash key, the index(%u) invalid !\n",
+                        index);
+               return;
+       }
        memcpy(resp_msg->data,
               &hdev->vport[0].rss_hash_key[index * HCLGE_RSS_MBX_RESP_LEN],
               HCLGE_RSS_MBX_RESP_LEN);
index 481bcedb391a13933d4f50518bd97bff663938a0,a536fdbf05e196b58603c47c2c7de7c1a2afb9fe..a1579cd4bfe1bd316c176eee695ac925f7a18f79
@@@ -1384,10 -1384,10 +1384,10 @@@ static int ibmvnic_close(struct net_dev
  
  /**
   * build_hdr_data - creates L2/L3/L4 header data buffer
 - * @hdr_field - bitfield determining needed headers
 - * @skb - socket buffer
 - * @hdr_len - array of header lengths
 - * @tot_len - total length of data
 + * @hdr_field: bitfield determining needed headers
 + * @skb: socket buffer
 + * @hdr_len: array of header lengths
 + * @hdr_data: buffer to write the header to
   *
   * Reads hdr_field to determine which headers are needed by firmware.
   * Builds a buffer containing these headers.  Saves individual header
@@@ -1444,11 -1444,11 +1444,11 @@@ static int build_hdr_data(u8 hdr_field
  
  /**
   * create_hdr_descs - create header and header extension descriptors
 - * @hdr_field - bitfield determining needed headers
 - * @data - buffer containing header data
 - * @len - length of data buffer
 - * @hdr_len - array of individual header lengths
 - * @scrq_arr - descriptor array
 + * @hdr_field: bitfield determining needed headers
 + * @hdr_data: buffer containing header data
 + * @len: length of data buffer
 + * @hdr_len: array of individual header lengths
 + * @scrq_arr: descriptor array
   *
   * Creates header and, if needed, header extension descriptors and
   * places them in a descriptor array, scrq_arr
@@@ -1496,9 -1496,10 +1496,9 @@@ static int create_hdr_descs(u8 hdr_fiel
  
  /**
   * build_hdr_descs_arr - build a header descriptor array
 - * @skb - socket buffer
 - * @num_entries - number of descriptors to be sent
 - * @subcrq - first TX descriptor
 - * @hdr_field - bit field determining which headers will be sent
 + * @txbuff: tx buffer
 + * @num_entries: number of descriptors to be sent
 + * @hdr_field: bit field determining which headers will be sent
   *
   * This function will build a TX descriptor array with applicable
   * L2/L3/L4 packet header descriptors to be sent by send_subcrq_indirect.
@@@ -1924,7 -1925,93 +1924,7 @@@ static int ibmvnic_set_mac(struct net_d
        return rc;
  }
  
 -/**
 - * do_change_param_reset returns zero if we are able to keep processing reset
 - * events, or non-zero if we hit a fatal error and must halt.
 - */
 -static int do_change_param_reset(struct ibmvnic_adapter *adapter,
 -                               struct ibmvnic_rwi *rwi,
 -                               u32 reset_state)
 -{
 -      struct net_device *netdev = adapter->netdev;
 -      int i, rc;
 -
 -      netdev_dbg(adapter->netdev, "Change param resetting driver (%d)\n",
 -                 rwi->reset_reason);
 -
 -      netif_carrier_off(netdev);
 -      adapter->reset_reason = rwi->reset_reason;
 -
 -      ibmvnic_cleanup(netdev);
 -
 -      if (reset_state == VNIC_OPEN) {
 -              rc = __ibmvnic_close(netdev);
 -              if (rc)
 -                      goto out;
 -      }
 -
 -      release_resources(adapter);
 -      release_sub_crqs(adapter, 1);
 -      release_crq_queue(adapter);
 -
 -      adapter->state = VNIC_PROBED;
 -
 -      rc = init_crq_queue(adapter);
 -
 -      if (rc) {
 -              netdev_err(adapter->netdev,
 -                         "Couldn't initialize crq. rc=%d\n", rc);
 -              return rc;
 -      }
 -
 -      rc = ibmvnic_reset_init(adapter, true);
 -      if (rc) {
 -              rc = IBMVNIC_INIT_FAILED;
 -              goto out;
 -      }
 -
 -      /* If the adapter was in PROBE state prior to the reset,
 -       * exit here.
 -       */
 -      if (reset_state == VNIC_PROBED)
 -              goto out;
 -
 -      rc = ibmvnic_login(netdev);
 -      if (rc) {
 -              goto out;
 -      }
 -
 -      rc = init_resources(adapter);
 -      if (rc)
 -              goto out;
 -
 -      ibmvnic_disable_irqs(adapter);
 -
 -      adapter->state = VNIC_CLOSED;
 -
 -      if (reset_state == VNIC_CLOSED)
 -              return 0;
 -
 -      rc = __ibmvnic_open(netdev);
 -      if (rc) {
 -              rc = IBMVNIC_OPEN_FAILED;
 -              goto out;
 -      }
 -
 -      /* refresh device's multicast list */
 -      ibmvnic_set_multi(netdev);
 -
 -      /* kick napi */
 -      for (i = 0; i < adapter->req_rx_queues; i++)
 -              napi_schedule(&adapter->napi[i]);
 -
 -out:
 -      if (rc)
 -              adapter->state = reset_state;
 -      return rc;
 -}
 -
 -/**
 +/*
   * do_reset returns zero if we are able to keep processing reset events, or
   * non-zero if we hit a fatal error and must halt.
   */
@@@ -1941,11 -2028,7 +1941,11 @@@ static int do_reset(struct ibmvnic_adap
                   adapter->state, adapter->failover_pending,
                   rwi->reset_reason, reset_state);
  
 -      rtnl_lock();
 +      adapter->reset_reason = rwi->reset_reason;
 +      /* requestor of VNIC_RESET_CHANGE_PARAM already has the rtnl lock */
 +      if (!(adapter->reset_reason == VNIC_RESET_CHANGE_PARAM))
 +              rtnl_lock();
 +
        /*
         * Now that we have the rtnl lock, clear any pending failover.
         * This will ensure ibmvnic_open() has either completed or will
                adapter->failover_pending = false;
  
        netif_carrier_off(netdev);
 -      adapter->reset_reason = rwi->reset_reason;
  
        old_num_rx_queues = adapter->req_rx_queues;
        old_num_tx_queues = adapter->req_tx_queues;
        if (reset_state == VNIC_OPEN &&
            adapter->reset_reason != VNIC_RESET_MOBILITY &&
            adapter->reset_reason != VNIC_RESET_FAILOVER) {
 -              adapter->state = VNIC_CLOSING;
 +              if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) {
 +                      rc = __ibmvnic_close(netdev);
 +                      if (rc)
 +                              goto out;
 +              } else {
 +                      adapter->state = VNIC_CLOSING;
  
 -              /* Release the RTNL lock before link state change and
 -               * re-acquire after the link state change to allow
 -               * linkwatch_event to grab the RTNL lock and run during
 -               * a reset.
 -               */
 -              rtnl_unlock();
 -              rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
 -              rtnl_lock();
 -              if (rc)
 -                      goto out;
 +                      /* Release the RTNL lock before link state change and
 +                       * re-acquire after the link state change to allow
 +                       * linkwatch_event to grab the RTNL lock and run during
 +                       * a reset.
 +                       */
 +                      rtnl_unlock();
 +                      rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
 +                      rtnl_lock();
 +                      if (rc)
 +                              goto out;
  
 -              if (adapter->state != VNIC_CLOSING) {
 -                      rc = -1;
 -                      goto out;
 +                      if (adapter->state != VNIC_CLOSING) {
 +                              rc = -1;
 +                              goto out;
 +                      }
 +
 +                      adapter->state = VNIC_CLOSED;
                }
 +      }
  
 -              adapter->state = VNIC_CLOSED;
 +      if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) {
 +              release_resources(adapter);
 +              release_sub_crqs(adapter, 1);
 +              release_crq_queue(adapter);
        }
  
        if (adapter->reset_reason != VNIC_RESET_NON_FATAL) {
                 */
                adapter->state = VNIC_PROBED;
  
 -              if (adapter->reset_reason == VNIC_RESET_MOBILITY) {
 +              if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) {
 +                      rc = init_crq_queue(adapter);
 +              } else if (adapter->reset_reason == VNIC_RESET_MOBILITY) {
                        rc = ibmvnic_reenable_crq_queue(adapter);
                        release_sub_crqs(adapter, 1);
                } else {
                        goto out;
                }
  
 -              if (adapter->req_rx_queues != old_num_rx_queues ||
 +              if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) {
 +                      rc = init_resources(adapter);
 +                      if (rc)
 +                              goto out;
 +              } else if (adapter->req_rx_queues != old_num_rx_queues ||
                    adapter->req_tx_queues != old_num_tx_queues ||
                    adapter->req_rx_add_entries_per_subcrq !=
                    old_num_rx_slots ||
@@@ -2115,9 -2181,7 +2115,9 @@@ out
        /* restore the adapter state if reset failed */
        if (rc)
                adapter->state = reset_state;
 -      rtnl_unlock();
 +      /* requestor of VNIC_RESET_CHANGE_PARAM should still hold the rtnl lock */
 +      if (!(adapter->reset_reason == VNIC_RESET_CHANGE_PARAM))
 +              rtnl_unlock();
  
        netdev_dbg(adapter->netdev, "[S:%d FOP:%d] Reset done, rc %d\n",
                   adapter->state, adapter->failover_pending, rc);
@@@ -2248,7 -2312,10 +2248,7 @@@ static void __ibmvnic_reset(struct work
                }
                spin_unlock_irqrestore(&adapter->state_lock, flags);
  
 -              if (rwi->reset_reason == VNIC_RESET_CHANGE_PARAM) {
 -                      /* CHANGE_PARAM requestor holds rtnl_lock */
 -                      rc = do_change_param_reset(adapter, rwi, reset_state);
 -              } else if (adapter->force_reset_recovery) {
 +              if (adapter->force_reset_recovery) {
                        /*
                         * Since we are doing a hard reset now, clear the
                         * failover_pending flag so we don't ignore any
@@@ -2444,6 -2511,12 +2444,6 @@@ restart_poll
  
                if (!pending_scrq(adapter, rx_scrq))
                        break;
 -              /* The queue entry at the current index is peeked at above
 -               * to determine that there is a valid descriptor awaiting
 -               * processing. We want to be sure that the current slot
 -               * holds a valid descriptor before reading its contents.
 -               */
 -              dma_rmb();
                next = ibmvnic_next_scrq(adapter, rx_scrq);
                rx_buff =
                    (struct ibmvnic_rx_buff *)be64_to_cpu(next->
                if (napi_complete_done(napi, frames_processed)) {
                        enable_scrq_irq(adapter, rx_scrq);
                        if (pending_scrq(adapter, rx_scrq)) {
 -                              rmb();
                                if (napi_reschedule(napi)) {
                                        disable_scrq_irq(adapter, rx_scrq);
                                        goto restart_poll;
@@@ -3182,6 -3256,13 +3182,6 @@@ restart_loop
                int total_bytes = 0;
                int num_packets = 0;
  
 -              /* The queue entry at the current index is peeked at above
 -               * to determine that there is a valid descriptor awaiting
 -               * processing. We want to be sure that the current slot
 -               * holds a valid descriptor before reading its contents.
 -               */
 -              dma_rmb();
 -
                next = ibmvnic_next_scrq(adapter, scrq);
                for (i = 0; i < next->tx_comp.num_comps; i++) {
                        if (next->tx_comp.rcs[i])
@@@ -3555,16 -3636,11 +3555,16 @@@ static int pending_scrq(struct ibmvnic_
                        struct ibmvnic_sub_crq_queue *scrq)
  {
        union sub_crq *entry = &scrq->msgs[scrq->cur];
 +      int rc;
  
 -      if (entry->generic.first & IBMVNIC_CRQ_CMD_RSP)
 -              return 1;
 -      else
 -              return 0;
 +      rc = !!(entry->generic.first & IBMVNIC_CRQ_CMD_RSP);
 +
 +      /* Ensure that the SCRQ valid flag is loaded prior to loading the
 +       * contents of the SCRQ descriptor
 +       */
 +      dma_rmb();
 +
 +      return rc;
  }
  
  static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *adapter,
        }
        spin_unlock_irqrestore(&scrq->lock, flags);
  
 -      /* Ensure that the entire buffer descriptor has been
 -       * loaded before reading its contents
 +      /* Ensure that the SCRQ valid flag is loaded prior to loading the
 +       * contents of the SCRQ descriptor
         */
        dma_rmb();
  
@@@ -4842,7 -4918,22 +4842,22 @@@ static void ibmvnic_handle_crq(union ib
                                complete(&adapter->init_done);
                                adapter->init_done_rc = -EIO;
                        }
-                       ibmvnic_reset(adapter, VNIC_RESET_FAILOVER);
+                       rc = ibmvnic_reset(adapter, VNIC_RESET_FAILOVER);
+                       if (rc && rc != -EBUSY) {
+                               /* We were unable to schedule the failover
+                                * reset either because the adapter was still
+                                * probing (eg: during kexec) or we could not
+                                * allocate memory. Clear the failover_pending
+                                * flag since no one else will. We ignore
+                                * EBUSY because it means either FAILOVER reset
+                                * is already scheduled or the adapter is
+                                * being removed.
+                                */
+                               netdev_err(netdev,
+                                          "Error %ld scheduling failover reset\n",
+                                          rc);
+                               adapter->failover_pending = false;
+                       }
                        break;
                case IBMVNIC_CRQ_INIT_COMPLETE:
                        dev_info(dev, "Partner initialization complete\n");
index f8b85ab8be5d220542023089ab37b1dba6780637,c072eb5c07646b66db85d8392177074c0f9eac3c..1654a6e22a7df91038edcfaf6b2aecf12e7736b6
@@@ -221,20 -221,25 +221,20 @@@ static void ocelot_port_set_pvid(struc
  }
  
  int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port,
 -                             bool vlan_aware, struct switchdev_trans *trans)
 +                             bool vlan_aware)
  {
 +      struct ocelot_vcap_block *block = &ocelot->block[VCAP_IS1];
        struct ocelot_port *ocelot_port = ocelot->ports[port];
 +      struct ocelot_vcap_filter *filter;
        u32 val;
  
 -      if (switchdev_trans_ph_prepare(trans)) {
 -              struct ocelot_vcap_block *block = &ocelot->block[VCAP_IS1];
 -              struct ocelot_vcap_filter *filter;
 -
 -              list_for_each_entry(filter, &block->rules, list) {
 -                      if (filter->ingress_port_mask & BIT(port) &&
 -                          filter->action.vid_replace_ena) {
 -                              dev_err(ocelot->dev,
 -                                      "Cannot change VLAN state with vlan modify rules active\n");
 -                              return -EBUSY;
 -                      }
 +      list_for_each_entry(filter, &block->rules, list) {
 +              if (filter->ingress_port_mask & BIT(port) &&
 +                  filter->action.vid_replace_ena) {
 +                      dev_err(ocelot->dev,
 +                              "Cannot change VLAN state with vlan modify rules active\n");
 +                      return -EBUSY;
                }
 -
 -              return 0;
        }
  
        ocelot_port->vlan_aware = vlan_aware;
@@@ -370,6 -375,60 +370,60 @@@ static void ocelot_vlan_init(struct oce
        }
  }
  
+ static u32 ocelot_read_eq_avail(struct ocelot *ocelot, int port)
+ {
+       return ocelot_read_rix(ocelot, QSYS_SW_STATUS, port);
+ }
+ int ocelot_port_flush(struct ocelot *ocelot, int port)
+ {
+       int err, val;
+       /* Disable dequeuing from the egress queues */
+       ocelot_rmw_rix(ocelot, QSYS_PORT_MODE_DEQUEUE_DIS,
+                      QSYS_PORT_MODE_DEQUEUE_DIS,
+                      QSYS_PORT_MODE, port);
+       /* Disable flow control */
+       ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 0);
+       /* Disable priority flow control */
+       ocelot_fields_write(ocelot, port,
+                           QSYS_SWITCH_PORT_MODE_TX_PFC_ENA, 0);
+       /* Wait at least the time it takes to receive a frame of maximum length
+        * at the port.
+        * Worst-case delays for 10 kilobyte jumbo frames are:
+        * 8 ms on a 10M port
+        * 800 μs on a 100M port
+        * 80 μs on a 1G port
+        * 32 μs on a 2.5G port
+        */
+       usleep_range(8000, 10000);
+       /* Disable half duplex backpressure. */
+       ocelot_rmw_rix(ocelot, 0, SYS_FRONT_PORT_MODE_HDX_MODE,
+                      SYS_FRONT_PORT_MODE, port);
+       /* Flush the queues associated with the port. */
+       ocelot_rmw_gix(ocelot, REW_PORT_CFG_FLUSH_ENA, REW_PORT_CFG_FLUSH_ENA,
+                      REW_PORT_CFG, port);
+       /* Enable dequeuing from the egress queues. */
+       ocelot_rmw_rix(ocelot, 0, QSYS_PORT_MODE_DEQUEUE_DIS, QSYS_PORT_MODE,
+                      port);
+       /* Wait until flushing is complete. */
+       err = read_poll_timeout(ocelot_read_eq_avail, val, !val,
+                               100, 2000000, false, ocelot, port);
+       /* Clear flushing again. */
+       ocelot_rmw_gix(ocelot, 0, REW_PORT_CFG_FLUSH_ENA, REW_PORT_CFG, port);
+       return err;
+ }
+ EXPORT_SYMBOL(ocelot_port_flush);
  void ocelot_adjust_link(struct ocelot *ocelot, int port,
                        struct phy_device *phydev)
  {
@@@ -889,102 -948,10 +943,102 @@@ int ocelot_get_ts_info(struct ocelot *o
  }
  EXPORT_SYMBOL(ocelot_get_ts_info);
  
 +static u32 ocelot_get_bond_mask(struct ocelot *ocelot, struct net_device *bond,
 +                              bool only_active_ports)
 +{
 +      u32 mask = 0;
 +      int port;
 +
 +      for (port = 0; port < ocelot->num_phys_ports; port++) {
 +              struct ocelot_port *ocelot_port = ocelot->ports[port];
 +
 +              if (!ocelot_port)
 +                      continue;
 +
 +              if (ocelot_port->bond == bond) {
 +                      if (only_active_ports && !ocelot_port->lag_tx_active)
 +                              continue;
 +
 +                      mask |= BIT(port);
 +              }
 +      }
 +
 +      return mask;
 +}
 +
 +static u32 ocelot_get_dsa_8021q_cpu_mask(struct ocelot *ocelot)
 +{
 +      u32 mask = 0;
 +      int port;
 +
 +      for (port = 0; port < ocelot->num_phys_ports; port++) {
 +              struct ocelot_port *ocelot_port = ocelot->ports[port];
 +
 +              if (!ocelot_port)
 +                      continue;
 +
 +              if (ocelot_port->is_dsa_8021q_cpu)
 +                      mask |= BIT(port);
 +      }
 +
 +      return mask;
 +}
 +
 +void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot)
 +{
 +      unsigned long cpu_fwd_mask;
 +      int port;
 +
 +      /* If a DSA tag_8021q CPU exists, it needs to be included in the
 +       * regular forwarding path of the front ports regardless of whether
 +       * those are bridged or standalone.
 +       * If DSA tag_8021q is not used, this returns 0, which is fine because
 +       * the hardware-based CPU port module can be a destination for packets
 +       * even if it isn't part of PGID_SRC.
 +       */
 +      cpu_fwd_mask = ocelot_get_dsa_8021q_cpu_mask(ocelot);
 +
 +      /* Apply FWD mask. The loop is needed to add/remove the current port as
 +       * a source for the other ports.
 +       */
 +      for (port = 0; port < ocelot->num_phys_ports; port++) {
 +              struct ocelot_port *ocelot_port = ocelot->ports[port];
 +              unsigned long mask;
 +
 +              if (!ocelot_port) {
 +                      /* Unused ports can't send anywhere */
 +                      mask = 0;
 +              } else if (ocelot_port->is_dsa_8021q_cpu) {
 +                      /* The DSA tag_8021q CPU ports need to be able to
 +                       * forward packets to all other ports except for
 +                       * themselves
 +                       */
 +                      mask = GENMASK(ocelot->num_phys_ports - 1, 0);
 +                      mask &= ~cpu_fwd_mask;
 +              } else if (ocelot->bridge_fwd_mask & BIT(port)) {
 +                      struct net_device *bond = ocelot_port->bond;
 +
 +                      mask = ocelot->bridge_fwd_mask & ~BIT(port);
 +                      if (bond) {
 +                              mask &= ~ocelot_get_bond_mask(ocelot, bond,
 +                                                            false);
 +                      }
 +              } else {
 +                      /* Standalone ports forward only to DSA tag_8021q CPU
 +                       * ports (if those exist), or to the hardware CPU port
 +                       * module otherwise.
 +                       */
 +                      mask = cpu_fwd_mask;
 +              }
 +
 +              ocelot_write_rix(ocelot, mask, ANA_PGID_PGID, PGID_SRC + port);
 +      }
 +}
 +EXPORT_SYMBOL(ocelot_apply_bridge_fwd_mask);
 +
  void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state)
  {
        u32 port_cfg;
 -      int p, i;
  
        if (!(BIT(port) & ocelot->bridge_mask))
                return;
  
        ocelot_write_gix(ocelot, port_cfg, ANA_PORT_PORT_CFG, port);
  
 -      /* Apply FWD mask. The loop is needed to add/remove the current port as
 -       * a source for the other ports.
 -       */
 -      for (p = 0; p < ocelot->num_phys_ports; p++) {
 -              if (ocelot->bridge_fwd_mask & BIT(p)) {
 -                      unsigned long mask = ocelot->bridge_fwd_mask & ~BIT(p);
 -
 -                      for (i = 0; i < ocelot->num_phys_ports; i++) {
 -                              unsigned long bond_mask = ocelot->lags[i];
 -
 -                              if (!bond_mask)
 -                                      continue;
 -
 -                              if (bond_mask & BIT(p)) {
 -                                      mask &= ~bond_mask;
 -                                      break;
 -                              }
 -                      }
 -
 -                      ocelot_write_rix(ocelot, mask,
 -                                       ANA_PGID_PGID, PGID_SRC + p);
 -              } else {
 -                      ocelot_write_rix(ocelot, 0,
 -                                       ANA_PGID_PGID, PGID_SRC + p);
 -              }
 -      }
 +      ocelot_apply_bridge_fwd_mask(ocelot);
  }
  EXPORT_SYMBOL(ocelot_bridge_stp_state_set);
  
@@@ -1254,6 -1246,7 +1308,6 @@@ int ocelot_port_bridge_leave(struct oce
                             struct net_device *bridge)
  {
        struct ocelot_vlan pvid = {0}, native_vlan = {0};
 -      struct switchdev_trans trans;
        int ret;
  
        ocelot->bridge_mask &= ~BIT(port);
        if (!ocelot->bridge_mask)
                ocelot->hw_bridge_dev = NULL;
  
 -      trans.ph_prepare = true;
 -      ret = ocelot_port_vlan_filtering(ocelot, port, false, &trans);
 -      if (ret)
 -              return ret;
 -
 -      trans.ph_prepare = false;
 -      ret = ocelot_port_vlan_filtering(ocelot, port, false, &trans);
 +      ret = ocelot_port_vlan_filtering(ocelot, port, false);
        if (ret)
                return ret;
  
@@@ -1274,7 -1273,6 +1328,7 @@@ EXPORT_SYMBOL(ocelot_port_bridge_leave)
  
  static void ocelot_set_aggr_pgids(struct ocelot *ocelot)
  {
 +      unsigned long visited = GENMASK(ocelot->num_phys_ports - 1, 0);
        int i, port, lag;
  
        /* Reset destination and aggregation PGIDS */
                ocelot_write_rix(ocelot, GENMASK(ocelot->num_phys_ports - 1, 0),
                                 ANA_PGID_PGID, i);
  
 -      /* Now, set PGIDs for each LAG */
 +      /* The visited ports bitmask holds the list of ports offloading any
 +       * bonding interface. Initially we mark all these ports as unvisited,
 +       * then every time we visit a port in this bitmask, we know that it is
 +       * the lowest numbered port, i.e. the one whose logical ID == physical
 +       * port ID == LAG ID. So we mark as visited all further ports in the
 +       * bitmask that are offloading the same bonding interface. This way,
 +       * we set up the aggregation PGIDs only once per bonding interface.
 +       */
 +      for (port = 0; port < ocelot->num_phys_ports; port++) {
 +              struct ocelot_port *ocelot_port = ocelot->ports[port];
 +
 +              if (!ocelot_port || !ocelot_port->bond)
 +                      continue;
 +
 +              visited &= ~BIT(port);
 +      }
 +
 +      /* Now, set PGIDs for each active LAG */
        for (lag = 0; lag < ocelot->num_phys_ports; lag++) {
 +              struct net_device *bond = ocelot->ports[lag]->bond;
 +              int num_active_ports = 0;
                unsigned long bond_mask;
 -              int aggr_count = 0;
                u8 aggr_idx[16];
  
 -              bond_mask = ocelot->lags[lag];
 -              if (!bond_mask)
 +              if (!bond || (visited & BIT(lag)))
                        continue;
  
 +              bond_mask = ocelot_get_bond_mask(ocelot, bond, true);
 +
                for_each_set_bit(port, &bond_mask, ocelot->num_phys_ports) {
                        // Destination mask
                        ocelot_write_rix(ocelot, bond_mask,
                                         ANA_PGID_PGID, port);
 -                      aggr_idx[aggr_count] = port;
 -                      aggr_count++;
 +                      aggr_idx[num_active_ports++] = port;
                }
  
                for_each_aggr_pgid(ocelot, i) {
  
                        ac = ocelot_read_rix(ocelot, ANA_PGID_PGID, i);
                        ac &= ~bond_mask;
 -                      ac |= BIT(aggr_idx[i % aggr_count]);
 +                      /* Don't do division by zero if there was no active
 +                       * port. Just make all aggregation codes zero.
 +                       */
 +                      if (num_active_ports)
 +                              ac |= BIT(aggr_idx[i % num_active_ports]);
                        ocelot_write_rix(ocelot, ac, ANA_PGID_PGID, i);
                }
 -      }
 -}
  
 -static void ocelot_setup_lag(struct ocelot *ocelot, int lag)
 -{
 -      unsigned long bond_mask = ocelot->lags[lag];
 -      unsigned int p;
 -
 -      for_each_set_bit(p, &bond_mask, ocelot->num_phys_ports) {
 -              u32 port_cfg = ocelot_read_gix(ocelot, ANA_PORT_PORT_CFG, p);
 +              /* Mark all ports in the same LAG as visited to avoid applying
 +               * the same config again.
 +               */
 +              for (port = lag; port < ocelot->num_phys_ports; port++) {
 +                      struct ocelot_port *ocelot_port = ocelot->ports[port];
  
 -              port_cfg &= ~ANA_PORT_PORT_CFG_PORTID_VAL_M;
 +                      if (!ocelot_port)
 +                              continue;
  
 -              /* Use lag port as logical port for port i */
 -              ocelot_write_gix(ocelot, port_cfg |
 -                               ANA_PORT_PORT_CFG_PORTID_VAL(lag),
 -                               ANA_PORT_PORT_CFG, p);
 +                      if (ocelot_port->bond == bond)
 +                              visited |= BIT(port);
 +              }
        }
  }
  
 -int ocelot_port_lag_join(struct ocelot *ocelot, int port,
 -                       struct net_device *bond)
 +/* When offloading a bonding interface, the switch ports configured under the
 + * same bond must have the same logical port ID, equal to the physical port ID
 + * of the lowest numbered physical port in that bond. Otherwise, in standalone/
 + * bridged mode, each port has a logical port ID equal to its physical port ID.
 + */
 +static void ocelot_setup_logical_port_ids(struct ocelot *ocelot)
  {
 -      struct net_device *ndev;
 -      u32 bond_mask = 0;
 -      int lag, lp;
 +      int port;
  
 -      rcu_read_lock();
 -      for_each_netdev_in_bond_rcu(bond, ndev) {
 -              struct ocelot_port_private *priv = netdev_priv(ndev);
 +      for (port = 0; port < ocelot->num_phys_ports; port++) {
 +              struct ocelot_port *ocelot_port = ocelot->ports[port];
 +              struct net_device *bond;
  
 -              bond_mask |= BIT(priv->chip_port);
 -      }
 -      rcu_read_unlock();
 +              if (!ocelot_port)
 +                      continue;
  
 -      lp = __ffs(bond_mask);
 +              bond = ocelot_port->bond;
 +              if (bond) {
 +                      int lag = __ffs(ocelot_get_bond_mask(ocelot, bond,
 +                                                           false));
  
 -      /* If the new port is the lowest one, use it as the logical port from
 -       * now on
 -       */
 -      if (port == lp) {
 -              lag = port;
 -              ocelot->lags[port] = bond_mask;
 -              bond_mask &= ~BIT(port);
 -              if (bond_mask) {
 -                      lp = __ffs(bond_mask);
 -                      ocelot->lags[lp] = 0;
 +                      ocelot_rmw_gix(ocelot,
 +                                     ANA_PORT_PORT_CFG_PORTID_VAL(lag),
 +                                     ANA_PORT_PORT_CFG_PORTID_VAL_M,
 +                                     ANA_PORT_PORT_CFG, port);
 +              } else {
 +                      ocelot_rmw_gix(ocelot,
 +                                     ANA_PORT_PORT_CFG_PORTID_VAL(port),
 +                                     ANA_PORT_PORT_CFG_PORTID_VAL_M,
 +                                     ANA_PORT_PORT_CFG, port);
                }
 -      } else {
 -              lag = lp;
 -              ocelot->lags[lp] |= BIT(port);
        }
 +}
  
 -      ocelot_setup_lag(ocelot, lag);
 +int ocelot_port_lag_join(struct ocelot *ocelot, int port,
 +                       struct net_device *bond,
 +                       struct netdev_lag_upper_info *info)
 +{
 +      if (info->tx_type != NETDEV_LAG_TX_TYPE_HASH)
 +              return -EOPNOTSUPP;
 +
 +      ocelot->ports[port]->bond = bond;
 +
 +      ocelot_setup_logical_port_ids(ocelot);
 +      ocelot_apply_bridge_fwd_mask(ocelot);
        ocelot_set_aggr_pgids(ocelot);
  
        return 0;
@@@ -1403,24 -1372,33 +1457,24 @@@ EXPORT_SYMBOL(ocelot_port_lag_join)
  void ocelot_port_lag_leave(struct ocelot *ocelot, int port,
                           struct net_device *bond)
  {
 -      u32 port_cfg;
 -      int i;
 +      ocelot->ports[port]->bond = NULL;
  
 -      /* Remove port from any lag */
 -      for (i = 0; i < ocelot->num_phys_ports; i++)
 -              ocelot->lags[i] &= ~BIT(port);
 -
 -      /* if it was the logical port of the lag, move the lag config to the
 -       * next port
 -       */
 -      if (ocelot->lags[port]) {
 -              int n = __ffs(ocelot->lags[port]);
 -
 -              ocelot->lags[n] = ocelot->lags[port];
 -              ocelot->lags[port] = 0;
 +      ocelot_setup_logical_port_ids(ocelot);
 +      ocelot_apply_bridge_fwd_mask(ocelot);
 +      ocelot_set_aggr_pgids(ocelot);
 +}
 +EXPORT_SYMBOL(ocelot_port_lag_leave);
  
 -              ocelot_setup_lag(ocelot, n);
 -      }
 +void ocelot_port_lag_change(struct ocelot *ocelot, int port, bool lag_tx_active)
 +{
 +      struct ocelot_port *ocelot_port = ocelot->ports[port];
  
 -      port_cfg = ocelot_read_gix(ocelot, ANA_PORT_PORT_CFG, port);
 -      port_cfg &= ~ANA_PORT_PORT_CFG_PORTID_VAL_M;
 -      ocelot_write_gix(ocelot, port_cfg | ANA_PORT_PORT_CFG_PORTID_VAL(port),
 -                       ANA_PORT_PORT_CFG, port);
 +      ocelot_port->lag_tx_active = lag_tx_active;
  
 +      /* Rebalance the LAGs */
        ocelot_set_aggr_pgids(ocelot);
  }
 -EXPORT_SYMBOL(ocelot_port_lag_leave);
 +EXPORT_SYMBOL(ocelot_port_lag_change);
  
  /* Configure the maximum SDU (L2 payload) on RX to the value specified in @sdu.
   * The length of VLAN tags is accounted for automatically via DEV_MAC_TAGS_CFG.
@@@ -1438,9 -1416,9 +1492,9 @@@ void ocelot_port_set_maxlen(struct ocel
        if (port == ocelot->npi) {
                maxlen += OCELOT_TAG_LEN;
  
 -              if (ocelot->inj_prefix == OCELOT_TAG_PREFIX_SHORT)
 +              if (ocelot->npi_inj_prefix == OCELOT_TAG_PREFIX_SHORT)
                        maxlen += OCELOT_SHORT_PREFIX_LEN;
 -              else if (ocelot->inj_prefix == OCELOT_TAG_PREFIX_LONG)
 +              else if (ocelot->npi_inj_prefix == OCELOT_TAG_PREFIX_LONG)
                        maxlen += OCELOT_LONG_PREFIX_LEN;
        }
  
                            pause_stop);
  
        /* Tail dropping watermarks */
 -      atop_tot = (ocelot->shared_queue_sz - 9 * maxlen) /
 +      atop_tot = (ocelot->packet_buffer_size - 9 * maxlen) /
                   OCELOT_BUFFER_CELL_SZ;
        atop = (9 * maxlen) / OCELOT_BUFFER_CELL_SZ;
        ocelot_write_rix(ocelot, ocelot->ops->wm_enc(atop), SYS_ATOP, port);
@@@ -1470,9 -1448,9 +1524,9 @@@ int ocelot_get_max_mtu(struct ocelot *o
        if (port == ocelot->npi) {
                max_mtu -= OCELOT_TAG_LEN;
  
 -              if (ocelot->inj_prefix == OCELOT_TAG_PREFIX_SHORT)
 +              if (ocelot->npi_inj_prefix == OCELOT_TAG_PREFIX_SHORT)
                        max_mtu -= OCELOT_SHORT_PREFIX_LEN;
 -              else if (ocelot->inj_prefix == OCELOT_TAG_PREFIX_LONG)
 +              else if (ocelot->npi_inj_prefix == OCELOT_TAG_PREFIX_LONG)
                        max_mtu -= OCELOT_LONG_PREFIX_LEN;
        }
  
@@@ -1557,9 -1535,9 +1611,9 @@@ static void ocelot_cpu_port_init(struc
        ocelot_fields_write(ocelot, cpu, QSYS_SWITCH_PORT_MODE_PORT_ENA, 1);
        /* CPU port Injection/Extraction configuration */
        ocelot_fields_write(ocelot, cpu, SYS_PORT_MODE_INCL_XTR_HDR,
 -                          ocelot->xtr_prefix);
 +                          OCELOT_TAG_PREFIX_NONE);
        ocelot_fields_write(ocelot, cpu, SYS_PORT_MODE_INCL_INJ_HDR,
 -                          ocelot->inj_prefix);
 +                          OCELOT_TAG_PREFIX_NONE);
  
        /* Configure the CPU port to be VLAN aware */
        ocelot_write_gix(ocelot, ANA_PORT_VLAN_CFG_VLAN_VID(0) |
                         ANA_PORT_VLAN_CFG, cpu);
  }
  
 +static void ocelot_detect_features(struct ocelot *ocelot)
 +{
 +      int mmgt, eq_ctrl;
 +
 +      /* For Ocelot, Felix, Seville, Serval etc, SYS:MMGT:MMGT:FREECNT holds
 +       * the number of 240-byte free memory words (aka 4-cell chunks) and not
 +       * 192 bytes as the documentation incorrectly says.
 +       */
 +      mmgt = ocelot_read(ocelot, SYS_MMGT);
 +      ocelot->packet_buffer_size = 240 * SYS_MMGT_FREECNT(mmgt);
 +
 +      eq_ctrl = ocelot_read(ocelot, QSYS_EQ_CTRL);
 +      ocelot->num_frame_refs = QSYS_MMGT_EQ_CTRL_FP_FREE_CNT(eq_ctrl);
 +}
 +
  int ocelot_init(struct ocelot *ocelot)
  {
        char queue_name[32];
                }
        }
  
 -      ocelot->lags = devm_kcalloc(ocelot->dev, ocelot->num_phys_ports,
 -                                  sizeof(u32), GFP_KERNEL);
 -      if (!ocelot->lags)
 -              return -ENOMEM;
 -
        ocelot->stats = devm_kcalloc(ocelot->dev,
                                     ocelot->num_phys_ports * ocelot->num_stats,
                                     sizeof(u64), GFP_KERNEL);
  
        INIT_LIST_HEAD(&ocelot->multicast);
        INIT_LIST_HEAD(&ocelot->pgids);
 +      ocelot_detect_features(ocelot);
        ocelot_mact_init(ocelot);
        ocelot_vlan_init(ocelot);
        ocelot_vcap_init(ocelot);
        ocelot_write(ocelot, ANA_AGGR_CFG_AC_SMAC_ENA |
                             ANA_AGGR_CFG_AC_DMAC_ENA |
                             ANA_AGGR_CFG_AC_IP4_SIPDIP_ENA |
 -                           ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA, ANA_AGGR_CFG);
 +                           ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA |
 +                           ANA_AGGR_CFG_AC_IP6_FLOW_LBL_ENA |
 +                           ANA_AGGR_CFG_AC_IP6_TCPUDP_ENA,
 +                           ANA_AGGR_CFG);
  
        /* Set MAC age time to default value. The entry is aged after
         * 2*AGE_PERIOD
index 9db1ea3affbb37018d4e0c2cbaf56661064ae0df,13bd48a75db7692fb5ab221ea587d60c088e325f..dc3f73c3b33ef61189a02436949f38f6919f18f5
@@@ -37,10 -37,6 +37,10 @@@ void netvsc_switch_datapath(struct net_
        struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev);
        struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt;
  
 +      /* Block sending traffic to VF if it's about to be gone */
 +      if (!vf)
 +              net_device_ctx->data_path_is_vf = vf;
 +
        memset(init_pkt, 0, sizeof(struct nvsp_message));
        init_pkt->hdr.msg_type = NVSP_MSG4_TYPE_SWITCH_DATA_PATH;
        if (vf)
  
        vmbus_sendpacket(dev->channel, init_pkt,
                               sizeof(struct nvsp_message),
 -                             VMBUS_RQST_ID_NO_RESPONSE,
 -                             VM_PKT_DATA_INBAND, 0);
 +                             (unsigned long)init_pkt,
 +                             VM_PKT_DATA_INBAND,
 +                             VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 +      wait_for_completion(&nv_dev->channel_init_wait);
 +      net_device_ctx->data_path_is_vf = vf;
  }
  
  /* Worker to setup sub channels on initial setup
@@@ -131,7 -124,6 +131,7 @@@ static void free_netvsc_device(struct r
  
        for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
                xdp_rxq_info_unreg(&nvdev->chan_table[i].xdp_rxq);
 +              kfree(nvdev->chan_table[i].recv_buf);
                vfree(nvdev->chan_table[i].mrc.slots);
        }
  
@@@ -311,7 -303,7 +311,7 @@@ static int netvsc_init_buf(struct hv_de
        struct nvsp_message *init_packet;
        unsigned int buf_size;
        size_t map_words;
 -      int ret = 0;
 +      int i, ret = 0;
  
        /* Get receive buffer area. */
        buf_size = device_info->recv_sections * device_info->recv_section_size;
                goto cleanup;
        }
  
 +      for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
 +              struct netvsc_channel *nvchan = &net_device->chan_table[i];
 +
 +              nvchan->recv_buf = kzalloc(net_device->recv_section_size, GFP_KERNEL);
 +              if (nvchan->recv_buf == NULL) {
 +                      ret = -ENOMEM;
 +                      goto cleanup;
 +              }
 +      }
 +
        /* Setup receive completion ring.
         * Add 1 to the recv_section_cnt because at least one entry in a
         * ring buffer has to be empty.
@@@ -772,31 -754,8 +772,31 @@@ static void netvsc_send_completion(stru
                                   const struct vmpacket_descriptor *desc,
                                   int budget)
  {
 -      const struct nvsp_message *nvsp_packet = hv_pkt_data(desc);
 +      const struct nvsp_message *nvsp_packet;
        u32 msglen = hv_pkt_datalen(desc);
 +      struct nvsp_message *pkt_rqst;
 +      u64 cmd_rqst;
 +
 +      /* First check if this is a VMBUS completion without data payload */
 +      if (!msglen) {
 +              cmd_rqst = vmbus_request_addr(&incoming_channel->requestor,
 +                                            (u64)desc->trans_id);
 +              if (cmd_rqst == VMBUS_RQST_ERROR) {
 +                      netdev_err(ndev, "Invalid transaction id\n");
 +                      return;
 +              }
 +
 +              pkt_rqst = (struct nvsp_message *)(uintptr_t)cmd_rqst;
 +              switch (pkt_rqst->hdr.msg_type) {
 +              case NVSP_MSG4_TYPE_SWITCH_DATA_PATH:
 +                      complete(&net_device->channel_init_wait);
 +                      break;
 +
 +              default:
 +                      netdev_err(ndev, "Unexpected VMBUS completion!!\n");
 +              }
 +              return;
 +      }
  
        /* Ensure packet is big enough to read header fields */
        if (msglen < sizeof(struct nvsp_message_header)) {
                return;
        }
  
 +      nvsp_packet = hv_pkt_data(desc);
        switch (nvsp_packet->hdr.msg_type) {
        case NVSP_MSG_TYPE_INIT_COMPLETE:
                if (msglen < sizeof(struct nvsp_message_header) +
@@@ -929,7 -887,6 +929,7 @@@ static inline int netvsc_send_pkt
        int ret;
        u32 ring_avail = hv_get_avail_to_write_percent(&out_channel->outbound);
  
 +      memset(&nvmsg, 0, sizeof(struct nvsp_message));
        nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
        if (skb)
                rpkt->channel_type = 0;         /* 0 is RMC_DATA */
@@@ -1295,19 -1252,6 +1295,19 @@@ static int netvsc_receive(struct net_de
                        continue;
                }
  
 +              /* We're going to copy (sections of) the packet into nvchan->recv_buf;
 +               * make sure that nvchan->recv_buf is large enough to hold the packet.
 +               */
 +              if (unlikely(buflen > net_device->recv_section_size)) {
 +                      nvchan->rsc.cnt = 0;
 +                      status = NVSP_STAT_FAIL;
 +                      netif_err(net_device_ctx, rx_err, ndev,
 +                                "Packet too big: buflen=%u recv_section_size=%u\n",
 +                                buflen, net_device->recv_section_size);
 +
 +                      continue;
 +              }
 +
                data = recv_buf + offset;
  
                nvchan->rsc.is_last = (i == count - 1);
                ret = rndis_filter_receive(ndev, net_device,
                                           nvchan, data, buflen);
  
-               if (unlikely(ret != NVSP_STAT_SUCCESS))
+               if (unlikely(ret != NVSP_STAT_SUCCESS)) {
+                       /* Drop incomplete packet */
+                       nvchan->rsc.cnt = 0;
                        status = NVSP_STAT_FAIL;
+               }
        }
  
        enq_receive_complete(ndev, net_device, q_idx,
@@@ -1362,7 -1309,7 +1365,7 @@@ static void netvsc_send_table(struct ne
                         sizeof(union nvsp_6_message_uber);
  
        /* Boundary check for all versions */
 -      if (offset > msglen - count * sizeof(u32)) {
 +      if (msglen < count * sizeof(u32) || offset > msglen - count * sizeof(u32)) {
                netdev_err(ndev, "Received send-table offset too big:%u\n",
                           offset);
                return;
index 0c2ebe7ac6554377453dc39a81b972ef5adcb781,3aab2b867fc0d082374683800e55b9a5ce418872..123cc9d25f5ed52fa66698348aab827a263b4975
@@@ -127,89 -127,70 +127,89 @@@ static void put_rndis_request(struct rn
  }
  
  static void dump_rndis_message(struct net_device *netdev,
 -                             const struct rndis_message *rndis_msg)
 +                             const struct rndis_message *rndis_msg,
 +                             const void *data)
  {
        switch (rndis_msg->ndis_msg_type) {
        case RNDIS_MSG_PACKET:
 -              netdev_dbg(netdev, "RNDIS_MSG_PACKET (len %u, "
 -                         "data offset %u data len %u, # oob %u, "
 -                         "oob offset %u, oob len %u, pkt offset %u, "
 -                         "pkt len %u\n",
 -                         rndis_msg->msg_len,
 -                         rndis_msg->msg.pkt.data_offset,
 -                         rndis_msg->msg.pkt.data_len,
 -                         rndis_msg->msg.pkt.num_oob_data_elements,
 -                         rndis_msg->msg.pkt.oob_data_offset,
 -                         rndis_msg->msg.pkt.oob_data_len,
 -                         rndis_msg->msg.pkt.per_pkt_info_offset,
 -                         rndis_msg->msg.pkt.per_pkt_info_len);
 +              if (rndis_msg->msg_len - RNDIS_HEADER_SIZE >= sizeof(struct rndis_packet)) {
 +                      const struct rndis_packet *pkt = data + RNDIS_HEADER_SIZE;
 +                      netdev_dbg(netdev, "RNDIS_MSG_PACKET (len %u, "
 +                                 "data offset %u data len %u, # oob %u, "
 +                                 "oob offset %u, oob len %u, pkt offset %u, "
 +                                 "pkt len %u\n",
 +                                 rndis_msg->msg_len,
 +                                 pkt->data_offset,
 +                                 pkt->data_len,
 +                                 pkt->num_oob_data_elements,
 +                                 pkt->oob_data_offset,
 +                                 pkt->oob_data_len,
 +                                 pkt->per_pkt_info_offset,
 +                                 pkt->per_pkt_info_len);
 +              }
                break;
  
        case RNDIS_MSG_INIT_C:
 -              netdev_dbg(netdev, "RNDIS_MSG_INIT_C "
 -                      "(len %u, id 0x%x, status 0x%x, major %d, minor %d, "
 -                      "device flags %d, max xfer size 0x%x, max pkts %u, "
 -                      "pkt aligned %u)\n",
 -                      rndis_msg->msg_len,
 -                      rndis_msg->msg.init_complete.req_id,
 -                      rndis_msg->msg.init_complete.status,
 -                      rndis_msg->msg.init_complete.major_ver,
 -                      rndis_msg->msg.init_complete.minor_ver,
 -                      rndis_msg->msg.init_complete.dev_flags,
 -                      rndis_msg->msg.init_complete.max_xfer_size,
 -                      rndis_msg->msg.init_complete.
 -                         max_pkt_per_msg,
 -                      rndis_msg->msg.init_complete.
 -                         pkt_alignment_factor);
 +              if (rndis_msg->msg_len - RNDIS_HEADER_SIZE >=
 +                              sizeof(struct rndis_initialize_complete)) {
 +                      const struct rndis_initialize_complete *init_complete =
 +                              data + RNDIS_HEADER_SIZE;
 +                      netdev_dbg(netdev, "RNDIS_MSG_INIT_C "
 +                              "(len %u, id 0x%x, status 0x%x, major %d, minor %d, "
 +                              "device flags %d, max xfer size 0x%x, max pkts %u, "
 +                              "pkt aligned %u)\n",
 +                              rndis_msg->msg_len,
 +                              init_complete->req_id,
 +                              init_complete->status,
 +                              init_complete->major_ver,
 +                              init_complete->minor_ver,
 +                              init_complete->dev_flags,
 +                              init_complete->max_xfer_size,
 +                              init_complete->max_pkt_per_msg,
 +                              init_complete->pkt_alignment_factor);
 +              }
                break;
  
        case RNDIS_MSG_QUERY_C:
 -              netdev_dbg(netdev, "RNDIS_MSG_QUERY_C "
 -                      "(len %u, id 0x%x, status 0x%x, buf len %u, "
 -                      "buf offset %u)\n",
 -                      rndis_msg->msg_len,
 -                      rndis_msg->msg.query_complete.req_id,
 -                      rndis_msg->msg.query_complete.status,
 -                      rndis_msg->msg.query_complete.
 -                         info_buflen,
 -                      rndis_msg->msg.query_complete.
 -                         info_buf_offset);
 +              if (rndis_msg->msg_len - RNDIS_HEADER_SIZE >=
 +                              sizeof(struct rndis_query_complete)) {
 +                      const struct rndis_query_complete *query_complete =
 +                              data + RNDIS_HEADER_SIZE;
 +                      netdev_dbg(netdev, "RNDIS_MSG_QUERY_C "
 +                              "(len %u, id 0x%x, status 0x%x, buf len %u, "
 +                              "buf offset %u)\n",
 +                              rndis_msg->msg_len,
 +                              query_complete->req_id,
 +                              query_complete->status,
 +                              query_complete->info_buflen,
 +                              query_complete->info_buf_offset);
 +              }
                break;
  
        case RNDIS_MSG_SET_C:
 -              netdev_dbg(netdev,
 -                      "RNDIS_MSG_SET_C (len %u, id 0x%x, status 0x%x)\n",
 -                      rndis_msg->msg_len,
 -                      rndis_msg->msg.set_complete.req_id,
 -                      rndis_msg->msg.set_complete.status);
 +              if (rndis_msg->msg_len - RNDIS_HEADER_SIZE + sizeof(struct rndis_set_complete)) {
 +                      const struct rndis_set_complete *set_complete =
 +                              data + RNDIS_HEADER_SIZE;
 +                      netdev_dbg(netdev,
 +                              "RNDIS_MSG_SET_C (len %u, id 0x%x, status 0x%x)\n",
 +                              rndis_msg->msg_len,
 +                              set_complete->req_id,
 +                              set_complete->status);
 +              }
                break;
  
        case RNDIS_MSG_INDICATE:
 -              netdev_dbg(netdev, "RNDIS_MSG_INDICATE "
 -                      "(len %u, status 0x%x, buf len %u, buf offset %u)\n",
 -                      rndis_msg->msg_len,
 -                      rndis_msg->msg.indicate_status.status,
 -                      rndis_msg->msg.indicate_status.status_buflen,
 -                      rndis_msg->msg.indicate_status.status_buf_offset);
 +              if (rndis_msg->msg_len - RNDIS_HEADER_SIZE >=
 +                              sizeof(struct rndis_indicate_status)) {
 +                      const struct rndis_indicate_status *indicate_status =
 +                              data + RNDIS_HEADER_SIZE;
 +                      netdev_dbg(netdev, "RNDIS_MSG_INDICATE "
 +                              "(len %u, status 0x%x, buf len %u, buf offset %u)\n",
 +                              rndis_msg->msg_len,
 +                              indicate_status->status,
 +                              indicate_status->status_buflen,
 +                              indicate_status->status_buf_offset);
 +              }
                break;
  
        default:
@@@ -265,20 -246,11 +265,20 @@@ static void rndis_set_link_state(struc
  {
        u32 link_status;
        struct rndis_query_complete *query_complete;
 +      u32 msg_len = request->response_msg.msg_len;
 +
 +      /* Ensure the packet is big enough to access its fields */
 +      if (msg_len - RNDIS_HEADER_SIZE < sizeof(struct rndis_query_complete))
 +              return;
  
        query_complete = &request->response_msg.msg.query_complete;
  
        if (query_complete->status == RNDIS_STATUS_SUCCESS &&
 -          query_complete->info_buflen == sizeof(u32)) {
 +          query_complete->info_buflen >= sizeof(u32) &&
 +          query_complete->info_buf_offset >= sizeof(*query_complete) &&
 +          msg_len - RNDIS_HEADER_SIZE >= query_complete->info_buf_offset &&
 +          msg_len - RNDIS_HEADER_SIZE - query_complete->info_buf_offset
 +                      >= query_complete->info_buflen) {
                memcpy(&link_status, (void *)((unsigned long)query_complete +
                       query_complete->info_buf_offset), sizeof(u32));
                rdev->link_state = link_status != 0;
  
  static void rndis_filter_receive_response(struct net_device *ndev,
                                          struct netvsc_device *nvdev,
 -                                        const struct rndis_message *resp)
 +                                        struct rndis_message *resp,
 +                                        void *data)
  {
 +      u32 *req_id = &resp->msg.init_complete.req_id;
        struct rndis_device *dev = nvdev->extension;
        struct rndis_request *request = NULL;
        bool found = false;
                return;
        }
  
 +      /* Copy the request ID into nvchan->recv_buf */
 +      *req_id = *(u32 *)(data + RNDIS_HEADER_SIZE);
 +
        spin_lock_irqsave(&dev->request_lock, flags);
        list_for_each_entry(request, &dev->req_list, list_ent) {
                /*
                 * All request/response message contains RequestId as the 1st
                 * field
                 */
 -              if (request->request_msg.msg.init_req.req_id
 -                  == resp->msg.init_complete.req_id) {
 +              if (request->request_msg.msg.init_req.req_id == *req_id) {
                        found = true;
                        break;
                }
        if (found) {
                if (resp->msg_len <=
                    sizeof(struct rndis_message) + RNDIS_EXT_LEN) {
 -                      memcpy(&request->response_msg, resp,
 -                             resp->msg_len);
 +                      memcpy(&request->response_msg, resp, RNDIS_HEADER_SIZE + sizeof(*req_id));
 +                      memcpy((void *)&request->response_msg + RNDIS_HEADER_SIZE + sizeof(*req_id),
 +                             data + RNDIS_HEADER_SIZE + sizeof(*req_id),
 +                             resp->msg_len - RNDIS_HEADER_SIZE - sizeof(*req_id));
                        if (request->request_msg.ndis_msg_type ==
                            RNDIS_MSG_QUERY && request->request_msg.msg.
                            query_req.oid == RNDIS_OID_GEN_MEDIA_CONNECT_STATUS)
                netdev_err(ndev,
                        "no rndis request found for this response "
                        "(id 0x%x res type 0x%x)\n",
 -                      resp->msg.init_complete.req_id,
 +                      *req_id,
                        resp->ndis_msg_type);
        }
  }
   */
  static inline void *rndis_get_ppi(struct net_device *ndev,
                                  struct rndis_packet *rpkt,
 -                                u32 rpkt_len, u32 type, u8 internal)
 +                                u32 rpkt_len, u32 type, u8 internal,
 +                                u32 ppi_size, void *data)
  {
        struct rndis_per_packet_info *ppi;
        int len;
                return NULL;
        }
  
 -      if (rpkt->per_pkt_info_len > rpkt_len - rpkt->per_pkt_info_offset) {
 +      if (rpkt->per_pkt_info_len < sizeof(*ppi) ||
 +          rpkt->per_pkt_info_len > rpkt_len - rpkt->per_pkt_info_offset) {
                netdev_err(ndev, "Invalid per_pkt_info_len: %u\n",
                           rpkt->per_pkt_info_len);
                return NULL;
  
        ppi = (struct rndis_per_packet_info *)((ulong)rpkt +
                rpkt->per_pkt_info_offset);
 +      /* Copy the PPIs into nvchan->recv_buf */
 +      memcpy(ppi, data + RNDIS_HEADER_SIZE + rpkt->per_pkt_info_offset, rpkt->per_pkt_info_len);
        len = rpkt->per_pkt_info_len;
  
        while (len > 0) {
                        continue;
                }
  
 -              if (ppi->type == type && ppi->internal == internal)
 +              if (ppi->type == type && ppi->internal == internal) {
 +                      /* ppi->size should be big enough to hold the returned object. */
 +                      if (ppi->size - ppi->ppi_offset < ppi_size) {
 +                              netdev_err(ndev, "Invalid ppi: size %u ppi_offset %u\n",
 +                                         ppi->size, ppi->ppi_offset);
 +                              continue;
 +                      }
                        return (void *)((ulong)ppi + ppi->ppi_offset);
 +              }
                len -= ppi->size;
                ppi = (struct rndis_per_packet_info *)((ulong)ppi + ppi->size);
        }
@@@ -447,29 -402,10 +447,29 @@@ void rsc_add_data(struct netvsc_channe
        if (cnt) {
                nvchan->rsc.pktlen += len;
        } else {
 -              nvchan->rsc.vlan = vlan;
 -              nvchan->rsc.csum_info = csum_info;
 +              /* The data/values pointed by vlan, csum_info and hash_info are shared
 +               * across the different 'fragments' of the RSC packet; store them into
 +               * the packet itself.
 +               */
 +              if (vlan != NULL) {
 +                      memcpy(&nvchan->rsc.vlan, vlan, sizeof(*vlan));
 +                      nvchan->rsc.ppi_flags |= NVSC_RSC_VLAN;
 +              } else {
 +                      nvchan->rsc.ppi_flags &= ~NVSC_RSC_VLAN;
 +              }
 +              if (csum_info != NULL) {
 +                      memcpy(&nvchan->rsc.csum_info, csum_info, sizeof(*csum_info));
 +                      nvchan->rsc.ppi_flags |= NVSC_RSC_CSUM_INFO;
 +              } else {
 +                      nvchan->rsc.ppi_flags &= ~NVSC_RSC_CSUM_INFO;
 +              }
                nvchan->rsc.pktlen = len;
 -              nvchan->rsc.hash_info = hash_info;
 +              if (hash_info != NULL) {
 +                      nvchan->rsc.hash_info = *hash_info;
 +                      nvchan->rsc.ppi_flags |= NVSC_RSC_HASH_INFO;
 +              } else {
 +                      nvchan->rsc.ppi_flags &= ~NVSC_RSC_HASH_INFO;
 +              }
        }
  
        nvchan->rsc.data[cnt] = data;
@@@ -481,7 -417,7 +481,7 @@@ static int rndis_filter_receive_data(st
                                     struct netvsc_device *nvdev,
                                     struct netvsc_channel *nvchan,
                                     struct rndis_message *msg,
 -                                   u32 data_buflen)
 +                                   void *data, u32 data_buflen)
  {
        struct rndis_packet *rndis_pkt = &msg->msg.pkt;
        const struct ndis_tcp_ip_checksum_info *csum_info;
        const struct rndis_pktinfo_id *pktinfo_id;
        const u32 *hash_info;
        u32 data_offset, rpkt_len;
 -      void *data;
        bool rsc_more = false;
        int ret;
  
                return NVSP_STAT_FAIL;
        }
  
 +      /* Copy the RNDIS packet into nvchan->recv_buf */
 +      memcpy(rndis_pkt, data + RNDIS_HEADER_SIZE, sizeof(*rndis_pkt));
 +
        /* Validate rndis_pkt offset */
        if (rndis_pkt->data_offset >= data_buflen - RNDIS_HEADER_SIZE) {
                netdev_err(ndev, "invalid rndis packet offset: %u\n",
                return NVSP_STAT_FAIL;
        }
  
 -      vlan = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, IEEE_8021Q_INFO, 0);
 -
 -      csum_info = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, TCPIP_CHKSUM_PKTINFO, 0);
 +      vlan = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, IEEE_8021Q_INFO, 0, sizeof(*vlan),
 +                           data);
  
 -      hash_info = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, NBL_HASH_VALUE, 0);
 +      csum_info = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, TCPIP_CHKSUM_PKTINFO, 0,
 +                                sizeof(*csum_info), data);
  
 -      pktinfo_id = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, RNDIS_PKTINFO_ID, 1);
 +      hash_info = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, NBL_HASH_VALUE, 0,
 +                                sizeof(*hash_info), data);
  
 -      data = (void *)msg + data_offset;
 +      pktinfo_id = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, RNDIS_PKTINFO_ID, 1,
 +                                 sizeof(*pktinfo_id), data);
  
        /* Identify RSC frags, drop erroneous packets */
        if (pktinfo_id && (pktinfo_id->flag & RNDIS_PKTINFO_SUBALLOC)) {
         * the data packet to the stack, without the rndis trailer padding
         */
        rsc_add_data(nvchan, vlan, csum_info, hash_info,
 -                   data, rndis_pkt->data_len);
 +                   data + data_offset, rndis_pkt->data_len);
  
        if (rsc_more)
                return NVSP_STAT_SUCCESS;
        return ret;
  
  drop:
-       /* Drop incomplete packet */
-       nvchan->rsc.cnt = 0;
        return NVSP_STAT_FAIL;
  }
  
@@@ -588,41 -518,33 +586,41 @@@ int rndis_filter_receive(struct net_dev
                         void *data, u32 buflen)
  {
        struct net_device_context *net_device_ctx = netdev_priv(ndev);
 -      struct rndis_message *rndis_msg = data;
 +      struct rndis_message *rndis_msg = nvchan->recv_buf;
  
 -      if (netif_msg_rx_status(net_device_ctx))
 -              dump_rndis_message(ndev, rndis_msg);
 +      if (buflen < RNDIS_HEADER_SIZE) {
 +              netdev_err(ndev, "Invalid rndis_msg (buflen: %u)\n", buflen);
 +              return NVSP_STAT_FAIL;
 +      }
 +
 +      /* Copy the RNDIS msg header into nvchan->recv_buf */
 +      memcpy(rndis_msg, data, RNDIS_HEADER_SIZE);
  
        /* Validate incoming rndis_message packet */
 -      if (buflen < RNDIS_HEADER_SIZE || rndis_msg->msg_len < RNDIS_HEADER_SIZE ||
 +      if (rndis_msg->msg_len < RNDIS_HEADER_SIZE ||
            buflen < rndis_msg->msg_len) {
                netdev_err(ndev, "Invalid rndis_msg (buflen: %u, msg_len: %u)\n",
                           buflen, rndis_msg->msg_len);
                return NVSP_STAT_FAIL;
        }
  
 +      if (netif_msg_rx_status(net_device_ctx))
 +              dump_rndis_message(ndev, rndis_msg, data);
 +
        switch (rndis_msg->ndis_msg_type) {
        case RNDIS_MSG_PACKET:
                return rndis_filter_receive_data(ndev, net_dev, nvchan,
 -                                               rndis_msg, buflen);
 +                                               rndis_msg, data, buflen);
        case RNDIS_MSG_INIT_C:
        case RNDIS_MSG_QUERY_C:
        case RNDIS_MSG_SET_C:
                /* completion msgs */
 -              rndis_filter_receive_response(ndev, net_dev, rndis_msg);
 +              rndis_filter_receive_response(ndev, net_dev, rndis_msg, data);
                break;
  
        case RNDIS_MSG_INDICATE:
                /* notification msgs */
 -              netvsc_linkstatus_callback(ndev, rndis_msg);
 +              netvsc_linkstatus_callback(ndev, rndis_msg, data);
                break;
        default:
                netdev_err(ndev,
@@@ -643,7 -565,6 +641,7 @@@ static int rndis_filter_query_device(st
        u32 inresult_size = *result_size;
        struct rndis_query_request *query;
        struct rndis_query_complete *query_complete;
 +      u32 msg_len;
        int ret = 0;
  
        if (!result)
  
        /* Copy the response back */
        query_complete = &request->response_msg.msg.query_complete;
 +      msg_len = request->response_msg.msg_len;
 +
 +      /* Ensure the packet is big enough to access its fields */
 +      if (msg_len - RNDIS_HEADER_SIZE < sizeof(struct rndis_query_complete)) {
 +              ret = -1;
 +              goto cleanup;
 +      }
  
 -      if (query_complete->info_buflen > inresult_size) {
 +      if (query_complete->info_buflen > inresult_size ||
 +          query_complete->info_buf_offset < sizeof(*query_complete) ||
 +          msg_len - RNDIS_HEADER_SIZE < query_complete->info_buf_offset ||
 +          msg_len - RNDIS_HEADER_SIZE - query_complete->info_buf_offset
 +                      < query_complete->info_buflen) {
                ret = -1;
                goto cleanup;
        }
diff --combined drivers/net/ipa/gsi.c
index 511c94f66036c176fbdbb9c46d9bec945103a349,b77f5fef7aecab8325ddd29304500cd0b23fc531..4402136461888efc59a831449152362494ede684
@@@ -89,9 -89,9 +89,9 @@@
  /* Delay period for interrupt moderation (in 32KHz IPA internal timer ticks) */
  #define GSI_EVT_RING_INT_MODT         (32 * 1) /* 1ms under 32KHz clock */
  
 -#define GSI_CMD_TIMEOUT                       5       /* seconds */
 +#define GSI_CMD_TIMEOUT                       50      /* milliseconds */
  
 -#define GSI_CHANNEL_STOP_RX_RETRIES   10
 +#define GSI_CHANNEL_STOP_RETRIES      10
  #define GSI_CHANNEL_MODEM_HALT_RETRIES        10
  
  #define GSI_MHI_EVENT_ID_START                10      /* 1st reserved event id */
@@@ -220,59 -220,7 +220,59 @@@ static void gsi_irq_teardown(struct gs
        /* Nothing to do */
  }
  
 -static void gsi_irq_ieob_enable(struct gsi *gsi, u32 evt_ring_id)
 +/* Event ring commands are performed one at a time.  Their completion
 + * is signaled by the event ring control GSI interrupt type, which is
 + * only enabled when we issue an event ring command.  Only the event
 + * ring being operated on has this interrupt enabled.
 + */
 +static void gsi_irq_ev_ctrl_enable(struct gsi *gsi, u32 evt_ring_id)
 +{
 +      u32 val = BIT(evt_ring_id);
 +
 +      /* There's a small chance that a previous command completed
 +       * after the interrupt was disabled, so make sure we have no
 +       * pending interrupts before we enable them.
 +       */
 +      iowrite32(~0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_CLR_OFFSET);
 +
 +      iowrite32(val, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET);
 +      gsi_irq_type_enable(gsi, GSI_EV_CTRL);
 +}
 +
 +/* Disable event ring control interrupts */
 +static void gsi_irq_ev_ctrl_disable(struct gsi *gsi)
 +{
 +      gsi_irq_type_disable(gsi, GSI_EV_CTRL);
 +      iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET);
 +}
 +
 +/* Channel commands are performed one at a time.  Their completion is
 + * signaled by the channel control GSI interrupt type, which is only
 + * enabled when we issue a channel command.  Only the channel being
 + * operated on has this interrupt enabled.
 + */
 +static void gsi_irq_ch_ctrl_enable(struct gsi *gsi, u32 channel_id)
 +{
 +      u32 val = BIT(channel_id);
 +
 +      /* There's a small chance that a previous command completed
 +       * after the interrupt was disabled, so make sure we have no
 +       * pending interrupts before we enable them.
 +       */
 +      iowrite32(~0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_CLR_OFFSET);
 +
 +      iowrite32(val, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET);
 +      gsi_irq_type_enable(gsi, GSI_CH_CTRL);
 +}
 +
 +/* Disable channel control interrupts */
 +static void gsi_irq_ch_ctrl_disable(struct gsi *gsi)
 +{
 +      gsi_irq_type_disable(gsi, GSI_CH_CTRL);
 +      iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET);
 +}
 +
 +static void gsi_irq_ieob_enable_one(struct gsi *gsi, u32 evt_ring_id)
  {
        bool enable_ieob = !gsi->ieob_enabled_bitmap;
        u32 val;
                gsi_irq_type_enable(gsi, GSI_IEOB);
  }
  
 -static void gsi_irq_ieob_disable(struct gsi *gsi, u32 evt_ring_id)
 +static void gsi_irq_ieob_disable(struct gsi *gsi, u32 event_mask)
  {
        u32 val;
  
 -      gsi->ieob_enabled_bitmap &= ~BIT(evt_ring_id);
 +      gsi->ieob_enabled_bitmap &= ~event_mask;
  
        /* Disable the interrupt type if this was the last enabled channel */
        if (!gsi->ieob_enabled_bitmap)
        iowrite32(val, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET);
  }
  
 +static void gsi_irq_ieob_disable_one(struct gsi *gsi, u32 evt_ring_id)
 +{
 +      gsi_irq_ieob_disable(gsi, BIT(evt_ring_id));
 +}
 +
  /* Enable all GSI_interrupt types */
  static void gsi_irq_enable(struct gsi *gsi)
  {
@@@ -364,13 -307,11 +364,13 @@@ static u32 gsi_ring_index(struct gsi_ri
  static bool
  gsi_command(struct gsi *gsi, u32 reg, u32 val, struct completion *completion)
  {
 +      unsigned long timeout = msecs_to_jiffies(GSI_CMD_TIMEOUT);
 +
        reinit_completion(completion);
  
        iowrite32(val, gsi->virt + reg);
  
 -      return !!wait_for_completion_timeout(completion, GSI_CMD_TIMEOUT * HZ);
 +      return !!wait_for_completion_timeout(completion, timeout);
  }
  
  /* Return the hardware's notion of the current state of an event ring */
@@@ -385,54 -326,68 +385,54 @@@ gsi_evt_ring_state(struct gsi *gsi, u3
  }
  
  /* Issue an event ring command and wait for it to complete */
 -static void evt_ring_command(struct gsi *gsi, u32 evt_ring_id,
 -                           enum gsi_evt_cmd_opcode opcode)
 +static void gsi_evt_ring_command(struct gsi *gsi, u32 evt_ring_id,
 +                               enum gsi_evt_cmd_opcode opcode)
  {
        struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id];
        struct completion *completion = &evt_ring->completion;
        struct device *dev = gsi->dev;
 -      bool success;
 +      bool timeout;
        u32 val;
  
 -      /* We only perform one event ring command at a time, and event
 -       * control interrupts should only occur when such a command
 -       * is issued here.  Only permit *this* event ring to trigger
 -       * an interrupt, and only enable the event control IRQ type
 -       * when we expect it to occur.
 -       *
 -       * There's a small chance that a previous command completed
 -       * after the interrupt was disabled, so make sure we have no
 -       * pending interrupts before we enable them.
 -       */
 -      iowrite32(~0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_CLR_OFFSET);
 -
 -      val = BIT(evt_ring_id);
 -      iowrite32(val, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET);
 -      gsi_irq_type_enable(gsi, GSI_EV_CTRL);
 +      /* Enable the completion interrupt for the command */
 +      gsi_irq_ev_ctrl_enable(gsi, evt_ring_id);
  
        val = u32_encode_bits(evt_ring_id, EV_CHID_FMASK);
        val |= u32_encode_bits(opcode, EV_OPCODE_FMASK);
  
 -      success = gsi_command(gsi, GSI_EV_CH_CMD_OFFSET, val, completion);
 +      timeout = !gsi_command(gsi, GSI_EV_CH_CMD_OFFSET, val, completion);
  
 -      /* Disable the interrupt again */
 -      gsi_irq_type_disable(gsi, GSI_EV_CTRL);
 -      iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET);
 +      gsi_irq_ev_ctrl_disable(gsi);
  
 -      if (success)
 +      if (!timeout)
                return;
  
        dev_err(dev, "GSI command %u for event ring %u timed out, state %u\n",
 -              opcode, evt_ring_id, evt_ring->state);
 +              opcode, evt_ring_id, gsi_evt_ring_state(gsi, evt_ring_id));
  }
  
  /* Allocate an event ring in NOT_ALLOCATED state */
  static int gsi_evt_ring_alloc_command(struct gsi *gsi, u32 evt_ring_id)
  {
 -      struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id];
 +      enum gsi_evt_ring_state state;
  
        /* Get initial event ring state */
 -      evt_ring->state = gsi_evt_ring_state(gsi, evt_ring_id);
 -      if (evt_ring->state != GSI_EVT_RING_STATE_NOT_ALLOCATED) {
 +      state = gsi_evt_ring_state(gsi, evt_ring_id);
 +      if (state != GSI_EVT_RING_STATE_NOT_ALLOCATED) {
                dev_err(gsi->dev, "event ring %u bad state %u before alloc\n",
 -                      evt_ring_id, evt_ring->state);
 +                      evt_ring_id, state);
                return -EINVAL;
        }
  
 -      evt_ring_command(gsi, evt_ring_id, GSI_EVT_ALLOCATE);
 +      gsi_evt_ring_command(gsi, evt_ring_id, GSI_EVT_ALLOCATE);
  
        /* If successful the event ring state will have changed */
 -      if (evt_ring->state == GSI_EVT_RING_STATE_ALLOCATED)
 +      state = gsi_evt_ring_state(gsi, evt_ring_id);
 +      if (state == GSI_EVT_RING_STATE_ALLOCATED)
                return 0;
  
        dev_err(gsi->dev, "event ring %u bad state %u after alloc\n",
 -              evt_ring_id, evt_ring->state);
 +              evt_ring_id, state);
  
        return -EIO;
  }
  /* Reset a GSI event ring in ALLOCATED or ERROR state. */
  static void gsi_evt_ring_reset_command(struct gsi *gsi, u32 evt_ring_id)
  {
 -      struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id];
 -      enum gsi_evt_ring_state state = evt_ring->state;
 +      enum gsi_evt_ring_state state;
  
 +      state = gsi_evt_ring_state(gsi, evt_ring_id);
        if (state != GSI_EVT_RING_STATE_ALLOCATED &&
            state != GSI_EVT_RING_STATE_ERROR) {
                dev_err(gsi->dev, "event ring %u bad state %u before reset\n",
 -                      evt_ring_id, evt_ring->state);
 +                      evt_ring_id, state);
                return;
        }
  
 -      evt_ring_command(gsi, evt_ring_id, GSI_EVT_RESET);
 +      gsi_evt_ring_command(gsi, evt_ring_id, GSI_EVT_RESET);
  
        /* If successful the event ring state will have changed */
 -      if (evt_ring->state == GSI_EVT_RING_STATE_ALLOCATED)
 +      state = gsi_evt_ring_state(gsi, evt_ring_id);
 +      if (state == GSI_EVT_RING_STATE_ALLOCATED)
                return;
  
        dev_err(gsi->dev, "event ring %u bad state %u after reset\n",
 -              evt_ring_id, evt_ring->state);
 +              evt_ring_id, state);
  }
  
  /* Issue a hardware de-allocation request for an allocated event ring */
  static void gsi_evt_ring_de_alloc_command(struct gsi *gsi, u32 evt_ring_id)
  {
 -      struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id];
 +      enum gsi_evt_ring_state state;
  
 -      if (evt_ring->state != GSI_EVT_RING_STATE_ALLOCATED) {
 +      state = gsi_evt_ring_state(gsi, evt_ring_id);
 +      if (state != GSI_EVT_RING_STATE_ALLOCATED) {
                dev_err(gsi->dev, "event ring %u state %u before dealloc\n",
 -                      evt_ring_id, evt_ring->state);
 +                      evt_ring_id, state);
                return;
        }
  
 -      evt_ring_command(gsi, evt_ring_id, GSI_EVT_DE_ALLOC);
 +      gsi_evt_ring_command(gsi, evt_ring_id, GSI_EVT_DE_ALLOC);
  
        /* If successful the event ring state will have changed */
 -      if (evt_ring->state == GSI_EVT_RING_STATE_NOT_ALLOCATED)
 +      state = gsi_evt_ring_state(gsi, evt_ring_id);
 +      if (state == GSI_EVT_RING_STATE_NOT_ALLOCATED)
                return;
  
        dev_err(gsi->dev, "event ring %u bad state %u after dealloc\n",
 -              evt_ring_id, evt_ring->state);
 +              evt_ring_id, state);
  }
  
  /* Fetch the current state of a channel from hardware */
@@@ -504,19 -456,34 +504,19 @@@ gsi_channel_command(struct gsi_channel 
        u32 channel_id = gsi_channel_id(channel);
        struct gsi *gsi = channel->gsi;
        struct device *dev = gsi->dev;
 -      bool success;
 +      bool timeout;
        u32 val;
  
 -      /* We only perform one channel command at a time, and channel
 -       * control interrupts should only occur when such a command is
 -       * issued here.  So we only permit *this* channel to trigger
 -       * an interrupt and only enable the channel control IRQ type
 -       * when we expect it to occur.
 -       *
 -       * There's a small chance that a previous command completed
 -       * after the interrupt was disabled, so make sure we have no
 -       * pending interrupts before we enable them.
 -       */
 -      iowrite32(~0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_CLR_OFFSET);
 -
 -      val = BIT(channel_id);
 -      iowrite32(val, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET);
 -      gsi_irq_type_enable(gsi, GSI_CH_CTRL);
 +      /* Enable the completion interrupt for the command */
 +      gsi_irq_ch_ctrl_enable(gsi, channel_id);
  
        val = u32_encode_bits(channel_id, CH_CHID_FMASK);
        val |= u32_encode_bits(opcode, CH_OPCODE_FMASK);
 -      success = gsi_command(gsi, GSI_CH_CMD_OFFSET, val, completion);
 +      timeout = !gsi_command(gsi, GSI_CH_CMD_OFFSET, val, completion);
  
 -      /* Disable the interrupt again */
 -      gsi_irq_type_disable(gsi, GSI_CH_CTRL);
 -      iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET);
 +      gsi_irq_ch_ctrl_disable(gsi);
  
 -      if (success)
 +      if (!timeout)
                return;
  
        dev_err(dev, "GSI command %u for channel %u timed out, state %u\n",
@@@ -622,8 -589,7 +622,8 @@@ static void gsi_channel_reset_command(s
        struct device *dev = channel->gsi->dev;
        enum gsi_channel_state state;
  
 -      msleep(1);      /* A short delay is required before a RESET command */
 +      /* A short delay is required before a RESET command */
 +      usleep_range(USEC_PER_MSEC, 2 * USEC_PER_MSEC);
  
        state = gsi_channel_state(channel);
        if (state != GSI_CHANNEL_STATE_STOPPED &&
@@@ -729,38 -695,22 +729,38 @@@ static void gsi_evt_ring_program(struc
        gsi_evt_ring_doorbell(gsi, evt_ring_id, 0);
  }
  
 -/* Return the last (most recent) transaction completed on a channel. */
 +/* Find the transaction whose completion indicates a channel is quiesced */
  static struct gsi_trans *gsi_channel_trans_last(struct gsi_channel *channel)
  {
        struct gsi_trans_info *trans_info = &channel->trans_info;
 +      const struct list_head *list;
        struct gsi_trans *trans;
  
        spin_lock_bh(&trans_info->spinlock);
  
 -      if (!list_empty(&trans_info->complete))
 -              trans = list_last_entry(&trans_info->complete,
 -                                      struct gsi_trans, links);
 -      else if (!list_empty(&trans_info->polled))
 -              trans = list_last_entry(&trans_info->polled,
 -                                      struct gsi_trans, links);
 -      else
 -              trans = NULL;
 +      /* There is a small chance a TX transaction got allocated just
 +       * before we disabled transmits, so check for that.
 +       */
 +      if (channel->toward_ipa) {
 +              list = &trans_info->alloc;
 +              if (!list_empty(list))
 +                      goto done;
 +              list = &trans_info->pending;
 +              if (!list_empty(list))
 +                      goto done;
 +      }
 +
 +      /* Otherwise (TX or RX) we want to wait for anything that
 +       * has completed, or has been polled but not released yet.
 +       */
 +      list = &trans_info->complete;
 +      if (!list_empty(list))
 +              goto done;
 +      list = &trans_info->polled;
 +      if (list_empty(list))
 +              list = NULL;
 +done:
 +      trans = list ? list_last_entry(list, struct gsi_trans, links) : NULL;
  
        /* Caller will wait for this, so take a reference */
        if (trans)
@@@ -784,6 -734,24 +784,6 @@@ static void gsi_channel_trans_quiesce(s
        }
  }
  
 -/* Stop channel activity.  Transactions may not be allocated until thawed. */
 -static void gsi_channel_freeze(struct gsi_channel *channel)
 -{
 -      gsi_channel_trans_quiesce(channel);
 -
 -      napi_disable(&channel->napi);
 -
 -      gsi_irq_ieob_disable(channel->gsi, channel->evt_ring_id);
 -}
 -
 -/* Allow transactions to be used on the channel again. */
 -static void gsi_channel_thaw(struct gsi_channel *channel)
 -{
 -      gsi_irq_ieob_enable(channel->gsi, channel->evt_ring_id);
 -
 -      napi_enable(&channel->napi);
 -}
 -
  /* Program a channel for use */
  static void gsi_channel_program(struct gsi_channel *channel, bool doorbell)
  {
@@@ -875,92 -843,51 +875,92 @@@ static void gsi_channel_deprogram(struc
        /* Nothing to do */
  }
  
 -/* Start an allocated GSI channel */
 -int gsi_channel_start(struct gsi *gsi, u32 channel_id)
 +static int __gsi_channel_start(struct gsi_channel *channel, bool start)
  {
 -      struct gsi_channel *channel = &gsi->channel[channel_id];
 +      struct gsi *gsi = channel->gsi;
        int ret;
  
 +      if (!start)
 +              return 0;
 +
        mutex_lock(&gsi->mutex);
  
        ret = gsi_channel_start_command(channel);
  
        mutex_unlock(&gsi->mutex);
  
 -      gsi_channel_thaw(channel);
 -
        return ret;
  }
  
 -/* Stop a started channel */
 -int gsi_channel_stop(struct gsi *gsi, u32 channel_id)
 +/* Start an allocated GSI channel */
 +int gsi_channel_start(struct gsi *gsi, u32 channel_id)
  {
        struct gsi_channel *channel = &gsi->channel[channel_id];
 -      u32 retries;
        int ret;
  
 -      gsi_channel_freeze(channel);
 +      /* Enable NAPI and the completion interrupt */
 +      napi_enable(&channel->napi);
 +      gsi_irq_ieob_enable_one(gsi, channel->evt_ring_id);
  
 -      /* RX channels might require a little time to enter STOPPED state */
 -      retries = channel->toward_ipa ? 0 : GSI_CHANNEL_STOP_RX_RETRIES;
 +      ret = __gsi_channel_start(channel, true);
 +      if (ret) {
 +              gsi_irq_ieob_disable_one(gsi, channel->evt_ring_id);
 +              napi_disable(&channel->napi);
 +      }
  
 -      mutex_lock(&gsi->mutex);
 +      return ret;
 +}
 +
 +static int gsi_channel_stop_retry(struct gsi_channel *channel)
 +{
 +      u32 retries = GSI_CHANNEL_STOP_RETRIES;
 +      int ret;
  
        do {
                ret = gsi_channel_stop_command(channel);
                if (ret != -EAGAIN)
                        break;
 -              msleep(1);
 +              usleep_range(3 * USEC_PER_MSEC, 5 * USEC_PER_MSEC);
        } while (retries--);
  
 +      return ret;
 +}
 +
 +static int __gsi_channel_stop(struct gsi_channel *channel, bool stop)
 +{
 +      struct gsi *gsi = channel->gsi;
 +      int ret;
 +
 +      /* Wait for any underway transactions to complete before stopping. */
 +      gsi_channel_trans_quiesce(channel);
 +
 +      if (!stop)
 +              return 0;
 +
 +      mutex_lock(&gsi->mutex);
 +
 +      ret = gsi_channel_stop_retry(channel);
 +
        mutex_unlock(&gsi->mutex);
  
 -      /* Thaw the channel if we need to retry (or on error) */
 +      return ret;
 +}
 +
 +/* Stop a started channel */
 +int gsi_channel_stop(struct gsi *gsi, u32 channel_id)
 +{
 +      struct gsi_channel *channel = &gsi->channel[channel_id];
 +      int ret;
 +
 +      ret = __gsi_channel_stop(channel, true);
        if (ret)
 -              gsi_channel_thaw(channel);
 +              return ret;
  
 -      return ret;
 +      /* Disable the completion interrupt and NAPI if successful */
 +      gsi_irq_ieob_disable_one(gsi, channel->evt_ring_id);
 +      napi_disable(&channel->napi);
 +
 +      return 0;
  }
  
  /* Reset and reconfigure a channel, (possibly) enabling the doorbell engine */
@@@ -985,14 -912,11 +985,14 @@@ void gsi_channel_reset(struct gsi *gsi
  int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop)
  {
        struct gsi_channel *channel = &gsi->channel[channel_id];
 +      int ret;
  
 -      if (stop)
 -              return gsi_channel_stop(gsi, channel_id);
 +      ret = __gsi_channel_stop(channel, stop);
 +      if (ret)
 +              return ret;
  
 -      gsi_channel_freeze(channel);
 +      /* Ensure NAPI polling has finished. */
 +      napi_synchronize(&channel->napi);
  
        return 0;
  }
@@@ -1002,7 -926,12 +1002,7 @@@ int gsi_channel_resume(struct gsi *gsi
  {
        struct gsi_channel *channel = &gsi->channel[channel_id];
  
 -      if (start)
 -              return gsi_channel_start(gsi, channel_id);
 -
 -      gsi_channel_thaw(channel);
 -
 -      return 0;
 +      return __gsi_channel_start(channel, start);
  }
  
  /**
@@@ -1111,6 -1040,7 +1111,6 @@@ static void gsi_isr_evt_ctrl(struct gs
                event_mask ^= BIT(evt_ring_id);
  
                evt_ring = &gsi->evt_ring[evt_ring_id];
 -              evt_ring->state = gsi_evt_ring_state(gsi, evt_ring_id);
  
                complete(&evt_ring->completion);
        }
@@@ -1248,7 -1178,6 +1248,7 @@@ static void gsi_isr_ieob(struct gsi *gs
        u32 event_mask;
  
        event_mask = ioread32(gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_OFFSET);
 +      gsi_irq_ieob_disable(gsi, event_mask);
        iowrite32(event_mask, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_CLR_OFFSET);
  
        while (event_mask) {
  
                event_mask ^= BIT(evt_ring_id);
  
 -              gsi_irq_ieob_disable(gsi, evt_ring_id);
                napi_schedule(&gsi->evt_ring[evt_ring_id].channel->napi);
        }
  }
@@@ -1500,7 -1430,7 +1500,7 @@@ void gsi_channel_doorbell(struct gsi_ch
  }
  
  /* Consult hardware, move any newly completed transactions to completed list */
 -static void gsi_channel_update(struct gsi_channel *channel)
 +static struct gsi_trans *gsi_channel_update(struct gsi_channel *channel)
  {
        u32 evt_ring_id = channel->evt_ring_id;
        struct gsi *gsi = channel->gsi;
        offset = GSI_EV_CH_E_CNTXT_4_OFFSET(evt_ring_id);
        index = gsi_ring_index(ring, ioread32(gsi->virt + offset));
        if (index == ring->index % ring->count)
 -              return;
 +              return NULL;
  
        /* Get the transaction for the latest completed event.  Take a
         * reference to keep it from completing before we give the events
        gsi_evt_ring_doorbell(channel->gsi, channel->evt_ring_id, index);
  
        gsi_trans_free(trans);
 +
 +      return gsi_channel_trans_complete(channel);
  }
  
  /**
@@@ -1566,8 -1494,11 +1566,8 @@@ static struct gsi_trans *gsi_channel_po
  
        /* Get the first transaction from the completed list */
        trans = gsi_channel_trans_complete(channel);
 -      if (!trans) {
 -              /* List is empty; see if there's more to do */
 -              gsi_channel_update(channel);
 -              trans = gsi_channel_trans_complete(channel);
 -      }
 +      if (!trans)     /* List is empty; see if there's more to do */
 +              trans = gsi_channel_update(channel);
  
        if (trans)
                gsi_trans_move_polled(trans);
  static int gsi_channel_poll(struct napi_struct *napi, int budget)
  {
        struct gsi_channel *channel;
 -      int count = 0;
 +      int count;
  
        channel = container_of(napi, struct gsi_channel, napi);
 -      while (count < budget) {
 +      for (count = 0; count < budget; count++) {
                struct gsi_trans *trans;
  
 -              count++;
                trans = gsi_channel_poll_one(channel);
                if (!trans)
                        break;
                gsi_trans_complete(trans);
        }
  
 -      if (count < budget) {
 -              napi_complete(&channel->napi);
 -              gsi_irq_ieob_enable(channel->gsi, channel->evt_ring_id);
 -      }
 +      if (count < budget && napi_complete(napi))
 +              gsi_irq_ieob_enable_one(channel->gsi, channel->evt_ring_id);
  
        return count;
  }
@@@ -1693,7 -1627,7 +1693,7 @@@ static int gsi_generic_command(struct g
                               enum gsi_generic_cmd_opcode opcode)
  {
        struct completion *completion = &gsi->completion;
 -      bool success;
 +      bool timeout;
        u32 val;
  
        /* The error global interrupt type is always enabled (until we
        val |= u32_encode_bits(channel_id, GENERIC_CHID_FMASK);
        val |= u32_encode_bits(GSI_EE_MODEM, GENERIC_EE_FMASK);
  
 -      success = gsi_command(gsi, GSI_GENERIC_CMD_OFFSET, val, completion);
 +      timeout = !gsi_command(gsi, GSI_GENERIC_CMD_OFFSET, val, completion);
  
        /* Disable the GP_INT1 IRQ type again */
        iowrite32(BIT(ERROR_INT), gsi->virt + GSI_CNTXT_GLOB_IRQ_EN_OFFSET);
  
 -      if (success)
 +      if (!timeout)
                return gsi->result;
  
        dev_err(gsi->dev, "GSI generic command %u to channel %u timed out\n",
@@@ -1776,6 -1710,7 +1776,7 @@@ static int gsi_channel_setup(struct gs
                if (!channel->gsi)
                        continue;       /* Ignore uninitialized channels */
  
+               ret = -EINVAL;
                dev_err(gsi->dev, "channel %u not supported by hardware\n",
                        channel_id - 1);
                channel_id = gsi->channel_count;
index c8b2b60d2183416eebde1d4a4b4aa933de1cbe18,5a05add9b4e690e10c5027e10d59bd21f867ef91..6c3d8c2abd385609d22a7d6e101a08684189b3f8
@@@ -57,7 -57,6 +57,7 @@@ struct qmi_wwan_state 
  enum qmi_wwan_flags {
        QMI_WWAN_FLAG_RAWIP = 1 << 0,
        QMI_WWAN_FLAG_MUX = 1 << 1,
 +      QMI_WWAN_FLAG_PASS_THROUGH = 1 << 2,
  };
  
  enum qmi_wwan_quirks {
@@@ -187,7 -186,7 +187,7 @@@ static int qmimux_rx_fixup(struct usbne
                net = qmimux_find_dev(dev, hdr->mux_id);
                if (!net)
                        goto skip;
 -              skbn = netdev_alloc_skb(net, pkt_len);
 +              skbn = netdev_alloc_skb(net, pkt_len + LL_MAX_HEADER);
                if (!skbn)
                        return 0;
                skbn->dev = net;
                        goto skip;
                }
  
 +              skb_reserve(skbn, LL_MAX_HEADER);
                skb_put_data(skbn, skb->data + offset + qmimux_hdr_sz, pkt_len);
                if (netif_rx(skbn) != NET_RX_SUCCESS) {
                        net->stats.rx_errors++;
@@@ -219,28 -217,6 +219,28 @@@ skip
        return 1;
  }
  
 +static ssize_t mux_id_show(struct device *d, struct device_attribute *attr, char *buf)
 +{
 +      struct net_device *dev = to_net_dev(d);
 +      struct qmimux_priv *priv;
 +
 +      priv = netdev_priv(dev);
 +
 +      return sysfs_emit(buf, "0x%02x\n", priv->mux_id);
 +}
 +
 +static DEVICE_ATTR_RO(mux_id);
 +
 +static struct attribute *qmi_wwan_sysfs_qmimux_attrs[] = {
 +      &dev_attr_mux_id.attr,
 +      NULL,
 +};
 +
 +static struct attribute_group qmi_wwan_sysfs_qmimux_attr_group = {
 +      .name = "qmap",
 +      .attrs = qmi_wwan_sysfs_qmimux_attrs,
 +};
 +
  static int qmimux_register_device(struct net_device *real_dev, u8 mux_id)
  {
        struct net_device *new_dev;
                goto out_free_newdev;
        }
  
 +      new_dev->sysfs_groups[0] = &qmi_wwan_sysfs_qmimux_attr_group;
 +
        err = register_netdevice(new_dev);
        if (err < 0)
                goto out_free_newdev;
@@@ -351,13 -325,6 +351,13 @@@ static ssize_t raw_ip_store(struct devi
        if (enable == (info->flags & QMI_WWAN_FLAG_RAWIP))
                return len;
  
 +      /* ip mode cannot be cleared when pass through mode is set */
 +      if (!enable && (info->flags & QMI_WWAN_FLAG_PASS_THROUGH)) {
 +              netdev_err(dev->net,
 +                         "Cannot clear ip mode on pass through device\n");
 +              return -EINVAL;
 +      }
 +
        if (!rtnl_trylock())
                return restart_syscall();
  
@@@ -488,59 -455,14 +488,59 @@@ err
        return ret;
  }
  
 +static ssize_t pass_through_show(struct device *d,
 +                               struct device_attribute *attr, char *buf)
 +{
 +      struct usbnet *dev = netdev_priv(to_net_dev(d));
 +      struct qmi_wwan_state *info;
 +
 +      info = (void *)&dev->data;
 +      return sprintf(buf, "%c\n",
 +                     info->flags & QMI_WWAN_FLAG_PASS_THROUGH ? 'Y' : 'N');
 +}
 +
 +static ssize_t pass_through_store(struct device *d,
 +                                struct device_attribute *attr,
 +                                const char *buf, size_t len)
 +{
 +      struct usbnet *dev = netdev_priv(to_net_dev(d));
 +      struct qmi_wwan_state *info;
 +      bool enable;
 +
 +      if (strtobool(buf, &enable))
 +              return -EINVAL;
 +
 +      info = (void *)&dev->data;
 +
 +      /* no change? */
 +      if (enable == (info->flags & QMI_WWAN_FLAG_PASS_THROUGH))
 +              return len;
 +
 +      /* pass through mode can be set for raw ip devices only */
 +      if (!(info->flags & QMI_WWAN_FLAG_RAWIP)) {
 +              netdev_err(dev->net,
 +                         "Cannot set pass through mode on non ip device\n");
 +              return -EINVAL;
 +      }
 +
 +      if (enable)
 +              info->flags |= QMI_WWAN_FLAG_PASS_THROUGH;
 +      else
 +              info->flags &= ~QMI_WWAN_FLAG_PASS_THROUGH;
 +
 +      return len;
 +}
 +
  static DEVICE_ATTR_RW(raw_ip);
  static DEVICE_ATTR_RW(add_mux);
  static DEVICE_ATTR_RW(del_mux);
 +static DEVICE_ATTR_RW(pass_through);
  
  static struct attribute *qmi_wwan_sysfs_attrs[] = {
        &dev_attr_raw_ip.attr,
        &dev_attr_add_mux.attr,
        &dev_attr_del_mux.attr,
 +      &dev_attr_pass_through.attr,
        NULL,
  };
  
@@@ -587,11 -509,6 +587,11 @@@ static int qmi_wwan_rx_fixup(struct usb
        if (info->flags & QMI_WWAN_FLAG_MUX)
                return qmimux_rx_fixup(dev, skb);
  
 +      if (info->flags & QMI_WWAN_FLAG_PASS_THROUGH) {
 +              skb->protocol = htons(ETH_P_MAP);
 +              return (netif_rx(skb) == NET_RX_SUCCESS);
 +      }
 +
        switch (skb->data[0] & 0xf0) {
        case 0x40:
                proto = htons(ETH_P_IP);
@@@ -1392,6 -1309,7 +1392,7 @@@ static const struct usb_device_id produ
        {QMI_FIXED_INTF(0x1e2d, 0x0082, 5)},    /* Cinterion PHxx,PXxx (2 RmNet) */
        {QMI_FIXED_INTF(0x1e2d, 0x0083, 4)},    /* Cinterion PHxx,PXxx (1 RmNet + USB Audio)*/
        {QMI_QUIRK_SET_DTR(0x1e2d, 0x00b0, 4)}, /* Cinterion CLS8 */
+       {QMI_FIXED_INTF(0x1e2d, 0x00b7, 0)},    /* Cinterion MV31 RmNet */
        {QMI_FIXED_INTF(0x413c, 0x81a2, 8)},    /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */
        {QMI_FIXED_INTF(0x413c, 0x81a3, 8)},    /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */
        {QMI_FIXED_INTF(0x413c, 0x81a4, 8)},    /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */
index 1340327f7abf448a6eb78cd3a05754f941f940fe,5ff27c12ce68833a2c142be425c82bffe8eecbd0..a20310ff5083b7dcb7bb1c2c6268888894086579
@@@ -347,7 -347,6 +347,7 @@@ struct napi_struct 
        struct list_head        dev_list;
        struct hlist_node       napi_hash_node;
        unsigned int            napi_id;
 +      struct task_struct      *thread;
  };
  
  enum {
        NAPI_STATE_NO_BUSY_POLL,        /* Do not add in napi_hash, no busy polling */
        NAPI_STATE_IN_BUSY_POLL,        /* sk_busy_loop() owns this NAPI */
        NAPI_STATE_PREFER_BUSY_POLL,    /* prefer busy-polling over softirq processing*/
 +      NAPI_STATE_THREADED,            /* The poll is performed inside its own thread*/
  };
  
  enum {
        NAPIF_STATE_NO_BUSY_POLL        = BIT(NAPI_STATE_NO_BUSY_POLL),
        NAPIF_STATE_IN_BUSY_POLL        = BIT(NAPI_STATE_IN_BUSY_POLL),
        NAPIF_STATE_PREFER_BUSY_POLL    = BIT(NAPI_STATE_PREFER_BUSY_POLL),
 +      NAPIF_STATE_THREADED            = BIT(NAPI_STATE_THREADED),
  };
  
  enum gro_result {
        GRO_MERGED_FREE,
        GRO_HELD,
        GRO_NORMAL,
 -      GRO_DROP,
        GRO_CONSUMED,
  };
  typedef enum gro_result gro_result_t;
@@@ -497,8 -495,6 +497,8 @@@ static inline bool napi_complete(struc
        return napi_complete_done(n, 0);
  }
  
 +int dev_set_threaded(struct net_device *dev, bool threaded);
 +
  /**
   *    napi_disable - prevent NAPI from scheduling
   *    @n: NAPI context
   */
  void napi_disable(struct napi_struct *n);
  
 -/**
 - *    napi_enable - enable NAPI scheduling
 - *    @n: NAPI context
 - *
 - * Resume NAPI from being scheduled on this context.
 - * Must be paired with napi_disable.
 - */
 -static inline void napi_enable(struct napi_struct *n)
 -{
 -      BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
 -      smp_mb__before_atomic();
 -      clear_bit(NAPI_STATE_SCHED, &n->state);
 -      clear_bit(NAPI_STATE_NPSVC, &n->state);
 -}
 +void napi_enable(struct napi_struct *n);
  
  /**
   *    napi_synchronize - wait until NAPI is not running
@@@ -850,7 -859,6 +850,7 @@@ enum tc_setup_type 
        TC_SETUP_QDISC_ETS,
        TC_SETUP_QDISC_TBF,
        TC_SETUP_QDISC_FIFO,
 +      TC_SETUP_QDISC_HTB,
  };
  
  /* These structures hold the attributes of bpf state that are being passed
@@@ -1205,6 -1213,19 +1205,6 @@@ struct netdev_net_notifier 
   *                             struct netdev_phys_item_id *ppid)
   *    Called to get the parent ID of the physical port of this device.
   *
 - * void (*ndo_udp_tunnel_add)(struct net_device *dev,
 - *                          struct udp_tunnel_info *ti);
 - *    Called by UDP tunnel to notify a driver about the UDP port and socket
 - *    address family that a UDP tunnel is listnening to. It is called only
 - *    when a new port starts listening. The operation is protected by the
 - *    RTNL.
 - *
 - * void (*ndo_udp_tunnel_del)(struct net_device *dev,
 - *                          struct udp_tunnel_info *ti);
 - *    Called by UDP tunnel to notify the driver about a UDP port and socket
 - *    address family that the UDP tunnel is not listening to anymore. The
 - *    operation is protected by the RTNL.
 - *
   * void* (*ndo_dfwd_add_station)(struct net_device *pdev,
   *                             struct net_device *dev)
   *    Called by upper layer devices to accelerate switching or other
@@@ -1391,8 -1412,6 +1391,8 @@@ struct net_device_ops 
        struct net_device*      (*ndo_get_xmit_slave)(struct net_device *dev,
                                                      struct sk_buff *skb,
                                                      bool all_slaves);
 +      struct net_device*      (*ndo_sk_get_lower_dev)(struct net_device *dev,
 +                                                      struct sock *sk);
        netdev_features_t       (*ndo_fix_features)(struct net_device *dev,
                                                    netdev_features_t features);
        int                     (*ndo_set_features)(struct net_device *dev,
                                                          struct netdev_phys_item_id *ppid);
        int                     (*ndo_get_phys_port_name)(struct net_device *dev,
                                                          char *name, size_t len);
 -      void                    (*ndo_udp_tunnel_add)(struct net_device *dev,
 -                                                    struct udp_tunnel_info *ti);
 -      void                    (*ndo_udp_tunnel_del)(struct net_device *dev,
 -                                                    struct udp_tunnel_info *ti);
        void*                   (*ndo_dfwd_add_station)(struct net_device *pdev,
                                                        struct net_device *dev);
        void                    (*ndo_dfwd_del_station)(struct net_device *pdev,
@@@ -1819,8 -1842,6 +1819,8 @@@ enum netdev_priv_flags 
   *
   *    @wol_enabled:   Wake-on-LAN is enabled
   *
 + *    @threaded:      napi threaded mode is enabled
 + *
   *    @net_notifier_list:     List of per-net netdev notifier block
   *                            that follow this device when it is moved
   *                            to another network namespace.
@@@ -1852,6 -1873,7 +1852,6 @@@ struct net_device 
        unsigned long           mem_end;
        unsigned long           mem_start;
        unsigned long           base_addr;
 -      int                     irq;
  
        /*
         *      Some hardware also needs these fields (state,dev_list,
                struct list_head lower;
        } adj_list;
  
 +      /* Read-mostly cache-line for fast-path access */
 +      unsigned int            flags;
 +      unsigned int            priv_flags;
 +      const struct net_device_ops *netdev_ops;
 +      int                     ifindex;
 +      unsigned short          gflags;
 +      unsigned short          hard_header_len;
 +
 +      /* Note : dev->mtu is often read without holding a lock.
 +       * Writers usually hold RTNL.
 +       * It is recommended to use READ_ONCE() to annotate the reads,
 +       * and to use WRITE_ONCE() to annotate the writes.
 +       */
 +      unsigned int            mtu;
 +      unsigned short          needed_headroom;
 +      unsigned short          needed_tailroom;
 +
        netdev_features_t       features;
        netdev_features_t       hw_features;
        netdev_features_t       wanted_features;
        netdev_features_t       mpls_features;
        netdev_features_t       gso_partial_features;
  
 -      int                     ifindex;
 +      unsigned int            min_mtu;
 +      unsigned int            max_mtu;
 +      unsigned short          type;
 +      unsigned char           min_header_len;
 +      unsigned char           name_assign_type;
 +
        int                     group;
  
 -      struct net_device_stats stats;
 +      struct net_device_stats stats; /* not used by modern drivers */
  
        atomic_long_t           rx_dropped;
        atomic_long_t           tx_dropped;
        const struct iw_handler_def *wireless_handlers;
        struct iw_public_data   *wireless_data;
  #endif
 -      const struct net_device_ops *netdev_ops;
        const struct ethtool_ops *ethtool_ops;
  #ifdef CONFIG_NET_L3_MASTER_DEV
        const struct l3mdev_ops *l3mdev_ops;
  
        const struct header_ops *header_ops;
  
 -      unsigned int            flags;
 -      unsigned int            priv_flags;
 -
 -      unsigned short          gflags;
 -      unsigned short          padded;
 -
        unsigned char           operstate;
        unsigned char           link_mode;
  
        unsigned char           if_port;
        unsigned char           dma;
  
 -      /* Note : dev->mtu is often read without holding a lock.
 -       * Writers usually hold RTNL.
 -       * It is recommended to use READ_ONCE() to annotate the reads,
 -       * and to use WRITE_ONCE() to annotate the writes.
 -       */
 -      unsigned int            mtu;
 -      unsigned int            min_mtu;
 -      unsigned int            max_mtu;
 -      unsigned short          type;
 -      unsigned short          hard_header_len;
 -      unsigned char           min_header_len;
 -      unsigned char           name_assign_type;
 -
 -      unsigned short          needed_headroom;
 -      unsigned short          needed_tailroom;
 -
        /* Interface address info. */
        unsigned char           perm_addr[MAX_ADDR_LEN];
        unsigned char           addr_assign_type;
        unsigned short          neigh_priv_len;
        unsigned short          dev_id;
        unsigned short          dev_port;
 +      unsigned short          padded;
 +
        spinlock_t              addr_list_lock;
 +      int                     irq;
  
        struct netdev_hw_addr_list      uc;
        struct netdev_hw_addr_list      mc;
        struct lock_class_key   *qdisc_running_key;
        bool                    proto_down;
        unsigned                wol_enabled:1;
 +      unsigned                threaded:1;
  
        struct list_head        net_notifier_list;
  
@@@ -2614,7 -2633,6 +2614,7 @@@ enum netdev_lag_hash 
        NETDEV_LAG_HASH_L23,
        NETDEV_LAG_HASH_E23,
        NETDEV_LAG_HASH_E34,
 +      NETDEV_LAG_HASH_VLAN_SRCMAC,
        NETDEV_LAG_HASH_UNKNOWN,
  };
  
@@@ -2858,8 -2876,6 +2858,8 @@@ int init_dummy_netdev(struct net_devic
  struct net_device *netdev_get_xmit_slave(struct net_device *dev,
                                         struct sk_buff *skb,
                                         bool all_slaves);
 +struct net_device *netdev_sk_get_lowest_dev(struct net_device *dev,
 +                                          struct sock *sk);
  struct net_device *dev_get_by_index(struct net *net, int ifindex);
  struct net_device *__dev_get_by_index(struct net *net, int ifindex);
  struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
@@@ -4336,6 -4352,7 +4336,7 @@@ static inline void netif_tx_disable(str
  
        local_bh_disable();
        cpu = smp_processor_id();
+       spin_lock(&dev->tx_global_lock);
        for (i = 0; i < dev->num_tx_queues; i++) {
                struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
  
                netif_tx_stop_queue(txq);
                __netif_tx_unlock(txq);
        }
+       spin_unlock(&dev->tx_global_lock);
        local_bh_enable();
  }
  
diff --combined include/net/switchdev.h
index 88fcac1409667dc7e77d819023baf9865c5c387c,afdf8bd1b4fe52f4be39a0a15d1b36e08a9d90fe..6dcfc4c51a6e7fe0f95fc22a9ce4119ed22a5c87
  #define SWITCHDEV_F_SKIP_EOPNOTSUPP   BIT(1)
  #define SWITCHDEV_F_DEFER             BIT(2)
  
 -struct switchdev_trans {
 -      bool ph_prepare;
 -};
 -
 -static inline bool switchdev_trans_ph_prepare(struct switchdev_trans *trans)
 -{
 -      return trans && trans->ph_prepare;
 -}
 -
 -static inline bool switchdev_trans_ph_commit(struct switchdev_trans *trans)
 -{
 -      return trans && !trans->ph_prepare;
 -}
 -
  enum switchdev_attr_id {
        SWITCHDEV_ATTR_ID_UNDEFINED,
        SWITCHDEV_ATTR_ID_PORT_STP_STATE,
@@@ -28,7 -42,6 +28,6 @@@
        SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED,
        SWITCHDEV_ATTR_ID_BRIDGE_MROUTER,
  #if IS_ENABLED(CONFIG_BRIDGE_MRP)
-       SWITCHDEV_ATTR_ID_MRP_PORT_STATE,
        SWITCHDEV_ATTR_ID_MRP_PORT_ROLE,
  #endif
  };
@@@ -48,7 -61,6 +47,6 @@@ struct switchdev_attr 
                u16 vlan_protocol;                      /* BRIDGE_VLAN_PROTOCOL */
                bool mc_disabled;                       /* MC_DISABLED */
  #if IS_ENABLED(CONFIG_BRIDGE_MRP)
-               u8 mrp_port_state;                      /* MRP_PORT_STATE */
                u8 mrp_port_role;                       /* MRP_PORT_ROLE */
  #endif
        } u;
@@@ -83,7 -95,8 +81,7 @@@ struct switchdev_obj 
  struct switchdev_obj_port_vlan {
        struct switchdev_obj obj;
        u16 flags;
 -      u16 vid_begin;
 -      u16 vid_end;
 +      u16 vid;
  };
  
  #define SWITCHDEV_OBJ_PORT_VLAN(OBJ) \
@@@ -219,12 -232,14 +217,12 @@@ struct switchdev_notifier_fdb_info 
  struct switchdev_notifier_port_obj_info {
        struct switchdev_notifier_info info; /* must be first */
        const struct switchdev_obj *obj;
 -      struct switchdev_trans *trans;
        bool handled;
  };
  
  struct switchdev_notifier_port_attr_info {
        struct switchdev_notifier_info info; /* must be first */
        const struct switchdev_attr *attr;
 -      struct switchdev_trans *trans;
        bool handled;
  };
  
@@@ -272,6 -287,7 +270,6 @@@ int switchdev_handle_port_obj_add(struc
                        bool (*check_cb)(const struct net_device *dev),
                        int (*add_cb)(struct net_device *dev,
                                      const struct switchdev_obj *obj,
 -                                    struct switchdev_trans *trans,
                                      struct netlink_ext_ack *extack));
  int switchdev_handle_port_obj_del(struct net_device *dev,
                        struct switchdev_notifier_port_obj_info *port_obj_info,
@@@ -283,7 -299,8 +281,7 @@@ int switchdev_handle_port_attr_set(stru
                        struct switchdev_notifier_port_attr_info *port_attr_info,
                        bool (*check_cb)(const struct net_device *dev),
                        int (*set_cb)(struct net_device *dev,
 -                                    const struct switchdev_attr *attr,
 -                                    struct switchdev_trans *trans));
 +                                    const struct switchdev_attr *attr));
  #else
  
  static inline void switchdev_deferred_process(void)
@@@ -354,6 -371,7 +352,6 @@@ switchdev_handle_port_obj_add(struct ne
                        bool (*check_cb)(const struct net_device *dev),
                        int (*add_cb)(struct net_device *dev,
                                      const struct switchdev_obj *obj,
 -                                    struct switchdev_trans *trans,
                                      struct netlink_ext_ack *extack))
  {
        return 0;
@@@ -374,7 -392,8 +372,7 @@@ switchdev_handle_port_attr_set(struct n
                        struct switchdev_notifier_port_attr_info *port_attr_info,
                        bool (*check_cb)(const struct net_device *dev),
                        int (*set_cb)(struct net_device *dev,
 -                                    const struct switchdev_attr *attr,
 -                                    struct switchdev_trans *trans))
 +                                    const struct switchdev_attr *attr))
  {
        return 0;
  }
index d0d48e9620fb7e7c3d892e4f21c620d8ef04c434,c34b9ccb64722df8612232eaea34756d52dbff51..bfce3df61bfd071cdd470330418d2203af48e935
@@@ -98,7 -98,6 +98,7 @@@
  #define IFH_REW_OP_TWO_STEP_PTP               0x3
  #define IFH_REW_OP_ORIGIN_PTP         0x5
  
 +#define OCELOT_NUM_TC                 8
  #define OCELOT_TAG_LEN                        16
  #define OCELOT_SHORT_PREFIX_LEN               4
  #define OCELOT_LONG_PREFIX_LEN                16
@@@ -564,8 -563,6 +564,8 @@@ struct ocelot_ops 
        int (*netdev_to_port)(struct net_device *dev);
        int (*reset)(struct ocelot *ocelot);
        u16 (*wm_enc)(u16 value);
 +      u16 (*wm_dec)(u16 value);
 +      void (*wm_stat)(u32 val, u32 *inuse, u32 *maxuse);
  };
  
  struct ocelot_vcap_block {
@@@ -579,18 -576,6 +579,18 @@@ struct ocelot_vlan 
        u16 vid;
  };
  
 +enum ocelot_sb {
 +      OCELOT_SB_BUF,
 +      OCELOT_SB_REF,
 +      OCELOT_SB_NUM,
 +};
 +
 +enum ocelot_sb_pool {
 +      OCELOT_SB_POOL_ING,
 +      OCELOT_SB_POOL_EGR,
 +      OCELOT_SB_POOL_NUM,
 +};
 +
  struct ocelot_port {
        struct ocelot                   *ocelot;
  
        phy_interface_t                 phy_mode;
  
        u8                              *xmit_template;
 +      bool                            is_dsa_8021q_cpu;
 +
 +      struct net_device               *bond;
 +      bool                            lag_tx_active;
  };
  
  struct ocelot {
        struct device                   *dev;
 +      struct devlink                  *devlink;
 +      struct devlink_port             *devlink_ports;
  
        const struct ocelot_ops         *ops;
        struct regmap                   *targets[TARGET_MAX];
        const struct ocelot_stat_layout *stats_layout;
        unsigned int                    num_stats;
  
 -      int                             shared_queue_sz;
 +      u32                             pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM];
 +      int                             packet_buffer_size;
 +      int                             num_frame_refs;
        int                             num_mact_rows;
  
        struct net_device               *hw_bridge_dev;
  
        int                             npi;
  
 -      enum ocelot_tag_prefix          inj_prefix;
 -      enum ocelot_tag_prefix          xtr_prefix;
 -
 -      u32                             *lags;
 +      enum ocelot_tag_prefix          npi_inj_prefix;
 +      enum ocelot_tag_prefix          npi_xtr_prefix;
  
        struct list_head                multicast;
        struct list_head                pgids;
@@@ -730,6 -709,7 +730,7 @@@ struct ocelot_policer 
  /* I/O */
  u32 ocelot_port_readl(struct ocelot_port *port, u32 reg);
  void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg);
+ void ocelot_port_rmwl(struct ocelot_port *port, u32 val, u32 mask, u32 reg);
  u32 __ocelot_read_ix(struct ocelot *ocelot, u32 reg, u32 offset);
  void __ocelot_write_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 offset);
  void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask, u32 reg,
@@@ -758,11 -738,12 +759,12 @@@ int ocelot_get_sset_count(struct ocelo
  int ocelot_get_ts_info(struct ocelot *ocelot, int port,
                       struct ethtool_ts_info *info);
  void ocelot_set_ageing_time(struct ocelot *ocelot, unsigned int msecs);
+ int ocelot_port_flush(struct ocelot *ocelot, int port);
  void ocelot_adjust_link(struct ocelot *ocelot, int port,
                        struct phy_device *phydev);
 -int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, bool enabled,
 -                             struct switchdev_trans *trans);
 +int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, bool enabled);
  void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state);
 +void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot);
  int ocelot_port_bridge_join(struct ocelot *ocelot, int port,
                            struct net_device *bridge);
  int ocelot_port_bridge_leave(struct ocelot *ocelot, int port,
@@@ -798,45 -779,5 +800,45 @@@ int ocelot_port_mdb_add(struct ocelot *
                        const struct switchdev_obj_port_mdb *mdb);
  int ocelot_port_mdb_del(struct ocelot *ocelot, int port,
                        const struct switchdev_obj_port_mdb *mdb);
 +int ocelot_port_lag_join(struct ocelot *ocelot, int port,
 +                       struct net_device *bond,
 +                       struct netdev_lag_upper_info *info);
 +void ocelot_port_lag_leave(struct ocelot *ocelot, int port,
 +                         struct net_device *bond);
 +void ocelot_port_lag_change(struct ocelot *ocelot, int port, bool lag_tx_active);
 +
 +int ocelot_devlink_sb_register(struct ocelot *ocelot);
 +void ocelot_devlink_sb_unregister(struct ocelot *ocelot);
 +int ocelot_sb_pool_get(struct ocelot *ocelot, unsigned int sb_index,
 +                     u16 pool_index,
 +                     struct devlink_sb_pool_info *pool_info);
 +int ocelot_sb_pool_set(struct ocelot *ocelot, unsigned int sb_index,
 +                     u16 pool_index, u32 size,
 +                     enum devlink_sb_threshold_type threshold_type,
 +                     struct netlink_ext_ack *extack);
 +int ocelot_sb_port_pool_get(struct ocelot *ocelot, int port,
 +                          unsigned int sb_index, u16 pool_index,
 +                          u32 *p_threshold);
 +int ocelot_sb_port_pool_set(struct ocelot *ocelot, int port,
 +                          unsigned int sb_index, u16 pool_index,
 +                          u32 threshold, struct netlink_ext_ack *extack);
 +int ocelot_sb_tc_pool_bind_get(struct ocelot *ocelot, int port,
 +                             unsigned int sb_index, u16 tc_index,
 +                             enum devlink_sb_pool_type pool_type,
 +                             u16 *p_pool_index, u32 *p_threshold);
 +int ocelot_sb_tc_pool_bind_set(struct ocelot *ocelot, int port,
 +                             unsigned int sb_index, u16 tc_index,
 +                             enum devlink_sb_pool_type pool_type,
 +                             u16 pool_index, u32 threshold,
 +                             struct netlink_ext_ack *extack);
 +int ocelot_sb_occ_snapshot(struct ocelot *ocelot, unsigned int sb_index);
 +int ocelot_sb_occ_max_clear(struct ocelot *ocelot, unsigned int sb_index);
 +int ocelot_sb_occ_port_pool_get(struct ocelot *ocelot, int port,
 +                              unsigned int sb_index, u16 pool_index,
 +                              u32 *p_cur, u32 *p_max);
 +int ocelot_sb_occ_tc_port_bind_get(struct ocelot *ocelot, int port,
 +                                 unsigned int sb_index, u16 tc_index,
 +                                 enum devlink_sb_pool_type pool_type,
 +                                 u32 *p_cur, u32 *p_max);
  
  #endif
diff --combined kernel/bpf/stackmap.c
index cabaf7db8efc09db4c0eb7437cec5bb8418b5403,bfafbf115bf306a2712c22dfb4eae1681c36fff1..be35bfb7fb13f31b921b39bc48fa78303bc7f66d
@@@ -7,9 -7,10 +7,9 @@@
  #include <linux/kernel.h>
  #include <linux/stacktrace.h>
  #include <linux/perf_event.h>
 -#include <linux/elf.h>
 -#include <linux/pagemap.h>
  #include <linux/irq_work.h>
  #include <linux/btf_ids.h>
 +#include <linux/buildid.h>
  #include "percpu_freelist.h"
  
  #define STACK_CREATE_FLAG_MASK                                        \
@@@ -114,6 -115,8 +114,8 @@@ static struct bpf_map *stack_map_alloc(
  
        /* hash table size must be power of 2 */
        n_buckets = roundup_pow_of_two(attr->max_entries);
+       if (!n_buckets)
+               return ERR_PTR(-E2BIG);
  
        cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap);
        cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
@@@ -142,6 -145,140 +144,6 @@@ free_smap
        return ERR_PTR(err);
  }
  
 -#define BPF_BUILD_ID 3
 -/*
 - * Parse build id from the note segment. This logic can be shared between
 - * 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are
 - * identical.
 - */
 -static inline int stack_map_parse_build_id(void *page_addr,
 -                                         unsigned char *build_id,
 -                                         void *note_start,
 -                                         Elf32_Word note_size)
 -{
 -      Elf32_Word note_offs = 0, new_offs;
 -
 -      /* check for overflow */
 -      if (note_start < page_addr || note_start + note_size < note_start)
 -              return -EINVAL;
 -
 -      /* only supports note that fits in the first page */
 -      if (note_start + note_size > page_addr + PAGE_SIZE)
 -              return -EINVAL;
 -
 -      while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
 -              Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
 -
 -              if (nhdr->n_type == BPF_BUILD_ID &&
 -                  nhdr->n_namesz == sizeof("GNU") &&
 -                  nhdr->n_descsz > 0 &&
 -                  nhdr->n_descsz <= BPF_BUILD_ID_SIZE) {
 -                      memcpy(build_id,
 -                             note_start + note_offs +
 -                             ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
 -                             nhdr->n_descsz);
 -                      memset(build_id + nhdr->n_descsz, 0,
 -                             BPF_BUILD_ID_SIZE - nhdr->n_descsz);
 -                      return 0;
 -              }
 -              new_offs = note_offs + sizeof(Elf32_Nhdr) +
 -                      ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4);
 -              if (new_offs <= note_offs)  /* overflow */
 -                      break;
 -              note_offs = new_offs;
 -      }
 -      return -EINVAL;
 -}
 -
 -/* Parse build ID from 32-bit ELF */
 -static int stack_map_get_build_id_32(void *page_addr,
 -                                   unsigned char *build_id)
 -{
 -      Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr;
 -      Elf32_Phdr *phdr;
 -      int i;
 -
 -      /* only supports phdr that fits in one page */
 -      if (ehdr->e_phnum >
 -          (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr))
 -              return -EINVAL;
 -
 -      phdr = (Elf32_Phdr *)(page_addr + sizeof(Elf32_Ehdr));
 -
 -      for (i = 0; i < ehdr->e_phnum; ++i) {
 -              if (phdr[i].p_type == PT_NOTE &&
 -                  !stack_map_parse_build_id(page_addr, build_id,
 -                                            page_addr + phdr[i].p_offset,
 -                                            phdr[i].p_filesz))
 -                      return 0;
 -      }
 -      return -EINVAL;
 -}
 -
 -/* Parse build ID from 64-bit ELF */
 -static int stack_map_get_build_id_64(void *page_addr,
 -                                   unsigned char *build_id)
 -{
 -      Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr;
 -      Elf64_Phdr *phdr;
 -      int i;
 -
 -      /* only supports phdr that fits in one page */
 -      if (ehdr->e_phnum >
 -          (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr))
 -              return -EINVAL;
 -
 -      phdr = (Elf64_Phdr *)(page_addr + sizeof(Elf64_Ehdr));
 -
 -      for (i = 0; i < ehdr->e_phnum; ++i) {
 -              if (phdr[i].p_type == PT_NOTE &&
 -                  !stack_map_parse_build_id(page_addr, build_id,
 -                                            page_addr + phdr[i].p_offset,
 -                                            phdr[i].p_filesz))
 -                      return 0;
 -      }
 -      return -EINVAL;
 -}
 -
 -/* Parse build ID of ELF file mapped to vma */
 -static int stack_map_get_build_id(struct vm_area_struct *vma,
 -                                unsigned char *build_id)
 -{
 -      Elf32_Ehdr *ehdr;
 -      struct page *page;
 -      void *page_addr;
 -      int ret;
 -
 -      /* only works for page backed storage  */
 -      if (!vma->vm_file)
 -              return -EINVAL;
 -
 -      page = find_get_page(vma->vm_file->f_mapping, 0);
 -      if (!page)
 -              return -EFAULT; /* page not mapped */
 -
 -      ret = -EINVAL;
 -      page_addr = kmap_atomic(page);
 -      ehdr = (Elf32_Ehdr *)page_addr;
 -
 -      /* compare magic x7f "ELF" */
 -      if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0)
 -              goto out;
 -
 -      /* only support executable file and shared object file */
 -      if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN)
 -              goto out;
 -
 -      if (ehdr->e_ident[EI_CLASS] == ELFCLASS32)
 -              ret = stack_map_get_build_id_32(page_addr, build_id);
 -      else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64)
 -              ret = stack_map_get_build_id_64(page_addr, build_id);
 -out:
 -      kunmap_atomic(page_addr);
 -      put_page(page);
 -      return ret;
 -}
 -
  static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
                                          u64 *ips, u32 trace_nr, bool user)
  {
                for (i = 0; i < trace_nr; i++) {
                        id_offs[i].status = BPF_STACK_BUILD_ID_IP;
                        id_offs[i].ip = ips[i];
 -                      memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE);
 +                      memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX);
                }
                return;
        }
  
        for (i = 0; i < trace_nr; i++) {
                vma = find_vma(current->mm, ips[i]);
 -              if (!vma || stack_map_get_build_id(vma, id_offs[i].build_id)) {
 +              if (!vma || build_id_parse(vma, id_offs[i].build_id, NULL)) {
                        /* per entry fall back to ips */
                        id_offs[i].status = BPF_STACK_BUILD_ID_IP;
                        id_offs[i].ip = ips[i];
 -                      memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE);
 +                      memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX);
                        continue;
                }
                id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i]
diff --combined kernel/bpf/verifier.c
index 785d25392ead7a411be645e810f43c0307377b2f,37581919e050c8fc63afb74aa9812d7e40df8eea..1cffd4e847258d3e97f6f7e8b5fc42fd222aa3ac
@@@ -3606,30 -3606,13 +3606,30 @@@ static int check_mem_access(struct bpf_
        return err;
  }
  
 -static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
 +static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
  {
 +      int load_reg;
        int err;
  
 -      if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
 -          insn->imm != 0) {
 -              verbose(env, "BPF_XADD uses reserved fields\n");
 +      switch (insn->imm) {
 +      case BPF_ADD:
 +      case BPF_ADD | BPF_FETCH:
 +      case BPF_AND:
 +      case BPF_AND | BPF_FETCH:
 +      case BPF_OR:
 +      case BPF_OR | BPF_FETCH:
 +      case BPF_XOR:
 +      case BPF_XOR | BPF_FETCH:
 +      case BPF_XCHG:
 +      case BPF_CMPXCHG:
 +              break;
 +      default:
 +              verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
 +              return -EINVAL;
 +      }
 +
 +      if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
 +              verbose(env, "invalid atomic operand size\n");
                return -EINVAL;
        }
  
        if (err)
                return err;
  
 +      if (insn->imm == BPF_CMPXCHG) {
 +              /* Check comparison of R0 with memory location */
 +              err = check_reg_arg(env, BPF_REG_0, SRC_OP);
 +              if (err)
 +                      return err;
 +      }
 +
        if (is_pointer_value(env, insn->src_reg)) {
                verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
                return -EACCES;
            is_pkt_reg(env, insn->dst_reg) ||
            is_flow_key_reg(env, insn->dst_reg) ||
            is_sk_reg(env, insn->dst_reg)) {
 -              verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
 +              verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
                        insn->dst_reg,
                        reg_type_str[reg_state(env, insn->dst_reg)->type]);
                return -EACCES;
        }
  
 -      /* check whether atomic_add can read the memory */
 +      /* check whether we can read the memory */
        err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
                               BPF_SIZE(insn->code), BPF_READ, -1, true);
        if (err)
                return err;
  
 -      /* check whether atomic_add can write into the same memory */
 -      return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
 -                              BPF_SIZE(insn->code), BPF_WRITE, -1, true);
 +      /* check whether we can write into the same memory */
 +      err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
 +                             BPF_SIZE(insn->code), BPF_WRITE, -1, true);
 +      if (err)
 +              return err;
 +
 +      if (!(insn->imm & BPF_FETCH))
 +              return 0;
 +
 +      if (insn->imm == BPF_CMPXCHG)
 +              load_reg = BPF_REG_0;
 +      else
 +              load_reg = insn->src_reg;
 +
 +      /* check and record load of old value */
 +      err = check_reg_arg(env, load_reg, DST_OP);
 +      if (err)
 +              return err;
 +
 +      return 0;
  }
  
  static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno,
@@@ -4362,7 -4321,7 +4362,7 @@@ skip_type_check
                        err = mark_chain_precision(env, regno);
        } else if (arg_type_is_alloc_size(arg_type)) {
                if (!tnum_is_const(reg->var_off)) {
 -                      verbose(env, "R%d unbounded size, use 'var &= const' or 'if (var < const)'\n",
 +                      verbose(env, "R%d is not a known constant'\n",
                                regno);
                        return -EACCES;
                }
@@@ -6918,7 -6877,7 +6918,7 @@@ static int is_branch32_taken(struct bpf
        case BPF_JSGT:
                if (reg->s32_min_value > sval)
                        return 1;
-               else if (reg->s32_max_value < sval)
+               else if (reg->s32_max_value <= sval)
                        return 0;
                break;
        case BPF_JLT:
@@@ -6991,7 -6950,7 +6991,7 @@@ static int is_branch64_taken(struct bpf
        case BPF_JSGT:
                if (reg->smin_value > sval)
                        return 1;
-               else if (reg->smax_value < sval)
+               else if (reg->smax_value <= sval)
                        return 0;
                break;
        case BPF_JLT:
@@@ -8631,7 -8590,11 +8631,11 @@@ static bool range_within(struct bpf_reg
        return old->umin_value <= cur->umin_value &&
               old->umax_value >= cur->umax_value &&
               old->smin_value <= cur->smin_value &&
-              old->smax_value >= cur->smax_value;
+              old->smax_value >= cur->smax_value &&
+              old->u32_min_value <= cur->u32_min_value &&
+              old->u32_max_value >= cur->u32_max_value &&
+              old->s32_min_value <= cur->s32_min_value &&
+              old->s32_max_value >= cur->s32_max_value;
  }
  
  /* Maximum number of register states that can exist at once */
@@@ -9567,19 -9530,14 +9571,19 @@@ static int do_check(struct bpf_verifier
                } else if (class == BPF_STX) {
                        enum bpf_reg_type *prev_dst_type, dst_reg_type;
  
 -                      if (BPF_MODE(insn->code) == BPF_XADD) {
 -                              err = check_xadd(env, env->insn_idx, insn);
 +                      if (BPF_MODE(insn->code) == BPF_ATOMIC) {
 +                              err = check_atomic(env, env->insn_idx, insn);
                                if (err)
                                        return err;
                                env->insn_idx++;
                                continue;
                        }
  
 +                      if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
 +                              verbose(env, "BPF_STX uses reserved fields\n");
 +                              return -EINVAL;
 +                      }
 +
                        /* check src1 operand */
                        err = check_reg_arg(env, insn->src_reg, SRC_OP);
                        if (err)
@@@ -9751,36 -9709,6 +9755,36 @@@ process_bpf_exit
        return 0;
  }
  
 +static int find_btf_percpu_datasec(struct btf *btf)
 +{
 +      const struct btf_type *t;
 +      const char *tname;
 +      int i, n;
 +
 +      /*
 +       * Both vmlinux and module each have their own ".data..percpu"
 +       * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
 +       * types to look at only module's own BTF types.
 +       */
 +      n = btf_nr_types(btf);
 +      if (btf_is_module(btf))
 +              i = btf_nr_types(btf_vmlinux);
 +      else
 +              i = 1;
 +
 +      for(; i < n; i++) {
 +              t = btf_type_by_id(btf, i);
 +              if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
 +                      continue;
 +
 +              tname = btf_name_by_offset(btf, t->name_off);
 +              if (!strcmp(tname, ".data..percpu"))
 +                      return i;
 +      }
 +
 +      return -ENOENT;
 +}
 +
  /* replace pseudo btf_id with kernel symbol address */
  static int check_pseudo_btf_id(struct bpf_verifier_env *env,
                               struct bpf_insn *insn,
  {
        const struct btf_var_secinfo *vsi;
        const struct btf_type *datasec;
 +      struct btf_mod_pair *btf_mod;
        const struct btf_type *t;
        const char *sym_name;
        bool percpu = false;
        u32 type, id = insn->imm;
 +      struct btf *btf;
        s32 datasec_id;
        u64 addr;
 -      int i;
 -
 -      if (!btf_vmlinux) {
 -              verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
 -              return -EINVAL;
 -      }
 +      int i, btf_fd, err;
  
 -      if (insn[1].imm != 0) {
 -              verbose(env, "reserved field (insn[1].imm) is used in pseudo_btf_id ldimm64 insn.\n");
 -              return -EINVAL;
 +      btf_fd = insn[1].imm;
 +      if (btf_fd) {
 +              btf = btf_get_by_fd(btf_fd);
 +              if (IS_ERR(btf)) {
 +                      verbose(env, "invalid module BTF object FD specified.\n");
 +                      return -EINVAL;
 +              }
 +      } else {
 +              if (!btf_vmlinux) {
 +                      verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
 +                      return -EINVAL;
 +              }
 +              btf = btf_vmlinux;
 +              btf_get(btf);
        }
  
 -      t = btf_type_by_id(btf_vmlinux, id);
 +      t = btf_type_by_id(btf, id);
        if (!t) {
                verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
 -              return -ENOENT;
 +              err = -ENOENT;
 +              goto err_put;
        }
  
        if (!btf_type_is_var(t)) {
 -              verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n",
 -                      id);
 -              return -EINVAL;
 +              verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n", id);
 +              err = -EINVAL;
 +              goto err_put;
        }
  
 -      sym_name = btf_name_by_offset(btf_vmlinux, t->name_off);
 +      sym_name = btf_name_by_offset(btf, t->name_off);
        addr = kallsyms_lookup_name(sym_name);
        if (!addr) {
                verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
                        sym_name);
 -              return -ENOENT;
 +              err = -ENOENT;
 +              goto err_put;
        }
  
 -      datasec_id = btf_find_by_name_kind(btf_vmlinux, ".data..percpu",
 -                                         BTF_KIND_DATASEC);
 +      datasec_id = find_btf_percpu_datasec(btf);
        if (datasec_id > 0) {
 -              datasec = btf_type_by_id(btf_vmlinux, datasec_id);
 +              datasec = btf_type_by_id(btf, datasec_id);
                for_each_vsi(i, datasec, vsi) {
                        if (vsi->type == id) {
                                percpu = true;
        insn[1].imm = addr >> 32;
  
        type = t->type;
 -      t = btf_type_skip_modifiers(btf_vmlinux, type, NULL);
 +      t = btf_type_skip_modifiers(btf, type, NULL);
        if (percpu) {
                aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
 -              aux->btf_var.btf = btf_vmlinux;
 +              aux->btf_var.btf = btf;
                aux->btf_var.btf_id = type;
        } else if (!btf_type_is_struct(t)) {
                const struct btf_type *ret;
                u32 tsize;
  
                /* resolve the type size of ksym. */
 -              ret = btf_resolve_size(btf_vmlinux, t, &tsize);
 +              ret = btf_resolve_size(btf, t, &tsize);
                if (IS_ERR(ret)) {
 -                      tname = btf_name_by_offset(btf_vmlinux, t->name_off);
 +                      tname = btf_name_by_offset(btf, t->name_off);
                        verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
                                tname, PTR_ERR(ret));
 -                      return -EINVAL;
 +                      err = -EINVAL;
 +                      goto err_put;
                }
                aux->btf_var.reg_type = PTR_TO_MEM;
                aux->btf_var.mem_size = tsize;
        } else {
                aux->btf_var.reg_type = PTR_TO_BTF_ID;
 -              aux->btf_var.btf = btf_vmlinux;
 +              aux->btf_var.btf = btf;
                aux->btf_var.btf_id = type;
        }
 +
 +      /* check whether we recorded this BTF (and maybe module) already */
 +      for (i = 0; i < env->used_btf_cnt; i++) {
 +              if (env->used_btfs[i].btf == btf) {
 +                      btf_put(btf);
 +                      return 0;
 +              }
 +      }
 +
 +      if (env->used_btf_cnt >= MAX_USED_BTFS) {
 +              err = -E2BIG;
 +              goto err_put;
 +      }
 +
 +      btf_mod = &env->used_btfs[env->used_btf_cnt];
 +      btf_mod->btf = btf;
 +      btf_mod->module = NULL;
 +
 +      /* if we reference variables from kernel module, bump its refcount */
 +      if (btf_is_module(btf)) {
 +              btf_mod->module = btf_try_get_module(btf);
 +              if (!btf_mod->module) {
 +                      err = -ENXIO;
 +                      goto err_put;
 +              }
 +      }
 +
 +      env->used_btf_cnt++;
 +
        return 0;
 +err_put:
 +      btf_put(btf);
 +      return err;
  }
  
  static int check_map_prealloc(struct bpf_map *map)
@@@ -10056,6 -9942,13 +10060,6 @@@ static int resolve_pseudo_ldimm64(struc
                        return -EINVAL;
                }
  
 -              if (BPF_CLASS(insn->code) == BPF_STX &&
 -                  ((BPF_MODE(insn->code) != BPF_MEM &&
 -                    BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
 -                      verbose(env, "BPF_STX uses reserved fields\n");
 -                      return -EINVAL;
 -              }
 -
                if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
                        struct bpf_insn_aux_data *aux;
                        struct bpf_map *map;
@@@ -10199,13 -10092,6 +10203,13 @@@ static void release_maps(struct bpf_ver
                             env->used_map_cnt);
  }
  
 +/* drop refcnt of maps used by the rejected program */
 +static void release_btfs(struct bpf_verifier_env *env)
 +{
 +      __bpf_free_used_btfs(env->prog->aux, env->used_btfs,
 +                           env->used_btf_cnt);
 +}
 +
  /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
  static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
  {
@@@ -11117,30 -11003,28 +11121,28 @@@ static int fixup_bpf_calls(struct bpf_v
                    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
                    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
                        bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
-                       struct bpf_insn mask_and_div[] = {
-                               BPF_MOV32_REG(insn->src_reg, insn->src_reg),
+                       bool isdiv = BPF_OP(insn->code) == BPF_DIV;
+                       struct bpf_insn *patchlet;
+                       struct bpf_insn chk_and_div[] = {
                                /* Rx div 0 -> 0 */
-                               BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2),
+                               BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+                                            BPF_JNE | BPF_K, insn->src_reg,
+                                            0, 2, 0),
                                BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
                                BPF_JMP_IMM(BPF_JA, 0, 0, 1),
                                *insn,
                        };
-                       struct bpf_insn mask_and_mod[] = {
-                               BPF_MOV32_REG(insn->src_reg, insn->src_reg),
+                       struct bpf_insn chk_and_mod[] = {
                                /* Rx mod 0 -> Rx */
-                               BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1),
+                               BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+                                            BPF_JEQ | BPF_K, insn->src_reg,
+                                            0, 1, 0),
                                *insn,
                        };
-                       struct bpf_insn *patchlet;
  
-                       if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
-                           insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
-                               patchlet = mask_and_div + (is64 ? 1 : 0);
-                               cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0);
-                       } else {
-                               patchlet = mask_and_mod + (is64 ? 1 : 0);
-                               cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0);
-                       }
+                       patchlet = isdiv ? chk_and_div : chk_and_mod;
+                       cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
+                                     ARRAY_SIZE(chk_and_mod);
  
                        new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
                        if (!new_prog)
@@@ -12218,10 -12102,7 +12220,10 @@@ skip_full_check
                goto err_release_maps;
        }
  
 -      if (ret == 0 && env->used_map_cnt) {
 +      if (ret)
 +              goto err_release_maps;
 +
 +      if (env->used_map_cnt) {
                /* if program passed verifier, update used_maps in bpf_prog_info */
                env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
                                                          sizeof(env->used_maps[0]),
                memcpy(env->prog->aux->used_maps, env->used_maps,
                       sizeof(env->used_maps[0]) * env->used_map_cnt);
                env->prog->aux->used_map_cnt = env->used_map_cnt;
 +      }
 +      if (env->used_btf_cnt) {
 +              /* if program passed verifier, update used_btfs in bpf_prog_aux */
 +              env->prog->aux->used_btfs = kmalloc_array(env->used_btf_cnt,
 +                                                        sizeof(env->used_btfs[0]),
 +                                                        GFP_KERNEL);
 +              if (!env->prog->aux->used_btfs) {
 +                      ret = -ENOMEM;
 +                      goto err_release_maps;
 +              }
  
 +              memcpy(env->prog->aux->used_btfs, env->used_btfs,
 +                     sizeof(env->used_btfs[0]) * env->used_btf_cnt);
 +              env->prog->aux->used_btf_cnt = env->used_btf_cnt;
 +      }
 +      if (env->used_map_cnt || env->used_btf_cnt) {
                /* program is valid. Convert pseudo bpf_ld_imm64 into generic
                 * bpf_ld_imm64 instructions
                 */
                convert_pseudo_ld_imm64(env);
        }
  
 -      if (ret == 0)
 -              adjust_btf_func(env);
 +      adjust_btf_func(env);
  
  err_release_maps:
        if (!env->prog->aux->used_maps)
                 * them now. Otherwise free_used_maps() will release them.
                 */
                release_maps(env);
 +      if (!env->prog->aux->used_btfs)
 +              release_btfs(env);
  
        /* extension progs temporarily inherit the attach_type of their targets
           for verification purposes, so set it back to zero before returning
diff --combined net/bridge/br_mrp.c
index fc0a98874bfc7dae0af3d5d71a5ed83fb6e33a4e,5aeae6ad17b37d8beac099bfd7db44b841941516..01c67ed727a9cd393c333bffea4764970077314c
@@@ -557,19 -557,22 +557,22 @@@ int br_mrp_del(struct net_bridge *br, s
  int br_mrp_set_port_state(struct net_bridge_port *p,
                          enum br_mrp_port_state_type state)
  {
+       u32 port_state;
        if (!p || !(p->flags & BR_MRP_AWARE))
                return -EINVAL;
  
        spin_lock_bh(&p->br->lock);
  
        if (state == BR_MRP_PORT_STATE_FORWARDING)
-               p->state = BR_STATE_FORWARDING;
+               port_state = BR_STATE_FORWARDING;
        else
-               p->state = BR_STATE_BLOCKING;
+               port_state = BR_STATE_BLOCKING;
  
+       p->state = port_state;
        spin_unlock_bh(&p->br->lock);
  
-       br_mrp_port_switchdev_set_state(p, state);
+       br_mrp_port_switchdev_set_state(p, port_state);
  
        return 0;
  }
@@@ -825,7 -828,7 +828,7 @@@ int br_mrp_start_in_test(struct net_bri
        return 0;
  }
  
 -/* Determin if the frame type is a ring frame */
 +/* Determine if the frame type is a ring frame */
  static bool br_mrp_ring_frame(struct sk_buff *skb)
  {
        const struct br_mrp_tlv_hdr *hdr;
        return false;
  }
  
 -/* Determin if the frame type is an interconnect frame */
 +/* Determine if the frame type is an interconnect frame */
  static bool br_mrp_in_frame(struct sk_buff *skb)
  {
        const struct br_mrp_tlv_hdr *hdr;
@@@ -894,7 -897,7 +897,7 @@@ static void br_mrp_mrm_process(struct b
                br_mrp_ring_port_open(port->dev, false);
  }
  
 -/* Determin if the test hdr has a better priority than the node */
 +/* Determine if the test hdr has a better priority than the node */
  static bool br_mrp_test_better_than_own(struct br_mrp *mrp,
                                        struct net_bridge *br,
                                        const struct br_mrp_ring_test_hdr *hdr)
diff --combined net/core/dev.c
index 7647278e46f0eb6b6c6270b3d36959232fccd469,449b45b843d40ece7dd1e2ed6a5996ee1db9f591..321d41a110e723e962cbf7e78141161c9b0d5532
@@@ -91,7 -91,6 +91,7 @@@
  #include <linux/etherdevice.h>
  #include <linux/ethtool.h>
  #include <linux/skbuff.h>
 +#include <linux/kthread.h>
  #include <linux/bpf.h>
  #include <linux/bpf_trace.h>
  #include <net/net_namespace.h>
  #include <net/dsa.h>
  #include <net/dst.h>
  #include <net/dst_metadata.h>
 +#include <net/gro.h>
  #include <net/pkt_sched.h>
  #include <net/pkt_cls.h>
  #include <net/checksum.h>
@@@ -1495,27 -1493,6 +1495,27 @@@ void netdev_notify_peers(struct net_dev
  }
  EXPORT_SYMBOL(netdev_notify_peers);
  
 +static int napi_threaded_poll(void *data);
 +
 +static int napi_kthread_create(struct napi_struct *n)
 +{
 +      int err = 0;
 +
 +      /* Create and wake up the kthread once to put it in
 +       * TASK_INTERRUPTIBLE mode to avoid the blocked task
 +       * warning and work with loadavg.
 +       */
 +      n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d",
 +                              n->dev->name, n->napi_id);
 +      if (IS_ERR(n->thread)) {
 +              err = PTR_ERR(n->thread);
 +              pr_err("kthread_run failed with err %d\n", err);
 +              n->thread = NULL;
 +      }
 +
 +      return err;
 +}
 +
  static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
  {
        const struct net_device_ops *ops = dev->netdev_ops;
@@@ -3640,22 -3617,11 +3640,22 @@@ static struct sk_buff *validate_xmit_vl
  int skb_csum_hwoffload_help(struct sk_buff *skb,
                            const netdev_features_t features)
  {
 -      if (unlikely(skb->csum_not_inet))
 +      if (unlikely(skb_csum_is_sctp(skb)))
                return !!(features & NETIF_F_SCTP_CRC) ? 0 :
                        skb_crc32c_csum_help(skb);
  
 -      return !!(features & NETIF_F_CSUM_MASK) ? 0 : skb_checksum_help(skb);
 +      if (features & NETIF_F_HW_CSUM)
 +              return 0;
 +
 +      if (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) {
 +              switch (skb->csum_offset) {
 +              case offsetof(struct tcphdr, check):
 +              case offsetof(struct udphdr, check):
 +                      return 0;
 +              }
 +      }
 +
 +      return skb_checksum_help(skb);
  }
  EXPORT_SYMBOL(skb_csum_hwoffload_help);
  
@@@ -3912,7 -3878,6 +3912,7 @@@ sch_handle_egress(struct sk_buff *skb, 
  
        /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
        qdisc_skb_cb(skb)->mru = 0;
 +      qdisc_skb_cb(skb)->post_ct = false;
        mini_qdisc_bstats_cpu_update(miniq, skb);
  
        switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
@@@ -4118,7 -4083,7 +4118,7 @@@ static int __dev_queue_xmit(struct sk_b
        skb_reset_mac_header(skb);
  
        if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
 -              __skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED);
 +              __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SCHED);
  
        /* Disable soft irqs for various locks below. Also
         * stops preemption for RCU.
@@@ -4287,22 -4252,6 +4287,22 @@@ int gro_normal_batch __read_mostly = 8
  static inline void ____napi_schedule(struct softnet_data *sd,
                                     struct napi_struct *napi)
  {
 +      struct task_struct *thread;
 +
 +      if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
 +              /* Paired with smp_mb__before_atomic() in
 +               * napi_enable()/dev_set_threaded().
 +               * Use READ_ONCE() to guarantee a complete
 +               * read on napi->thread. Only call
 +               * wake_up_process() when it's not NULL.
 +               */
 +              thread = READ_ONCE(napi->thread);
 +              if (thread) {
 +                      wake_up_process(thread);
 +                      return;
 +              }
 +      }
 +
        list_add_tail(&napi->poll_list, &sd->poll_list);
        __raise_softirq_irqoff(NET_RX_SOFTIRQ);
  }
@@@ -4654,14 -4603,14 +4654,14 @@@ static u32 netif_receive_generic_xdp(st
                                     struct xdp_buff *xdp,
                                     struct bpf_prog *xdp_prog)
  {
 +      void *orig_data, *orig_data_end, *hard_start;
        struct netdev_rx_queue *rxqueue;
 -      void *orig_data, *orig_data_end;
        u32 metalen, act = XDP_DROP;
 +      u32 mac_len, frame_sz;
        __be16 orig_eth_type;
        struct ethhdr *eth;
        bool orig_bcast;
 -      int hlen, off;
 -      u32 mac_len;
 +      int off;
  
        /* Reinjected packets coming from act_mirred or similar should
         * not get XDP generic processing.
         * header.
         */
        mac_len = skb->data - skb_mac_header(skb);
 -      hlen = skb_headlen(skb) + mac_len;
 -      xdp->data = skb->data - mac_len;
 -      xdp->data_meta = xdp->data;
 -      xdp->data_end = xdp->data + hlen;
 -      xdp->data_hard_start = skb->data - skb_headroom(skb);
 +      hard_start = skb->data - skb_headroom(skb);
  
        /* SKB "head" area always have tailroom for skb_shared_info */
 -      xdp->frame_sz  = (void *)skb_end_pointer(skb) - xdp->data_hard_start;
 -      xdp->frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 +      frame_sz = (void *)skb_end_pointer(skb) - hard_start;
 +      frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 +
 +      rxqueue = netif_get_rxqueue(skb);
 +      xdp_init_buff(xdp, frame_sz, &rxqueue->xdp_rxq);
 +      xdp_prepare_buff(xdp, hard_start, skb_headroom(skb) - mac_len,
 +                       skb_headlen(skb) + mac_len, true);
  
        orig_data_end = xdp->data_end;
        orig_data = xdp->data;
        orig_bcast = is_multicast_ether_addr_64bits(eth->h_dest);
        orig_eth_type = eth->h_proto;
  
 -      rxqueue = netif_get_rxqueue(skb);
 -      xdp->rxq = &rxqueue->xdp_rxq;
 -
        act = bpf_prog_run_xdp(xdp_prog, xdp);
  
        /* check if bpf_xdp_adjust_head was used */
@@@ -5011,7 -4962,6 +5011,7 @@@ sch_handle_ingress(struct sk_buff *skb
  
        qdisc_skb_cb(skb)->pkt_len = skb->len;
        qdisc_skb_cb(skb)->mru = 0;
 +      qdisc_skb_cb(skb)->post_ct = false;
        skb->tc_at_ingress = 1;
        mini_qdisc_bstats_cpu_update(miniq, skb);
  
@@@ -5201,7 -5151,8 +5201,7 @@@ another_round
                skb_reset_mac_len(skb);
        }
  
 -      if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
 -          skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
 +      if (eth_type_vlan(skb->protocol)) {
                skb = skb_vlan_untag(skb);
                if (unlikely(!skb))
                        goto out;
@@@ -5285,7 -5236,8 +5285,7 @@@ check_vlan_id
                         * find vlan device.
                         */
                        skb->pkt_type = PACKET_OTHERHOST;
 -              } else if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
 -                         skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
 +              } else if (eth_type_vlan(skb->protocol)) {
                        /* Outer header is 802.1P with vlan 0, inner header is
                         * 802.1Q or 802.1AD and vlan_do_receive() above could
                         * not find vlan dev for vlan id 0.
@@@ -5761,7 -5713,7 +5761,7 @@@ static void flush_all_backlogs(void
        }
  
        /* we can have in flight packet[s] on the cpus we are not flushing,
 -       * synchronize_net() in rollback_registered_many() will take care of
 +       * synchronize_net() in unregister_netdevice_many() will take care of
         * them
         */
        for_each_cpu(cpu, &flush_cpus)
@@@ -5783,13 -5735,16 +5783,14 @@@ static void gro_normal_list(struct napi
  /* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
   * pass the whole batch up to the stack.
   */
- static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
+ static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int segs)
  {
        list_add_tail(&skb->list, &napi->rx_list);
-       if (++napi->rx_count >= gro_normal_batch)
+       napi->rx_count += segs;
+       if (napi->rx_count >= gro_normal_batch)
                gro_normal_list(napi);
  }
  
 -INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
 -INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int));
  static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
  {
        struct packet_offload *ptype;
        }
  
  out:
-       gro_normal_one(napi, skb);
+       gro_normal_one(napi, skb, NAPI_GRO_CB(skb)->count);
        return NET_RX_SUCCESS;
  }
  
@@@ -5958,6 -5913,10 +5959,6 @@@ static void gro_flush_oldest(struct nap
        napi_gro_complete(napi, oldest);
  }
  
 -INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *,
 -                                                         struct sk_buff *));
 -INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *,
 -                                                         struct sk_buff *));
  static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
  {
        u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
@@@ -6109,9 -6068,13 +6110,9 @@@ static gro_result_t napi_skb_finish(str
  {
        switch (ret) {
        case GRO_NORMAL:
-               gro_normal_one(napi, skb);
+               gro_normal_one(napi, skb, 1);
                break;
  
 -      case GRO_DROP:
 -              kfree_skb(skb);
 -              break;
 -
        case GRO_MERGED_FREE:
                if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
                        napi_skb_free_stolen_head(skb);
@@@ -6193,9 -6156,13 +6194,9 @@@ static gro_result_t napi_frags_finish(s
                __skb_push(skb, ETH_HLEN);
                skb->protocol = eth_type_trans(skb, skb->dev);
                if (ret == GRO_NORMAL)
-                       gro_normal_one(napi, skb);
+                       gro_normal_one(napi, skb, 1);
                break;
  
 -      case GRO_DROP:
 -              napi_reuse_skb(napi, skb);
 -              break;
 -
        case GRO_MERGED_FREE:
                if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
                        napi_skb_free_stolen_head(skb);
@@@ -6257,6 -6224,9 +6258,6 @@@ gro_result_t napi_gro_frags(struct napi
        gro_result_t ret;
        struct sk_buff *skb = napi_frags_skb(napi);
  
 -      if (!skb)
 -              return GRO_DROP;
 -
        trace_napi_gro_frags_entry(skb);
  
        ret = napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
@@@ -6739,49 -6709,6 +6740,49 @@@ static void init_gro_hash(struct napi_s
        napi->gro_bitmask = 0;
  }
  
 +int dev_set_threaded(struct net_device *dev, bool threaded)
 +{
 +      struct napi_struct *napi;
 +      int err = 0;
 +
 +      if (dev->threaded == threaded)
 +              return 0;
 +
 +      if (threaded) {
 +              list_for_each_entry(napi, &dev->napi_list, dev_list) {
 +                      if (!napi->thread) {
 +                              err = napi_kthread_create(napi);
 +                              if (err) {
 +                                      threaded = false;
 +                                      break;
 +                              }
 +                      }
 +              }
 +      }
 +
 +      dev->threaded = threaded;
 +
 +      /* Make sure kthread is created before THREADED bit
 +       * is set.
 +       */
 +      smp_mb__before_atomic();
 +
 +      /* Setting/unsetting threaded mode on a napi might not immediately
 +       * take effect, if the current napi instance is actively being
 +       * polled. In this case, the switch between threaded mode and
 +       * softirq mode will happen in the next round of napi_schedule().
 +       * This should not cause hiccups/stalls to the live traffic.
 +       */
 +      list_for_each_entry(napi, &dev->napi_list, dev_list) {
 +              if (threaded)
 +                      set_bit(NAPI_STATE_THREADED, &napi->state);
 +              else
 +                      clear_bit(NAPI_STATE_THREADED, &napi->state);
 +      }
 +
 +      return err;
 +}
 +
  void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
                    int (*poll)(struct napi_struct *, int), int weight)
  {
        set_bit(NAPI_STATE_NPSVC, &napi->state);
        list_add_rcu(&napi->dev_list, &dev->napi_list);
        napi_hash_add(napi);
 +      /* Create kthread for this napi if dev->threaded is set.
 +       * Clear dev->threaded if kthread creation failed so that
 +       * threaded mode will not be enabled in napi_enable().
 +       */
 +      if (dev->threaded && napi_kthread_create(napi))
 +              dev->threaded = 0;
  }
  EXPORT_SYMBOL(netif_napi_add);
  
@@@ -6832,28 -6753,9 +6833,28 @@@ void napi_disable(struct napi_struct *n
  
        clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state);
        clear_bit(NAPI_STATE_DISABLE, &n->state);
 +      clear_bit(NAPI_STATE_THREADED, &n->state);
  }
  EXPORT_SYMBOL(napi_disable);
  
 +/**
 + *    napi_enable - enable NAPI scheduling
 + *    @n: NAPI context
 + *
 + * Resume NAPI from being scheduled on this context.
 + * Must be paired with napi_disable.
 + */
 +void napi_enable(struct napi_struct *n)
 +{
 +      BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
 +      smp_mb__before_atomic();
 +      clear_bit(NAPI_STATE_SCHED, &n->state);
 +      clear_bit(NAPI_STATE_NPSVC, &n->state);
 +      if (n->dev->threaded && n->thread)
 +              set_bit(NAPI_STATE_THREADED, &n->state);
 +}
 +EXPORT_SYMBOL(napi_enable);
 +
  static void flush_gro_hash(struct napi_struct *napi)
  {
        int i;
@@@ -6879,18 -6781,18 +6880,18 @@@ void __netif_napi_del(struct napi_struc
  
        flush_gro_hash(napi);
        napi->gro_bitmask = 0;
 +
 +      if (napi->thread) {
 +              kthread_stop(napi->thread);
 +              napi->thread = NULL;
 +      }
  }
  EXPORT_SYMBOL(__netif_napi_del);
  
 -static int napi_poll(struct napi_struct *n, struct list_head *repoll)
 +static int __napi_poll(struct napi_struct *n, bool *repoll)
  {
 -      void *have;
        int work, weight;
  
 -      list_del_init(&n->poll_list);
 -
 -      have = netpoll_poll_lock(n);
 -
        weight = n->weight;
  
        /* This NAPI_STATE_SCHED test is for avoiding a race
                            n->poll, work, weight);
  
        if (likely(work < weight))
 -              goto out_unlock;
 +              return work;
  
        /* Drivers must not modify the NAPI state if they
         * consume the entire weight.  In such cases this code
         */
        if (unlikely(napi_disable_pending(n))) {
                napi_complete(n);
 -              goto out_unlock;
 +              return work;
        }
  
        /* The NAPI context has more processing work, but busy-polling
                         */
                        napi_schedule(n);
                }
 -              goto out_unlock;
 +              return work;
        }
  
        if (n->gro_bitmask) {
        if (unlikely(!list_empty(&n->poll_list))) {
                pr_warn_once("%s: Budget exhausted after napi rescheduled\n",
                             n->dev ? n->dev->name : "backlog");
 -              goto out_unlock;
 +              return work;
        }
  
 -      list_add_tail(&n->poll_list, repoll);
 +      *repoll = true;
 +
 +      return work;
 +}
 +
 +static int napi_poll(struct napi_struct *n, struct list_head *repoll)
 +{
 +      bool do_repoll = false;
 +      void *have;
 +      int work;
 +
 +      list_del_init(&n->poll_list);
 +
 +      have = netpoll_poll_lock(n);
 +
 +      work = __napi_poll(n, &do_repoll);
 +
 +      if (do_repoll)
 +              list_add_tail(&n->poll_list, repoll);
  
 -out_unlock:
        netpoll_poll_unlock(have);
  
        return work;
  }
  
 +static int napi_thread_wait(struct napi_struct *napi)
 +{
 +      set_current_state(TASK_INTERRUPTIBLE);
 +
 +      while (!kthread_should_stop() && !napi_disable_pending(napi)) {
 +              if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
 +                      WARN_ON(!list_empty(&napi->poll_list));
 +                      __set_current_state(TASK_RUNNING);
 +                      return 0;
 +              }
 +
 +              schedule();
 +              set_current_state(TASK_INTERRUPTIBLE);
 +      }
 +      __set_current_state(TASK_RUNNING);
 +      return -1;
 +}
 +
 +static int napi_threaded_poll(void *data)
 +{
 +      struct napi_struct *napi = data;
 +      void *have;
 +
 +      while (!napi_thread_wait(napi)) {
 +              for (;;) {
 +                      bool repoll = false;
 +
 +                      local_bh_disable();
 +
 +                      have = netpoll_poll_lock(napi);
 +                      __napi_poll(napi, &repoll);
 +                      netpoll_poll_unlock(have);
 +
 +                      __kfree_skb_flush();
 +                      local_bh_enable();
 +
 +                      if (!repoll)
 +                              break;
 +
 +                      cond_resched();
 +              }
 +      }
 +      return 0;
 +}
 +
  static __latent_entropy void net_rx_action(struct softirq_action *h)
  {
        struct softnet_data *sd = this_cpu_ptr(&softnet_data);
@@@ -8281,39 -8121,6 +8282,39 @@@ struct net_device *netdev_get_xmit_slav
  }
  EXPORT_SYMBOL(netdev_get_xmit_slave);
  
 +static struct net_device *netdev_sk_get_lower_dev(struct net_device *dev,
 +                                                struct sock *sk)
 +{
 +      const struct net_device_ops *ops = dev->netdev_ops;
 +
 +      if (!ops->ndo_sk_get_lower_dev)
 +              return NULL;
 +      return ops->ndo_sk_get_lower_dev(dev, sk);
 +}
 +
 +/**
 + * netdev_sk_get_lowest_dev - Get the lowest device in chain given device and socket
 + * @dev: device
 + * @sk: the socket
 + *
 + * %NULL is returned if no lower device is found.
 + */
 +
 +struct net_device *netdev_sk_get_lowest_dev(struct net_device *dev,
 +                                          struct sock *sk)
 +{
 +      struct net_device *lower;
 +
 +      lower = netdev_sk_get_lower_dev(dev, sk);
 +      while (lower) {
 +              dev = lower;
 +              lower = netdev_sk_get_lower_dev(dev, sk);
 +      }
 +
 +      return dev;
 +}
 +EXPORT_SYMBOL(netdev_sk_get_lowest_dev);
 +
  static void netdev_adjacent_add_links(struct net_device *dev)
  {
        struct netdev_adjacent *iter;
@@@ -9635,6 -9442,106 +9636,6 @@@ static void net_set_todo(struct net_dev
        dev_net(dev)->dev_unreg_count++;
  }
  
 -static void rollback_registered_many(struct list_head *head)
 -{
 -      struct net_device *dev, *tmp;
 -      LIST_HEAD(close_head);
 -
 -      BUG_ON(dev_boot_phase);
 -      ASSERT_RTNL();
 -
 -      list_for_each_entry_safe(dev, tmp, head, unreg_list) {
 -              /* Some devices call without registering
 -               * for initialization unwind. Remove those
 -               * devices and proceed with the remaining.
 -               */
 -              if (dev->reg_state == NETREG_UNINITIALIZED) {
 -                      pr_debug("unregister_netdevice: device %s/%p never was registered\n",
 -                               dev->name, dev);
 -
 -                      WARN_ON(1);
 -                      list_del(&dev->unreg_list);
 -                      continue;
 -              }
 -              dev->dismantle = true;
 -              BUG_ON(dev->reg_state != NETREG_REGISTERED);
 -      }
 -
 -      /* If device is running, close it first. */
 -      list_for_each_entry(dev, head, unreg_list)
 -              list_add_tail(&dev->close_list, &close_head);
 -      dev_close_many(&close_head, true);
 -
 -      list_for_each_entry(dev, head, unreg_list) {
 -              /* And unlink it from device chain. */
 -              unlist_netdevice(dev);
 -
 -              dev->reg_state = NETREG_UNREGISTERING;
 -      }
 -      flush_all_backlogs();
 -
 -      synchronize_net();
 -
 -      list_for_each_entry(dev, head, unreg_list) {
 -              struct sk_buff *skb = NULL;
 -
 -              /* Shutdown queueing discipline. */
 -              dev_shutdown(dev);
 -
 -              dev_xdp_uninstall(dev);
 -
 -              /* Notify protocols, that we are about to destroy
 -               * this device. They should clean all the things.
 -               */
 -              call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
 -
 -              if (!dev->rtnl_link_ops ||
 -                  dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
 -                      skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
 -                                                   GFP_KERNEL, NULL, 0);
 -
 -              /*
 -               *      Flush the unicast and multicast chains
 -               */
 -              dev_uc_flush(dev);
 -              dev_mc_flush(dev);
 -
 -              netdev_name_node_alt_flush(dev);
 -              netdev_name_node_free(dev->name_node);
 -
 -              if (dev->netdev_ops->ndo_uninit)
 -                      dev->netdev_ops->ndo_uninit(dev);
 -
 -              if (skb)
 -                      rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);
 -
 -              /* Notifier chain MUST detach us all upper devices. */
 -              WARN_ON(netdev_has_any_upper_dev(dev));
 -              WARN_ON(netdev_has_any_lower_dev(dev));
 -
 -              /* Remove entries from kobject tree */
 -              netdev_unregister_kobject(dev);
 -#ifdef CONFIG_XPS
 -              /* Remove XPS queueing entries */
 -              netif_reset_xps_queues_gt(dev, 0);
 -#endif
 -      }
 -
 -      synchronize_net();
 -
 -      list_for_each_entry(dev, head, unreg_list)
 -              dev_put(dev);
 -}
 -
 -static void rollback_registered(struct net_device *dev)
 -{
 -      LIST_HEAD(single);
 -
 -      list_add(&dev->unreg_list, &single);
 -      rollback_registered_many(&single);
 -      list_del(&single);
 -}
 -
  static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
        struct net_device *upper, netdev_features_t features)
  {
@@@ -10107,7 -10014,7 +10108,7 @@@ int register_netdevice(struct net_devic
        dev->hw_features |= (NETIF_F_SOFT_FEATURES | NETIF_F_SOFT_FEATURES_OFF);
        dev->features |= NETIF_F_SOFT_FEATURES;
  
 -      if (dev->netdev_ops->ndo_udp_tunnel_add) {
 +      if (dev->udp_tunnel_nic_info) {
                dev->features |= NETIF_F_RX_UDP_TUNNEL_PORT;
                dev->hw_features |= NETIF_F_RX_UDP_TUNNEL_PORT;
        }
        if (ret) {
                /* Expect explicit free_netdev() on failure */
                dev->needs_free_netdev = false;
 -              rollback_registered(dev);
 -              net_set_todo(dev);
 +              unregister_netdevice_queue(dev, NULL);
                goto out;
        }
        /*
@@@ -10806,10 -10714,9 +10807,10 @@@ void unregister_netdevice_queue(struct 
        if (head) {
                list_move_tail(&dev->unreg_list, head);
        } else {
 -              rollback_registered(dev);
 -              /* Finish processing unregister after unlock */
 -              net_set_todo(dev);
 +              LIST_HEAD(single);
 +
 +              list_add(&dev->unreg_list, &single);
 +              unregister_netdevice_many(&single);
        }
  }
  EXPORT_SYMBOL(unregister_netdevice_queue);
   */
  void unregister_netdevice_many(struct list_head *head)
  {
 -      struct net_device *dev;
 +      struct net_device *dev, *tmp;
 +      LIST_HEAD(close_head);
 +
 +      BUG_ON(dev_boot_phase);
 +      ASSERT_RTNL();
 +
 +      if (list_empty(head))
 +              return;
 +
 +      list_for_each_entry_safe(dev, tmp, head, unreg_list) {
 +              /* Some devices call without registering
 +               * for initialization unwind. Remove those
 +               * devices and proceed with the remaining.
 +               */
 +              if (dev->reg_state == NETREG_UNINITIALIZED) {
 +                      pr_debug("unregister_netdevice: device %s/%p never was registered\n",
 +                               dev->name, dev);
 +
 +                      WARN_ON(1);
 +                      list_del(&dev->unreg_list);
 +                      continue;
 +              }
 +              dev->dismantle = true;
 +              BUG_ON(dev->reg_state != NETREG_REGISTERED);
 +      }
 +
 +      /* If device is running, close it first. */
 +      list_for_each_entry(dev, head, unreg_list)
 +              list_add_tail(&dev->close_list, &close_head);
 +      dev_close_many(&close_head, true);
 +
 +      list_for_each_entry(dev, head, unreg_list) {
 +              /* And unlink it from device chain. */
 +              unlist_netdevice(dev);
 +
 +              dev->reg_state = NETREG_UNREGISTERING;
 +      }
 +      flush_all_backlogs();
 +
 +      synchronize_net();
 +
 +      list_for_each_entry(dev, head, unreg_list) {
 +              struct sk_buff *skb = NULL;
 +
 +              /* Shutdown queueing discipline. */
 +              dev_shutdown(dev);
 +
 +              dev_xdp_uninstall(dev);
 +
 +              /* Notify protocols, that we are about to destroy
 +               * this device. They should clean all the things.
 +               */
 +              call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
 +
 +              if (!dev->rtnl_link_ops ||
 +                  dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
 +                      skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
 +                                                   GFP_KERNEL, NULL, 0);
 +
 +              /*
 +               *      Flush the unicast and multicast chains
 +               */
 +              dev_uc_flush(dev);
 +              dev_mc_flush(dev);
 +
 +              netdev_name_node_alt_flush(dev);
 +              netdev_name_node_free(dev->name_node);
 +
 +              if (dev->netdev_ops->ndo_uninit)
 +                      dev->netdev_ops->ndo_uninit(dev);
 +
 +              if (skb)
 +                      rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);
 +
 +              /* Notifier chain MUST detach us all upper devices. */
 +              WARN_ON(netdev_has_any_upper_dev(dev));
 +              WARN_ON(netdev_has_any_lower_dev(dev));
 +
 +              /* Remove entries from kobject tree */
 +              netdev_unregister_kobject(dev);
 +#ifdef CONFIG_XPS
 +              /* Remove XPS queueing entries */
 +              netif_reset_xps_queues_gt(dev, 0);
 +#endif
 +      }
 +
 +      synchronize_net();
  
 -      if (!list_empty(head)) {
 -              rollback_registered_many(head);
 -              list_for_each_entry(dev, head, unreg_list)
 -                      net_set_todo(dev);
 -              list_del(head);
 +      list_for_each_entry(dev, head, unreg_list) {
 +              dev_put(dev);
 +              net_set_todo(dev);
        }
 +
 +      list_del(head);
  }
  EXPORT_SYMBOL(unregister_netdevice_many);
  
diff --combined net/dsa/dsa2.c
index 96249c4ad5f277df5c19208ac61062251e38a433,a04fd637b4cdcc3466f09bbdf2e80b5b41394e60..4d4956ed303b096e685b499c534128be8c5a91a9
  static DEFINE_MUTEX(dsa2_mutex);
  LIST_HEAD(dsa_tree_list);
  
 +/**
 + * dsa_tree_notify - Execute code for all switches in a DSA switch tree.
 + * @dst: collection of struct dsa_switch devices to notify.
 + * @e: event, must be of type DSA_NOTIFIER_*
 + * @v: event-specific value.
 + *
 + * Given a struct dsa_switch_tree, this can be used to run a function once for
 + * each member DSA switch. The other alternative of traversing the tree is only
 + * through its ports list, which does not uniquely list the switches.
 + */
 +int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v)
 +{
 +      struct raw_notifier_head *nh = &dst->nh;
 +      int err;
 +
 +      err = raw_notifier_call_chain(nh, e, v);
 +
 +      return notifier_to_errno(err);
 +}
 +
 +/**
 + * dsa_broadcast - Notify all DSA trees in the system.
 + * @e: event, must be of type DSA_NOTIFIER_*
 + * @v: event-specific value.
 + *
 + * Can be used to notify the switching fabric of events such as cross-chip
 + * bridging between disjoint trees (such as islands of tagger-compatible
 + * switches bridged by an incompatible middle switch).
 + */
 +int dsa_broadcast(unsigned long e, void *v)
 +{
 +      struct dsa_switch_tree *dst;
 +      int err = 0;
 +
 +      list_for_each_entry(dst, &dsa_tree_list, list) {
 +              err = dsa_tree_notify(dst, e, v);
 +              if (err)
 +                      break;
 +      }
 +
 +      return err;
 +}
 +
 +/**
 + * dsa_lag_map() - Map LAG netdev to a linear LAG ID
 + * @dst: Tree in which to record the mapping.
 + * @lag: Netdev that is to be mapped to an ID.
 + *
 + * dsa_lag_id/dsa_lag_dev can then be used to translate between the
 + * two spaces. The size of the mapping space is determined by the
 + * driver by setting ds->num_lag_ids. It is perfectly legal to leave
 + * it unset if it is not needed, in which case these functions become
 + * no-ops.
 + */
 +void dsa_lag_map(struct dsa_switch_tree *dst, struct net_device *lag)
 +{
 +      unsigned int id;
 +
 +      if (dsa_lag_id(dst, lag) >= 0)
 +              /* Already mapped */
 +              return;
 +
 +      for (id = 0; id < dst->lags_len; id++) {
 +              if (!dsa_lag_dev(dst, id)) {
 +                      dst->lags[id] = lag;
 +                      return;
 +              }
 +      }
 +
 +      /* No IDs left, which is OK. Some drivers do not need it. The
 +       * ones that do, e.g. mv88e6xxx, will discover that dsa_lag_id
 +       * returns an error for this device when joining the LAG. The
 +       * driver can then return -EOPNOTSUPP back to DSA, which will
 +       * fall back to a software LAG.
 +       */
 +}
 +
 +/**
 + * dsa_lag_unmap() - Remove a LAG ID mapping
 + * @dst: Tree in which the mapping is recorded.
 + * @lag: Netdev that was mapped.
 + *
 + * As there may be multiple users of the mapping, it is only removed
 + * if there are no other references to it.
 + */
 +void dsa_lag_unmap(struct dsa_switch_tree *dst, struct net_device *lag)
 +{
 +      struct dsa_port *dp;
 +      unsigned int id;
 +
 +      dsa_lag_foreach_port(dp, dst, lag)
 +              /* There are remaining users of this mapping */
 +              return;
 +
 +      dsa_lags_foreach_id(id, dst) {
 +              if (dsa_lag_dev(dst, id) == lag) {
 +                      dst->lags[id] = NULL;
 +                      break;
 +              }
 +      }
 +}
 +
  struct dsa_switch *dsa_switch_find(int tree_index, int sw_index)
  {
        struct dsa_switch_tree *dst;
@@@ -179,8 -77,6 +179,8 @@@ static struct dsa_switch_tree *dsa_tree
  
  static void dsa_tree_free(struct dsa_switch_tree *dst)
  {
 +      if (dst->tag_ops)
 +              dsa_tag_driver_put(dst->tag_ops);
        list_del(&dst->list);
        kfree(dst);
  }
@@@ -469,6 -365,7 +469,6 @@@ static void dsa_port_teardown(struct ds
                break;
        case DSA_PORT_TYPE_CPU:
                dsa_port_disable(dp);
 -              dsa_tag_driver_put(dp->tag_ops);
                dsa_port_link_unregister_of(dp);
                break;
        case DSA_PORT_TYPE_DSA:
@@@ -507,165 -404,8 +507,165 @@@ static int dsa_devlink_info_get(struct 
        return -EOPNOTSUPP;
  }
  
 +static int dsa_devlink_sb_pool_get(struct devlink *dl,
 +                                 unsigned int sb_index, u16 pool_index,
 +                                 struct devlink_sb_pool_info *pool_info)
 +{
 +      struct dsa_switch *ds = dsa_devlink_to_ds(dl);
 +
 +      if (!ds->ops->devlink_sb_pool_get)
 +              return -EOPNOTSUPP;
 +
 +      return ds->ops->devlink_sb_pool_get(ds, sb_index, pool_index,
 +                                          pool_info);
 +}
 +
 +static int dsa_devlink_sb_pool_set(struct devlink *dl, unsigned int sb_index,
 +                                 u16 pool_index, u32 size,
 +                                 enum devlink_sb_threshold_type threshold_type,
 +                                 struct netlink_ext_ack *extack)
 +{
 +      struct dsa_switch *ds = dsa_devlink_to_ds(dl);
 +
 +      if (!ds->ops->devlink_sb_pool_set)
 +              return -EOPNOTSUPP;
 +
 +      return ds->ops->devlink_sb_pool_set(ds, sb_index, pool_index, size,
 +                                          threshold_type, extack);
 +}
 +
 +static int dsa_devlink_sb_port_pool_get(struct devlink_port *dlp,
 +                                      unsigned int sb_index, u16 pool_index,
 +                                      u32 *p_threshold)
 +{
 +      struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
 +      int port = dsa_devlink_port_to_port(dlp);
 +
 +      if (!ds->ops->devlink_sb_port_pool_get)
 +              return -EOPNOTSUPP;
 +
 +      return ds->ops->devlink_sb_port_pool_get(ds, port, sb_index,
 +                                               pool_index, p_threshold);
 +}
 +
 +static int dsa_devlink_sb_port_pool_set(struct devlink_port *dlp,
 +                                      unsigned int sb_index, u16 pool_index,
 +                                      u32 threshold,
 +                                      struct netlink_ext_ack *extack)
 +{
 +      struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
 +      int port = dsa_devlink_port_to_port(dlp);
 +
 +      if (!ds->ops->devlink_sb_port_pool_set)
 +              return -EOPNOTSUPP;
 +
 +      return ds->ops->devlink_sb_port_pool_set(ds, port, sb_index,
 +                                               pool_index, threshold, extack);
 +}
 +
 +static int
 +dsa_devlink_sb_tc_pool_bind_get(struct devlink_port *dlp,
 +                              unsigned int sb_index, u16 tc_index,
 +                              enum devlink_sb_pool_type pool_type,
 +                              u16 *p_pool_index, u32 *p_threshold)
 +{
 +      struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
 +      int port = dsa_devlink_port_to_port(dlp);
 +
 +      if (!ds->ops->devlink_sb_tc_pool_bind_get)
 +              return -EOPNOTSUPP;
 +
 +      return ds->ops->devlink_sb_tc_pool_bind_get(ds, port, sb_index,
 +                                                  tc_index, pool_type,
 +                                                  p_pool_index, p_threshold);
 +}
 +
 +static int
 +dsa_devlink_sb_tc_pool_bind_set(struct devlink_port *dlp,
 +                              unsigned int sb_index, u16 tc_index,
 +                              enum devlink_sb_pool_type pool_type,
 +                              u16 pool_index, u32 threshold,
 +                              struct netlink_ext_ack *extack)
 +{
 +      struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
 +      int port = dsa_devlink_port_to_port(dlp);
 +
 +      if (!ds->ops->devlink_sb_tc_pool_bind_set)
 +              return -EOPNOTSUPP;
 +
 +      return ds->ops->devlink_sb_tc_pool_bind_set(ds, port, sb_index,
 +                                                  tc_index, pool_type,
 +                                                  pool_index, threshold,
 +                                                  extack);
 +}
 +
 +static int dsa_devlink_sb_occ_snapshot(struct devlink *dl,
 +                                     unsigned int sb_index)
 +{
 +      struct dsa_switch *ds = dsa_devlink_to_ds(dl);
 +
 +      if (!ds->ops->devlink_sb_occ_snapshot)
 +              return -EOPNOTSUPP;
 +
 +      return ds->ops->devlink_sb_occ_snapshot(ds, sb_index);
 +}
 +
 +static int dsa_devlink_sb_occ_max_clear(struct devlink *dl,
 +                                      unsigned int sb_index)
 +{
 +      struct dsa_switch *ds = dsa_devlink_to_ds(dl);
 +
 +      if (!ds->ops->devlink_sb_occ_max_clear)
 +              return -EOPNOTSUPP;
 +
 +      return ds->ops->devlink_sb_occ_max_clear(ds, sb_index);
 +}
 +
 +static int dsa_devlink_sb_occ_port_pool_get(struct devlink_port *dlp,
 +                                          unsigned int sb_index,
 +                                          u16 pool_index, u32 *p_cur,
 +                                          u32 *p_max)
 +{
 +      struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
 +      int port = dsa_devlink_port_to_port(dlp);
 +
 +      if (!ds->ops->devlink_sb_occ_port_pool_get)
 +              return -EOPNOTSUPP;
 +
 +      return ds->ops->devlink_sb_occ_port_pool_get(ds, port, sb_index,
 +                                                   pool_index, p_cur, p_max);
 +}
 +
 +static int
 +dsa_devlink_sb_occ_tc_port_bind_get(struct devlink_port *dlp,
 +                                  unsigned int sb_index, u16 tc_index,
 +                                  enum devlink_sb_pool_type pool_type,
 +                                  u32 *p_cur, u32 *p_max)
 +{
 +      struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
 +      int port = dsa_devlink_port_to_port(dlp);
 +
 +      if (!ds->ops->devlink_sb_occ_tc_port_bind_get)
 +              return -EOPNOTSUPP;
 +
 +      return ds->ops->devlink_sb_occ_tc_port_bind_get(ds, port,
 +                                                      sb_index, tc_index,
 +                                                      pool_type, p_cur,
 +                                                      p_max);
 +}
 +
  static const struct devlink_ops dsa_devlink_ops = {
 -      .info_get = dsa_devlink_info_get,
 +      .info_get                       = dsa_devlink_info_get,
 +      .sb_pool_get                    = dsa_devlink_sb_pool_get,
 +      .sb_pool_set                    = dsa_devlink_sb_pool_set,
 +      .sb_port_pool_get               = dsa_devlink_sb_port_pool_get,
 +      .sb_port_pool_set               = dsa_devlink_sb_port_pool_set,
 +      .sb_tc_pool_bind_get            = dsa_devlink_sb_tc_pool_bind_get,
 +      .sb_tc_pool_bind_set            = dsa_devlink_sb_tc_pool_bind_set,
 +      .sb_occ_snapshot                = dsa_devlink_sb_occ_snapshot,
 +      .sb_occ_max_clear               = dsa_devlink_sb_occ_max_clear,
 +      .sb_occ_port_pool_get           = dsa_devlink_sb_occ_port_pool_get,
 +      .sb_occ_tc_port_bind_get        = dsa_devlink_sb_occ_tc_port_bind_get,
  };
  
  static int dsa_switch_setup(struct dsa_switch *ds)
        if (err)
                goto unregister_devlink_ports;
  
 +      ds->configure_vlan_while_not_filtering = true;
 +
        err = ds->ops->setup(ds);
        if (err < 0)
                goto unregister_notifier;
                ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
                if (!ds->slave_mii_bus) {
                        err = -ENOMEM;
-                       goto unregister_notifier;
+                       goto teardown;
                }
  
                dsa_slave_mii_bus_init(ds);
  
                err = mdiobus_register(ds->slave_mii_bus);
                if (err < 0)
-                       goto unregister_notifier;
+                       goto teardown;
        }
  
        ds->setup = true;
  
        return 0;
  
+ teardown:
+       if (ds->ops->teardown)
+               ds->ops->teardown(ds);
  unregister_notifier:
        dsa_switch_unregister_notifier(ds);
  unregister_devlink_ports:
@@@ -844,32 -585,6 +847,32 @@@ static void dsa_tree_teardown_master(st
                        dsa_master_teardown(dp->master);
  }
  
 +static int dsa_tree_setup_lags(struct dsa_switch_tree *dst)
 +{
 +      unsigned int len = 0;
 +      struct dsa_port *dp;
 +
 +      list_for_each_entry(dp, &dst->ports, list) {
 +              if (dp->ds->num_lag_ids > len)
 +                      len = dp->ds->num_lag_ids;
 +      }
 +
 +      if (!len)
 +              return 0;
 +
 +      dst->lags = kcalloc(len, sizeof(*dst->lags), GFP_KERNEL);
 +      if (!dst->lags)
 +              return -ENOMEM;
 +
 +      dst->lags_len = len;
 +      return 0;
 +}
 +
 +static void dsa_tree_teardown_lags(struct dsa_switch_tree *dst)
 +{
 +      kfree(dst->lags);
 +}
 +
  static int dsa_tree_setup(struct dsa_switch_tree *dst)
  {
        bool complete;
        if (err)
                goto teardown_switches;
  
 +      err = dsa_tree_setup_lags(dst);
 +      if (err)
 +              goto teardown_master;
 +
        dst->setup = true;
  
        pr_info("DSA: tree %d setup\n", dst->index);
  
        return 0;
  
 +teardown_master:
 +      dsa_tree_teardown_master(dst);
  teardown_switches:
        dsa_tree_teardown_switches(dst);
  teardown_default_cpu:
@@@ -924,8 -633,6 +927,8 @@@ static void dsa_tree_teardown(struct ds
        if (!dst->setup)
                return;
  
 +      dsa_tree_teardown_lags(dst);
 +
        dsa_tree_teardown_master(dst);
  
        dsa_tree_teardown_switches(dst);
        dst->setup = false;
  }
  
 +/* Since the dsa/tagging sysfs device attribute is per master, the assumption
 + * is that all DSA switches within a tree share the same tagger, otherwise
 + * they would have formed disjoint trees (different "dsa,member" values).
 + */
 +int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
 +                            struct net_device *master,
 +                            const struct dsa_device_ops *tag_ops,
 +                            const struct dsa_device_ops *old_tag_ops)
 +{
 +      struct dsa_notifier_tag_proto_info info;
 +      struct dsa_port *dp;
 +      int err = -EBUSY;
 +
 +      if (!rtnl_trylock())
 +              return restart_syscall();
 +
 +      /* At the moment we don't allow changing the tag protocol under
 +       * traffic. The rtnl_mutex also happens to serialize concurrent
 +       * attempts to change the tagging protocol. If we ever lift the IFF_UP
 +       * restriction, there needs to be another mutex which serializes this.
 +       */
 +      if (master->flags & IFF_UP)
 +              goto out_unlock;
 +
 +      list_for_each_entry(dp, &dst->ports, list) {
 +              if (!dsa_is_user_port(dp->ds, dp->index))
 +                      continue;
 +
 +              if (dp->slave->flags & IFF_UP)
 +                      goto out_unlock;
 +      }
 +
 +      info.tag_ops = tag_ops;
 +      err = dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO, &info);
 +      if (err)
 +              goto out_unwind_tagger;
 +
 +      dst->tag_ops = tag_ops;
 +
 +      rtnl_unlock();
 +
 +      return 0;
 +
 +out_unwind_tagger:
 +      info.tag_ops = old_tag_ops;
 +      dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO, &info);
 +out_unlock:
 +      rtnl_unlock();
 +      return err;
 +}
 +
  static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index)
  {
        struct dsa_switch_tree *dst = ds->dst;
@@@ -1063,33 -719,24 +1066,33 @@@ static int dsa_port_parse_cpu(struct ds
  {
        struct dsa_switch *ds = dp->ds;
        struct dsa_switch_tree *dst = ds->dst;
 -      const struct dsa_device_ops *tag_ops;
        enum dsa_tag_protocol tag_protocol;
  
        tag_protocol = dsa_get_tag_protocol(dp, master);
 -      tag_ops = dsa_tag_driver_get(tag_protocol);
 -      if (IS_ERR(tag_ops)) {
 -              if (PTR_ERR(tag_ops) == -ENOPROTOOPT)
 -                      return -EPROBE_DEFER;
 -              dev_warn(ds->dev, "No tagger for this switch\n");
 -              dp->master = NULL;
 -              return PTR_ERR(tag_ops);
 +      if (dst->tag_ops) {
 +              if (dst->tag_ops->proto != tag_protocol) {
 +                      dev_err(ds->dev,
 +                              "A DSA switch tree can have only one tagging protocol\n");
 +                      return -EINVAL;
 +              }
 +              /* In the case of multiple CPU ports per switch, the tagging
 +               * protocol is still reference-counted only per switch tree, so
 +               * nothing to do here.
 +               */
 +      } else {
 +              dst->tag_ops = dsa_tag_driver_get(tag_protocol);
 +              if (IS_ERR(dst->tag_ops)) {
 +                      if (PTR_ERR(dst->tag_ops) == -ENOPROTOOPT)
 +                              return -EPROBE_DEFER;
 +                      dev_warn(ds->dev, "No tagger for this switch\n");
 +                      dp->master = NULL;
 +                      return PTR_ERR(dst->tag_ops);
 +              }
        }
  
        dp->master = master;
        dp->type = DSA_PORT_TYPE_CPU;
 -      dp->filter = tag_ops->filter;
 -      dp->rcv = tag_ops->rcv;
 -      dp->tag_ops = tag_ops;
 +      dsa_port_set_tag_protocol(dp, dst->tag_ops);
        dp->dst = dst;
  
        return 0;
@@@ -1143,8 -790,6 +1146,8 @@@ static int dsa_switch_parse_ports_of(st
                        goto out_put_node;
  
                if (reg >= ds->num_ports) {
 +                      dev_err(ds->dev, "port %pOF index %u exceeds num_ports (%zu)\n",
 +                              port, reg, ds->num_ports);
                        err = -EINVAL;
                        goto out_put_node;
                }
index 55fca71ace262888ecb2aa961142260a9a4174c1,4a4acbba78ff77292c2ddfa24e0ed7e7bde1090e..5fa657b8e03dff15a2fa828f22b09a364e7d5887
@@@ -191,14 -191,14 +191,14 @@@ static u32 flow_offload_hash(const voi
  {
        const struct flow_offload_tuple *tuple = data;
  
 -      return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
 +      return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed);
  }
  
  static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
  {
        const struct flow_offload_tuple_rhash *tuplehash = data;
  
 -      return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
 +      return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed);
  }
  
  static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
        const struct flow_offload_tuple *tuple = arg->key;
        const struct flow_offload_tuple_rhash *x = ptr;
  
 -      if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
 +      if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash)))
                return 1;
  
        return 0;
@@@ -399,7 -399,7 +399,7 @@@ static int nf_flow_nat_port_tcp(struct 
                return -1;
  
        tcph = (void *)(skb_network_header(skb) + thoff);
-       inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true);
+       inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
  
        return 0;
  }
@@@ -415,7 -415,7 +415,7 @@@ static int nf_flow_nat_port_udp(struct 
        udph = (void *)(skb_network_header(skb) + thoff);
        if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
                inet_proto_csum_replace2(&udph->check, skb, port,
-                                        new_port, true);
+                                        new_port, false);
                if (!udph->check)
                        udph->check = CSUM_MANGLED_0;
        }
index 01dc0a169a780a13b86ffaebea51ba6f8a551eaa,8ee9f40cc0ea2816839a3c7264fcb65eebd5c9e7..ab93a353651a657c2df8fb05d39f06f3310297ab
@@@ -4438,12 -4438,6 +4438,12 @@@ static int nf_tables_delset(struct net 
        return nft_delset(&ctx, set);
  }
  
 +static int nft_validate_register_store(const struct nft_ctx *ctx,
 +                                     enum nft_registers reg,
 +                                     const struct nft_data *data,
 +                                     enum nft_data_types type,
 +                                     unsigned int len);
 +
  static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
                                        struct nft_set *set,
                                        const struct nft_set_iter *iter,
@@@ -5287,6 -5281,7 +5287,7 @@@ static int nft_add_set_elem(struct nft_
        struct nft_expr *expr_array[NFT_SET_EXPR_MAX] = {};
        struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
        u8 genmask = nft_genmask_next(ctx->net);
+       u32 flags = 0, size = 0, num_exprs = 0;
        struct nft_set_ext_tmpl tmpl;
        struct nft_set_ext *ext, *ext2;
        struct nft_set_elem elem;
        struct nft_data_desc desc;
        enum nft_registers dreg;
        struct nft_trans *trans;
-       u32 flags = 0, size = 0;
        u64 timeout;
        u64 expiration;
        int err, i;
        if (nla[NFTA_SET_ELEM_EXPR]) {
                struct nft_expr *expr;
  
-               if (set->num_exprs != 1)
+               if (set->num_exprs && set->num_exprs != 1)
                        return -EOPNOTSUPP;
  
                expr = nft_set_elem_expr_alloc(ctx, set,
                        return PTR_ERR(expr);
  
                expr_array[0] = expr;
+               num_exprs = 1;
  
-               if (set->exprs[0] && set->exprs[0]->ops != expr->ops) {
+               if (set->num_exprs && set->exprs[0]->ops != expr->ops) {
                        err = -EOPNOTSUPP;
                        goto err_set_elem_expr;
                }
                struct nlattr *tmp;
                int left;
  
-               if (set->num_exprs == 0)
-                       return -EOPNOTSUPP;
                i = 0;
                nla_for_each_nested(tmp, nla[NFTA_SET_ELEM_EXPRESSIONS], left) {
-                       if (i == set->num_exprs) {
+                       if (i == NFT_SET_EXPR_MAX ||
+                           (set->num_exprs && set->num_exprs == i)) {
                                err = -E2BIG;
                                goto err_set_elem_expr;
                        }
                                goto err_set_elem_expr;
                        }
                        expr_array[i] = expr;
+                       num_exprs++;
  
-                       if (expr->ops != set->exprs[i]->ops) {
+                       if (set->num_exprs && expr->ops != set->exprs[i]->ops) {
                                err = -EOPNOTSUPP;
                                goto err_set_elem_expr;
                        }
                        i++;
                }
-               if (set->num_exprs != i) {
+               if (set->num_exprs && set->num_exprs != i) {
                        err = -EOPNOTSUPP;
                        goto err_set_elem_expr;
                }
                err = nft_set_elem_expr_clone(ctx, set, expr_array);
                if (err < 0)
                        goto err_set_elem_expr_clone;
+               num_exprs = set->num_exprs;
        }
  
        err = nft_setelem_parse_key(ctx, set, &elem.key.val,
                        nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
        }
  
-       if (set->num_exprs) {
-               for (i = 0; i < set->num_exprs; i++)
+       if (num_exprs) {
+               for (i = 0; i < num_exprs; i++)
                        size += expr_array[i]->ops->size;
  
                nft_set_ext_add_length(&tmpl, NFT_SET_EXT_EXPRESSIONS,
                *nft_set_ext_obj(ext) = obj;
                obj->use++;
        }
-       for (i = 0; i < set->num_exprs; i++)
+       for (i = 0; i < num_exprs; i++)
                nft_set_elem_expr_setup(ext, i, expr_array);
  
        trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
@@@ -5590,7 -5586,7 +5592,7 @@@ err_parse_key_end
  err_parse_key:
        nft_data_release(&elem.key.val, NFT_DATA_VALUE);
  err_set_elem_expr:
-       for (i = 0; i < set->num_exprs && expr_array[i]; i++)
+       for (i = 0; i < num_exprs && expr_array[i]; i++)
                nft_expr_destroy(ctx, expr_array[i]);
  err_set_elem_expr_clone:
        return err;
@@@ -8594,7 -8590,7 +8596,7 @@@ EXPORT_SYMBOL_GPL(nft_parse_u32_check)
   *    Registers used to be 128 bit wide, these register numbers will be
   *    mapped to the corresponding 32 bit register numbers.
   */
 -unsigned int nft_parse_register(const struct nlattr *attr)
 +static unsigned int nft_parse_register(const struct nlattr *attr)
  {
        unsigned int reg;
  
                return reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00;
        }
  }
 -EXPORT_SYMBOL_GPL(nft_parse_register);
  
  /**
   *    nft_dump_register - dump a register value to a netlink attribute
@@@ -8638,7 -8635,7 +8640,7 @@@ EXPORT_SYMBOL_GPL(nft_dump_register)
   *    Validate that the input register is one of the general purpose
   *    registers and that the length of the load is within the bounds.
   */
 -int nft_validate_register_load(enum nft_registers reg, unsigned int len)
 +static int nft_validate_register_load(enum nft_registers reg, unsigned int len)
  {
        if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE)
                return -EINVAL;
  
        return 0;
  }
 -EXPORT_SYMBOL_GPL(nft_validate_register_load);
 +
 +int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len)
 +{
 +      u32 reg;
 +      int err;
 +
 +      reg = nft_parse_register(attr);
 +      err = nft_validate_register_load(reg, len);
 +      if (err < 0)
 +              return err;
 +
 +      *sreg = reg;
 +      return 0;
 +}
 +EXPORT_SYMBOL_GPL(nft_parse_register_load);
  
  /**
   *    nft_validate_register_store - validate an expressions' register store
   *    A value of NULL for the data means that its runtime gathered
   *    data.
   */
 -int nft_validate_register_store(const struct nft_ctx *ctx,
 -                              enum nft_registers reg,
 -                              const struct nft_data *data,
 -                              enum nft_data_types type, unsigned int len)
 +static int nft_validate_register_store(const struct nft_ctx *ctx,
 +                                     enum nft_registers reg,
 +                                     const struct nft_data *data,
 +                                     enum nft_data_types type,
 +                                     unsigned int len)
  {
        int err;
  
                return 0;
        }
  }
 -EXPORT_SYMBOL_GPL(nft_validate_register_store);
 +
 +int nft_parse_register_store(const struct nft_ctx *ctx,
 +                           const struct nlattr *attr, u8 *dreg,
 +                           const struct nft_data *data,
 +                           enum nft_data_types type, unsigned int len)
 +{
 +      int err;
 +      u32 reg;
 +
 +      reg = nft_parse_register(attr);
 +      err = nft_validate_register_store(ctx, reg, data, type, len);
 +      if (err < 0)
 +              return err;
 +
 +      *dreg = reg;
 +      return 0;
 +}
 +EXPORT_SYMBOL_GPL(nft_parse_register_store);
  
  static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = {
        [NFTA_VERDICT_CODE]     = { .type = NLA_U32 },
@@@ -8986,6 -8951,17 +8988,17 @@@ int __nft_release_basechain(struct nft_
  }
  EXPORT_SYMBOL_GPL(__nft_release_basechain);
  
+ static void __nft_release_hooks(struct net *net)
+ {
+       struct nft_table *table;
+       struct nft_chain *chain;
+       list_for_each_entry(table, &net->nft.tables, list) {
+               list_for_each_entry(chain, &table->chains, list)
+                       nf_tables_unregister_hook(net, table, chain);
+       }
+ }
  static void __nft_release_tables(struct net *net)
  {
        struct nft_flowtable *flowtable, *nf;
  
        list_for_each_entry_safe(table, nt, &net->nft.tables, list) {
                ctx.family = table->family;
-               list_for_each_entry(chain, &table->chains, list)
-                       nf_tables_unregister_hook(net, table, chain);
-               /* No packets are walking on these chains anymore. */
                ctx.table = table;
                list_for_each_entry(chain, &table->chains, list) {
                        ctx.chain = chain;
@@@ -9053,6 -9025,11 +9062,11 @@@ static int __net_init nf_tables_init_ne
        return 0;
  }
  
+ static void __net_exit nf_tables_pre_exit_net(struct net *net)
+ {
+       __nft_release_hooks(net);
+ }
  static void __net_exit nf_tables_exit_net(struct net *net)
  {
        mutex_lock(&net->nft.commit_mutex);
  }
  
  static struct pernet_operations nf_tables_net_ops = {
-       .init   = nf_tables_init_net,
-       .exit   = nf_tables_exit_net,
+       .init           = nf_tables_init_net,
+       .pre_exit       = nf_tables_pre_exit_net,
+       .exit           = nf_tables_exit_net,
  };
  
  static int __init nf_tables_module_init(void)
This page took 0.460314 seconds and 4 git commands to generate.