Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

author David S. Miller <[email protected]>

Wed, 10 Feb 2021 21:30:12 +0000 (13:30 -0800)

committer David S. Miller <[email protected]>

Wed, 10 Feb 2021 21:30:12 +0000 (13:30 -0800)
author David S. Miller <[email protected]>
Wed, 10 Feb 2021 21:30:12 +0000 (13:30 -0800)
committer David S. Miller <[email protected]>
Wed, 10 Feb 2021 21:30:12 +0000 (13:30 -0800)
diff --combined MAINTAINERS

index cbf4b94f89d4b7421f5e8e0757d3fe9965b8c89b,64c7169db617604f0b6d0490a9ef0371f14dd8ca..0bbd95b73c39392c1329119f27fa9871093610b6
--- 1/MAINTAINERS
--- 2/MAINTAINERS
+++ b/MAINTAINERS
@@@ -2616,8 -2616,8 +2616,8 @@@ S:      Maintaine
   F:    drivers/power/reset/keystone-reset.c
   
   ARM/TEXAS INSTRUMENTS K3 ARCHITECTURE
- M:    Tero Kristo <[email protected]>
   M:    Nishanth Menon <[email protected]>
+ M:    Tero Kristo <[email protected]>
   L:    [email protected] (moderated for non-subscribers)
   S:    Supported
   F:    Documentation/devicetree/bindings/arm/ti/k3.yaml
@@@ -2787,14 -2787,6 +2787,14 @@@ F:    arch/arm64
   F:    tools/testing/selftests/arm64/
   X:    arch/arm64/boot/dts/
   
+ +ARROW SPEEDCHIPS XRS7000 SERIES ETHERNET SWITCH DRIVER
+ +M:    George McCollister <[email protected]>
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    Documentation/devicetree/bindings/net/dsa/arrow,xrs700x.yaml
+ +F:    drivers/net/dsa/xrs700x/*
+ +F:    net/dsa/tag_xrs700x.c
+ +
   AS3645A LED FLASH CONTROLLER DRIVER
   M:    Sakari Ailus <[email protected]>
   L:    [email protected]
@@@ -3407,7 -3399,6 +3407,7 @@@ L:      [email protected] (sub
   S:    Supported
   F:    Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml
   F:    drivers/net/dsa/b53/*
+ +F:    include/linux/dsa/brcm.h
   F:    include/linux/platform_data/b53.h
   
   BROADCOM BCM2711/BCM2835 ARM ARCHITECTURE
@@@ -3445,15 -3436,6 +3445,15 @@@ F:    Documentation/devicetree/bindings/mi
   F:    arch/mips/bcm47xx/*
   F:    arch/mips/include/asm/mach-bcm47xx/*
   
+ +BROADCOM BCM4908 ETHERNET DRIVER
+ +M:    Rafał Miłecki <[email protected]>
+ +M:    [email protected]
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    Documentation/devicetree/bindings/net/brcm,bcm4908enet.yaml
+ +F:    drivers/net/ethernet/broadcom/bcm4908enet.*
+ +F:    drivers/net/ethernet/broadcom/unimac.h
+ +
   BROADCOM BCM5301X ARM ARCHITECTURE
   M:    Hauke Mehrtens <[email protected]>
   M:    Rafał Miłecki <[email protected]>
@@@ -3642,7 -3624,6 +3642,7 @@@ S:      Supporte
   F:    Documentation/devicetree/bindings/net/brcm,bcmgenet.txt
   F:    Documentation/devicetree/bindings/net/brcm,unimac-mdio.txt
   F:    drivers/net/ethernet/broadcom/genet/
+ +F:    drivers/net/ethernet/broadcom/unimac.h
   F:    drivers/net/mdio/mdio-bcm-unimac.c
   F:    include/linux/platform_data/bcmgenet.h
   F:    include/linux/platform_data/mdio-bcm-unimac.h
@@@ -3676,15 -3657,6 +3676,15 @@@ N:    bcm8831
   N:    hr2
   N:    stingray
   
+ +BROADCOM IPROC GBIT ETHERNET DRIVER
+ +M:    Rafał Miłecki <[email protected]>
+ +M:    [email protected]
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    Documentation/devicetree/bindings/net/brcm,amac.txt
+ +F:    drivers/net/ethernet/broadcom/bgmac*
+ +F:    drivers/net/ethernet/broadcom/unimac.h
+ +
   BROADCOM KONA GPIO DRIVER
   M:    Ray Jui <[email protected]>
   L:    [email protected]
@@@ -3764,7 -3736,6 +3764,7 @@@ L:      [email protected]
   L:    [email protected]
   S:    Supported
   F:    drivers/net/ethernet/broadcom/bcmsysport.*
+ +F:    drivers/net/ethernet/broadcom/unimac.h
   
   BROADCOM TG3 GIGABIT ETHERNET DRIVER
   M:    Siva Reddy Kallam <[email protected]>
@@@ -3959,10 -3930,8 +3959,10 @@@ T:    git git://git.kernel.org/pub/scm/lin
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can-next.git
   F:    Documentation/devicetree/bindings/net/can/
   F:    drivers/net/can/
+ +F:    include/linux/can/bittiming.h
   F:    include/linux/can/dev.h
   F:    include/linux/can/led.h
+ +F:    include/linux/can/length.h
   F:    include/linux/can/platform/
   F:    include/linux/can/rx-offload.h
   F:    include/uapi/linux/can/error.h
@@@ -3978,7 -3947,6 +3978,7 @@@ W:      https://github.com/linux-ca
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can.git
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can-next.git
   F:    Documentation/networking/can.rst
+ +F:    include/linux/can/can-ml.h
   F:    include/linux/can/core.h
   F:    include/linux/can/skb.h
   F:    include/net/netns/can.h
@@@ -4336,7 -4304,7 +4336,7 @@@ S:      Maintaine
   F:    .clang-format
   
   CLANG/LLVM BUILD SUPPORT
- M:    Nathan Chancellor <nat[email protected]>
+ M:    Nathan Chancellor <nat[email protected]>
   M:    Nick Desaulniers <[email protected]>
   L:    [email protected]
   S:    Supported
@@@ -6506,9 -6474,9 +6506,9 @@@ S:      Maintaine
   F:    drivers/edac/skx_*.[ch]
   
   EDAC-TI
- M:    Tero Kristo <[email protected]>
+ M:    Tero Kristo <[email protected]>
   L:    [email protected]
- S:    Maintained
+ S:    Odd Fixes
   F:    drivers/edac/ti_edac.c
   
   EDIROL UA-101/UA-1000 DRIVER
@@@ -9591,7 -9559,7 +9591,7 @@@ F:      Documentation/hwmon/k8temp.rs
   F:    drivers/hwmon/k8temp.c
   
   KASAN
- M:    Andrey Ryabinin <aryabinin@virtuozzo.com>
+ M:    Andrey Ryabinin <ryabinin.a.a@gmail.com>
   R:    Alexander Potapenko <[email protected]>
   R:    Dmitry Vyukov <[email protected]>
   L:    [email protected]
@@@ -12549,14 -12517,6 +12549,14 @@@ F: include/net/nfc
   F:    include/uapi/linux/nfc.h
   F:    net/nfc/
   
+ +NFC VIRTUAL NCI DEVICE DRIVER
+ +M:    Bongsu Jeon <[email protected]>
+ +L:    [email protected]
+ +L:    [email protected] (moderated for non-subscribers)
+ +S:    Supported
+ +F:    drivers/nfc/virtual_ncidev.c
+ +F:    tools/testing/selftests/nci/
+ +
   NFS, SUNRPC, AND LOCKD CLIENTS
   M:    Trond Myklebust <[email protected]>
   M:    Anna Schumaker <[email protected]>
@@@ -12868,7 -12828,6 +12868,7 @@@ F:   drivers/net/dsa/ocelot/
   F:    drivers/net/ethernet/mscc/
   F:    include/soc/mscc/ocelot*
   F:    net/dsa/tag_ocelot.c
+ +F:    net/dsa/tag_ocelot_8021q.c
   F:    tools/testing/selftests/drivers/net/ocelot/*
   
   OCXL (Open Coherent Accelerator Processor Interface OpenCAPI) DRIVER
@@@ -17613,7 -17572,7 +17613,7 @@@ F:   include/linux/dma/k3-psil.
   
   TEXAS INSTRUMENTS' SYSTEM CONTROL INTERFACE (TISCI) PROTOCOL DRIVER
   M:    Nishanth Menon <[email protected]>
- M:    Tero Kristo <[email protected]>
+ M:    Tero Kristo <[email protected]>
   M:    Santosh Shilimkar <[email protected]>
   L:    [email protected]
   S:    Maintained
@@@ -17757,9 -17716,9 +17757,9 @@@ S:   Maintaine
   F:    drivers/clk/clk-cdce706.c
   
   TI CLOCK DRIVER
- M:    Tero Kristo <[email protected]>
+ M:    Tero Kristo <[email protected]>
   L:    [email protected]
- S:    Maintained
+ S:    Odd Fixes
   F:    drivers/clk/ti/
   F:    include/linux/clk/ti.h
   
@@@ -17885,7 -17844,7 +17885,7 @@@ M:   Dan Murphy <[email protected]
   L:    [email protected]
   S:    Maintained
   F:    Documentation/devicetree/bindings/net/can/tcan4x5x.txt
- -F:    drivers/net/can/m_can/tcan4x5x.c
+ +F:    drivers/net/can/m_can/tcan4x5x*
   
   TI TRF7970A NFC DRIVER
   M:    Mark Greer <[email protected]>
diff --combined Makefile

index 54b1ae50c817ee6c7ddcc3dd7e9e9a794c8912ef,ade44ac4cc2ffa6b0d679e559d22a73907041e1a..b83df651018b065c9a5cda5484332afab1b08266
--- 1/Makefile
--- 2/Makefile
+++ b/Makefile
@@@ -2,7 -2,7 +2,7 @@@
   VERSION = 5
   PATCHLEVEL = 11
   SUBLEVEL = 0
- EXTRAVERSION = -rc6
+ EXTRAVERSION = -rc7
   NAME = Kleptomaniac Octopus
   
   # *DOCUMENTATION*
@@@ -452,7 -452,6 +452,6 @@@ AWK                = aw
   INSTALLKERNEL  := installkernel
   DEPMOD                = depmod
   PERL          = perl
- PYTHON                = python
   PYTHON3               = python3
   CHECK         = sparse
   BASH          = bash
@@@ -508,7 -507,7 +507,7 @@@ CLANG_FLAGS :
   
   export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC
   export CPP AR NM STRIP OBJCOPY OBJDUMP READELF PAHOLE RESOLVE_BTFIDS LEX YACC AWK INSTALLKERNEL
- export PERL PYTHON PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX
+ export PERL PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX
   export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD
   export KBUILD_HOSTCXXFLAGS KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS LDFLAGS_MODULE
   
@@@ -649,8 -648,7 +648,8 @@@ ifeq ($(KBUILD_EXTMOD),
   core-y                := init/ usr/
   drivers-y     := drivers/ sound/
   drivers-$(CONFIG_SAMPLES) += samples/
- -drivers-y     += net/ virt/
+ +drivers-$(CONFIG_NET) += net/
+ +drivers-y     += virt/
   libs-y                := lib/
   endif # KBUILD_EXTMOD
   
@@@ -813,10 -811,12 +812,12 @@@ KBUILD_CFLAGS   += -ftrivial-auto-var-ini
   KBUILD_CFLAGS += -enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang
   endif
   
+ DEBUG_CFLAGS  :=
+ 
   # Workaround for GCC versions < 5.0
   # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61801
   ifdef CONFIG_CC_IS_GCC
- DEBUG_CFLAGS  := $(call cc-ifversion, -lt, 0500, $(call cc-option, -fno-var-tracking-assignments))
+ DEBUG_CFLAGS  += $(call cc-ifversion, -lt, 0500, $(call cc-option, -fno-var-tracking-assignments))
   endif
   
   ifdef CONFIG_DEBUG_INFO
@@@ -949,12 -949,6 +950,6 @@@ KBUILD_CFLAGS   += $(call cc-option,-We
   # change __FILE__ to the relative path from the srctree
   KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
   
- # ensure -fcf-protection is disabled when using retpoline as it is
- # incompatible with -mindirect-branch=thunk-extern
- ifdef CONFIG_RETPOLINE
- KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
- endif
- 
   # include additional Makefiles when needed
   include-y                     := scripts/Makefile.extrawarn
   include-$(CONFIG_KASAN)               += scripts/Makefile.kasan
diff --combined drivers/net/dsa/ocelot/felix.c

index 1bd5aea12b252fb9f99a89875cdc09dfdbc0350f,45fdb1256dbfeb6160e1e3f6aafddc657677b786..386468e66c417c0ce62eca5e0ddc615861494da4
--- 1/drivers/net/dsa/ocelot/felix.c
--- 2/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@@ -1,5 -1,5 +1,5 @@@
   // SPDX-License-Identifier: GPL-2.0
- -/* Copyright 2019 NXP Semiconductors
+ +/* Copyright 2019-2021 NXP Semiconductors
    *
    * This is an umbrella module for all network switches that are
    * register-compatible with Ocelot and that perform I/O to their host CPU
@@@ -13,7 -13,6 +13,7 @@@
   #include <soc/mscc/ocelot_ana.h>
   #include <soc/mscc/ocelot_ptp.h>
   #include <soc/mscc/ocelot.h>
+ +#include <linux/dsa/8021q.h>
   #include <linux/platform_device.h>
   #include <linux/packing.h>
   #include <linux/module.h>
@@@ -25,474 -24,11 +25,474 @@@
   #include <net/dsa.h>
   #include "felix.h"
   
+ +static int felix_tag_8021q_rxvlan_add(struct felix *felix, int port, u16 vid,
+ +                                    bool pvid, bool untagged)
+ +{
+ +      struct ocelot_vcap_filter *outer_tagging_rule;
+ +      struct ocelot *ocelot = &felix->ocelot;
+ +      struct dsa_switch *ds = felix->ds;
+ +      int key_length, upstream, err;
+ +
+ +      /* We don't need to install the rxvlan into the other ports' filtering
+ +       * tables, because we're just pushing the rxvlan when sending towards
+ +       * the CPU
+ +       */
+ +      if (!pvid)
+ +              return 0;
+ +
+ +      key_length = ocelot->vcap[VCAP_ES0].keys[VCAP_ES0_IGR_PORT].length;
+ +      upstream = dsa_upstream_port(ds, port);
+ +
+ +      outer_tagging_rule = kzalloc(sizeof(struct ocelot_vcap_filter),
+ +                                   GFP_KERNEL);
+ +      if (!outer_tagging_rule)
+ +              return -ENOMEM;
+ +
+ +      outer_tagging_rule->key_type = OCELOT_VCAP_KEY_ANY;
+ +      outer_tagging_rule->prio = 1;
+ +      outer_tagging_rule->id.cookie = port;
+ +      outer_tagging_rule->id.tc_offload = false;
+ +      outer_tagging_rule->block_id = VCAP_ES0;
+ +      outer_tagging_rule->type = OCELOT_VCAP_FILTER_OFFLOAD;
+ +      outer_tagging_rule->lookup = 0;
+ +      outer_tagging_rule->ingress_port.value = port;
+ +      outer_tagging_rule->ingress_port.mask = GENMASK(key_length - 1, 0);
+ +      outer_tagging_rule->egress_port.value = upstream;
+ +      outer_tagging_rule->egress_port.mask = GENMASK(key_length - 1, 0);
+ +      outer_tagging_rule->action.push_outer_tag = OCELOT_ES0_TAG;
+ +      outer_tagging_rule->action.tag_a_tpid_sel = OCELOT_TAG_TPID_SEL_8021AD;
+ +      outer_tagging_rule->action.tag_a_vid_sel = 1;
+ +      outer_tagging_rule->action.vid_a_val = vid;
+ +
+ +      err = ocelot_vcap_filter_add(ocelot, outer_tagging_rule, NULL);
+ +      if (err)
+ +              kfree(outer_tagging_rule);
+ +
+ +      return err;
+ +}
+ +
+ +static int felix_tag_8021q_txvlan_add(struct felix *felix, int port, u16 vid,
+ +                                    bool pvid, bool untagged)
+ +{
+ +      struct ocelot_vcap_filter *untagging_rule, *redirect_rule;
+ +      struct ocelot *ocelot = &felix->ocelot;
+ +      struct dsa_switch *ds = felix->ds;
+ +      int upstream, err;
+ +
+ +      /* tag_8021q.c assumes we are implementing this via port VLAN
+ +       * membership, which we aren't. So we don't need to add any VCAP filter
+ +       * for the CPU port.
+ +       */
+ +      if (ocelot->ports[port]->is_dsa_8021q_cpu)
+ +              return 0;
+ +
+ +      untagging_rule = kzalloc(sizeof(struct ocelot_vcap_filter), GFP_KERNEL);
+ +      if (!untagging_rule)
+ +              return -ENOMEM;
+ +
+ +      redirect_rule = kzalloc(sizeof(struct ocelot_vcap_filter), GFP_KERNEL);
+ +      if (!redirect_rule) {
+ +              kfree(untagging_rule);
+ +              return -ENOMEM;
+ +      }
+ +
+ +      upstream = dsa_upstream_port(ds, port);
+ +
+ +      untagging_rule->key_type = OCELOT_VCAP_KEY_ANY;
+ +      untagging_rule->ingress_port_mask = BIT(upstream);
+ +      untagging_rule->vlan.vid.value = vid;
+ +      untagging_rule->vlan.vid.mask = VLAN_VID_MASK;
+ +      untagging_rule->prio = 1;
+ +      untagging_rule->id.cookie = port;
+ +      untagging_rule->id.tc_offload = false;
+ +      untagging_rule->block_id = VCAP_IS1;
+ +      untagging_rule->type = OCELOT_VCAP_FILTER_OFFLOAD;
+ +      untagging_rule->lookup = 0;
+ +      untagging_rule->action.vlan_pop_cnt_ena = true;
+ +      untagging_rule->action.vlan_pop_cnt = 1;
+ +      untagging_rule->action.pag_override_mask = 0xff;
+ +      untagging_rule->action.pag_val = port;
+ +
+ +      err = ocelot_vcap_filter_add(ocelot, untagging_rule, NULL);
+ +      if (err) {
+ +              kfree(untagging_rule);
+ +              kfree(redirect_rule);
+ +              return err;
+ +      }
+ +
+ +      redirect_rule->key_type = OCELOT_VCAP_KEY_ANY;
+ +      redirect_rule->ingress_port_mask = BIT(upstream);
+ +      redirect_rule->pag = port;
+ +      redirect_rule->prio = 1;
+ +      redirect_rule->id.cookie = port;
+ +      redirect_rule->id.tc_offload = false;
+ +      redirect_rule->block_id = VCAP_IS2;
+ +      redirect_rule->type = OCELOT_VCAP_FILTER_OFFLOAD;
+ +      redirect_rule->lookup = 0;
+ +      redirect_rule->action.mask_mode = OCELOT_MASK_MODE_REDIRECT;
+ +      redirect_rule->action.port_mask = BIT(port);
+ +
+ +      err = ocelot_vcap_filter_add(ocelot, redirect_rule, NULL);
+ +      if (err) {
+ +              ocelot_vcap_filter_del(ocelot, untagging_rule);
+ +              kfree(redirect_rule);
+ +              return err;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +static int felix_tag_8021q_vlan_add(struct dsa_switch *ds, int port, u16 vid,
+ +                                  u16 flags)
+ +{
+ +      bool untagged = flags & BRIDGE_VLAN_INFO_UNTAGGED;
+ +      bool pvid = flags & BRIDGE_VLAN_INFO_PVID;
+ +      struct ocelot *ocelot = ds->priv;
+ +
+ +      if (vid_is_dsa_8021q_rxvlan(vid))
+ +              return felix_tag_8021q_rxvlan_add(ocelot_to_felix(ocelot),
+ +                                                port, vid, pvid, untagged);
+ +
+ +      if (vid_is_dsa_8021q_txvlan(vid))
+ +              return felix_tag_8021q_txvlan_add(ocelot_to_felix(ocelot),
+ +                                                port, vid, pvid, untagged);
+ +
+ +      return 0;
+ +}
+ +
+ +static int felix_tag_8021q_rxvlan_del(struct felix *felix, int port, u16 vid)
+ +{
+ +      struct ocelot_vcap_filter *outer_tagging_rule;
+ +      struct ocelot_vcap_block *block_vcap_es0;
+ +      struct ocelot *ocelot = &felix->ocelot;
+ +
+ +      block_vcap_es0 = &ocelot->block[VCAP_ES0];
+ +
+ +      outer_tagging_rule = ocelot_vcap_block_find_filter_by_id(block_vcap_es0,
+ +                                                               port, false);
+ +      /* In rxvlan_add, we had the "if (!pvid) return 0" logic to avoid
+ +       * installing outer tagging ES0 rules where they weren't needed.
+ +       * But in rxvlan_del, the API doesn't give us the "flags" anymore,
+ +       * so that forces us to be slightly sloppy here, and just assume that
+ +       * if we didn't find an outer_tagging_rule it means that there was
+ +       * none in the first place, i.e. rxvlan_del is called on a non-pvid
+ +       * port. This is most probably true though.
+ +       */
+ +      if (!outer_tagging_rule)
+ +              return 0;
+ +
+ +      return ocelot_vcap_filter_del(ocelot, outer_tagging_rule);
+ +}
+ +
+ +static int felix_tag_8021q_txvlan_del(struct felix *felix, int port, u16 vid)
+ +{
+ +      struct ocelot_vcap_filter *untagging_rule, *redirect_rule;
+ +      struct ocelot_vcap_block *block_vcap_is1;
+ +      struct ocelot_vcap_block *block_vcap_is2;
+ +      struct ocelot *ocelot = &felix->ocelot;
+ +      int err;
+ +
+ +      if (ocelot->ports[port]->is_dsa_8021q_cpu)
+ +              return 0;
+ +
+ +      block_vcap_is1 = &ocelot->block[VCAP_IS1];
+ +      block_vcap_is2 = &ocelot->block[VCAP_IS2];
+ +
+ +      untagging_rule = ocelot_vcap_block_find_filter_by_id(block_vcap_is1,
+ +                                                           port, false);
+ +      if (!untagging_rule)
+ +              return 0;
+ +
+ +      err = ocelot_vcap_filter_del(ocelot, untagging_rule);
+ +      if (err)
+ +              return err;
+ +
+ +      redirect_rule = ocelot_vcap_block_find_filter_by_id(block_vcap_is2,
+ +                                                          port, false);
+ +      if (!redirect_rule)
+ +              return 0;
+ +
+ +      return ocelot_vcap_filter_del(ocelot, redirect_rule);
+ +}
+ +
+ +static int felix_tag_8021q_vlan_del(struct dsa_switch *ds, int port, u16 vid)
+ +{
+ +      struct ocelot *ocelot = ds->priv;
+ +
+ +      if (vid_is_dsa_8021q_rxvlan(vid))
+ +              return felix_tag_8021q_rxvlan_del(ocelot_to_felix(ocelot),
+ +                                                port, vid);
+ +
+ +      if (vid_is_dsa_8021q_txvlan(vid))
+ +              return felix_tag_8021q_txvlan_del(ocelot_to_felix(ocelot),
+ +                                                port, vid);
+ +
+ +      return 0;
+ +}
+ +
+ +static const struct dsa_8021q_ops felix_tag_8021q_ops = {
+ +      .vlan_add       = felix_tag_8021q_vlan_add,
+ +      .vlan_del       = felix_tag_8021q_vlan_del,
+ +};
+ +
+ +/* Alternatively to using the NPI functionality, that same hardware MAC
+ + * connected internally to the enetc or fman DSA master can be configured to
+ + * use the software-defined tag_8021q frame format. As far as the hardware is
+ + * concerned, it thinks it is a "dumb switch" - the queues of the CPU port
+ + * module are now disconnected from it, but can still be accessed through
+ + * register-based MMIO.
+ + */
+ +static void felix_8021q_cpu_port_init(struct ocelot *ocelot, int port)
+ +{
+ +      ocelot->ports[port]->is_dsa_8021q_cpu = true;
+ +      ocelot->npi = -1;
+ +
+ +      /* Overwrite PGID_CPU with the non-tagging port */
+ +      ocelot_write_rix(ocelot, BIT(port), ANA_PGID_PGID, PGID_CPU);
+ +
+ +      ocelot_apply_bridge_fwd_mask(ocelot);
+ +}
+ +
+ +static void felix_8021q_cpu_port_deinit(struct ocelot *ocelot, int port)
+ +{
+ +      ocelot->ports[port]->is_dsa_8021q_cpu = false;
+ +
+ +      /* Restore PGID_CPU */
+ +      ocelot_write_rix(ocelot, BIT(ocelot->num_phys_ports), ANA_PGID_PGID,
+ +                       PGID_CPU);
+ +
+ +      ocelot_apply_bridge_fwd_mask(ocelot);
+ +}
+ +
+ +static int felix_setup_tag_8021q(struct dsa_switch *ds, int cpu)
+ +{
+ +      struct ocelot *ocelot = ds->priv;
+ +      struct felix *felix = ocelot_to_felix(ocelot);
+ +      unsigned long cpu_flood;
+ +      int port, err;
+ +
+ +      felix_8021q_cpu_port_init(ocelot, cpu);
+ +
+ +      for (port = 0; port < ds->num_ports; port++) {
+ +              if (dsa_is_unused_port(ds, port))
+ +                      continue;
+ +
+ +              /* This overwrites ocelot_init():
+ +               * Do not forward BPDU frames to the CPU port module,
+ +               * for 2 reasons:
+ +               * - When these packets are injected from the tag_8021q
+ +               *   CPU port, we want them to go out, not loop back
+ +               *   into the system.
+ +               * - STP traffic ingressing on a user port should go to
+ +               *   the tag_8021q CPU port, not to the hardware CPU
+ +               *   port module.
+ +               */
+ +              ocelot_write_gix(ocelot,
+ +                               ANA_PORT_CPU_FWD_BPDU_CFG_BPDU_REDIR_ENA(0),
+ +                               ANA_PORT_CPU_FWD_BPDU_CFG, port);
+ +      }
+ +
+ +      /* In tag_8021q mode, the CPU port module is unused. So we
+ +       * want to disable flooding of any kind to the CPU port module,
+ +       * since packets going there will end in a black hole.
+ +       */
+ +      cpu_flood = ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports));
+ +      ocelot_rmw_rix(ocelot, 0, cpu_flood, ANA_PGID_PGID, PGID_UC);
+ +      ocelot_rmw_rix(ocelot, 0, cpu_flood, ANA_PGID_PGID, PGID_MC);
+ +
+ +      felix->dsa_8021q_ctx = kzalloc(sizeof(*felix->dsa_8021q_ctx),
+ +                                     GFP_KERNEL);
+ +      if (!felix->dsa_8021q_ctx)
+ +              return -ENOMEM;
+ +
+ +      felix->dsa_8021q_ctx->ops = &felix_tag_8021q_ops;
+ +      felix->dsa_8021q_ctx->proto = htons(ETH_P_8021AD);
+ +      felix->dsa_8021q_ctx->ds = ds;
+ +
+ +      err = dsa_8021q_setup(felix->dsa_8021q_ctx, true);
+ +      if (err)
+ +              goto out_free_dsa_8021_ctx;
+ +
+ +      return 0;
+ +
+ +out_free_dsa_8021_ctx:
+ +      kfree(felix->dsa_8021q_ctx);
+ +      return err;
+ +}
+ +
+ +static void felix_teardown_tag_8021q(struct dsa_switch *ds, int cpu)
+ +{
+ +      struct ocelot *ocelot = ds->priv;
+ +      struct felix *felix = ocelot_to_felix(ocelot);
+ +      int err, port;
+ +
+ +      err = dsa_8021q_setup(felix->dsa_8021q_ctx, false);
+ +      if (err)
+ +              dev_err(ds->dev, "dsa_8021q_setup returned %d", err);
+ +
+ +      kfree(felix->dsa_8021q_ctx);
+ +
+ +      for (port = 0; port < ds->num_ports; port++) {
+ +              if (dsa_is_unused_port(ds, port))
+ +                      continue;
+ +
+ +              /* Restore the logic from ocelot_init:
+ +               * do not forward BPDU frames to the front ports.
+ +               */
+ +              ocelot_write_gix(ocelot,
+ +                               ANA_PORT_CPU_FWD_BPDU_CFG_BPDU_REDIR_ENA(0xffff),
+ +                               ANA_PORT_CPU_FWD_BPDU_CFG,
+ +                               port);
+ +      }
+ +
+ +      felix_8021q_cpu_port_deinit(ocelot, cpu);
+ +}
+ +
+ +/* The CPU port module is connected to the Node Processor Interface (NPI). This
+ + * is the mode through which frames can be injected from and extracted to an
+ + * external CPU, over Ethernet. In NXP SoCs, the "external CPU" is the ARM CPU
+ + * running Linux, and this forms a DSA setup together with the enetc or fman
+ + * DSA master.
+ + */
+ +static void felix_npi_port_init(struct ocelot *ocelot, int port)
+ +{
+ +      ocelot->npi = port;
+ +
+ +      ocelot_write(ocelot, QSYS_EXT_CPU_CFG_EXT_CPUQ_MSK_M |
+ +                   QSYS_EXT_CPU_CFG_EXT_CPU_PORT(port),
+ +                   QSYS_EXT_CPU_CFG);
+ +
+ +      /* NPI port Injection/Extraction configuration */
+ +      ocelot_fields_write(ocelot, port, SYS_PORT_MODE_INCL_XTR_HDR,
+ +                          ocelot->npi_xtr_prefix);
+ +      ocelot_fields_write(ocelot, port, SYS_PORT_MODE_INCL_INJ_HDR,
+ +                          ocelot->npi_inj_prefix);
+ +
+ +      /* Disable transmission of pause frames */
+ +      ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 0);
+ +}
+ +
+ +static void felix_npi_port_deinit(struct ocelot *ocelot, int port)
+ +{
+ +      /* Restore hardware defaults */
+ +      int unused_port = ocelot->num_phys_ports + 2;
+ +
+ +      ocelot->npi = -1;
+ +
+ +      ocelot_write(ocelot, QSYS_EXT_CPU_CFG_EXT_CPU_PORT(unused_port),
+ +                   QSYS_EXT_CPU_CFG);
+ +
+ +      ocelot_fields_write(ocelot, port, SYS_PORT_MODE_INCL_XTR_HDR,
+ +                          OCELOT_TAG_PREFIX_DISABLED);
+ +      ocelot_fields_write(ocelot, port, SYS_PORT_MODE_INCL_INJ_HDR,
+ +                          OCELOT_TAG_PREFIX_DISABLED);
+ +
+ +      /* Enable transmission of pause frames */
+ +      ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 1);
+ +}
+ +
+ +static int felix_setup_tag_npi(struct dsa_switch *ds, int cpu)
+ +{
+ +      struct ocelot *ocelot = ds->priv;
+ +      unsigned long cpu_flood;
+ +
+ +      felix_npi_port_init(ocelot, cpu);
+ +
+ +      /* Include the CPU port module (and indirectly, the NPI port)
+ +       * in the forwarding mask for unknown unicast - the hardware
+ +       * default value for ANA_FLOODING_FLD_UNICAST excludes
+ +       * BIT(ocelot->num_phys_ports), and so does ocelot_init,
+ +       * since Ocelot relies on whitelisting MAC addresses towards
+ +       * PGID_CPU.
+ +       * We do this because DSA does not yet perform RX filtering,
+ +       * and the NPI port does not perform source address learning,
+ +       * so traffic sent to Linux is effectively unknown from the
+ +       * switch's perspective.
+ +       */
+ +      cpu_flood = ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports));
+ +      ocelot_rmw_rix(ocelot, cpu_flood, cpu_flood, ANA_PGID_PGID, PGID_UC);
+ +
+ +      return 0;
+ +}
+ +
+ +static void felix_teardown_tag_npi(struct dsa_switch *ds, int cpu)
+ +{
+ +      struct ocelot *ocelot = ds->priv;
+ +
+ +      felix_npi_port_deinit(ocelot, cpu);
+ +}
+ +
+ +static int felix_set_tag_protocol(struct dsa_switch *ds, int cpu,
+ +                                enum dsa_tag_protocol proto)
+ +{
+ +      int err;
+ +
+ +      switch (proto) {
+ +      case DSA_TAG_PROTO_OCELOT:
+ +              err = felix_setup_tag_npi(ds, cpu);
+ +              break;
+ +      case DSA_TAG_PROTO_OCELOT_8021Q:
+ +              err = felix_setup_tag_8021q(ds, cpu);
+ +              break;
+ +      default:
+ +              err = -EPROTONOSUPPORT;
+ +      }
+ +
+ +      return err;
+ +}
+ +
+ +static void felix_del_tag_protocol(struct dsa_switch *ds, int cpu,
+ +                                 enum dsa_tag_protocol proto)
+ +{
+ +      switch (proto) {
+ +      case DSA_TAG_PROTO_OCELOT:
+ +              felix_teardown_tag_npi(ds, cpu);
+ +              break;
+ +      case DSA_TAG_PROTO_OCELOT_8021Q:
+ +              felix_teardown_tag_8021q(ds, cpu);
+ +              break;
+ +      default:
+ +              break;
+ +      }
+ +}
+ +
+ +/* This always leaves the switch in a consistent state, because although the
+ + * tag_8021q setup can fail, the NPI setup can't. So either the change is made,
+ + * or the restoration is guaranteed to work.
+ + */
+ +static int felix_change_tag_protocol(struct dsa_switch *ds, int cpu,
+ +                                   enum dsa_tag_protocol proto)
+ +{
+ +      struct ocelot *ocelot = ds->priv;
+ +      struct felix *felix = ocelot_to_felix(ocelot);
+ +      enum dsa_tag_protocol old_proto = felix->tag_proto;
+ +      int err;
+ +
+ +      if (proto != DSA_TAG_PROTO_OCELOT &&
+ +          proto != DSA_TAG_PROTO_OCELOT_8021Q)
+ +              return -EPROTONOSUPPORT;
+ +
+ +      felix_del_tag_protocol(ds, cpu, old_proto);
+ +
+ +      err = felix_set_tag_protocol(ds, cpu, proto);
+ +      if (err) {
+ +              felix_set_tag_protocol(ds, cpu, old_proto);
+ +              return err;
+ +      }
+ +
+ +      felix->tag_proto = proto;
+ +
+ +      return 0;
+ +}
+ +
   static enum dsa_tag_protocol felix_get_tag_protocol(struct dsa_switch *ds,
                                                     int port,
                                                     enum dsa_tag_protocol mp)
   {
- -      return DSA_TAG_PROTO_OCELOT;
+ +      struct ocelot *ocelot = ds->priv;
+ +      struct felix *felix = ocelot_to_felix(ocelot);
+ +
+ +      return felix->tag_proto;
   }
   
   static int felix_set_ageing_time(struct dsa_switch *ds,
@@@ -529,12 -65,19 +529,12 @@@ static int felix_fdb_del(struct dsa_swi
         return ocelot_fdb_del(ocelot, port, addr, vid);
   }
   
- -/* This callback needs to be present */
- -static int felix_mdb_prepare(struct dsa_switch *ds, int port,
- -                           const struct switchdev_obj_port_mdb *mdb)
- -{
- -      return 0;
- -}
- -
- -static void felix_mdb_add(struct dsa_switch *ds, int port,
- -                        const struct switchdev_obj_port_mdb *mdb)
+ +static int felix_mdb_add(struct dsa_switch *ds, int port,
+ +                       const struct switchdev_obj_port_mdb *mdb)
   {
         struct ocelot *ocelot = ds->priv;
   
- -      ocelot_port_mdb_add(ocelot, port, mdb);
+ +      return ocelot_port_mdb_add(ocelot, port, mdb);
   }
   
   static int felix_mdb_del(struct dsa_switch *ds, int port,
@@@ -569,40 -112,12 +569,40 @@@ static void felix_bridge_leave(struct d
         ocelot_port_bridge_leave(ocelot, port, br);
   }
   
+ +static int felix_lag_join(struct dsa_switch *ds, int port,
+ +                        struct net_device *bond,
+ +                        struct netdev_lag_upper_info *info)
+ +{
+ +      struct ocelot *ocelot = ds->priv;
+ +
+ +      return ocelot_port_lag_join(ocelot, port, bond, info);
+ +}
+ +
+ +static int felix_lag_leave(struct dsa_switch *ds, int port,
+ +                         struct net_device *bond)
+ +{
+ +      struct ocelot *ocelot = ds->priv;
+ +
+ +      ocelot_port_lag_leave(ocelot, port, bond);
+ +
+ +      return 0;
+ +}
+ +
+ +static int felix_lag_change(struct dsa_switch *ds, int port)
+ +{
+ +      struct dsa_port *dp = dsa_to_port(ds, port);
+ +      struct ocelot *ocelot = ds->priv;
+ +
+ +      ocelot_port_lag_change(ocelot, port, dp->lag_tx_enabled);
+ +
+ +      return 0;
+ +}
+ +
   static int felix_vlan_prepare(struct dsa_switch *ds, int port,
                               const struct switchdev_obj_port_vlan *vlan)
   {
         struct ocelot *ocelot = ds->priv;
- -      u16 vid, flags = vlan->flags;
- -      int err;
+ +      u16 flags = vlan->flags;
   
         /* Ocelot switches copy frames as-is to the CPU, so the flags:
          * egress-untagged or not, pvid or not, make no difference. This
@@@ -615,40 -130,61 +615,40 @@@
         if (port == ocelot->npi)
                 return 0;
   
- -      for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) {
- -              err = ocelot_vlan_prepare(ocelot, port, vid,
- -                                        flags & BRIDGE_VLAN_INFO_PVID,
- -                                        flags & BRIDGE_VLAN_INFO_UNTAGGED);
- -              if (err)
- -                      return err;
- -      }
- -
- -      return 0;
+ +      return ocelot_vlan_prepare(ocelot, port, vlan->vid,
+ +                                 flags & BRIDGE_VLAN_INFO_PVID,
+ +                                 flags & BRIDGE_VLAN_INFO_UNTAGGED);
   }
   
- -static int felix_vlan_filtering(struct dsa_switch *ds, int port, bool enabled,
- -                              struct switchdev_trans *trans)
+ +static int felix_vlan_filtering(struct dsa_switch *ds, int port, bool enabled)
   {
         struct ocelot *ocelot = ds->priv;
   
- -      return ocelot_port_vlan_filtering(ocelot, port, enabled, trans);
+ +      return ocelot_port_vlan_filtering(ocelot, port, enabled);
   }
   
- -static void felix_vlan_add(struct dsa_switch *ds, int port,
- -                         const struct switchdev_obj_port_vlan *vlan)
+ +static int felix_vlan_add(struct dsa_switch *ds, int port,
+ +                        const struct switchdev_obj_port_vlan *vlan)
   {
         struct ocelot *ocelot = ds->priv;
         u16 flags = vlan->flags;
- -      u16 vid;
         int err;
   
- -      for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) {
- -              err = ocelot_vlan_add(ocelot, port, vid,
- -                                    flags & BRIDGE_VLAN_INFO_PVID,
- -                                    flags & BRIDGE_VLAN_INFO_UNTAGGED);
- -              if (err) {
- -                      dev_err(ds->dev, "Failed to add VLAN %d to port %d: %d\n",
- -                              vid, port, err);
- -                      return;
- -              }
- -      }
+ +      err = felix_vlan_prepare(ds, port, vlan);
+ +      if (err)
+ +              return err;
+ +
+ +      return ocelot_vlan_add(ocelot, port, vlan->vid,
+ +                             flags & BRIDGE_VLAN_INFO_PVID,
+ +                             flags & BRIDGE_VLAN_INFO_UNTAGGED);
   }
   
   static int felix_vlan_del(struct dsa_switch *ds, int port,
                           const struct switchdev_obj_port_vlan *vlan)
   {
         struct ocelot *ocelot = ds->priv;
- -      u16 vid;
- -      int err;
   
- -      for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) {
- -              err = ocelot_vlan_del(ocelot, port, vid);
- -              if (err) {
- -                      dev_err(ds->dev, "Failed to remove VLAN %d from port %d: %d\n",
- -                              vid, port, err);
- -                      return err;
- -              }
- -      }
- -      return 0;
+ +      return ocelot_vlan_del(ocelot, port, vlan->vid);
   }
   
   static int felix_port_enable(struct dsa_switch *ds, int port,
@@@ -697,9 -233,24 +697,24 @@@ static void felix_phylink_mac_link_down
   {
         struct ocelot *ocelot = ds->priv;
         struct ocelot_port *ocelot_port = ocelot->ports[port];
+       int err;
+ 
+       ocelot_port_rmwl(ocelot_port, 0, DEV_MAC_ENA_CFG_RX_ENA,
+                        DEV_MAC_ENA_CFG);
   
-       ocelot_port_writel(ocelot_port, 0, DEV_MAC_ENA_CFG);
         ocelot_fields_write(ocelot, port, QSYS_SWITCH_PORT_MODE_PORT_ENA, 0);
+ 
+       err = ocelot_port_flush(ocelot, port);
+       if (err)
+               dev_err(ocelot->dev, "failed to flush port %d: %d\n",
+                       port, err);
+ 
+       /* Put the port in reset. */
+       ocelot_port_writel(ocelot_port,
+                          DEV_CLOCK_CFG_MAC_TX_RST |
+                          DEV_CLOCK_CFG_MAC_RX_RST |
+                          DEV_CLOCK_CFG_LINK_SPEED(OCELOT_SPEED_1000),
+                          DEV_CLOCK_CFG);
   }
   
   static void felix_phylink_mac_link_up(struct dsa_switch *ds, int port,
@@@ -792,7 -343,7 +807,7 @@@ static void felix_port_qos_map_init(str
                        ANA_PORT_QOS_CFG,
                        port);
   
- -      for (i = 0; i < FELIX_NUM_TC * 2; i++) {
+ +      for (i = 0; i < OCELOT_NUM_TC * 2; i++) {
                 ocelot_rmw_ix(ocelot,
                               (ANA_PORT_PCP_DEI_MAP_DP_PCP_DEI_VAL & i) |
                               ANA_PORT_PCP_DEI_MAP_QOS_PCP_DEI_VAL(i),
@@@ -915,12 -466,12 +930,12 @@@ static int felix_init_structs(struct fe
         ocelot->map             = felix->info->map;
         ocelot->stats_layout    = felix->info->stats_layout;
         ocelot->num_stats       = felix->info->num_stats;
- -      ocelot->shared_queue_sz = felix->info->shared_queue_sz;
         ocelot->num_mact_rows   = felix->info->num_mact_rows;
         ocelot->vcap            = felix->info->vcap;
         ocelot->ops             = felix->info->ops;
- -      ocelot->inj_prefix      = OCELOT_TAG_PREFIX_SHORT;
- -      ocelot->xtr_prefix      = OCELOT_TAG_PREFIX_SHORT;
+ +      ocelot->npi_inj_prefix  = OCELOT_TAG_PREFIX_SHORT;
+ +      ocelot->npi_xtr_prefix  = OCELOT_TAG_PREFIX_SHORT;
+ +      ocelot->devlink         = felix->ds->devlink;
   
         port_phy_modes = kcalloc(num_phys_ports, sizeof(phy_interface_t),
                                  GFP_KERNEL);
@@@ -1020,6 -571,28 +1035,6 @@@
         return 0;
   }
   
- -/* The CPU port module is connected to the Node Processor Interface (NPI). This
- - * is the mode through which frames can be injected from and extracted to an
- - * external CPU, over Ethernet.
- - */
- -static void felix_npi_port_init(struct ocelot *ocelot, int port)
- -{
- -      ocelot->npi = port;
- -
- -      ocelot_write(ocelot, QSYS_EXT_CPU_CFG_EXT_CPUQ_MSK_M |
- -                   QSYS_EXT_CPU_CFG_EXT_CPU_PORT(port),
- -                   QSYS_EXT_CPU_CFG);
- -
- -      /* NPI port Injection/Extraction configuration */
- -      ocelot_fields_write(ocelot, port, SYS_PORT_MODE_INCL_XTR_HDR,
- -                          ocelot->xtr_prefix);
- -      ocelot_fields_write(ocelot, port, SYS_PORT_MODE_INCL_INJ_HDR,
- -                          ocelot->inj_prefix);
- -
- -      /* Disable transmission of pause frames */
- -      ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 0);
- -}
- -
   /* Hardware initialization done here so that we can allocate structures with
    * devm without fear of dsa_register_switch returning -EPROBE_DEFER and causing
    * us to allocate structures twice (leak memory) and map PCI memory twice
@@@ -1049,10 -622,10 +1064,10 @@@ static int felix_setup(struct dsa_switc
         }
   
         for (port = 0; port < ds->num_ports; port++) {
- -              ocelot_init_port(ocelot, port);
+ +              if (dsa_is_unused_port(ds, port))
+ +                      continue;
   
- -              if (dsa_is_cpu_port(ds, port))
- -                      felix_npi_port_init(ocelot, port);
+ +              ocelot_init_port(ocelot, port);
   
                 /* Set the default QoS Classification based on PCP and DEI
                  * bits of vlan tag.
@@@ -1060,22 -633,17 +1075,22 @@@
                 felix_port_qos_map_init(ocelot, port);
         }
   
- -      /* Include the CPU port module in the forwarding mask for unknown
- -       * unicast - the hardware default value for ANA_FLOODING_FLD_UNICAST
- -       * excludes BIT(ocelot->num_phys_ports), and so does ocelot_init, since
- -       * Ocelot relies on whitelisting MAC addresses towards PGID_CPU.
- -       */
- -      ocelot_write_rix(ocelot,
- -                       ANA_PGID_PGID_PGID(GENMASK(ocelot->num_phys_ports, 0)),
- -                       ANA_PGID_PGID, PGID_UC);
+ +      err = ocelot_devlink_sb_register(ocelot);
+ +      if (err)
+ +              return err;
+ +
+ +      for (port = 0; port < ds->num_ports; port++) {
+ +              if (!dsa_is_cpu_port(ds, port))
+ +                      continue;
+ +
+ +              /* The initial tag protocol is NPI which always returns 0, so
+ +               * there's no real point in checking for errors.
+ +               */
+ +              felix_set_tag_protocol(ds, port, felix->tag_proto);
+ +      }
   
         ds->mtu_enforcement_ingress = true;
- -      ds->configure_vlan_while_not_filtering = true;
+ +      ds->assisted_learning_on_cpu_port = true;
   
         return 0;
   }
@@@ -1086,22 -654,14 +1101,22 @@@ static void felix_teardown(struct dsa_s
         struct felix *felix = ocelot_to_felix(ocelot);
         int port;
   
- -      if (felix->info->mdio_bus_free)
- -              felix->info->mdio_bus_free(ocelot);
+ +      for (port = 0; port < ds->num_ports; port++) {
+ +              if (!dsa_is_cpu_port(ds, port))
+ +                      continue;
   
- -      for (port = 0; port < ocelot->num_phys_ports; port++)
- -              ocelot_deinit_port(ocelot, port);
+ +              felix_del_tag_protocol(ds, port, felix->tag_proto);
+ +      }
+ +
+ +      ocelot_devlink_sb_unregister(ocelot);
         ocelot_deinit_timestamp(ocelot);
- -      /* stop workqueue thread */
         ocelot_deinit(ocelot);
+ +
+ +      for (port = 0; port < ocelot->num_phys_ports; port++)
+ +              ocelot_deinit_port(ocelot, port);
+ +
+ +      if (felix->info->mdio_bus_free)
+ +              felix->info->mdio_bus_free(ocelot);
   }
   
   static int felix_hwtstamp_get(struct dsa_switch *ds, int port,
@@@ -1235,160 -795,46 +1250,160 @@@ static int felix_port_setup_tc(struct d
                 return -EOPNOTSUPP;
   }
   
+ +static int felix_sb_pool_get(struct dsa_switch *ds, unsigned int sb_index,
+ +                           u16 pool_index,
+ +                           struct devlink_sb_pool_info *pool_info)
+ +{
+ +      struct ocelot *ocelot = ds->priv;
+ +
+ +      return ocelot_sb_pool_get(ocelot, sb_index, pool_index, pool_info);
+ +}
+ +
+ +static int felix_sb_pool_set(struct dsa_switch *ds, unsigned int sb_index,
+ +                           u16 pool_index, u32 size,
+ +                           enum devlink_sb_threshold_type threshold_type,
+ +                           struct netlink_ext_ack *extack)
+ +{
+ +      struct ocelot *ocelot = ds->priv;
+ +
+ +      return ocelot_sb_pool_set(ocelot, sb_index, pool_index, size,
+ +                                threshold_type, extack);
+ +}
+ +
+ +static int felix_sb_port_pool_get(struct dsa_switch *ds, int port,
+ +                                unsigned int sb_index, u16 pool_index,
+ +                                u32 *p_threshold)
+ +{
+ +      struct ocelot *ocelot = ds->priv;
+ +
+ +      return ocelot_sb_port_pool_get(ocelot, port, sb_index, pool_index,
+ +                                     p_threshold);
+ +}
+ +
+ +static int felix_sb_port_pool_set(struct dsa_switch *ds, int port,
+ +                                unsigned int sb_index, u16 pool_index,
+ +                                u32 threshold, struct netlink_ext_ack *extack)
+ +{
+ +      struct ocelot *ocelot = ds->priv;
+ +
+ +      return ocelot_sb_port_pool_set(ocelot, port, sb_index, pool_index,
+ +                                     threshold, extack);
+ +}
+ +
+ +static int felix_sb_tc_pool_bind_get(struct dsa_switch *ds, int port,
+ +                                   unsigned int sb_index, u16 tc_index,
+ +                                   enum devlink_sb_pool_type pool_type,
+ +                                   u16 *p_pool_index, u32 *p_threshold)
+ +{
+ +      struct ocelot *ocelot = ds->priv;
+ +
+ +      return ocelot_sb_tc_pool_bind_get(ocelot, port, sb_index, tc_index,
+ +                                        pool_type, p_pool_index,
+ +                                        p_threshold);
+ +}
+ +
+ +static int felix_sb_tc_pool_bind_set(struct dsa_switch *ds, int port,
+ +                                   unsigned int sb_index, u16 tc_index,
+ +                                   enum devlink_sb_pool_type pool_type,
+ +                                   u16 pool_index, u32 threshold,
+ +                                   struct netlink_ext_ack *extack)
+ +{
+ +      struct ocelot *ocelot = ds->priv;
+ +
+ +      return ocelot_sb_tc_pool_bind_set(ocelot, port, sb_index, tc_index,
+ +                                        pool_type, pool_index, threshold,
+ +                                        extack);
+ +}
+ +
+ +static int felix_sb_occ_snapshot(struct dsa_switch *ds,
+ +                               unsigned int sb_index)
+ +{
+ +      struct ocelot *ocelot = ds->priv;
+ +
+ +      return ocelot_sb_occ_snapshot(ocelot, sb_index);
+ +}
+ +
+ +static int felix_sb_occ_max_clear(struct dsa_switch *ds,
+ +                                unsigned int sb_index)
+ +{
+ +      struct ocelot *ocelot = ds->priv;
+ +
+ +      return ocelot_sb_occ_max_clear(ocelot, sb_index);
+ +}
+ +
+ +static int felix_sb_occ_port_pool_get(struct dsa_switch *ds, int port,
+ +                                    unsigned int sb_index, u16 pool_index,
+ +                                    u32 *p_cur, u32 *p_max)
+ +{
+ +      struct ocelot *ocelot = ds->priv;
+ +
+ +      return ocelot_sb_occ_port_pool_get(ocelot, port, sb_index, pool_index,
+ +                                         p_cur, p_max);
+ +}
+ +
+ +static int felix_sb_occ_tc_port_bind_get(struct dsa_switch *ds, int port,
+ +                                       unsigned int sb_index, u16 tc_index,
+ +                                       enum devlink_sb_pool_type pool_type,
+ +                                       u32 *p_cur, u32 *p_max)
+ +{
+ +      struct ocelot *ocelot = ds->priv;
+ +
+ +      return ocelot_sb_occ_tc_port_bind_get(ocelot, port, sb_index, tc_index,
+ +                                            pool_type, p_cur, p_max);
+ +}
+ +
   const struct dsa_switch_ops felix_switch_ops = {
- -      .get_tag_protocol       = felix_get_tag_protocol,
- -      .setup                  = felix_setup,
- -      .teardown               = felix_teardown,
- -      .set_ageing_time        = felix_set_ageing_time,
- -      .get_strings            = felix_get_strings,
- -      .get_ethtool_stats      = felix_get_ethtool_stats,
- -      .get_sset_count         = felix_get_sset_count,
- -      .get_ts_info            = felix_get_ts_info,
- -      .phylink_validate       = felix_phylink_validate,
- -      .phylink_mac_config     = felix_phylink_mac_config,
- -      .phylink_mac_link_down  = felix_phylink_mac_link_down,
- -      .phylink_mac_link_up    = felix_phylink_mac_link_up,
- -      .port_enable            = felix_port_enable,
- -      .port_disable           = felix_port_disable,
- -      .port_fdb_dump          = felix_fdb_dump,
- -      .port_fdb_add           = felix_fdb_add,
- -      .port_fdb_del           = felix_fdb_del,
- -      .port_mdb_prepare       = felix_mdb_prepare,
- -      .port_mdb_add           = felix_mdb_add,
- -      .port_mdb_del           = felix_mdb_del,
- -      .port_bridge_join       = felix_bridge_join,
- -      .port_bridge_leave      = felix_bridge_leave,
- -      .port_stp_state_set     = felix_bridge_stp_state_set,
- -      .port_vlan_prepare      = felix_vlan_prepare,
- -      .port_vlan_filtering    = felix_vlan_filtering,
- -      .port_vlan_add          = felix_vlan_add,
- -      .port_vlan_del          = felix_vlan_del,
- -      .port_hwtstamp_get      = felix_hwtstamp_get,
- -      .port_hwtstamp_set      = felix_hwtstamp_set,
- -      .port_rxtstamp          = felix_rxtstamp,
- -      .port_txtstamp          = felix_txtstamp,
- -      .port_change_mtu        = felix_change_mtu,
- -      .port_max_mtu           = felix_get_max_mtu,
- -      .port_policer_add       = felix_port_policer_add,
- -      .port_policer_del       = felix_port_policer_del,
- -      .cls_flower_add         = felix_cls_flower_add,
- -      .cls_flower_del         = felix_cls_flower_del,
- -      .cls_flower_stats       = felix_cls_flower_stats,
- -      .port_setup_tc          = felix_port_setup_tc,
+ +      .get_tag_protocol               = felix_get_tag_protocol,
+ +      .change_tag_protocol            = felix_change_tag_protocol,
+ +      .setup                          = felix_setup,
+ +      .teardown                       = felix_teardown,
+ +      .set_ageing_time                = felix_set_ageing_time,
+ +      .get_strings                    = felix_get_strings,
+ +      .get_ethtool_stats              = felix_get_ethtool_stats,
+ +      .get_sset_count                 = felix_get_sset_count,
+ +      .get_ts_info                    = felix_get_ts_info,
+ +      .phylink_validate               = felix_phylink_validate,
+ +      .phylink_mac_config             = felix_phylink_mac_config,
+ +      .phylink_mac_link_down          = felix_phylink_mac_link_down,
+ +      .phylink_mac_link_up            = felix_phylink_mac_link_up,
+ +      .port_enable                    = felix_port_enable,
+ +      .port_disable                   = felix_port_disable,
+ +      .port_fdb_dump                  = felix_fdb_dump,
+ +      .port_fdb_add                   = felix_fdb_add,
+ +      .port_fdb_del                   = felix_fdb_del,
+ +      .port_mdb_add                   = felix_mdb_add,
+ +      .port_mdb_del                   = felix_mdb_del,
+ +      .port_bridge_join               = felix_bridge_join,
+ +      .port_bridge_leave              = felix_bridge_leave,
+ +      .port_lag_join                  = felix_lag_join,
+ +      .port_lag_leave                 = felix_lag_leave,
+ +      .port_lag_change                = felix_lag_change,
+ +      .port_stp_state_set             = felix_bridge_stp_state_set,
+ +      .port_vlan_filtering            = felix_vlan_filtering,
+ +      .port_vlan_add                  = felix_vlan_add,
+ +      .port_vlan_del                  = felix_vlan_del,
+ +      .port_hwtstamp_get              = felix_hwtstamp_get,
+ +      .port_hwtstamp_set              = felix_hwtstamp_set,
+ +      .port_rxtstamp                  = felix_rxtstamp,
+ +      .port_txtstamp                  = felix_txtstamp,
+ +      .port_change_mtu                = felix_change_mtu,
+ +      .port_max_mtu                   = felix_get_max_mtu,
+ +      .port_policer_add               = felix_port_policer_add,
+ +      .port_policer_del               = felix_port_policer_del,
+ +      .cls_flower_add                 = felix_cls_flower_add,
+ +      .cls_flower_del                 = felix_cls_flower_del,
+ +      .cls_flower_stats               = felix_cls_flower_stats,
+ +      .port_setup_tc                  = felix_port_setup_tc,
+ +      .devlink_sb_pool_get            = felix_sb_pool_get,
+ +      .devlink_sb_pool_set            = felix_sb_pool_set,
+ +      .devlink_sb_port_pool_get       = felix_sb_port_pool_get,
+ +      .devlink_sb_port_pool_set       = felix_sb_port_pool_set,
+ +      .devlink_sb_tc_pool_bind_get    = felix_sb_tc_pool_bind_get,
+ +      .devlink_sb_tc_pool_bind_set    = felix_sb_tc_pool_bind_set,
+ +      .devlink_sb_occ_snapshot        = felix_sb_occ_snapshot,
+ +      .devlink_sb_occ_max_clear       = felix_sb_occ_max_clear,
+ +      .devlink_sb_occ_port_pool_get   = felix_sb_occ_port_pool_get,
+ +      .devlink_sb_occ_tc_port_bind_get= felix_sb_occ_tc_port_bind_get,
   };
   
   struct net_device *felix_port_to_netdev(struct ocelot *ocelot, int port)
diff --combined drivers/net/ethernet/amazon/ena/ena_netdev.c

index 1db6cfd2b55c6ebf3f4f1b98f27db2a14b3df3ef,a0596c073dddc10d03f0625b58c24f2edbd91b50..102f2c91fdb855311e164186a2fddda0492f3ea2
--- 1/drivers/net/ethernet/amazon/ena/ena_netdev.c
--- 2/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@@ -404,6 -404,7 +404,7 @@@ static int ena_xdp_execute(struct ena_r
                 if (unlikely(!xdpf)) {
                         trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
                         xdp_stat = &rx_ring->rx_stats.xdp_aborted;
+                       verdict = XDP_ABORTED;
                         break;
                 }
   
@@@ -424,7 -425,10 +425,10 @@@
                         xdp_stat = &rx_ring->rx_stats.xdp_redirect;
                         break;
                 }
-               fallthrough;
+               trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
+               xdp_stat = &rx_ring->rx_stats.xdp_aborted;
+               verdict = XDP_ABORTED;
+               break;
         case XDP_ABORTED:
                 trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
                 xdp_stat = &rx_ring->rx_stats.xdp_aborted;
@@@ -1585,9 -1589,10 +1589,9 @@@ static int ena_xdp_handle_buff(struct e
         int ret;
   
         rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
- -      xdp->data = page_address(rx_info->page) + rx_info->page_offset;
- -      xdp_set_data_meta_invalid(xdp);
- -      xdp->data_hard_start = page_address(rx_info->page);
- -      xdp->data_end = xdp->data + rx_ring->ena_bufs[0].len;
+ +      xdp_prepare_buff(xdp, page_address(rx_info->page),
+ +                       rx_info->page_offset,
+ +                       rx_ring->ena_bufs[0].len, false);
         /* If for some reason we received a bigger packet than
          * we expect, then we simply drop it
          */
@@@ -1633,7 -1638,8 +1637,7 @@@ static int ena_clean_rx_irq(struct ena_
         netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
                   "%s qid %d\n", __func__, rx_ring->qid);
         res_budget = budget;
- -      xdp.rxq = &rx_ring->xdp_rxq;
- -      xdp.frame_sz = ENA_PAGE_SIZE;
+ +      xdp_init_buff(&xdp, ENA_PAGE_SIZE, &rx_ring->xdp_rxq);
   
         do {
                 xdp_verdict = XDP_PASS;
diff --combined drivers/net/ethernet/freescale/dpaa/dpaa_eth.c

index d8e568f6caf303de90bf560d0e012138592a41f9,6faa20bed48858c7785934b0c78c6246d9699f04..ccfe52a50a665bb10b7411207a18b6bab05f9673
--- 1/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
--- 2/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@@ -2180,8 -2180,10 +2180,10 @@@ static int dpaa_a050385_wa_xdpf(struct 
                                 struct xdp_frame **init_xdpf)
   {
         struct xdp_frame *new_xdpf, *xdpf = *init_xdpf;
-       void *new_buff;
+       void *new_buff, *aligned_data;
         struct page *p;
+       u32 data_shift;
+       int headroom;
   
         /* Check the data alignment and make sure the headroom is large
          * enough to store the xdpf backpointer. Use an aligned headroom
@@@ -2191,25 -2193,57 +2193,57 @@@
          * byte frame headroom. If the XDP program uses all of it, copy the
          * data to a new buffer and make room for storing the backpointer.
          */
-       if (PTR_IS_ALIGNED(xdpf->data, DPAA_A050385_ALIGN) &&
+       if (PTR_IS_ALIGNED(xdpf->data, DPAA_FD_DATA_ALIGNMENT) &&
             xdpf->headroom >= priv->tx_headroom) {
                 xdpf->headroom = priv->tx_headroom;
                 return 0;
         }
   
+       /* Try to move the data inside the buffer just enough to align it and
+        * store the xdpf backpointer. If the available headroom isn't large
+        * enough, resort to allocating a new buffer and copying the data.
+        */
+       aligned_data = PTR_ALIGN_DOWN(xdpf->data, DPAA_FD_DATA_ALIGNMENT);
+       data_shift = xdpf->data - aligned_data;
+ 
+       /* The XDP frame's headroom needs to be large enough to accommodate
+        * shifting the data as well as storing the xdpf backpointer.
+        */
+       if (xdpf->headroom  >= data_shift + priv->tx_headroom) {
+               memmove(aligned_data, xdpf->data, xdpf->len);
+               xdpf->data = aligned_data;
+               xdpf->headroom = priv->tx_headroom;
+               return 0;
+       }
+ 
+       /* The new xdp_frame is stored in the new buffer. Reserve enough space
+        * in the headroom for storing it along with the driver's private
+        * info. The headroom needs to be aligned to DPAA_FD_DATA_ALIGNMENT to
+        * guarantee the data's alignment in the buffer.
+        */
+       headroom = ALIGN(sizeof(*new_xdpf) + priv->tx_headroom,
+                        DPAA_FD_DATA_ALIGNMENT);
+ 
+       /* Assure the extended headroom and data don't overflow the buffer,
+        * while maintaining the mandatory tailroom.
+        */
+       if (headroom + xdpf->len > DPAA_BP_RAW_SIZE -
+                       SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+               return -ENOMEM;
+ 
         p = dev_alloc_pages(0);
         if (unlikely(!p))
                 return -ENOMEM;
   
         /* Copy the data to the new buffer at a properly aligned offset */
         new_buff = page_address(p);
-       memcpy(new_buff + priv->tx_headroom, xdpf->data, xdpf->len);
+       memcpy(new_buff + headroom, xdpf->data, xdpf->len);
   
         /* Create an XDP frame around the new buffer in a similar fashion
          * to xdp_convert_buff_to_frame.
          */
         new_xdpf = new_buff;
-       new_xdpf->data = new_buff + priv->tx_headroom;
+       new_xdpf->data = new_buff + headroom;
         new_xdpf->len = xdpf->len;
         new_xdpf->headroom = priv->tx_headroom;
         new_xdpf->frame_sz = DPAA_BP_RAW_SIZE;
@@@ -2532,10 -2566,12 +2566,10 @@@ static u32 dpaa_run_xdp(struct dpaa_pri
                 return XDP_PASS;
         }
   
- -      xdp.data = vaddr + fd_off;
- -      xdp.data_meta = xdp.data;
- -      xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
- -      xdp.data_end = xdp.data + qm_fd_get_length(fd);
- -      xdp.frame_sz = DPAA_BP_RAW_SIZE - DPAA_TX_PRIV_DATA_SIZE;
- -      xdp.rxq = &dpaa_fq->xdp_rxq;
+ +      xdp_init_buff(&xdp, DPAA_BP_RAW_SIZE - DPAA_TX_PRIV_DATA_SIZE,
+ +                    &dpaa_fq->xdp_rxq);
+ +      xdp_prepare_buff(&xdp, vaddr + fd_off - XDP_PACKET_HEADROOM,
+ +                       XDP_PACKET_HEADROOM, qm_fd_get_length(fd), true);
   
         /* We reserve a fixed headroom of 256 bytes under the erratum and we
          * offer it all to XDP programs to use. If no room is left for the
diff --combined drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c

index 64a80a5933cb37873d3bd8d281543f8be1db413c,48549db23c5241e6bcd04d3584c0b837c6f981a0..7d81ffed4dc0f8bb2e284136bda765d6ed1a06c4
--- 1/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
--- 2/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@@ -24,7 -24,7 +24,7 @@@
   #include "hnae3.h"
   
   #define HCLGE_NAME                    "hclge"
- -#define HCLGE_STATS_READ(p, offset) (*((u64 *)((u8 *)(p) + (offset))))
+ +#define HCLGE_STATS_READ(p, offset) (*(u64 *)((u8 *)(p) + (offset)))
   #define HCLGE_MAC_STATS_FIELD_OFF(f) (offsetof(struct hclge_mac_stats, f))
   
   #define HCLGE_BUF_SIZE_UNIT   256U
@@@ -55,6 -55,8 +55,6 @@@
   
   #define HCLGE_LINK_STATUS_MS  10
   
- -#define HCLGE_VF_VPORT_START_NUM      1
- -
   static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mps);
   static int hclge_init_vlan_config(struct hclge_dev *hdev);
   static void hclge_sync_vlan_filter(struct hclge_dev *hdev);
@@@ -626,7 -628,7 +626,7 @@@ static u8 *hclge_tqps_get_strings(struc
         for (i = 0; i < kinfo->num_tqps; i++) {
                 struct hclge_tqp *tqp = container_of(handle->kinfo.tqp[i],
                         struct hclge_tqp, q);
- -              snprintf(buff, ETH_GSTRING_LEN, "txq%d_pktnum_rcd",
+ +              snprintf(buff, ETH_GSTRING_LEN, "txq%u_pktnum_rcd",
                          tqp->index);
                 buff = buff + ETH_GSTRING_LEN;
         }
@@@ -634,7 -636,7 +634,7 @@@
         for (i = 0; i < kinfo->num_tqps; i++) {
                 struct hclge_tqp *tqp = container_of(kinfo->tqp[i],
                         struct hclge_tqp, q);
- -              snprintf(buff, ETH_GSTRING_LEN, "rxq%d_pktnum_rcd",
+ +              snprintf(buff, ETH_GSTRING_LEN, "rxq%u_pktnum_rcd",
                          tqp->index);
                 buff = buff + ETH_GSTRING_LEN;
         }
@@@ -928,7 -930,7 +928,7 @@@ static int hclge_query_pf_resource(stru
         return 0;
   }
   
- -static int hclge_parse_speed(int speed_cmd, int *speed)
+ +static int hclge_parse_speed(u8 speed_cmd, u32 *speed)
   {
         switch (speed_cmd) {
         case 6:
@@@ -1371,8 -1373,6 +1371,8 @@@ static void hclge_set_default_dev_specs
         ae_dev->dev_specs.rss_key_size = HCLGE_RSS_KEY_SIZE;
         ae_dev->dev_specs.max_tm_rate = HCLGE_ETHER_MAX_RATE;
         ae_dev->dev_specs.max_int_gl = HCLGE_DEF_MAX_INT_GL;
+ +      ae_dev->dev_specs.max_frm_size = HCLGE_MAC_MAX_FRAME;
+ +      ae_dev->dev_specs.max_qset_num = HCLGE_MAX_QSET_NUM;
   }
   
   static void hclge_parse_dev_specs(struct hclge_dev *hdev,
@@@ -1391,9 -1391,7 +1391,9 @@@
         ae_dev->dev_specs.int_ql_max = le16_to_cpu(req0->int_ql_max);
         ae_dev->dev_specs.rss_key_size = le16_to_cpu(req0->rss_key_size);
         ae_dev->dev_specs.max_tm_rate = le32_to_cpu(req0->max_tm_rate);
+ +      ae_dev->dev_specs.max_qset_num = le16_to_cpu(req1->max_qset_num);
         ae_dev->dev_specs.max_int_gl = le16_to_cpu(req1->max_int_gl);
+ +      ae_dev->dev_specs.max_frm_size = le16_to_cpu(req1->max_frm_size);
   }
   
   static void hclge_check_dev_specs(struct hclge_dev *hdev)
@@@ -1408,12 -1406,8 +1408,12 @@@
                 dev_specs->rss_key_size = HCLGE_RSS_KEY_SIZE;
         if (!dev_specs->max_tm_rate)
                 dev_specs->max_tm_rate = HCLGE_ETHER_MAX_RATE;
+ +      if (!dev_specs->max_qset_num)
+ +              dev_specs->max_qset_num = HCLGE_MAX_QSET_NUM;
         if (!dev_specs->max_int_gl)
                 dev_specs->max_int_gl = HCLGE_DEF_MAX_INT_GL;
+ +      if (!dev_specs->max_frm_size)
+ +              dev_specs->max_frm_size = HCLGE_MAC_MAX_FRAME;
   }
   
   static int hclge_query_dev_specs(struct hclge_dev *hdev)
@@@ -4243,6 -4237,11 +4243,6 @@@ static u32 hclge_get_rss_key_size(struc
         return HCLGE_RSS_KEY_SIZE;
   }
   
- -static u32 hclge_get_rss_indir_size(struct hnae3_handle *handle)
- -{
- -      return HCLGE_RSS_IND_TBL_SIZE;
- -}
- -
   static int hclge_set_rss_algo_key(struct hclge_dev *hdev,
                                   const u8 hfunc, const u8 *key)
   {
@@@ -4284,7 -4283,6 +4284,7 @@@ static int hclge_set_rss_indir_table(st
   {
         struct hclge_rss_indirection_table_cmd *req;
         struct hclge_desc desc;
+ +      int rss_cfg_tbl_num;
         u8 rss_msb_oft;
         u8 rss_msb_val;
         int ret;
@@@ -4293,10 -4291,8 +4293,10 @@@
         u32 j;
   
         req = (struct hclge_rss_indirection_table_cmd *)desc.data;
+ +      rss_cfg_tbl_num = hdev->ae_dev->dev_specs.rss_ind_tbl_size /
+ +                        HCLGE_RSS_CFG_TBL_SIZE;
   
- -      for (i = 0; i < HCLGE_RSS_CFG_TBL_NUM; i++) {
+ +      for (i = 0; i < rss_cfg_tbl_num; i++) {
                 hclge_cmd_setup_basic_desc
                         (&desc, HCLGE_OPC_RSS_INDIR_TABLE, false);
   
@@@ -4402,7 -4398,6 +4402,7 @@@ static int hclge_set_rss_input_tuple(st
   static int hclge_get_rss(struct hnae3_handle *handle, u32 *indir,
                          u8 *key, u8 *hfunc)
   {
+ +      struct hnae3_ae_dev *ae_dev = pci_get_drvdata(handle->pdev);
         struct hclge_vport *vport = hclge_get_vport(handle);
         int i;
   
@@@ -4427,7 -4422,7 +4427,7 @@@
   
         /* Get indirect table */
         if (indir)
- -              for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++)
+ +              for (i = 0; i < ae_dev->dev_specs.rss_ind_tbl_size; i++)
                         indir[i] =  vport->rss_indirection_tbl[i];
   
         return 0;
@@@ -4436,7 -4431,6 +4436,7 @@@
   static int hclge_set_rss(struct hnae3_handle *handle, const u32 *indir,
                          const  u8 *key, const  u8 hfunc)
   {
+ +      struct hnae3_ae_dev *ae_dev = pci_get_drvdata(handle->pdev);
         struct hclge_vport *vport = hclge_get_vport(handle);
         struct hclge_dev *hdev = vport->back;
         u8 hash_algo;
@@@ -4468,7 -4462,7 +4468,7 @@@
         }
   
         /* Update the shadow RSS table with user specified qids */
- -      for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++)
+ +      for (i = 0; i < ae_dev->dev_specs.rss_ind_tbl_size; i++)
                 vport->rss_indirection_tbl[i] = indir[i];
   
         /* Update the hardware */
@@@ -4709,15 -4703,14 +4709,15 @@@ void hclge_rss_indir_init_cfg(struct hc
         int i, j;
   
         for (j = 0; j < hdev->num_vmdq_vport + 1; j++) {
- -              for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++)
+ +              for (i = 0; i < hdev->ae_dev->dev_specs.rss_ind_tbl_size; i++)
                         vport[j].rss_indirection_tbl[i] =
                                 i % vport[j].alloc_rss_size;
         }
   }
   
- -static void hclge_rss_init_cfg(struct hclge_dev *hdev)
+ +static int hclge_rss_init_cfg(struct hclge_dev *hdev)
   {
+ +      u16 rss_ind_tbl_size = hdev->ae_dev->dev_specs.rss_ind_tbl_size;
         int i, rss_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
         struct hclge_vport *vport = hdev->vport;
   
@@@ -4725,8 -4718,6 +4725,8 @@@
                 rss_algo = HCLGE_RSS_HASH_ALGO_SIMPLE;
   
         for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
+ +              u16 *rss_ind_tbl;
+ +
                 vport[i].rss_tuple_sets.ipv4_tcp_en =
                         HCLGE_RSS_INPUT_TUPLE_OTHER;
                 vport[i].rss_tuple_sets.ipv4_udp_en =
@@@ -4748,19 -4739,11 +4748,19 @@@
   
                 vport[i].rss_algo = rss_algo;
   
+ +              rss_ind_tbl = devm_kcalloc(&hdev->pdev->dev, rss_ind_tbl_size,
+ +                                         sizeof(*rss_ind_tbl), GFP_KERNEL);
+ +              if (!rss_ind_tbl)
+ +                      return -ENOMEM;
+ +
+ +              vport[i].rss_indirection_tbl = rss_ind_tbl;
                 memcpy(vport[i].rss_hash_key, hclge_hash_key,
                        HCLGE_RSS_KEY_SIZE);
         }
   
         hclge_rss_indir_init_cfg(hdev);
+ +
+ +      return 0;
   }
   
   int hclge_bind_ring_with_vector(struct hclge_vport *vport,
@@@ -5595,7 -5578,7 +5595,7 @@@ static int hclge_fd_check_ext_tuple(str
                 if (fs->m_ext.vlan_tci &&
                     be16_to_cpu(fs->h_ext.vlan_tci) >= VLAN_N_VID) {
                         dev_err(&hdev->pdev->dev,
- -                              "failed to config vlan_tci, invalid vlan_tci: %u, max is %u.\n",
+ +                              "failed to config vlan_tci, invalid vlan_tci: %u, max is %d.\n",
                                 ntohs(fs->h_ext.vlan_tci), VLAN_N_VID - 1);
                         return -EINVAL;
                 }
@@@ -9681,7 -9664,7 +9681,7 @@@ int hclge_set_vport_mtu(struct hclge_vp
         /* HW supprt 2 layer vlan */
         max_frm_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + 2 * VLAN_HLEN;
         if (max_frm_size < HCLGE_MAC_MIN_FRAME ||
- -          max_frm_size > HCLGE_MAC_MAX_FRAME)
+ +          max_frm_size > hdev->ae_dev->dev_specs.max_frm_size)
                 return -EINVAL;
   
         max_frm_size = max(max_frm_size, HCLGE_MAC_DEFAULT_FRAME);
@@@ -9830,12 -9813,19 +9830,19 @@@ int hclge_reset_tqp(struct hnae3_handl
   
   void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id)
   {
+       struct hnae3_handle *handle = &vport->nic;
         struct hclge_dev *hdev = vport->back;
         int reset_try_times = 0;
         int reset_status;
         u16 queue_gid;
         int ret;
   
+       if (queue_id >= handle->kinfo.num_tqps) {
+               dev_warn(&hdev->pdev->dev, "Invalid vf queue id(%u)\n",
+                        queue_id);
+               return;
+       }
+ 
         queue_gid = hclge_covert_handle_qid_global(&vport->nic, queue_id);
   
         ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true);
@@@ -10598,12 -10588,7 +10605,12 @@@ static int hclge_init_ae_dev(struct hna
                 goto err_mdiobus_unreg;
         }
   
- -      hclge_rss_init_cfg(hdev);
+ +      ret = hclge_rss_init_cfg(hdev);
+ +      if (ret) {
+ +              dev_err(&pdev->dev, "failed to init rss cfg, ret = %d\n", ret);
+ +              goto err_mdiobus_unreg;
+ +      }
+ +
         ret = hclge_rss_init_hw(hdev);
         if (ret) {
                 dev_err(&pdev->dev, "Rss init fail, ret =%d\n", ret);
@@@ -10831,7 -10816,7 +10838,7 @@@ static void hclge_reset_vf_rate(struct 
         }
   }
   
- -static int hclge_vf_rate_param_check(struct hclge_dev *hdev, int vf,
+ +static int hclge_vf_rate_param_check(struct hclge_dev *hdev,
                                      int min_tx_rate, int max_tx_rate)
   {
         if (min_tx_rate != 0 ||
@@@ -10852,7 -10837,7 +10859,7 @@@ static int hclge_set_vf_rate(struct hna
         struct hclge_dev *hdev = vport->back;
         int ret;
   
- -      ret = hclge_vf_rate_param_check(hdev, vf, min_tx_rate, max_tx_rate);
+ +      ret = hclge_vf_rate_param_check(hdev, min_tx_rate, max_tx_rate);
         if (ret)
                 return ret;
   
@@@ -11094,7 -11079,6 +11101,7 @@@ static void hclge_get_tqps_and_rss_info
   static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num,
                               bool rxfh_configured)
   {
+ +      struct hnae3_ae_dev *ae_dev = pci_get_drvdata(handle->pdev);
         struct hclge_vport *vport = hclge_get_vport(handle);
         struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
         u16 tc_offset[HCLGE_MAX_TC_NUM] = {0};
@@@ -11138,12 -11122,11 +11145,12 @@@
                 goto out;
   
         /* Reinitializes the rss indirect table according to the new RSS size */
- -      rss_indir = kcalloc(HCLGE_RSS_IND_TBL_SIZE, sizeof(u32), GFP_KERNEL);
+ +      rss_indir = kcalloc(ae_dev->dev_specs.rss_ind_tbl_size, sizeof(u32),
+ +                          GFP_KERNEL);
         if (!rss_indir)
                 return -ENOMEM;
   
- -      for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++)
+ +      for (i = 0; i < ae_dev->dev_specs.rss_ind_tbl_size; i++)
                 rss_indir[i] = i % kinfo->rss_size;
   
         ret = hclge_set_rss(handle, rss_indir, NULL, 0);
@@@ -11823,6 -11806,7 +11830,6 @@@ static const struct hnae3_ae_ops hclge_
         .get_fec = hclge_get_fec,
         .set_fec = hclge_set_fec,
         .get_rss_key_size = hclge_get_rss_key_size,
- -      .get_rss_indir_size = hclge_get_rss_indir_size,
         .get_rss = hclge_get_rss,
         .set_rss = hclge_set_rss,
         .set_rss_tuple = hclge_set_rss_tuple,
@@@ -11873,7 -11857,6 +11880,7 @@@
         .enable_fd = hclge_enable_fd,
         .add_arfs_entry = hclge_add_fd_entry_by_arfs,
         .dbg_run_cmd = hclge_dbg_run_cmd,
+ +      .dbg_read_cmd = hclge_dbg_read_cmd,
         .handle_hw_ras_error = hclge_handle_hw_ras_error,
         .get_hw_reset_stat = hclge_get_hw_reset_stat,
         .ae_dev_resetting = hclge_ae_dev_resetting,
diff --combined drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c

index 52a3737225891e250343e76ca3b3dad21dbf6205,ffb416e088a978800a9582abd80ab5c77cec4fad..51a36e74f0881f61db54d8c5c4dafaf30634f57b
--- 1/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
--- 2/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@@ -56,7 -56,7 +56,7 @@@ static int hclge_gen_resp_to_vf(struct 
                 resp_pf_to_vf->msg.resp_status = resp;
         } else {
                 dev_warn(&hdev->pdev->dev,
- -                       "failed to send response to VF, response status %d is out-of-bound\n",
+ +                       "failed to send response to VF, response status %u is out-of-bound\n",
                          resp);
                 resp_pf_to_vf->msg.resp_status = EIO;
         }
@@@ -158,21 -158,31 +158,31 @@@ static int hclge_get_ring_chain_from_mb
                         struct hclge_vport *vport)
   {
         struct hnae3_ring_chain_node *cur_chain, *new_chain;
+       struct hclge_dev *hdev = vport->back;
         int ring_num;
-       int i = 0;
+       int i;
   
         ring_num = req->msg.ring_num;
   
         if (ring_num > HCLGE_MBX_MAX_RING_CHAIN_PARAM_NUM)
                 return -ENOMEM;
   
+       for (i = 0; i < ring_num; i++) {
+               if (req->msg.param[i].tqp_index >= vport->nic.kinfo.rss_size) {
+                       dev_err(&hdev->pdev->dev, "tqp index(%u) is out of range(0-%u)\n",
+                               req->msg.param[i].tqp_index,
+                               vport->nic.kinfo.rss_size - 1);
+                       return -EINVAL;
+               }
+       }
+ 
         hnae3_set_bit(ring_chain->flag, HNAE3_RING_TYPE_B,
-                     req->msg.param[i].ring_type);
+                     req->msg.param[0].ring_type);
         ring_chain->tqp_index =
                 hclge_get_queue_id(vport->nic.kinfo.tqp
-                                  [req->msg.param[i].tqp_index]);
+                                  [req->msg.param[0].tqp_index]);
         hnae3_set_field(ring_chain->int_gl_idx, HNAE3_RING_GL_IDX_M,
-                       HNAE3_RING_GL_IDX_S, req->msg.param[i].int_gl_index);
+                       HNAE3_RING_GL_IDX_S, req->msg.param[0].int_gl_index);
   
         cur_chain = ring_chain;
   
@@@ -597,6 -607,17 +607,17 @@@ static void hclge_get_rss_key(struct hc
   
         index = mbx_req->msg.data[0];
   
+       /* Check the query index of rss_hash_key from VF, make sure no
+        * more than the size of rss_hash_key.
+        */
+       if (((index + 1) * HCLGE_RSS_MBX_RESP_LEN) >
+             sizeof(vport[0].rss_hash_key)) {
+               dev_warn(&hdev->pdev->dev,
+                        "failed to get the rss hash key, the index(%u) invalid !\n",
+                        index);
+               return;
+       }
+ 
         memcpy(resp_msg->data,
                &hdev->vport[0].rss_hash_key[index * HCLGE_RSS_MBX_RESP_LEN],
                HCLGE_RSS_MBX_RESP_LEN);
diff --combined drivers/net/ethernet/ibm/ibmvnic.c

index 481bcedb391a13933d4f50518bd97bff663938a0,a536fdbf05e196b58603c47c2c7de7c1a2afb9fe..a1579cd4bfe1bd316c176eee695ac925f7a18f79
--- 1/drivers/net/ethernet/ibm/ibmvnic.c
--- 2/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@@ -1384,10 -1384,10 +1384,10 @@@ static int ibmvnic_close(struct net_dev
   
   /**
    * build_hdr_data - creates L2/L3/L4 header data buffer
- - * @hdr_field - bitfield determining needed headers
- - * @skb - socket buffer
- - * @hdr_len - array of header lengths
- - * @tot_len - total length of data
+ + * @hdr_field: bitfield determining needed headers
+ + * @skb: socket buffer
+ + * @hdr_len: array of header lengths
+ + * @hdr_data: buffer to write the header to
    *
    * Reads hdr_field to determine which headers are needed by firmware.
    * Builds a buffer containing these headers.  Saves individual header
@@@ -1444,11 -1444,11 +1444,11 @@@ static int build_hdr_data(u8 hdr_field
   
   /**
    * create_hdr_descs - create header and header extension descriptors
- - * @hdr_field - bitfield determining needed headers
- - * @data - buffer containing header data
- - * @len - length of data buffer
- - * @hdr_len - array of individual header lengths
- - * @scrq_arr - descriptor array
+ + * @hdr_field: bitfield determining needed headers
+ + * @hdr_data: buffer containing header data
+ + * @len: length of data buffer
+ + * @hdr_len: array of individual header lengths
+ + * @scrq_arr: descriptor array
    *
    * Creates header and, if needed, header extension descriptors and
    * places them in a descriptor array, scrq_arr
@@@ -1496,9 -1496,10 +1496,9 @@@ static int create_hdr_descs(u8 hdr_fiel
   
   /**
    * build_hdr_descs_arr - build a header descriptor array
- - * @skb - socket buffer
- - * @num_entries - number of descriptors to be sent
- - * @subcrq - first TX descriptor
- - * @hdr_field - bit field determining which headers will be sent
+ + * @txbuff: tx buffer
+ + * @num_entries: number of descriptors to be sent
+ + * @hdr_field: bit field determining which headers will be sent
    *
    * This function will build a TX descriptor array with applicable
    * L2/L3/L4 packet header descriptors to be sent by send_subcrq_indirect.
@@@ -1924,7 -1925,93 +1924,7 @@@ static int ibmvnic_set_mac(struct net_d
         return rc;
   }
   
- -/**
- - * do_change_param_reset returns zero if we are able to keep processing reset
- - * events, or non-zero if we hit a fatal error and must halt.
- - */
- -static int do_change_param_reset(struct ibmvnic_adapter *adapter,
- -                               struct ibmvnic_rwi *rwi,
- -                               u32 reset_state)
- -{
- -      struct net_device *netdev = adapter->netdev;
- -      int i, rc;
- -
- -      netdev_dbg(adapter->netdev, "Change param resetting driver (%d)\n",
- -                 rwi->reset_reason);
- -
- -      netif_carrier_off(netdev);
- -      adapter->reset_reason = rwi->reset_reason;
- -
- -      ibmvnic_cleanup(netdev);
- -
- -      if (reset_state == VNIC_OPEN) {
- -              rc = __ibmvnic_close(netdev);
- -              if (rc)
- -                      goto out;
- -      }
- -
- -      release_resources(adapter);
- -      release_sub_crqs(adapter, 1);
- -      release_crq_queue(adapter);
- -
- -      adapter->state = VNIC_PROBED;
- -
- -      rc = init_crq_queue(adapter);
- -
- -      if (rc) {
- -              netdev_err(adapter->netdev,
- -                         "Couldn't initialize crq. rc=%d\n", rc);
- -              return rc;
- -      }
- -
- -      rc = ibmvnic_reset_init(adapter, true);
- -      if (rc) {
- -              rc = IBMVNIC_INIT_FAILED;
- -              goto out;
- -      }
- -
- -      /* If the adapter was in PROBE state prior to the reset,
- -       * exit here.
- -       */
- -      if (reset_state == VNIC_PROBED)
- -              goto out;
- -
- -      rc = ibmvnic_login(netdev);
- -      if (rc) {
- -              goto out;
- -      }
- -
- -      rc = init_resources(adapter);
- -      if (rc)
- -              goto out;
- -
- -      ibmvnic_disable_irqs(adapter);
- -
- -      adapter->state = VNIC_CLOSED;
- -
- -      if (reset_state == VNIC_CLOSED)
- -              return 0;
- -
- -      rc = __ibmvnic_open(netdev);
- -      if (rc) {
- -              rc = IBMVNIC_OPEN_FAILED;
- -              goto out;
- -      }
- -
- -      /* refresh device's multicast list */
- -      ibmvnic_set_multi(netdev);
- -
- -      /* kick napi */
- -      for (i = 0; i < adapter->req_rx_queues; i++)
- -              napi_schedule(&adapter->napi[i]);
- -
- -out:
- -      if (rc)
- -              adapter->state = reset_state;
- -      return rc;
- -}
- -
- -/**
+ +/*
    * do_reset returns zero if we are able to keep processing reset events, or
    * non-zero if we hit a fatal error and must halt.
    */
@@@ -1941,11 -2028,7 +1941,11 @@@ static int do_reset(struct ibmvnic_adap
                    adapter->state, adapter->failover_pending,
                    rwi->reset_reason, reset_state);
   
- -      rtnl_lock();
+ +      adapter->reset_reason = rwi->reset_reason;
+ +      /* requestor of VNIC_RESET_CHANGE_PARAM already has the rtnl lock */
+ +      if (!(adapter->reset_reason == VNIC_RESET_CHANGE_PARAM))
+ +              rtnl_lock();
+ +
         /*
          * Now that we have the rtnl lock, clear any pending failover.
          * This will ensure ibmvnic_open() has either completed or will
@@@ -1955,6 -2038,7 +1955,6 @@@
                 adapter->failover_pending = false;
   
         netif_carrier_off(netdev);
- -      adapter->reset_reason = rwi->reset_reason;
   
         old_num_rx_queues = adapter->req_rx_queues;
         old_num_tx_queues = adapter->req_tx_queues;
@@@ -1966,37 -2050,25 +1966,37 @@@
         if (reset_state == VNIC_OPEN &&
             adapter->reset_reason != VNIC_RESET_MOBILITY &&
             adapter->reset_reason != VNIC_RESET_FAILOVER) {
- -              adapter->state = VNIC_CLOSING;
+ +              if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) {
+ +                      rc = __ibmvnic_close(netdev);
+ +                      if (rc)
+ +                              goto out;
+ +              } else {
+ +                      adapter->state = VNIC_CLOSING;
   
- -              /* Release the RTNL lock before link state change and
- -               * re-acquire after the link state change to allow
- -               * linkwatch_event to grab the RTNL lock and run during
- -               * a reset.
- -               */
- -              rtnl_unlock();
- -              rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
- -              rtnl_lock();
- -              if (rc)
- -                      goto out;
+ +                      /* Release the RTNL lock before link state change and
+ +                       * re-acquire after the link state change to allow
+ +                       * linkwatch_event to grab the RTNL lock and run during
+ +                       * a reset.
+ +                       */
+ +                      rtnl_unlock();
+ +                      rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
+ +                      rtnl_lock();
+ +                      if (rc)
+ +                              goto out;
   
- -              if (adapter->state != VNIC_CLOSING) {
- -                      rc = -1;
- -                      goto out;
+ +                      if (adapter->state != VNIC_CLOSING) {
+ +                              rc = -1;
+ +                              goto out;
+ +                      }
+ +
+ +                      adapter->state = VNIC_CLOSED;
                 }
+ +      }
   
- -              adapter->state = VNIC_CLOSED;
+ +      if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) {
+ +              release_resources(adapter);
+ +              release_sub_crqs(adapter, 1);
+ +              release_crq_queue(adapter);
         }
   
         if (adapter->reset_reason != VNIC_RESET_NON_FATAL) {
@@@ -2005,9 -2077,7 +2005,9 @@@
                  */
                 adapter->state = VNIC_PROBED;
   
- -              if (adapter->reset_reason == VNIC_RESET_MOBILITY) {
+ +              if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) {
+ +                      rc = init_crq_queue(adapter);
+ +              } else if (adapter->reset_reason == VNIC_RESET_MOBILITY) {
                         rc = ibmvnic_reenable_crq_queue(adapter);
                         release_sub_crqs(adapter, 1);
                 } else {
@@@ -2046,11 -2116,7 +2046,11 @@@
                         goto out;
                 }
   
- -              if (adapter->req_rx_queues != old_num_rx_queues ||
+ +              if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) {
+ +                      rc = init_resources(adapter);
+ +                      if (rc)
+ +                              goto out;
+ +              } else if (adapter->req_rx_queues != old_num_rx_queues ||
                     adapter->req_tx_queues != old_num_tx_queues ||
                     adapter->req_rx_add_entries_per_subcrq !=
                     old_num_rx_slots ||
@@@ -2115,9 -2181,7 +2115,9 @@@ out
         /* restore the adapter state if reset failed */
         if (rc)
                 adapter->state = reset_state;
- -      rtnl_unlock();
+ +      /* requestor of VNIC_RESET_CHANGE_PARAM should still hold the rtnl lock */
+ +      if (!(adapter->reset_reason == VNIC_RESET_CHANGE_PARAM))
+ +              rtnl_unlock();
   
         netdev_dbg(adapter->netdev, "[S:%d FOP:%d] Reset done, rc %d\n",
                    adapter->state, adapter->failover_pending, rc);
@@@ -2248,7 -2312,10 +2248,7 @@@ static void __ibmvnic_reset(struct work
                 }
                 spin_unlock_irqrestore(&adapter->state_lock, flags);
   
- -              if (rwi->reset_reason == VNIC_RESET_CHANGE_PARAM) {
- -                      /* CHANGE_PARAM requestor holds rtnl_lock */
- -                      rc = do_change_param_reset(adapter, rwi, reset_state);
- -              } else if (adapter->force_reset_recovery) {
+ +              if (adapter->force_reset_recovery) {
                         /*
                          * Since we are doing a hard reset now, clear the
                          * failover_pending flag so we don't ignore any
@@@ -2444,6 -2511,12 +2444,6 @@@ restart_poll
   
                 if (!pending_scrq(adapter, rx_scrq))
                         break;
- -              /* The queue entry at the current index is peeked at above
- -               * to determine that there is a valid descriptor awaiting
- -               * processing. We want to be sure that the current slot
- -               * holds a valid descriptor before reading its contents.
- -               */
- -              dma_rmb();
                 next = ibmvnic_next_scrq(adapter, rx_scrq);
                 rx_buff =
                     (struct ibmvnic_rx_buff *)be64_to_cpu(next->
@@@ -2510,6 -2583,7 +2510,6 @@@
                 if (napi_complete_done(napi, frames_processed)) {
                         enable_scrq_irq(adapter, rx_scrq);
                         if (pending_scrq(adapter, rx_scrq)) {
- -                              rmb();
                                 if (napi_reschedule(napi)) {
                                         disable_scrq_irq(adapter, rx_scrq);
                                         goto restart_poll;
@@@ -3182,6 -3256,13 +3182,6 @@@ restart_loop
                 int total_bytes = 0;
                 int num_packets = 0;
   
- -              /* The queue entry at the current index is peeked at above
- -               * to determine that there is a valid descriptor awaiting
- -               * processing. We want to be sure that the current slot
- -               * holds a valid descriptor before reading its contents.
- -               */
- -              dma_rmb();
- -
                 next = ibmvnic_next_scrq(adapter, scrq);
                 for (i = 0; i < next->tx_comp.num_comps; i++) {
                         if (next->tx_comp.rcs[i])
@@@ -3555,16 -3636,11 +3555,16 @@@ static int pending_scrq(struct ibmvnic_
                         struct ibmvnic_sub_crq_queue *scrq)
   {
         union sub_crq *entry = &scrq->msgs[scrq->cur];
+ +      int rc;
   
- -      if (entry->generic.first & IBMVNIC_CRQ_CMD_RSP)
- -              return 1;
- -      else
- -              return 0;
+ +      rc = !!(entry->generic.first & IBMVNIC_CRQ_CMD_RSP);
+ +
+ +      /* Ensure that the SCRQ valid flag is loaded prior to loading the
+ +       * contents of the SCRQ descriptor
+ +       */
+ +      dma_rmb();
+ +
+ +      return rc;
   }
   
   static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *adapter,
@@@ -3583,8 -3659,8 +3583,8 @@@
         }
         spin_unlock_irqrestore(&scrq->lock, flags);
   
- -      /* Ensure that the entire buffer descriptor has been
- -       * loaded before reading its contents
+ +      /* Ensure that the SCRQ valid flag is loaded prior to loading the
+ +       * contents of the SCRQ descriptor
          */
         dma_rmb();
   
@@@ -4842,7 -4918,22 +4842,22 @@@ static void ibmvnic_handle_crq(union ib
                                 complete(&adapter->init_done);
                                 adapter->init_done_rc = -EIO;
                         }
-                       ibmvnic_reset(adapter, VNIC_RESET_FAILOVER);
+                       rc = ibmvnic_reset(adapter, VNIC_RESET_FAILOVER);
+                       if (rc && rc != -EBUSY) {
+                               /* We were unable to schedule the failover
+                                * reset either because the adapter was still
+                                * probing (eg: during kexec) or we could not
+                                * allocate memory. Clear the failover_pending
+                                * flag since no one else will. We ignore
+                                * EBUSY because it means either FAILOVER reset
+                                * is already scheduled or the adapter is
+                                * being removed.
+                                */
+                               netdev_err(netdev,
+                                          "Error %ld scheduling failover reset\n",
+                                          rc);
+                               adapter->failover_pending = false;
+                       }
                         break;
                 case IBMVNIC_CRQ_INIT_COMPLETE:
                         dev_info(dev, "Partner initialization complete\n");
diff --combined drivers/net/ethernet/mscc/ocelot.c

index f8b85ab8be5d220542023089ab37b1dba6780637,c072eb5c07646b66db85d8392177074c0f9eac3c..1654a6e22a7df91038edcfaf6b2aecf12e7736b6
--- 1/drivers/net/ethernet/mscc/ocelot.c
--- 2/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@@ -221,20 -221,25 +221,20 @@@ static void ocelot_port_set_pvid(struc
   }
   
   int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port,
- -                             bool vlan_aware, struct switchdev_trans *trans)
+ +                             bool vlan_aware)
   {
+ +      struct ocelot_vcap_block *block = &ocelot->block[VCAP_IS1];
         struct ocelot_port *ocelot_port = ocelot->ports[port];
+ +      struct ocelot_vcap_filter *filter;
         u32 val;
   
- -      if (switchdev_trans_ph_prepare(trans)) {
- -              struct ocelot_vcap_block *block = &ocelot->block[VCAP_IS1];
- -              struct ocelot_vcap_filter *filter;
- -
- -              list_for_each_entry(filter, &block->rules, list) {
- -                      if (filter->ingress_port_mask & BIT(port) &&
- -                          filter->action.vid_replace_ena) {
- -                              dev_err(ocelot->dev,
- -                                      "Cannot change VLAN state with vlan modify rules active\n");
- -                              return -EBUSY;
- -                      }
+ +      list_for_each_entry(filter, &block->rules, list) {
+ +              if (filter->ingress_port_mask & BIT(port) &&
+ +                  filter->action.vid_replace_ena) {
+ +                      dev_err(ocelot->dev,
+ +                              "Cannot change VLAN state with vlan modify rules active\n");
+ +                      return -EBUSY;
                 }
- -
- -              return 0;
         }
   
         ocelot_port->vlan_aware = vlan_aware;
@@@ -370,6 -375,60 +370,60 @@@ static void ocelot_vlan_init(struct oce
         }
   }
   
+ static u32 ocelot_read_eq_avail(struct ocelot *ocelot, int port)
+ {
+       return ocelot_read_rix(ocelot, QSYS_SW_STATUS, port);
+ }
+ 
+ int ocelot_port_flush(struct ocelot *ocelot, int port)
+ {
+       int err, val;
+ 
+       /* Disable dequeuing from the egress queues */
+       ocelot_rmw_rix(ocelot, QSYS_PORT_MODE_DEQUEUE_DIS,
+                      QSYS_PORT_MODE_DEQUEUE_DIS,
+                      QSYS_PORT_MODE, port);
+ 
+       /* Disable flow control */
+       ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 0);
+ 
+       /* Disable priority flow control */
+       ocelot_fields_write(ocelot, port,
+                           QSYS_SWITCH_PORT_MODE_TX_PFC_ENA, 0);
+ 
+       /* Wait at least the time it takes to receive a frame of maximum length
+        * at the port.
+        * Worst-case delays for 10 kilobyte jumbo frames are:
+        * 8 ms on a 10M port
+        * 800 μs on a 100M port
+        * 80 μs on a 1G port
+        * 32 μs on a 2.5G port
+        */
+       usleep_range(8000, 10000);
+ 
+       /* Disable half duplex backpressure. */
+       ocelot_rmw_rix(ocelot, 0, SYS_FRONT_PORT_MODE_HDX_MODE,
+                      SYS_FRONT_PORT_MODE, port);
+ 
+       /* Flush the queues associated with the port. */
+       ocelot_rmw_gix(ocelot, REW_PORT_CFG_FLUSH_ENA, REW_PORT_CFG_FLUSH_ENA,
+                      REW_PORT_CFG, port);
+ 
+       /* Enable dequeuing from the egress queues. */
+       ocelot_rmw_rix(ocelot, 0, QSYS_PORT_MODE_DEQUEUE_DIS, QSYS_PORT_MODE,
+                      port);
+ 
+       /* Wait until flushing is complete. */
+       err = read_poll_timeout(ocelot_read_eq_avail, val, !val,
+                               100, 2000000, false, ocelot, port);
+ 
+       /* Clear flushing again. */
+       ocelot_rmw_gix(ocelot, 0, REW_PORT_CFG_FLUSH_ENA, REW_PORT_CFG, port);
+ 
+       return err;
+ }
+ EXPORT_SYMBOL(ocelot_port_flush);
+ 
   void ocelot_adjust_link(struct ocelot *ocelot, int port,
                         struct phy_device *phydev)
   {
@@@ -889,102 -948,10 +943,102 @@@ int ocelot_get_ts_info(struct ocelot *o
   }
   EXPORT_SYMBOL(ocelot_get_ts_info);
   
+ +static u32 ocelot_get_bond_mask(struct ocelot *ocelot, struct net_device *bond,
+ +                              bool only_active_ports)
+ +{
+ +      u32 mask = 0;
+ +      int port;
+ +
+ +      for (port = 0; port < ocelot->num_phys_ports; port++) {
+ +              struct ocelot_port *ocelot_port = ocelot->ports[port];
+ +
+ +              if (!ocelot_port)
+ +                      continue;
+ +
+ +              if (ocelot_port->bond == bond) {
+ +                      if (only_active_ports && !ocelot_port->lag_tx_active)
+ +                              continue;
+ +
+ +                      mask |= BIT(port);
+ +              }
+ +      }
+ +
+ +      return mask;
+ +}
+ +
+ +static u32 ocelot_get_dsa_8021q_cpu_mask(struct ocelot *ocelot)
+ +{
+ +      u32 mask = 0;
+ +      int port;
+ +
+ +      for (port = 0; port < ocelot->num_phys_ports; port++) {
+ +              struct ocelot_port *ocelot_port = ocelot->ports[port];
+ +
+ +              if (!ocelot_port)
+ +                      continue;
+ +
+ +              if (ocelot_port->is_dsa_8021q_cpu)
+ +                      mask |= BIT(port);
+ +      }
+ +
+ +      return mask;
+ +}
+ +
+ +void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot)
+ +{
+ +      unsigned long cpu_fwd_mask;
+ +      int port;
+ +
+ +      /* If a DSA tag_8021q CPU exists, it needs to be included in the
+ +       * regular forwarding path of the front ports regardless of whether
+ +       * those are bridged or standalone.
+ +       * If DSA tag_8021q is not used, this returns 0, which is fine because
+ +       * the hardware-based CPU port module can be a destination for packets
+ +       * even if it isn't part of PGID_SRC.
+ +       */
+ +      cpu_fwd_mask = ocelot_get_dsa_8021q_cpu_mask(ocelot);
+ +
+ +      /* Apply FWD mask. The loop is needed to add/remove the current port as
+ +       * a source for the other ports.
+ +       */
+ +      for (port = 0; port < ocelot->num_phys_ports; port++) {
+ +              struct ocelot_port *ocelot_port = ocelot->ports[port];
+ +              unsigned long mask;
+ +
+ +              if (!ocelot_port) {
+ +                      /* Unused ports can't send anywhere */
+ +                      mask = 0;
+ +              } else if (ocelot_port->is_dsa_8021q_cpu) {
+ +                      /* The DSA tag_8021q CPU ports need to be able to
+ +                       * forward packets to all other ports except for
+ +                       * themselves
+ +                       */
+ +                      mask = GENMASK(ocelot->num_phys_ports - 1, 0);
+ +                      mask &= ~cpu_fwd_mask;
+ +              } else if (ocelot->bridge_fwd_mask & BIT(port)) {
+ +                      struct net_device *bond = ocelot_port->bond;
+ +
+ +                      mask = ocelot->bridge_fwd_mask & ~BIT(port);
+ +                      if (bond) {
+ +                              mask &= ~ocelot_get_bond_mask(ocelot, bond,
+ +                                                            false);
+ +                      }
+ +              } else {
+ +                      /* Standalone ports forward only to DSA tag_8021q CPU
+ +                       * ports (if those exist), or to the hardware CPU port
+ +                       * module otherwise.
+ +                       */
+ +                      mask = cpu_fwd_mask;
+ +              }
+ +
+ +              ocelot_write_rix(ocelot, mask, ANA_PGID_PGID, PGID_SRC + port);
+ +      }
+ +}
+ +EXPORT_SYMBOL(ocelot_apply_bridge_fwd_mask);
+ +
   void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state)
   {
         u32 port_cfg;
- -      int p, i;
   
         if (!(BIT(port) & ocelot->bridge_mask))
                 return;
@@@ -1007,7 -974,32 +1061,7 @@@
   
         ocelot_write_gix(ocelot, port_cfg, ANA_PORT_PORT_CFG, port);
   
- -      /* Apply FWD mask. The loop is needed to add/remove the current port as
- -       * a source for the other ports.
- -       */
- -      for (p = 0; p < ocelot->num_phys_ports; p++) {
- -              if (ocelot->bridge_fwd_mask & BIT(p)) {
- -                      unsigned long mask = ocelot->bridge_fwd_mask & ~BIT(p);
- -
- -                      for (i = 0; i < ocelot->num_phys_ports; i++) {
- -                              unsigned long bond_mask = ocelot->lags[i];
- -
- -                              if (!bond_mask)
- -                                      continue;
- -
- -                              if (bond_mask & BIT(p)) {
- -                                      mask &= ~bond_mask;
- -                                      break;
- -                              }
- -                      }
- -
- -                      ocelot_write_rix(ocelot, mask,
- -                                       ANA_PGID_PGID, PGID_SRC + p);
- -              } else {
- -                      ocelot_write_rix(ocelot, 0,
- -                                       ANA_PGID_PGID, PGID_SRC + p);
- -              }
- -      }
+ +      ocelot_apply_bridge_fwd_mask(ocelot);
   }
   EXPORT_SYMBOL(ocelot_bridge_stp_state_set);
   
@@@ -1254,6 -1246,7 +1308,6 @@@ int ocelot_port_bridge_leave(struct oce
                              struct net_device *bridge)
   {
         struct ocelot_vlan pvid = {0}, native_vlan = {0};
- -      struct switchdev_trans trans;
         int ret;
   
         ocelot->bridge_mask &= ~BIT(port);
@@@ -1261,7 -1254,13 +1315,7 @@@
         if (!ocelot->bridge_mask)
                 ocelot->hw_bridge_dev = NULL;
   
- -      trans.ph_prepare = true;
- -      ret = ocelot_port_vlan_filtering(ocelot, port, false, &trans);
- -      if (ret)
- -              return ret;
- -
- -      trans.ph_prepare = false;
- -      ret = ocelot_port_vlan_filtering(ocelot, port, false, &trans);
+ +      ret = ocelot_port_vlan_filtering(ocelot, port, false);
         if (ret)
                 return ret;
   
@@@ -1274,7 -1273,6 +1328,7 @@@ EXPORT_SYMBOL(ocelot_port_bridge_leave)
   
   static void ocelot_set_aggr_pgids(struct ocelot *ocelot)
   {
+ +      unsigned long visited = GENMASK(ocelot->num_phys_ports - 1, 0);
         int i, port, lag;
   
         /* Reset destination and aggregation PGIDS */
@@@ -1285,40 -1283,22 +1339,40 @@@
                 ocelot_write_rix(ocelot, GENMASK(ocelot->num_phys_ports - 1, 0),
                                  ANA_PGID_PGID, i);
   
- -      /* Now, set PGIDs for each LAG */
+ +      /* The visited ports bitmask holds the list of ports offloading any
+ +       * bonding interface. Initially we mark all these ports as unvisited,
+ +       * then every time we visit a port in this bitmask, we know that it is
+ +       * the lowest numbered port, i.e. the one whose logical ID == physical
+ +       * port ID == LAG ID. So we mark as visited all further ports in the
+ +       * bitmask that are offloading the same bonding interface. This way,
+ +       * we set up the aggregation PGIDs only once per bonding interface.
+ +       */
+ +      for (port = 0; port < ocelot->num_phys_ports; port++) {
+ +              struct ocelot_port *ocelot_port = ocelot->ports[port];
+ +
+ +              if (!ocelot_port || !ocelot_port->bond)
+ +                      continue;
+ +
+ +              visited &= ~BIT(port);
+ +      }
+ +
+ +      /* Now, set PGIDs for each active LAG */
         for (lag = 0; lag < ocelot->num_phys_ports; lag++) {
+ +              struct net_device *bond = ocelot->ports[lag]->bond;
+ +              int num_active_ports = 0;
                 unsigned long bond_mask;
- -              int aggr_count = 0;
                 u8 aggr_idx[16];
   
- -              bond_mask = ocelot->lags[lag];
- -              if (!bond_mask)
+ +              if (!bond || (visited & BIT(lag)))
                         continue;
   
+ +              bond_mask = ocelot_get_bond_mask(ocelot, bond, true);
+ +
                 for_each_set_bit(port, &bond_mask, ocelot->num_phys_ports) {
                         // Destination mask
                         ocelot_write_rix(ocelot, bond_mask,
                                          ANA_PGID_PGID, port);
- -                      aggr_idx[aggr_count] = port;
- -                      aggr_count++;
+ +                      aggr_idx[num_active_ports++] = port;
                 }
   
                 for_each_aggr_pgid(ocelot, i) {
@@@ -1326,74 -1306,63 +1380,74 @@@
   
                         ac = ocelot_read_rix(ocelot, ANA_PGID_PGID, i);
                         ac &= ~bond_mask;
- -                      ac |= BIT(aggr_idx[i % aggr_count]);
+ +                      /* Don't do division by zero if there was no active
+ +                       * port. Just make all aggregation codes zero.
+ +                       */
+ +                      if (num_active_ports)
+ +                              ac |= BIT(aggr_idx[i % num_active_ports]);
                         ocelot_write_rix(ocelot, ac, ANA_PGID_PGID, i);
                 }
- -      }
- -}
   
- -static void ocelot_setup_lag(struct ocelot *ocelot, int lag)
- -{
- -      unsigned long bond_mask = ocelot->lags[lag];
- -      unsigned int p;
- -
- -      for_each_set_bit(p, &bond_mask, ocelot->num_phys_ports) {
- -              u32 port_cfg = ocelot_read_gix(ocelot, ANA_PORT_PORT_CFG, p);
+ +              /* Mark all ports in the same LAG as visited to avoid applying
+ +               * the same config again.
+ +               */
+ +              for (port = lag; port < ocelot->num_phys_ports; port++) {
+ +                      struct ocelot_port *ocelot_port = ocelot->ports[port];
   
- -              port_cfg &= ~ANA_PORT_PORT_CFG_PORTID_VAL_M;
+ +                      if (!ocelot_port)
+ +                              continue;
   
- -              /* Use lag port as logical port for port i */
- -              ocelot_write_gix(ocelot, port_cfg |
- -                               ANA_PORT_PORT_CFG_PORTID_VAL(lag),
- -                               ANA_PORT_PORT_CFG, p);
+ +                      if (ocelot_port->bond == bond)
+ +                              visited |= BIT(port);
+ +              }
         }
   }
   
- -int ocelot_port_lag_join(struct ocelot *ocelot, int port,
- -                       struct net_device *bond)
+ +/* When offloading a bonding interface, the switch ports configured under the
+ + * same bond must have the same logical port ID, equal to the physical port ID
+ + * of the lowest numbered physical port in that bond. Otherwise, in standalone/
+ + * bridged mode, each port has a logical port ID equal to its physical port ID.
+ + */
+ +static void ocelot_setup_logical_port_ids(struct ocelot *ocelot)
   {
- -      struct net_device *ndev;
- -      u32 bond_mask = 0;
- -      int lag, lp;
+ +      int port;
   
- -      rcu_read_lock();
- -      for_each_netdev_in_bond_rcu(bond, ndev) {
- -              struct ocelot_port_private *priv = netdev_priv(ndev);
+ +      for (port = 0; port < ocelot->num_phys_ports; port++) {
+ +              struct ocelot_port *ocelot_port = ocelot->ports[port];
+ +              struct net_device *bond;
   
- -              bond_mask |= BIT(priv->chip_port);
- -      }
- -      rcu_read_unlock();
+ +              if (!ocelot_port)
+ +                      continue;
   
- -      lp = __ffs(bond_mask);
+ +              bond = ocelot_port->bond;
+ +              if (bond) {
+ +                      int lag = __ffs(ocelot_get_bond_mask(ocelot, bond,
+ +                                                           false));
   
- -      /* If the new port is the lowest one, use it as the logical port from
- -       * now on
- -       */
- -      if (port == lp) {
- -              lag = port;
- -              ocelot->lags[port] = bond_mask;
- -              bond_mask &= ~BIT(port);
- -              if (bond_mask) {
- -                      lp = __ffs(bond_mask);
- -                      ocelot->lags[lp] = 0;
+ +                      ocelot_rmw_gix(ocelot,
+ +                                     ANA_PORT_PORT_CFG_PORTID_VAL(lag),
+ +                                     ANA_PORT_PORT_CFG_PORTID_VAL_M,
+ +                                     ANA_PORT_PORT_CFG, port);
+ +              } else {
+ +                      ocelot_rmw_gix(ocelot,
+ +                                     ANA_PORT_PORT_CFG_PORTID_VAL(port),
+ +                                     ANA_PORT_PORT_CFG_PORTID_VAL_M,
+ +                                     ANA_PORT_PORT_CFG, port);
                 }
- -      } else {
- -              lag = lp;
- -              ocelot->lags[lp] |= BIT(port);
         }
+ +}
   
- -      ocelot_setup_lag(ocelot, lag);
+ +int ocelot_port_lag_join(struct ocelot *ocelot, int port,
+ +                       struct net_device *bond,
+ +                       struct netdev_lag_upper_info *info)
+ +{
+ +      if (info->tx_type != NETDEV_LAG_TX_TYPE_HASH)
+ +              return -EOPNOTSUPP;
+ +
+ +      ocelot->ports[port]->bond = bond;
+ +
+ +      ocelot_setup_logical_port_ids(ocelot);
+ +      ocelot_apply_bridge_fwd_mask(ocelot);
         ocelot_set_aggr_pgids(ocelot);
   
         return 0;
@@@ -1403,24 -1372,33 +1457,24 @@@ EXPORT_SYMBOL(ocelot_port_lag_join)
   void ocelot_port_lag_leave(struct ocelot *ocelot, int port,
                            struct net_device *bond)
   {
- -      u32 port_cfg;
- -      int i;
+ +      ocelot->ports[port]->bond = NULL;
   
- -      /* Remove port from any lag */
- -      for (i = 0; i < ocelot->num_phys_ports; i++)
- -              ocelot->lags[i] &= ~BIT(port);
- -
- -      /* if it was the logical port of the lag, move the lag config to the
- -       * next port
- -       */
- -      if (ocelot->lags[port]) {
- -              int n = __ffs(ocelot->lags[port]);
- -
- -              ocelot->lags[n] = ocelot->lags[port];
- -              ocelot->lags[port] = 0;
+ +      ocelot_setup_logical_port_ids(ocelot);
+ +      ocelot_apply_bridge_fwd_mask(ocelot);
+ +      ocelot_set_aggr_pgids(ocelot);
+ +}
+ +EXPORT_SYMBOL(ocelot_port_lag_leave);
   
- -              ocelot_setup_lag(ocelot, n);
- -      }
+ +void ocelot_port_lag_change(struct ocelot *ocelot, int port, bool lag_tx_active)
+ +{
+ +      struct ocelot_port *ocelot_port = ocelot->ports[port];
   
- -      port_cfg = ocelot_read_gix(ocelot, ANA_PORT_PORT_CFG, port);
- -      port_cfg &= ~ANA_PORT_PORT_CFG_PORTID_VAL_M;
- -      ocelot_write_gix(ocelot, port_cfg | ANA_PORT_PORT_CFG_PORTID_VAL(port),
- -                       ANA_PORT_PORT_CFG, port);
+ +      ocelot_port->lag_tx_active = lag_tx_active;
   
+ +      /* Rebalance the LAGs */
         ocelot_set_aggr_pgids(ocelot);
   }
- -EXPORT_SYMBOL(ocelot_port_lag_leave);
+ +EXPORT_SYMBOL(ocelot_port_lag_change);
   
   /* Configure the maximum SDU (L2 payload) on RX to the value specified in @sdu.
    * The length of VLAN tags is accounted for automatically via DEV_MAC_TAGS_CFG.
@@@ -1438,9 -1416,9 +1492,9 @@@ void ocelot_port_set_maxlen(struct ocel
         if (port == ocelot->npi) {
                 maxlen += OCELOT_TAG_LEN;
   
- -              if (ocelot->inj_prefix == OCELOT_TAG_PREFIX_SHORT)
+ +              if (ocelot->npi_inj_prefix == OCELOT_TAG_PREFIX_SHORT)
                         maxlen += OCELOT_SHORT_PREFIX_LEN;
- -              else if (ocelot->inj_prefix == OCELOT_TAG_PREFIX_LONG)
+ +              else if (ocelot->npi_inj_prefix == OCELOT_TAG_PREFIX_LONG)
                         maxlen += OCELOT_LONG_PREFIX_LEN;
         }
   
@@@ -1455,7 -1433,7 +1509,7 @@@
                             pause_stop);
   
         /* Tail dropping watermarks */
- -      atop_tot = (ocelot->shared_queue_sz - 9 * maxlen) /
+ +      atop_tot = (ocelot->packet_buffer_size - 9 * maxlen) /
                    OCELOT_BUFFER_CELL_SZ;
         atop = (9 * maxlen) / OCELOT_BUFFER_CELL_SZ;
         ocelot_write_rix(ocelot, ocelot->ops->wm_enc(atop), SYS_ATOP, port);
@@@ -1470,9 -1448,9 +1524,9 @@@ int ocelot_get_max_mtu(struct ocelot *o
         if (port == ocelot->npi) {
                 max_mtu -= OCELOT_TAG_LEN;
   
- -              if (ocelot->inj_prefix == OCELOT_TAG_PREFIX_SHORT)
+ +              if (ocelot->npi_inj_prefix == OCELOT_TAG_PREFIX_SHORT)
                         max_mtu -= OCELOT_SHORT_PREFIX_LEN;
- -              else if (ocelot->inj_prefix == OCELOT_TAG_PREFIX_LONG)
+ +              else if (ocelot->npi_inj_prefix == OCELOT_TAG_PREFIX_LONG)
                         max_mtu -= OCELOT_LONG_PREFIX_LEN;
         }
   
@@@ -1557,9 -1535,9 +1611,9 @@@ static void ocelot_cpu_port_init(struc
         ocelot_fields_write(ocelot, cpu, QSYS_SWITCH_PORT_MODE_PORT_ENA, 1);
         /* CPU port Injection/Extraction configuration */
         ocelot_fields_write(ocelot, cpu, SYS_PORT_MODE_INCL_XTR_HDR,
- -                          ocelot->xtr_prefix);
+ +                          OCELOT_TAG_PREFIX_NONE);
         ocelot_fields_write(ocelot, cpu, SYS_PORT_MODE_INCL_INJ_HDR,
- -                          ocelot->inj_prefix);
+ +                          OCELOT_TAG_PREFIX_NONE);
   
         /* Configure the CPU port to be VLAN aware */
         ocelot_write_gix(ocelot, ANA_PORT_VLAN_CFG_VLAN_VID(0) |
@@@ -1568,21 -1546,6 +1622,21 @@@
                          ANA_PORT_VLAN_CFG, cpu);
   }
   
+ +static void ocelot_detect_features(struct ocelot *ocelot)
+ +{
+ +      int mmgt, eq_ctrl;
+ +
+ +      /* For Ocelot, Felix, Seville, Serval etc, SYS:MMGT:MMGT:FREECNT holds
+ +       * the number of 240-byte free memory words (aka 4-cell chunks) and not
+ +       * 192 bytes as the documentation incorrectly says.
+ +       */
+ +      mmgt = ocelot_read(ocelot, SYS_MMGT);
+ +      ocelot->packet_buffer_size = 240 * SYS_MMGT_FREECNT(mmgt);
+ +
+ +      eq_ctrl = ocelot_read(ocelot, QSYS_EQ_CTRL);
+ +      ocelot->num_frame_refs = QSYS_MMGT_EQ_CTRL_FP_FREE_CNT(eq_ctrl);
+ +}
+ +
   int ocelot_init(struct ocelot *ocelot)
   {
         char queue_name[32];
@@@ -1597,6 -1560,11 +1651,6 @@@
                 }
         }
   
- -      ocelot->lags = devm_kcalloc(ocelot->dev, ocelot->num_phys_ports,
- -                                  sizeof(u32), GFP_KERNEL);
- -      if (!ocelot->lags)
- -              return -ENOMEM;
- -
         ocelot->stats = devm_kcalloc(ocelot->dev,
                                      ocelot->num_phys_ports * ocelot->num_stats,
                                      sizeof(u64), GFP_KERNEL);
@@@ -1620,7 -1588,6 +1674,7 @@@
   
         INIT_LIST_HEAD(&ocelot->multicast);
         INIT_LIST_HEAD(&ocelot->pgids);
+ +      ocelot_detect_features(ocelot);
         ocelot_mact_init(ocelot);
         ocelot_vlan_init(ocelot);
         ocelot_vcap_init(ocelot);
@@@ -1640,10 -1607,7 +1694,10 @@@
         ocelot_write(ocelot, ANA_AGGR_CFG_AC_SMAC_ENA |
                              ANA_AGGR_CFG_AC_DMAC_ENA |
                              ANA_AGGR_CFG_AC_IP4_SIPDIP_ENA |
- -                           ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA, ANA_AGGR_CFG);
+ +                           ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA |
+ +                           ANA_AGGR_CFG_AC_IP6_FLOW_LBL_ENA |
+ +                           ANA_AGGR_CFG_AC_IP6_TCPUDP_ENA,
+ +                           ANA_AGGR_CFG);
   
         /* Set MAC age time to default value. The entry is aged after
          * 2*AGE_PERIOD
diff --combined drivers/net/hyperv/netvsc.c

index 9db1ea3affbb37018d4e0c2cbaf56661064ae0df,13bd48a75db7692fb5ab221ea587d60c088e325f..dc3f73c3b33ef61189a02436949f38f6919f18f5
--- 1/drivers/net/hyperv/netvsc.c
--- 2/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@@ -37,10 -37,6 +37,10 @@@ void netvsc_switch_datapath(struct net_
         struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev);
         struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt;
   
+ +      /* Block sending traffic to VF if it's about to be gone */
+ +      if (!vf)
+ +              net_device_ctx->data_path_is_vf = vf;
+ +
         memset(init_pkt, 0, sizeof(struct nvsp_message));
         init_pkt->hdr.msg_type = NVSP_MSG4_TYPE_SWITCH_DATA_PATH;
         if (vf)
@@@ -54,11 -50,8 +54,11 @@@
   
         vmbus_sendpacket(dev->channel, init_pkt,
                                sizeof(struct nvsp_message),
- -                             VMBUS_RQST_ID_NO_RESPONSE,
- -                             VM_PKT_DATA_INBAND, 0);
+ +                             (unsigned long)init_pkt,
+ +                             VM_PKT_DATA_INBAND,
+ +                             VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ +      wait_for_completion(&nv_dev->channel_init_wait);
+ +      net_device_ctx->data_path_is_vf = vf;
   }
   
   /* Worker to setup sub channels on initial setup
@@@ -131,7 -124,6 +131,7 @@@ static void free_netvsc_device(struct r
   
         for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
                 xdp_rxq_info_unreg(&nvdev->chan_table[i].xdp_rxq);
+ +              kfree(nvdev->chan_table[i].recv_buf);
                 vfree(nvdev->chan_table[i].mrc.slots);
         }
   
@@@ -311,7 -303,7 +311,7 @@@ static int netvsc_init_buf(struct hv_de
         struct nvsp_message *init_packet;
         unsigned int buf_size;
         size_t map_words;
- -      int ret = 0;
+ +      int i, ret = 0;
   
         /* Get receive buffer area. */
         buf_size = device_info->recv_sections * device_info->recv_section_size;
@@@ -405,16 -397,6 +405,16 @@@
                 goto cleanup;
         }
   
+ +      for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
+ +              struct netvsc_channel *nvchan = &net_device->chan_table[i];
+ +
+ +              nvchan->recv_buf = kzalloc(net_device->recv_section_size, GFP_KERNEL);
+ +              if (nvchan->recv_buf == NULL) {
+ +                      ret = -ENOMEM;
+ +                      goto cleanup;
+ +              }
+ +      }
+ +
         /* Setup receive completion ring.
          * Add 1 to the recv_section_cnt because at least one entry in a
          * ring buffer has to be empty.
@@@ -772,31 -754,8 +772,31 @@@ static void netvsc_send_completion(stru
                                    const struct vmpacket_descriptor *desc,
                                    int budget)
   {
- -      const struct nvsp_message *nvsp_packet = hv_pkt_data(desc);
+ +      const struct nvsp_message *nvsp_packet;
         u32 msglen = hv_pkt_datalen(desc);
+ +      struct nvsp_message *pkt_rqst;
+ +      u64 cmd_rqst;
+ +
+ +      /* First check if this is a VMBUS completion without data payload */
+ +      if (!msglen) {
+ +              cmd_rqst = vmbus_request_addr(&incoming_channel->requestor,
+ +                                            (u64)desc->trans_id);
+ +              if (cmd_rqst == VMBUS_RQST_ERROR) {
+ +                      netdev_err(ndev, "Invalid transaction id\n");
+ +                      return;
+ +              }
+ +
+ +              pkt_rqst = (struct nvsp_message *)(uintptr_t)cmd_rqst;
+ +              switch (pkt_rqst->hdr.msg_type) {
+ +              case NVSP_MSG4_TYPE_SWITCH_DATA_PATH:
+ +                      complete(&net_device->channel_init_wait);
+ +                      break;
+ +
+ +              default:
+ +                      netdev_err(ndev, "Unexpected VMBUS completion!!\n");
+ +              }
+ +              return;
+ +      }
   
         /* Ensure packet is big enough to read header fields */
         if (msglen < sizeof(struct nvsp_message_header)) {
@@@ -804,7 -763,6 +804,7 @@@
                 return;
         }
   
+ +      nvsp_packet = hv_pkt_data(desc);
         switch (nvsp_packet->hdr.msg_type) {
         case NVSP_MSG_TYPE_INIT_COMPLETE:
                 if (msglen < sizeof(struct nvsp_message_header) +
@@@ -929,7 -887,6 +929,7 @@@ static inline int netvsc_send_pkt
         int ret;
         u32 ring_avail = hv_get_avail_to_write_percent(&out_channel->outbound);
   
+ +      memset(&nvmsg, 0, sizeof(struct nvsp_message));
         nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
         if (skb)
                 rpkt->channel_type = 0;         /* 0 is RMC_DATA */
@@@ -1295,19 -1252,6 +1295,19 @@@ static int netvsc_receive(struct net_de
                         continue;
                 }
   
+ +              /* We're going to copy (sections of) the packet into nvchan->recv_buf;
+ +               * make sure that nvchan->recv_buf is large enough to hold the packet.
+ +               */
+ +              if (unlikely(buflen > net_device->recv_section_size)) {
+ +                      nvchan->rsc.cnt = 0;
+ +                      status = NVSP_STAT_FAIL;
+ +                      netif_err(net_device_ctx, rx_err, ndev,
+ +                                "Packet too big: buflen=%u recv_section_size=%u\n",
+ +                                buflen, net_device->recv_section_size);
+ +
+ +                      continue;
+ +              }
+ +
                 data = recv_buf + offset;
   
                 nvchan->rsc.is_last = (i == count - 1);
@@@ -1318,8 -1262,11 +1318,11 @@@
                 ret = rndis_filter_receive(ndev, net_device,
                                            nvchan, data, buflen);
   
-               if (unlikely(ret != NVSP_STAT_SUCCESS))
+               if (unlikely(ret != NVSP_STAT_SUCCESS)) {
+                       /* Drop incomplete packet */
+                       nvchan->rsc.cnt = 0;
                         status = NVSP_STAT_FAIL;
+               }
         }
   
         enq_receive_complete(ndev, net_device, q_idx,
@@@ -1362,7 -1309,7 +1365,7 @@@ static void netvsc_send_table(struct ne
                          sizeof(union nvsp_6_message_uber);
   
         /* Boundary check for all versions */
- -      if (offset > msglen - count * sizeof(u32)) {
+ +      if (msglen < count * sizeof(u32) || offset > msglen - count * sizeof(u32)) {
                 netdev_err(ndev, "Received send-table offset too big:%u\n",
                            offset);
                 return;
diff --combined drivers/net/hyperv/rndis_filter.c

index 0c2ebe7ac6554377453dc39a81b972ef5adcb781,3aab2b867fc0d082374683800e55b9a5ce418872..123cc9d25f5ed52fa66698348aab827a263b4975
--- 1/drivers/net/hyperv/rndis_filter.c
--- 2/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@@ -127,89 -127,70 +127,89 @@@ static void put_rndis_request(struct rn
   }
   
   static void dump_rndis_message(struct net_device *netdev,
- -                             const struct rndis_message *rndis_msg)
+ +                             const struct rndis_message *rndis_msg,
+ +                             const void *data)
   {
         switch (rndis_msg->ndis_msg_type) {
         case RNDIS_MSG_PACKET:
- -              netdev_dbg(netdev, "RNDIS_MSG_PACKET (len %u, "
- -                         "data offset %u data len %u, # oob %u, "
- -                         "oob offset %u, oob len %u, pkt offset %u, "
- -                         "pkt len %u\n",
- -                         rndis_msg->msg_len,
- -                         rndis_msg->msg.pkt.data_offset,
- -                         rndis_msg->msg.pkt.data_len,
- -                         rndis_msg->msg.pkt.num_oob_data_elements,
- -                         rndis_msg->msg.pkt.oob_data_offset,
- -                         rndis_msg->msg.pkt.oob_data_len,
- -                         rndis_msg->msg.pkt.per_pkt_info_offset,
- -                         rndis_msg->msg.pkt.per_pkt_info_len);
+ +              if (rndis_msg->msg_len - RNDIS_HEADER_SIZE >= sizeof(struct rndis_packet)) {
+ +                      const struct rndis_packet *pkt = data + RNDIS_HEADER_SIZE;
+ +                      netdev_dbg(netdev, "RNDIS_MSG_PACKET (len %u, "
+ +                                 "data offset %u data len %u, # oob %u, "
+ +                                 "oob offset %u, oob len %u, pkt offset %u, "
+ +                                 "pkt len %u\n",
+ +                                 rndis_msg->msg_len,
+ +                                 pkt->data_offset,
+ +                                 pkt->data_len,
+ +                                 pkt->num_oob_data_elements,
+ +                                 pkt->oob_data_offset,
+ +                                 pkt->oob_data_len,
+ +                                 pkt->per_pkt_info_offset,
+ +                                 pkt->per_pkt_info_len);
+ +              }
                 break;
   
         case RNDIS_MSG_INIT_C:
- -              netdev_dbg(netdev, "RNDIS_MSG_INIT_C "
- -                      "(len %u, id 0x%x, status 0x%x, major %d, minor %d, "
- -                      "device flags %d, max xfer size 0x%x, max pkts %u, "
- -                      "pkt aligned %u)\n",
- -                      rndis_msg->msg_len,
- -                      rndis_msg->msg.init_complete.req_id,
- -                      rndis_msg->msg.init_complete.status,
- -                      rndis_msg->msg.init_complete.major_ver,
- -                      rndis_msg->msg.init_complete.minor_ver,
- -                      rndis_msg->msg.init_complete.dev_flags,
- -                      rndis_msg->msg.init_complete.max_xfer_size,
- -                      rndis_msg->msg.init_complete.
- -                         max_pkt_per_msg,
- -                      rndis_msg->msg.init_complete.
- -                         pkt_alignment_factor);
+ +              if (rndis_msg->msg_len - RNDIS_HEADER_SIZE >=
+ +                              sizeof(struct rndis_initialize_complete)) {
+ +                      const struct rndis_initialize_complete *init_complete =
+ +                              data + RNDIS_HEADER_SIZE;
+ +                      netdev_dbg(netdev, "RNDIS_MSG_INIT_C "
+ +                              "(len %u, id 0x%x, status 0x%x, major %d, minor %d, "
+ +                              "device flags %d, max xfer size 0x%x, max pkts %u, "
+ +                              "pkt aligned %u)\n",
+ +                              rndis_msg->msg_len,
+ +                              init_complete->req_id,
+ +                              init_complete->status,
+ +                              init_complete->major_ver,
+ +                              init_complete->minor_ver,
+ +                              init_complete->dev_flags,
+ +                              init_complete->max_xfer_size,
+ +                              init_complete->max_pkt_per_msg,
+ +                              init_complete->pkt_alignment_factor);
+ +              }
                 break;
   
         case RNDIS_MSG_QUERY_C:
- -              netdev_dbg(netdev, "RNDIS_MSG_QUERY_C "
- -                      "(len %u, id 0x%x, status 0x%x, buf len %u, "
- -                      "buf offset %u)\n",
- -                      rndis_msg->msg_len,
- -                      rndis_msg->msg.query_complete.req_id,
- -                      rndis_msg->msg.query_complete.status,
- -                      rndis_msg->msg.query_complete.
- -                         info_buflen,
- -                      rndis_msg->msg.query_complete.
- -                         info_buf_offset);
+ +              if (rndis_msg->msg_len - RNDIS_HEADER_SIZE >=
+ +                              sizeof(struct rndis_query_complete)) {
+ +                      const struct rndis_query_complete *query_complete =
+ +                              data + RNDIS_HEADER_SIZE;
+ +                      netdev_dbg(netdev, "RNDIS_MSG_QUERY_C "
+ +                              "(len %u, id 0x%x, status 0x%x, buf len %u, "
+ +                              "buf offset %u)\n",
+ +                              rndis_msg->msg_len,
+ +                              query_complete->req_id,
+ +                              query_complete->status,
+ +                              query_complete->info_buflen,
+ +                              query_complete->info_buf_offset);
+ +              }
                 break;
   
         case RNDIS_MSG_SET_C:
- -              netdev_dbg(netdev,
- -                      "RNDIS_MSG_SET_C (len %u, id 0x%x, status 0x%x)\n",
- -                      rndis_msg->msg_len,
- -                      rndis_msg->msg.set_complete.req_id,
- -                      rndis_msg->msg.set_complete.status);
+ +              if (rndis_msg->msg_len - RNDIS_HEADER_SIZE + sizeof(struct rndis_set_complete)) {
+ +                      const struct rndis_set_complete *set_complete =
+ +                              data + RNDIS_HEADER_SIZE;
+ +                      netdev_dbg(netdev,
+ +                              "RNDIS_MSG_SET_C (len %u, id 0x%x, status 0x%x)\n",
+ +                              rndis_msg->msg_len,
+ +                              set_complete->req_id,
+ +                              set_complete->status);
+ +              }
                 break;
   
         case RNDIS_MSG_INDICATE:
- -              netdev_dbg(netdev, "RNDIS_MSG_INDICATE "
- -                      "(len %u, status 0x%x, buf len %u, buf offset %u)\n",
- -                      rndis_msg->msg_len,
- -                      rndis_msg->msg.indicate_status.status,
- -                      rndis_msg->msg.indicate_status.status_buflen,
- -                      rndis_msg->msg.indicate_status.status_buf_offset);
+ +              if (rndis_msg->msg_len - RNDIS_HEADER_SIZE >=
+ +                              sizeof(struct rndis_indicate_status)) {
+ +                      const struct rndis_indicate_status *indicate_status =
+ +                              data + RNDIS_HEADER_SIZE;
+ +                      netdev_dbg(netdev, "RNDIS_MSG_INDICATE "
+ +                              "(len %u, status 0x%x, buf len %u, buf offset %u)\n",
+ +                              rndis_msg->msg_len,
+ +                              indicate_status->status,
+ +                              indicate_status->status_buflen,
+ +                              indicate_status->status_buf_offset);
+ +              }
                 break;
   
         default:
@@@ -265,20 -246,11 +265,20 @@@ static void rndis_set_link_state(struc
   {
         u32 link_status;
         struct rndis_query_complete *query_complete;
+ +      u32 msg_len = request->response_msg.msg_len;
+ +
+ +      /* Ensure the packet is big enough to access its fields */
+ +      if (msg_len - RNDIS_HEADER_SIZE < sizeof(struct rndis_query_complete))
+ +              return;
   
         query_complete = &request->response_msg.msg.query_complete;
   
         if (query_complete->status == RNDIS_STATUS_SUCCESS &&
- -          query_complete->info_buflen == sizeof(u32)) {
+ +          query_complete->info_buflen >= sizeof(u32) &&
+ +          query_complete->info_buf_offset >= sizeof(*query_complete) &&
+ +          msg_len - RNDIS_HEADER_SIZE >= query_complete->info_buf_offset &&
+ +          msg_len - RNDIS_HEADER_SIZE - query_complete->info_buf_offset
+ +                      >= query_complete->info_buflen) {
                 memcpy(&link_status, (void *)((unsigned long)query_complete +
                        query_complete->info_buf_offset), sizeof(u32));
                 rdev->link_state = link_status != 0;
@@@ -287,10 -259,8 +287,10 @@@
   
   static void rndis_filter_receive_response(struct net_device *ndev,
                                           struct netvsc_device *nvdev,
- -                                        const struct rndis_message *resp)
+ +                                        struct rndis_message *resp,
+ +                                        void *data)
   {
+ +      u32 *req_id = &resp->msg.init_complete.req_id;
         struct rndis_device *dev = nvdev->extension;
         struct rndis_request *request = NULL;
         bool found = false;
@@@ -315,16 -285,14 +315,16 @@@
                 return;
         }
   
+ +      /* Copy the request ID into nvchan->recv_buf */
+ +      *req_id = *(u32 *)(data + RNDIS_HEADER_SIZE);
+ +
         spin_lock_irqsave(&dev->request_lock, flags);
         list_for_each_entry(request, &dev->req_list, list_ent) {
                 /*
                  * All request/response message contains RequestId as the 1st
                  * field
                  */
- -              if (request->request_msg.msg.init_req.req_id
- -                  == resp->msg.init_complete.req_id) {
+ +              if (request->request_msg.msg.init_req.req_id == *req_id) {
                         found = true;
                         break;
                 }
@@@ -334,10 -302,8 +334,10 @@@
         if (found) {
                 if (resp->msg_len <=
                     sizeof(struct rndis_message) + RNDIS_EXT_LEN) {
- -                      memcpy(&request->response_msg, resp,
- -                             resp->msg_len);
+ +                      memcpy(&request->response_msg, resp, RNDIS_HEADER_SIZE + sizeof(*req_id));
+ +                      memcpy((void *)&request->response_msg + RNDIS_HEADER_SIZE + sizeof(*req_id),
+ +                             data + RNDIS_HEADER_SIZE + sizeof(*req_id),
+ +                             resp->msg_len - RNDIS_HEADER_SIZE - sizeof(*req_id));
                         if (request->request_msg.ndis_msg_type ==
                             RNDIS_MSG_QUERY && request->request_msg.msg.
                             query_req.oid == RNDIS_OID_GEN_MEDIA_CONNECT_STATUS)
@@@ -366,7 -332,7 +366,7 @@@
                 netdev_err(ndev,
                         "no rndis request found for this response "
                         "(id 0x%x res type 0x%x)\n",
- -                      resp->msg.init_complete.req_id,
+ +                      *req_id,
                         resp->ndis_msg_type);
         }
   }
@@@ -377,8 -343,7 +377,8 @@@
    */
   static inline void *rndis_get_ppi(struct net_device *ndev,
                                   struct rndis_packet *rpkt,
- -                                u32 rpkt_len, u32 type, u8 internal)
+ +                                u32 rpkt_len, u32 type, u8 internal,
+ +                                u32 ppi_size, void *data)
   {
         struct rndis_per_packet_info *ppi;
         int len;
@@@ -394,8 -359,7 +394,8 @@@
                 return NULL;
         }
   
- -      if (rpkt->per_pkt_info_len > rpkt_len - rpkt->per_pkt_info_offset) {
+ +      if (rpkt->per_pkt_info_len < sizeof(*ppi) ||
+ +          rpkt->per_pkt_info_len > rpkt_len - rpkt->per_pkt_info_offset) {
                 netdev_err(ndev, "Invalid per_pkt_info_len: %u\n",
                            rpkt->per_pkt_info_len);
                 return NULL;
@@@ -403,8 -367,6 +403,8 @@@
   
         ppi = (struct rndis_per_packet_info *)((ulong)rpkt +
                 rpkt->per_pkt_info_offset);
+ +      /* Copy the PPIs into nvchan->recv_buf */
+ +      memcpy(ppi, data + RNDIS_HEADER_SIZE + rpkt->per_pkt_info_offset, rpkt->per_pkt_info_len);
         len = rpkt->per_pkt_info_len;
   
         while (len > 0) {
@@@ -419,15 -381,8 +419,15 @@@
                         continue;
                 }
   
- -              if (ppi->type == type && ppi->internal == internal)
+ +              if (ppi->type == type && ppi->internal == internal) {
+ +                      /* ppi->size should be big enough to hold the returned object. */
+ +                      if (ppi->size - ppi->ppi_offset < ppi_size) {
+ +                              netdev_err(ndev, "Invalid ppi: size %u ppi_offset %u\n",
+ +                                         ppi->size, ppi->ppi_offset);
+ +                              continue;
+ +                      }
                         return (void *)((ulong)ppi + ppi->ppi_offset);
+ +              }
                 len -= ppi->size;
                 ppi = (struct rndis_per_packet_info *)((ulong)ppi + ppi->size);
         }
@@@ -447,29 -402,10 +447,29 @@@ void rsc_add_data(struct netvsc_channe
         if (cnt) {
                 nvchan->rsc.pktlen += len;
         } else {
- -              nvchan->rsc.vlan = vlan;
- -              nvchan->rsc.csum_info = csum_info;
+ +              /* The data/values pointed by vlan, csum_info and hash_info are shared
+ +               * across the different 'fragments' of the RSC packet; store them into
+ +               * the packet itself.
+ +               */
+ +              if (vlan != NULL) {
+ +                      memcpy(&nvchan->rsc.vlan, vlan, sizeof(*vlan));
+ +                      nvchan->rsc.ppi_flags |= NVSC_RSC_VLAN;
+ +              } else {
+ +                      nvchan->rsc.ppi_flags &= ~NVSC_RSC_VLAN;
+ +              }
+ +              if (csum_info != NULL) {
+ +                      memcpy(&nvchan->rsc.csum_info, csum_info, sizeof(*csum_info));
+ +                      nvchan->rsc.ppi_flags |= NVSC_RSC_CSUM_INFO;
+ +              } else {
+ +                      nvchan->rsc.ppi_flags &= ~NVSC_RSC_CSUM_INFO;
+ +              }
                 nvchan->rsc.pktlen = len;
- -              nvchan->rsc.hash_info = hash_info;
+ +              if (hash_info != NULL) {
+ +                      nvchan->rsc.hash_info = *hash_info;
+ +                      nvchan->rsc.ppi_flags |= NVSC_RSC_HASH_INFO;
+ +              } else {
+ +                      nvchan->rsc.ppi_flags &= ~NVSC_RSC_HASH_INFO;
+ +              }
         }
   
         nvchan->rsc.data[cnt] = data;
@@@ -481,7 -417,7 +481,7 @@@ static int rndis_filter_receive_data(st
                                      struct netvsc_device *nvdev,
                                      struct netvsc_channel *nvchan,
                                      struct rndis_message *msg,
- -                                   u32 data_buflen)
+ +                                   void *data, u32 data_buflen)
   {
         struct rndis_packet *rndis_pkt = &msg->msg.pkt;
         const struct ndis_tcp_ip_checksum_info *csum_info;
@@@ -489,6 -425,7 +489,6 @@@
         const struct rndis_pktinfo_id *pktinfo_id;
         const u32 *hash_info;
         u32 data_offset, rpkt_len;
- -      void *data;
         bool rsc_more = false;
         int ret;
   
@@@ -499,9 -436,6 +499,9 @@@
                 return NVSP_STAT_FAIL;
         }
   
+ +      /* Copy the RNDIS packet into nvchan->recv_buf */
+ +      memcpy(rndis_pkt, data + RNDIS_HEADER_SIZE, sizeof(*rndis_pkt));
+ +
         /* Validate rndis_pkt offset */
         if (rndis_pkt->data_offset >= data_buflen - RNDIS_HEADER_SIZE) {
                 netdev_err(ndev, "invalid rndis packet offset: %u\n",
@@@ -527,17 -461,15 +527,17 @@@
                 return NVSP_STAT_FAIL;
         }
   
- -      vlan = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, IEEE_8021Q_INFO, 0);
- -
- -      csum_info = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, TCPIP_CHKSUM_PKTINFO, 0);
+ +      vlan = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, IEEE_8021Q_INFO, 0, sizeof(*vlan),
+ +                           data);
   
- -      hash_info = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, NBL_HASH_VALUE, 0);
+ +      csum_info = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, TCPIP_CHKSUM_PKTINFO, 0,
+ +                                sizeof(*csum_info), data);
   
- -      pktinfo_id = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, RNDIS_PKTINFO_ID, 1);
+ +      hash_info = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, NBL_HASH_VALUE, 0,
+ +                                sizeof(*hash_info), data);
   
- -      data = (void *)msg + data_offset;
+ +      pktinfo_id = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, RNDIS_PKTINFO_ID, 1,
+ +                                 sizeof(*pktinfo_id), data);
   
         /* Identify RSC frags, drop erroneous packets */
         if (pktinfo_id && (pktinfo_id->flag & RNDIS_PKTINFO_SUBALLOC)) {
@@@ -566,7 -498,7 +566,7 @@@
          * the data packet to the stack, without the rndis trailer padding
          */
         rsc_add_data(nvchan, vlan, csum_info, hash_info,
- -                   data, rndis_pkt->data_len);
+ +                   data + data_offset, rndis_pkt->data_len);
   
         if (rsc_more)
                 return NVSP_STAT_SUCCESS;
@@@ -577,8 -509,6 +577,6 @@@
         return ret;
   
   drop:
-       /* Drop incomplete packet */
-       nvchan->rsc.cnt = 0;
         return NVSP_STAT_FAIL;
   }
   
@@@ -588,41 -518,33 +586,41 @@@ int rndis_filter_receive(struct net_dev
                          void *data, u32 buflen)
   {
         struct net_device_context *net_device_ctx = netdev_priv(ndev);
- -      struct rndis_message *rndis_msg = data;
+ +      struct rndis_message *rndis_msg = nvchan->recv_buf;
   
- -      if (netif_msg_rx_status(net_device_ctx))
- -              dump_rndis_message(ndev, rndis_msg);
+ +      if (buflen < RNDIS_HEADER_SIZE) {
+ +              netdev_err(ndev, "Invalid rndis_msg (buflen: %u)\n", buflen);
+ +              return NVSP_STAT_FAIL;
+ +      }
+ +
+ +      /* Copy the RNDIS msg header into nvchan->recv_buf */
+ +      memcpy(rndis_msg, data, RNDIS_HEADER_SIZE);
   
         /* Validate incoming rndis_message packet */
- -      if (buflen < RNDIS_HEADER_SIZE || rndis_msg->msg_len < RNDIS_HEADER_SIZE ||
+ +      if (rndis_msg->msg_len < RNDIS_HEADER_SIZE ||
             buflen < rndis_msg->msg_len) {
                 netdev_err(ndev, "Invalid rndis_msg (buflen: %u, msg_len: %u)\n",
                            buflen, rndis_msg->msg_len);
                 return NVSP_STAT_FAIL;
         }
   
+ +      if (netif_msg_rx_status(net_device_ctx))
+ +              dump_rndis_message(ndev, rndis_msg, data);
+ +
         switch (rndis_msg->ndis_msg_type) {
         case RNDIS_MSG_PACKET:
                 return rndis_filter_receive_data(ndev, net_dev, nvchan,
- -                                               rndis_msg, buflen);
+ +                                               rndis_msg, data, buflen);
         case RNDIS_MSG_INIT_C:
         case RNDIS_MSG_QUERY_C:
         case RNDIS_MSG_SET_C:
                 /* completion msgs */
- -              rndis_filter_receive_response(ndev, net_dev, rndis_msg);
+ +              rndis_filter_receive_response(ndev, net_dev, rndis_msg, data);
                 break;
   
         case RNDIS_MSG_INDICATE:
                 /* notification msgs */
- -              netvsc_linkstatus_callback(ndev, rndis_msg);
+ +              netvsc_linkstatus_callback(ndev, rndis_msg, data);
                 break;
         default:
                 netdev_err(ndev,
@@@ -643,7 -565,6 +641,7 @@@ static int rndis_filter_query_device(st
         u32 inresult_size = *result_size;
         struct rndis_query_request *query;
         struct rndis_query_complete *query_complete;
+ +      u32 msg_len;
         int ret = 0;
   
         if (!result)
@@@ -711,19 -632,8 +709,19 @@@
   
         /* Copy the response back */
         query_complete = &request->response_msg.msg.query_complete;
+ +      msg_len = request->response_msg.msg_len;
+ +
+ +      /* Ensure the packet is big enough to access its fields */
+ +      if (msg_len - RNDIS_HEADER_SIZE < sizeof(struct rndis_query_complete)) {
+ +              ret = -1;
+ +              goto cleanup;
+ +      }
   
- -      if (query_complete->info_buflen > inresult_size) {
+ +      if (query_complete->info_buflen > inresult_size ||
+ +          query_complete->info_buf_offset < sizeof(*query_complete) ||
+ +          msg_len - RNDIS_HEADER_SIZE < query_complete->info_buf_offset ||
+ +          msg_len - RNDIS_HEADER_SIZE - query_complete->info_buf_offset
+ +                      < query_complete->info_buflen) {
                 ret = -1;
                 goto cleanup;
         }
diff --combined drivers/net/ipa/gsi.c

index 511c94f66036c176fbdbb9c46d9bec945103a349,b77f5fef7aecab8325ddd29304500cd0b23fc531..4402136461888efc59a831449152362494ede684
--- 1/drivers/net/ipa/gsi.c
--- 2/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@@ -89,9 -89,9 +89,9 @@@
   /* Delay period for interrupt moderation (in 32KHz IPA internal timer ticks) */
   #define GSI_EVT_RING_INT_MODT         (32 * 1) /* 1ms under 32KHz clock */
   
- -#define GSI_CMD_TIMEOUT                       5       /* seconds */
+ +#define GSI_CMD_TIMEOUT                       50      /* milliseconds */
   
- -#define GSI_CHANNEL_STOP_RX_RETRIES   10
+ +#define GSI_CHANNEL_STOP_RETRIES      10
   #define GSI_CHANNEL_MODEM_HALT_RETRIES        10
   
   #define GSI_MHI_EVENT_ID_START                10      /* 1st reserved event id */
@@@ -220,59 -220,7 +220,59 @@@ static void gsi_irq_teardown(struct gs
         /* Nothing to do */
   }
   
- -static void gsi_irq_ieob_enable(struct gsi *gsi, u32 evt_ring_id)
+ +/* Event ring commands are performed one at a time.  Their completion
+ + * is signaled by the event ring control GSI interrupt type, which is
+ + * only enabled when we issue an event ring command.  Only the event
+ + * ring being operated on has this interrupt enabled.
+ + */
+ +static void gsi_irq_ev_ctrl_enable(struct gsi *gsi, u32 evt_ring_id)
+ +{
+ +      u32 val = BIT(evt_ring_id);
+ +
+ +      /* There's a small chance that a previous command completed
+ +       * after the interrupt was disabled, so make sure we have no
+ +       * pending interrupts before we enable them.
+ +       */
+ +      iowrite32(~0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_CLR_OFFSET);
+ +
+ +      iowrite32(val, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET);
+ +      gsi_irq_type_enable(gsi, GSI_EV_CTRL);
+ +}
+ +
+ +/* Disable event ring control interrupts */
+ +static void gsi_irq_ev_ctrl_disable(struct gsi *gsi)
+ +{
+ +      gsi_irq_type_disable(gsi, GSI_EV_CTRL);
+ +      iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET);
+ +}
+ +
+ +/* Channel commands are performed one at a time.  Their completion is
+ + * signaled by the channel control GSI interrupt type, which is only
+ + * enabled when we issue a channel command.  Only the channel being
+ + * operated on has this interrupt enabled.
+ + */
+ +static void gsi_irq_ch_ctrl_enable(struct gsi *gsi, u32 channel_id)
+ +{
+ +      u32 val = BIT(channel_id);
+ +
+ +      /* There's a small chance that a previous command completed
+ +       * after the interrupt was disabled, so make sure we have no
+ +       * pending interrupts before we enable them.
+ +       */
+ +      iowrite32(~0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_CLR_OFFSET);
+ +
+ +      iowrite32(val, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET);
+ +      gsi_irq_type_enable(gsi, GSI_CH_CTRL);
+ +}
+ +
+ +/* Disable channel control interrupts */
+ +static void gsi_irq_ch_ctrl_disable(struct gsi *gsi)
+ +{
+ +      gsi_irq_type_disable(gsi, GSI_CH_CTRL);
+ +      iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET);
+ +}
+ +
+ +static void gsi_irq_ieob_enable_one(struct gsi *gsi, u32 evt_ring_id)
   {
         bool enable_ieob = !gsi->ieob_enabled_bitmap;
         u32 val;
@@@ -286,11 -234,11 +286,11 @@@
                 gsi_irq_type_enable(gsi, GSI_IEOB);
   }
   
- -static void gsi_irq_ieob_disable(struct gsi *gsi, u32 evt_ring_id)
+ +static void gsi_irq_ieob_disable(struct gsi *gsi, u32 event_mask)
   {
         u32 val;
   
- -      gsi->ieob_enabled_bitmap &= ~BIT(evt_ring_id);
+ +      gsi->ieob_enabled_bitmap &= ~event_mask;
   
         /* Disable the interrupt type if this was the last enabled channel */
         if (!gsi->ieob_enabled_bitmap)
@@@ -300,11 -248,6 +300,11 @@@
         iowrite32(val, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET);
   }
   
+ +static void gsi_irq_ieob_disable_one(struct gsi *gsi, u32 evt_ring_id)
+ +{
+ +      gsi_irq_ieob_disable(gsi, BIT(evt_ring_id));
+ +}
+ +
   /* Enable all GSI_interrupt types */
   static void gsi_irq_enable(struct gsi *gsi)
   {
@@@ -364,13 -307,11 +364,13 @@@ static u32 gsi_ring_index(struct gsi_ri
   static bool
   gsi_command(struct gsi *gsi, u32 reg, u32 val, struct completion *completion)
   {
+ +      unsigned long timeout = msecs_to_jiffies(GSI_CMD_TIMEOUT);
+ +
         reinit_completion(completion);
   
         iowrite32(val, gsi->virt + reg);
   
- -      return !!wait_for_completion_timeout(completion, GSI_CMD_TIMEOUT * HZ);
+ +      return !!wait_for_completion_timeout(completion, timeout);
   }
   
   /* Return the hardware's notion of the current state of an event ring */
@@@ -385,54 -326,68 +385,54 @@@ gsi_evt_ring_state(struct gsi *gsi, u3
   }
   
   /* Issue an event ring command and wait for it to complete */
- -static void evt_ring_command(struct gsi *gsi, u32 evt_ring_id,
- -                           enum gsi_evt_cmd_opcode opcode)
+ +static void gsi_evt_ring_command(struct gsi *gsi, u32 evt_ring_id,
+ +                               enum gsi_evt_cmd_opcode opcode)
   {
         struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id];
         struct completion *completion = &evt_ring->completion;
         struct device *dev = gsi->dev;
- -      bool success;
+ +      bool timeout;
         u32 val;
   
- -      /* We only perform one event ring command at a time, and event
- -       * control interrupts should only occur when such a command
- -       * is issued here.  Only permit *this* event ring to trigger
- -       * an interrupt, and only enable the event control IRQ type
- -       * when we expect it to occur.
- -       *
- -       * There's a small chance that a previous command completed
- -       * after the interrupt was disabled, so make sure we have no
- -       * pending interrupts before we enable them.
- -       */
- -      iowrite32(~0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_CLR_OFFSET);
- -
- -      val = BIT(evt_ring_id);
- -      iowrite32(val, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET);
- -      gsi_irq_type_enable(gsi, GSI_EV_CTRL);
+ +      /* Enable the completion interrupt for the command */
+ +      gsi_irq_ev_ctrl_enable(gsi, evt_ring_id);
   
         val = u32_encode_bits(evt_ring_id, EV_CHID_FMASK);
         val |= u32_encode_bits(opcode, EV_OPCODE_FMASK);
   
- -      success = gsi_command(gsi, GSI_EV_CH_CMD_OFFSET, val, completion);
+ +      timeout = !gsi_command(gsi, GSI_EV_CH_CMD_OFFSET, val, completion);
   
- -      /* Disable the interrupt again */
- -      gsi_irq_type_disable(gsi, GSI_EV_CTRL);
- -      iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET);
+ +      gsi_irq_ev_ctrl_disable(gsi);
   
- -      if (success)
+ +      if (!timeout)
                 return;
   
         dev_err(dev, "GSI command %u for event ring %u timed out, state %u\n",
- -              opcode, evt_ring_id, evt_ring->state);
+ +              opcode, evt_ring_id, gsi_evt_ring_state(gsi, evt_ring_id));
   }
   
   /* Allocate an event ring in NOT_ALLOCATED state */
   static int gsi_evt_ring_alloc_command(struct gsi *gsi, u32 evt_ring_id)
   {
- -      struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id];
+ +      enum gsi_evt_ring_state state;
   
         /* Get initial event ring state */
- -      evt_ring->state = gsi_evt_ring_state(gsi, evt_ring_id);
- -      if (evt_ring->state != GSI_EVT_RING_STATE_NOT_ALLOCATED) {
+ +      state = gsi_evt_ring_state(gsi, evt_ring_id);
+ +      if (state != GSI_EVT_RING_STATE_NOT_ALLOCATED) {
                 dev_err(gsi->dev, "event ring %u bad state %u before alloc\n",
- -                      evt_ring_id, evt_ring->state);
+ +                      evt_ring_id, state);
                 return -EINVAL;
         }
   
- -      evt_ring_command(gsi, evt_ring_id, GSI_EVT_ALLOCATE);
+ +      gsi_evt_ring_command(gsi, evt_ring_id, GSI_EVT_ALLOCATE);
   
         /* If successful the event ring state will have changed */
- -      if (evt_ring->state == GSI_EVT_RING_STATE_ALLOCATED)
+ +      state = gsi_evt_ring_state(gsi, evt_ring_id);
+ +      if (state == GSI_EVT_RING_STATE_ALLOCATED)
                 return 0;
   
         dev_err(gsi->dev, "event ring %u bad state %u after alloc\n",
- -              evt_ring_id, evt_ring->state);
+ +              evt_ring_id, state);
   
         return -EIO;
   }
@@@ -440,48 -395,45 +440,48 @@@
   /* Reset a GSI event ring in ALLOCATED or ERROR state. */
   static void gsi_evt_ring_reset_command(struct gsi *gsi, u32 evt_ring_id)
   {
- -      struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id];
- -      enum gsi_evt_ring_state state = evt_ring->state;
+ +      enum gsi_evt_ring_state state;
   
+ +      state = gsi_evt_ring_state(gsi, evt_ring_id);
         if (state != GSI_EVT_RING_STATE_ALLOCATED &&
             state != GSI_EVT_RING_STATE_ERROR) {
                 dev_err(gsi->dev, "event ring %u bad state %u before reset\n",
- -                      evt_ring_id, evt_ring->state);
+ +                      evt_ring_id, state);
                 return;
         }
   
- -      evt_ring_command(gsi, evt_ring_id, GSI_EVT_RESET);
+ +      gsi_evt_ring_command(gsi, evt_ring_id, GSI_EVT_RESET);
   
         /* If successful the event ring state will have changed */
- -      if (evt_ring->state == GSI_EVT_RING_STATE_ALLOCATED)
+ +      state = gsi_evt_ring_state(gsi, evt_ring_id);
+ +      if (state == GSI_EVT_RING_STATE_ALLOCATED)
                 return;
   
         dev_err(gsi->dev, "event ring %u bad state %u after reset\n",
- -              evt_ring_id, evt_ring->state);
+ +              evt_ring_id, state);
   }
   
   /* Issue a hardware de-allocation request for an allocated event ring */
   static void gsi_evt_ring_de_alloc_command(struct gsi *gsi, u32 evt_ring_id)
   {
- -      struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id];
+ +      enum gsi_evt_ring_state state;
   
- -      if (evt_ring->state != GSI_EVT_RING_STATE_ALLOCATED) {
+ +      state = gsi_evt_ring_state(gsi, evt_ring_id);
+ +      if (state != GSI_EVT_RING_STATE_ALLOCATED) {
                 dev_err(gsi->dev, "event ring %u state %u before dealloc\n",
- -                      evt_ring_id, evt_ring->state);
+ +                      evt_ring_id, state);
                 return;
         }
   
- -      evt_ring_command(gsi, evt_ring_id, GSI_EVT_DE_ALLOC);
+ +      gsi_evt_ring_command(gsi, evt_ring_id, GSI_EVT_DE_ALLOC);
   
         /* If successful the event ring state will have changed */
- -      if (evt_ring->state == GSI_EVT_RING_STATE_NOT_ALLOCATED)
+ +      state = gsi_evt_ring_state(gsi, evt_ring_id);
+ +      if (state == GSI_EVT_RING_STATE_NOT_ALLOCATED)
                 return;
   
         dev_err(gsi->dev, "event ring %u bad state %u after dealloc\n",
- -              evt_ring_id, evt_ring->state);
+ +              evt_ring_id, state);
   }
   
   /* Fetch the current state of a channel from hardware */
@@@ -504,19 -456,34 +504,19 @@@ gsi_channel_command(struct gsi_channel 
         u32 channel_id = gsi_channel_id(channel);
         struct gsi *gsi = channel->gsi;
         struct device *dev = gsi->dev;
- -      bool success;
+ +      bool timeout;
         u32 val;
   
- -      /* We only perform one channel command at a time, and channel
- -       * control interrupts should only occur when such a command is
- -       * issued here.  So we only permit *this* channel to trigger
- -       * an interrupt and only enable the channel control IRQ type
- -       * when we expect it to occur.
- -       *
- -       * There's a small chance that a previous command completed
- -       * after the interrupt was disabled, so make sure we have no
- -       * pending interrupts before we enable them.
- -       */
- -      iowrite32(~0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_CLR_OFFSET);
- -
- -      val = BIT(channel_id);
- -      iowrite32(val, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET);
- -      gsi_irq_type_enable(gsi, GSI_CH_CTRL);
+ +      /* Enable the completion interrupt for the command */
+ +      gsi_irq_ch_ctrl_enable(gsi, channel_id);
   
         val = u32_encode_bits(channel_id, CH_CHID_FMASK);
         val |= u32_encode_bits(opcode, CH_OPCODE_FMASK);
- -      success = gsi_command(gsi, GSI_CH_CMD_OFFSET, val, completion);
+ +      timeout = !gsi_command(gsi, GSI_CH_CMD_OFFSET, val, completion);
   
- -      /* Disable the interrupt again */
- -      gsi_irq_type_disable(gsi, GSI_CH_CTRL);
- -      iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET);
+ +      gsi_irq_ch_ctrl_disable(gsi);
   
- -      if (success)
+ +      if (!timeout)
                 return;
   
         dev_err(dev, "GSI command %u for channel %u timed out, state %u\n",
@@@ -622,8 -589,7 +622,8 @@@ static void gsi_channel_reset_command(s
         struct device *dev = channel->gsi->dev;
         enum gsi_channel_state state;
   
- -      msleep(1);      /* A short delay is required before a RESET command */
+ +      /* A short delay is required before a RESET command */
+ +      usleep_range(USEC_PER_MSEC, 2 * USEC_PER_MSEC);
   
         state = gsi_channel_state(channel);
         if (state != GSI_CHANNEL_STATE_STOPPED &&
@@@ -729,38 -695,22 +729,38 @@@ static void gsi_evt_ring_program(struc
         gsi_evt_ring_doorbell(gsi, evt_ring_id, 0);
   }
   
- -/* Return the last (most recent) transaction completed on a channel. */
+ +/* Find the transaction whose completion indicates a channel is quiesced */
   static struct gsi_trans *gsi_channel_trans_last(struct gsi_channel *channel)
   {
         struct gsi_trans_info *trans_info = &channel->trans_info;
+ +      const struct list_head *list;
         struct gsi_trans *trans;
   
         spin_lock_bh(&trans_info->spinlock);
   
- -      if (!list_empty(&trans_info->complete))
- -              trans = list_last_entry(&trans_info->complete,
- -                                      struct gsi_trans, links);
- -      else if (!list_empty(&trans_info->polled))
- -              trans = list_last_entry(&trans_info->polled,
- -                                      struct gsi_trans, links);
- -      else
- -              trans = NULL;
+ +      /* There is a small chance a TX transaction got allocated just
+ +       * before we disabled transmits, so check for that.
+ +       */
+ +      if (channel->toward_ipa) {
+ +              list = &trans_info->alloc;
+ +              if (!list_empty(list))
+ +                      goto done;
+ +              list = &trans_info->pending;
+ +              if (!list_empty(list))
+ +                      goto done;
+ +      }
+ +
+ +      /* Otherwise (TX or RX) we want to wait for anything that
+ +       * has completed, or has been polled but not released yet.
+ +       */
+ +      list = &trans_info->complete;
+ +      if (!list_empty(list))
+ +              goto done;
+ +      list = &trans_info->polled;
+ +      if (list_empty(list))
+ +              list = NULL;
+ +done:
+ +      trans = list ? list_last_entry(list, struct gsi_trans, links) : NULL;
   
         /* Caller will wait for this, so take a reference */
         if (trans)
@@@ -784,6 -734,24 +784,6 @@@ static void gsi_channel_trans_quiesce(s
         }
   }
   
- -/* Stop channel activity.  Transactions may not be allocated until thawed. */
- -static void gsi_channel_freeze(struct gsi_channel *channel)
- -{
- -      gsi_channel_trans_quiesce(channel);
- -
- -      napi_disable(&channel->napi);
- -
- -      gsi_irq_ieob_disable(channel->gsi, channel->evt_ring_id);
- -}
- -
- -/* Allow transactions to be used on the channel again. */
- -static void gsi_channel_thaw(struct gsi_channel *channel)
- -{
- -      gsi_irq_ieob_enable(channel->gsi, channel->evt_ring_id);
- -
- -      napi_enable(&channel->napi);
- -}
- -
   /* Program a channel for use */
   static void gsi_channel_program(struct gsi_channel *channel, bool doorbell)
   {
@@@ -875,92 -843,51 +875,92 @@@ static void gsi_channel_deprogram(struc
         /* Nothing to do */
   }
   
- -/* Start an allocated GSI channel */
- -int gsi_channel_start(struct gsi *gsi, u32 channel_id)
+ +static int __gsi_channel_start(struct gsi_channel *channel, bool start)
   {
- -      struct gsi_channel *channel = &gsi->channel[channel_id];
+ +      struct gsi *gsi = channel->gsi;
         int ret;
   
+ +      if (!start)
+ +              return 0;
+ +
         mutex_lock(&gsi->mutex);
   
         ret = gsi_channel_start_command(channel);
   
         mutex_unlock(&gsi->mutex);
   
- -      gsi_channel_thaw(channel);
- -
         return ret;
   }
   
- -/* Stop a started channel */
- -int gsi_channel_stop(struct gsi *gsi, u32 channel_id)
+ +/* Start an allocated GSI channel */
+ +int gsi_channel_start(struct gsi *gsi, u32 channel_id)
   {
         struct gsi_channel *channel = &gsi->channel[channel_id];
- -      u32 retries;
         int ret;
   
- -      gsi_channel_freeze(channel);
+ +      /* Enable NAPI and the completion interrupt */
+ +      napi_enable(&channel->napi);
+ +      gsi_irq_ieob_enable_one(gsi, channel->evt_ring_id);
   
- -      /* RX channels might require a little time to enter STOPPED state */
- -      retries = channel->toward_ipa ? 0 : GSI_CHANNEL_STOP_RX_RETRIES;
+ +      ret = __gsi_channel_start(channel, true);
+ +      if (ret) {
+ +              gsi_irq_ieob_disable_one(gsi, channel->evt_ring_id);
+ +              napi_disable(&channel->napi);
+ +      }
   
- -      mutex_lock(&gsi->mutex);
+ +      return ret;
+ +}
+ +
+ +static int gsi_channel_stop_retry(struct gsi_channel *channel)
+ +{
+ +      u32 retries = GSI_CHANNEL_STOP_RETRIES;
+ +      int ret;
   
         do {
                 ret = gsi_channel_stop_command(channel);
                 if (ret != -EAGAIN)
                         break;
- -              msleep(1);
+ +              usleep_range(3 * USEC_PER_MSEC, 5 * USEC_PER_MSEC);
         } while (retries--);
   
+ +      return ret;
+ +}
+ +
+ +static int __gsi_channel_stop(struct gsi_channel *channel, bool stop)
+ +{
+ +      struct gsi *gsi = channel->gsi;
+ +      int ret;
+ +
+ +      /* Wait for any underway transactions to complete before stopping. */
+ +      gsi_channel_trans_quiesce(channel);
+ +
+ +      if (!stop)
+ +              return 0;
+ +
+ +      mutex_lock(&gsi->mutex);
+ +
+ +      ret = gsi_channel_stop_retry(channel);
+ +
         mutex_unlock(&gsi->mutex);
   
- -      /* Thaw the channel if we need to retry (or on error) */
+ +      return ret;
+ +}
+ +
+ +/* Stop a started channel */
+ +int gsi_channel_stop(struct gsi *gsi, u32 channel_id)
+ +{
+ +      struct gsi_channel *channel = &gsi->channel[channel_id];
+ +      int ret;
+ +
+ +      ret = __gsi_channel_stop(channel, true);
         if (ret)
- -              gsi_channel_thaw(channel);
+ +              return ret;
   
- -      return ret;
+ +      /* Disable the completion interrupt and NAPI if successful */
+ +      gsi_irq_ieob_disable_one(gsi, channel->evt_ring_id);
+ +      napi_disable(&channel->napi);
+ +
+ +      return 0;
   }
   
   /* Reset and reconfigure a channel, (possibly) enabling the doorbell engine */
@@@ -985,14 -912,11 +985,14 @@@ void gsi_channel_reset(struct gsi *gsi
   int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop)
   {
         struct gsi_channel *channel = &gsi->channel[channel_id];
+ +      int ret;
   
- -      if (stop)
- -              return gsi_channel_stop(gsi, channel_id);
+ +      ret = __gsi_channel_stop(channel, stop);
+ +      if (ret)
+ +              return ret;
   
- -      gsi_channel_freeze(channel);
+ +      /* Ensure NAPI polling has finished. */
+ +      napi_synchronize(&channel->napi);
   
         return 0;
   }
@@@ -1002,7 -926,12 +1002,7 @@@ int gsi_channel_resume(struct gsi *gsi
   {
         struct gsi_channel *channel = &gsi->channel[channel_id];
   
- -      if (start)
- -              return gsi_channel_start(gsi, channel_id);
- -
- -      gsi_channel_thaw(channel);
- -
- -      return 0;
+ +      return __gsi_channel_start(channel, start);
   }
   
   /**
@@@ -1111,6 -1040,7 +1111,6 @@@ static void gsi_isr_evt_ctrl(struct gs
                 event_mask ^= BIT(evt_ring_id);
   
                 evt_ring = &gsi->evt_ring[evt_ring_id];
- -              evt_ring->state = gsi_evt_ring_state(gsi, evt_ring_id);
   
                 complete(&evt_ring->completion);
         }
@@@ -1248,7 -1178,6 +1248,7 @@@ static void gsi_isr_ieob(struct gsi *gs
         u32 event_mask;
   
         event_mask = ioread32(gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_OFFSET);
+ +      gsi_irq_ieob_disable(gsi, event_mask);
         iowrite32(event_mask, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_CLR_OFFSET);
   
         while (event_mask) {
@@@ -1256,6 -1185,7 +1256,6 @@@
   
                 event_mask ^= BIT(evt_ring_id);
   
- -              gsi_irq_ieob_disable(gsi, evt_ring_id);
                 napi_schedule(&gsi->evt_ring[evt_ring_id].channel->napi);
         }
   }
@@@ -1500,7 -1430,7 +1500,7 @@@ void gsi_channel_doorbell(struct gsi_ch
   }
   
   /* Consult hardware, move any newly completed transactions to completed list */
- -static void gsi_channel_update(struct gsi_channel *channel)
+ +static struct gsi_trans *gsi_channel_update(struct gsi_channel *channel)
   {
         u32 evt_ring_id = channel->evt_ring_id;
         struct gsi *gsi = channel->gsi;
@@@ -1519,7 -1449,7 +1519,7 @@@
         offset = GSI_EV_CH_E_CNTXT_4_OFFSET(evt_ring_id);
         index = gsi_ring_index(ring, ioread32(gsi->virt + offset));
         if (index == ring->index % ring->count)
- -              return;
+ +              return NULL;
   
         /* Get the transaction for the latest completed event.  Take a
          * reference to keep it from completing before we give the events
@@@ -1544,8 -1474,6 +1544,8 @@@
         gsi_evt_ring_doorbell(channel->gsi, channel->evt_ring_id, index);
   
         gsi_trans_free(trans);
+ +
+ +      return gsi_channel_trans_complete(channel);
   }
   
   /**
@@@ -1566,8 -1494,11 +1566,8 @@@ static struct gsi_trans *gsi_channel_po
   
         /* Get the first transaction from the completed list */
         trans = gsi_channel_trans_complete(channel);
- -      if (!trans) {
- -              /* List is empty; see if there's more to do */
- -              gsi_channel_update(channel);
- -              trans = gsi_channel_trans_complete(channel);
- -      }
+ +      if (!trans)     /* List is empty; see if there's more to do */
+ +              trans = gsi_channel_update(channel);
   
         if (trans)
                 gsi_trans_move_polled(trans);
@@@ -1590,20 -1521,23 +1590,20 @@@
   static int gsi_channel_poll(struct napi_struct *napi, int budget)
   {
         struct gsi_channel *channel;
- -      int count = 0;
+ +      int count;
   
         channel = container_of(napi, struct gsi_channel, napi);
- -      while (count < budget) {
+ +      for (count = 0; count < budget; count++) {
                 struct gsi_trans *trans;
   
- -              count++;
                 trans = gsi_channel_poll_one(channel);
                 if (!trans)
                         break;
                 gsi_trans_complete(trans);
         }
   
- -      if (count < budget) {
- -              napi_complete(&channel->napi);
- -              gsi_irq_ieob_enable(channel->gsi, channel->evt_ring_id);
- -      }
+ +      if (count < budget && napi_complete(napi))
+ +              gsi_irq_ieob_enable_one(channel->gsi, channel->evt_ring_id);
   
         return count;
   }
@@@ -1693,7 -1627,7 +1693,7 @@@ static int gsi_generic_command(struct g
                                enum gsi_generic_cmd_opcode opcode)
   {
         struct completion *completion = &gsi->completion;
- -      bool success;
+ +      bool timeout;
         u32 val;
   
         /* The error global interrupt type is always enabled (until we
@@@ -1716,12 -1650,12 +1716,12 @@@
         val |= u32_encode_bits(channel_id, GENERIC_CHID_FMASK);
         val |= u32_encode_bits(GSI_EE_MODEM, GENERIC_EE_FMASK);
   
- -      success = gsi_command(gsi, GSI_GENERIC_CMD_OFFSET, val, completion);
+ +      timeout = !gsi_command(gsi, GSI_GENERIC_CMD_OFFSET, val, completion);
   
         /* Disable the GP_INT1 IRQ type again */
         iowrite32(BIT(ERROR_INT), gsi->virt + GSI_CNTXT_GLOB_IRQ_EN_OFFSET);
   
- -      if (success)
+ +      if (!timeout)
                 return gsi->result;
   
         dev_err(gsi->dev, "GSI generic command %u to channel %u timed out\n",
@@@ -1776,6 -1710,7 +1776,7 @@@ static int gsi_channel_setup(struct gs
                 if (!channel->gsi)
                         continue;       /* Ignore uninitialized channels */
   
+               ret = -EINVAL;
                 dev_err(gsi->dev, "channel %u not supported by hardware\n",
                         channel_id - 1);
                 channel_id = gsi->channel_count;
diff --combined drivers/net/usb/qmi_wwan.c

index c8b2b60d2183416eebde1d4a4b4aa933de1cbe18,5a05add9b4e690e10c5027e10d59bd21f867ef91..6c3d8c2abd385609d22a7d6e101a08684189b3f8
--- 1/drivers/net/usb/qmi_wwan.c
--- 2/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@@ -57,7 -57,6 +57,7 @@@ struct qmi_wwan_state 
   enum qmi_wwan_flags {
         QMI_WWAN_FLAG_RAWIP = 1 << 0,
         QMI_WWAN_FLAG_MUX = 1 << 1,
+ +      QMI_WWAN_FLAG_PASS_THROUGH = 1 << 2,
   };
   
   enum qmi_wwan_quirks {
@@@ -187,7 -186,7 +187,7 @@@ static int qmimux_rx_fixup(struct usbne
                 net = qmimux_find_dev(dev, hdr->mux_id);
                 if (!net)
                         goto skip;
- -              skbn = netdev_alloc_skb(net, pkt_len);
+ +              skbn = netdev_alloc_skb(net, pkt_len + LL_MAX_HEADER);
                 if (!skbn)
                         return 0;
                 skbn->dev = net;
@@@ -204,7 -203,6 +204,7 @@@
                         goto skip;
                 }
   
+ +              skb_reserve(skbn, LL_MAX_HEADER);
                 skb_put_data(skbn, skb->data + offset + qmimux_hdr_sz, pkt_len);
                 if (netif_rx(skbn) != NET_RX_SUCCESS) {
                         net->stats.rx_errors++;
@@@ -219,28 -217,6 +219,28 @@@ skip
         return 1;
   }
   
+ +static ssize_t mux_id_show(struct device *d, struct device_attribute *attr, char *buf)
+ +{
+ +      struct net_device *dev = to_net_dev(d);
+ +      struct qmimux_priv *priv;
+ +
+ +      priv = netdev_priv(dev);
+ +
+ +      return sysfs_emit(buf, "0x%02x\n", priv->mux_id);
+ +}
+ +
+ +static DEVICE_ATTR_RO(mux_id);
+ +
+ +static struct attribute *qmi_wwan_sysfs_qmimux_attrs[] = {
+ +      &dev_attr_mux_id.attr,
+ +      NULL,
+ +};
+ +
+ +static struct attribute_group qmi_wwan_sysfs_qmimux_attr_group = {
+ +      .name = "qmap",
+ +      .attrs = qmi_wwan_sysfs_qmimux_attrs,
+ +};
+ +
   static int qmimux_register_device(struct net_device *real_dev, u8 mux_id)
   {
         struct net_device *new_dev;
@@@ -263,8 -239,6 +263,8 @@@
                 goto out_free_newdev;
         }
   
+ +      new_dev->sysfs_groups[0] = &qmi_wwan_sysfs_qmimux_attr_group;
+ +
         err = register_netdevice(new_dev);
         if (err < 0)
                 goto out_free_newdev;
@@@ -351,13 -325,6 +351,13 @@@ static ssize_t raw_ip_store(struct devi
         if (enable == (info->flags & QMI_WWAN_FLAG_RAWIP))
                 return len;
   
+ +      /* ip mode cannot be cleared when pass through mode is set */
+ +      if (!enable && (info->flags & QMI_WWAN_FLAG_PASS_THROUGH)) {
+ +              netdev_err(dev->net,
+ +                         "Cannot clear ip mode on pass through device\n");
+ +              return -EINVAL;
+ +      }
+ +
         if (!rtnl_trylock())
                 return restart_syscall();
   
@@@ -488,59 -455,14 +488,59 @@@ err
         return ret;
   }
   
+ +static ssize_t pass_through_show(struct device *d,
+ +                               struct device_attribute *attr, char *buf)
+ +{
+ +      struct usbnet *dev = netdev_priv(to_net_dev(d));
+ +      struct qmi_wwan_state *info;
+ +
+ +      info = (void *)&dev->data;
+ +      return sprintf(buf, "%c\n",
+ +                     info->flags & QMI_WWAN_FLAG_PASS_THROUGH ? 'Y' : 'N');
+ +}
+ +
+ +static ssize_t pass_through_store(struct device *d,
+ +                                struct device_attribute *attr,
+ +                                const char *buf, size_t len)
+ +{
+ +      struct usbnet *dev = netdev_priv(to_net_dev(d));
+ +      struct qmi_wwan_state *info;
+ +      bool enable;
+ +
+ +      if (strtobool(buf, &enable))
+ +              return -EINVAL;
+ +
+ +      info = (void *)&dev->data;
+ +
+ +      /* no change? */
+ +      if (enable == (info->flags & QMI_WWAN_FLAG_PASS_THROUGH))
+ +              return len;
+ +
+ +      /* pass through mode can be set for raw ip devices only */
+ +      if (!(info->flags & QMI_WWAN_FLAG_RAWIP)) {
+ +              netdev_err(dev->net,
+ +                         "Cannot set pass through mode on non ip device\n");
+ +              return -EINVAL;
+ +      }
+ +
+ +      if (enable)
+ +              info->flags |= QMI_WWAN_FLAG_PASS_THROUGH;
+ +      else
+ +              info->flags &= ~QMI_WWAN_FLAG_PASS_THROUGH;
+ +
+ +      return len;
+ +}
+ +
   static DEVICE_ATTR_RW(raw_ip);
   static DEVICE_ATTR_RW(add_mux);
   static DEVICE_ATTR_RW(del_mux);
+ +static DEVICE_ATTR_RW(pass_through);
   
   static struct attribute *qmi_wwan_sysfs_attrs[] = {
         &dev_attr_raw_ip.attr,
         &dev_attr_add_mux.attr,
         &dev_attr_del_mux.attr,
+ +      &dev_attr_pass_through.attr,
         NULL,
   };
   
@@@ -587,11 -509,6 +587,11 @@@ static int qmi_wwan_rx_fixup(struct usb
         if (info->flags & QMI_WWAN_FLAG_MUX)
                 return qmimux_rx_fixup(dev, skb);
   
+ +      if (info->flags & QMI_WWAN_FLAG_PASS_THROUGH) {
+ +              skb->protocol = htons(ETH_P_MAP);
+ +              return (netif_rx(skb) == NET_RX_SUCCESS);
+ +      }
+ +
         switch (skb->data[0] & 0xf0) {
         case 0x40:
                 proto = htons(ETH_P_IP);
@@@ -1392,6 -1309,7 +1392,7 @@@ static const struct usb_device_id produ
         {QMI_FIXED_INTF(0x1e2d, 0x0082, 5)},    /* Cinterion PHxx,PXxx (2 RmNet) */
         {QMI_FIXED_INTF(0x1e2d, 0x0083, 4)},    /* Cinterion PHxx,PXxx (1 RmNet + USB Audio)*/
         {QMI_QUIRK_SET_DTR(0x1e2d, 0x00b0, 4)}, /* Cinterion CLS8 */
+       {QMI_FIXED_INTF(0x1e2d, 0x00b7, 0)},    /* Cinterion MV31 RmNet */
         {QMI_FIXED_INTF(0x413c, 0x81a2, 8)},    /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */
         {QMI_FIXED_INTF(0x413c, 0x81a3, 8)},    /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */
         {QMI_FIXED_INTF(0x413c, 0x81a4, 8)},    /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */
diff --combined include/linux/netdevice.h

index 1340327f7abf448a6eb78cd3a05754f941f940fe,5ff27c12ce68833a2c142be425c82bffe8eecbd0..a20310ff5083b7dcb7bb1c2c6268888894086579
--- 1/include/linux/netdevice.h
--- 2/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@@ -347,7 -347,6 +347,7 @@@ struct napi_struct 
         struct list_head        dev_list;
         struct hlist_node       napi_hash_node;
         unsigned int            napi_id;
+ +      struct task_struct      *thread;
   };
   
   enum {
@@@ -359,7 -358,6 +359,7 @@@
         NAPI_STATE_NO_BUSY_POLL,        /* Do not add in napi_hash, no busy polling */
         NAPI_STATE_IN_BUSY_POLL,        /* sk_busy_loop() owns this NAPI */
         NAPI_STATE_PREFER_BUSY_POLL,    /* prefer busy-polling over softirq processing*/
+ +      NAPI_STATE_THREADED,            /* The poll is performed inside its own thread*/
   };
   
   enum {
@@@ -371,7 -369,6 +371,7 @@@
         NAPIF_STATE_NO_BUSY_POLL        = BIT(NAPI_STATE_NO_BUSY_POLL),
         NAPIF_STATE_IN_BUSY_POLL        = BIT(NAPI_STATE_IN_BUSY_POLL),
         NAPIF_STATE_PREFER_BUSY_POLL    = BIT(NAPI_STATE_PREFER_BUSY_POLL),
+ +      NAPIF_STATE_THREADED            = BIT(NAPI_STATE_THREADED),
   };
   
   enum gro_result {
@@@ -379,6 -376,7 +379,6 @@@
         GRO_MERGED_FREE,
         GRO_HELD,
         GRO_NORMAL,
- -      GRO_DROP,
         GRO_CONSUMED,
   };
   typedef enum gro_result gro_result_t;
@@@ -497,8 -495,6 +497,8 @@@ static inline bool napi_complete(struc
         return napi_complete_done(n, 0);
   }
   
+ +int dev_set_threaded(struct net_device *dev, bool threaded);
+ +
   /**
    *    napi_disable - prevent NAPI from scheduling
    *    @n: NAPI context
@@@ -508,7 -504,20 +508,7 @@@
    */
   void napi_disable(struct napi_struct *n);
   
- -/**
- - *    napi_enable - enable NAPI scheduling
- - *    @n: NAPI context
- - *
- - * Resume NAPI from being scheduled on this context.
- - * Must be paired with napi_disable.
- - */
- -static inline void napi_enable(struct napi_struct *n)
- -{
- -      BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
- -      smp_mb__before_atomic();
- -      clear_bit(NAPI_STATE_SCHED, &n->state);
- -      clear_bit(NAPI_STATE_NPSVC, &n->state);
- -}
+ +void napi_enable(struct napi_struct *n);
   
   /**
    *    napi_synchronize - wait until NAPI is not running
@@@ -850,7 -859,6 +850,7 @@@ enum tc_setup_type 
         TC_SETUP_QDISC_ETS,
         TC_SETUP_QDISC_TBF,
         TC_SETUP_QDISC_FIFO,
+ +      TC_SETUP_QDISC_HTB,
   };
   
   /* These structures hold the attributes of bpf state that are being passed
@@@ -1205,6 -1213,19 +1205,6 @@@ struct netdev_net_notifier 
    *                             struct netdev_phys_item_id *ppid)
    *    Called to get the parent ID of the physical port of this device.
    *
- - * void (*ndo_udp_tunnel_add)(struct net_device *dev,
- - *                          struct udp_tunnel_info *ti);
- - *    Called by UDP tunnel to notify a driver about the UDP port and socket
- - *    address family that a UDP tunnel is listnening to. It is called only
- - *    when a new port starts listening. The operation is protected by the
- - *    RTNL.
- - *
- - * void (*ndo_udp_tunnel_del)(struct net_device *dev,
- - *                          struct udp_tunnel_info *ti);
- - *    Called by UDP tunnel to notify the driver about a UDP port and socket
- - *    address family that the UDP tunnel is not listening to anymore. The
- - *    operation is protected by the RTNL.
- - *
    * void* (*ndo_dfwd_add_station)(struct net_device *pdev,
    *                             struct net_device *dev)
    *    Called by upper layer devices to accelerate switching or other
@@@ -1391,8 -1412,6 +1391,8 @@@ struct net_device_ops 
         struct net_device*      (*ndo_get_xmit_slave)(struct net_device *dev,
                                                       struct sk_buff *skb,
                                                       bool all_slaves);
+ +      struct net_device*      (*ndo_sk_get_lower_dev)(struct net_device *dev,
+ +                                                      struct sock *sk);
         netdev_features_t       (*ndo_fix_features)(struct net_device *dev,
                                                     netdev_features_t features);
         int                     (*ndo_set_features)(struct net_device *dev,
@@@ -1445,6 -1464,10 +1445,6 @@@
                                                           struct netdev_phys_item_id *ppid);
         int                     (*ndo_get_phys_port_name)(struct net_device *dev,
                                                           char *name, size_t len);
- -      void                    (*ndo_udp_tunnel_add)(struct net_device *dev,
- -                                                    struct udp_tunnel_info *ti);
- -      void                    (*ndo_udp_tunnel_del)(struct net_device *dev,
- -                                                    struct udp_tunnel_info *ti);
         void*                   (*ndo_dfwd_add_station)(struct net_device *pdev,
                                                         struct net_device *dev);
         void                    (*ndo_dfwd_del_station)(struct net_device *pdev,
@@@ -1819,8 -1842,6 +1819,8 @@@ enum netdev_priv_flags 
    *
    *    @wol_enabled:   Wake-on-LAN is enabled
    *
+ + *    @threaded:      napi threaded mode is enabled
+ + *
    *    @net_notifier_list:     List of per-net netdev notifier block
    *                            that follow this device when it is moved
    *                            to another network namespace.
@@@ -1852,6 -1873,7 +1852,6 @@@ struct net_device 
         unsigned long           mem_end;
         unsigned long           mem_start;
         unsigned long           base_addr;
- -      int                     irq;
   
         /*
          *      Some hardware also needs these fields (state,dev_list,
@@@ -1873,23 -1895,6 +1873,23 @@@
                 struct list_head lower;
         } adj_list;
   
+ +      /* Read-mostly cache-line for fast-path access */
+ +      unsigned int            flags;
+ +      unsigned int            priv_flags;
+ +      const struct net_device_ops *netdev_ops;
+ +      int                     ifindex;
+ +      unsigned short          gflags;
+ +      unsigned short          hard_header_len;
+ +
+ +      /* Note : dev->mtu is often read without holding a lock.
+ +       * Writers usually hold RTNL.
+ +       * It is recommended to use READ_ONCE() to annotate the reads,
+ +       * and to use WRITE_ONCE() to annotate the writes.
+ +       */
+ +      unsigned int            mtu;
+ +      unsigned short          needed_headroom;
+ +      unsigned short          needed_tailroom;
+ +
         netdev_features_t       features;
         netdev_features_t       hw_features;
         netdev_features_t       wanted_features;
@@@ -1898,15 -1903,10 +1898,15 @@@
         netdev_features_t       mpls_features;
         netdev_features_t       gso_partial_features;
   
- -      int                     ifindex;
+ +      unsigned int            min_mtu;
+ +      unsigned int            max_mtu;
+ +      unsigned short          type;
+ +      unsigned char           min_header_len;
+ +      unsigned char           name_assign_type;
+ +
         int                     group;
   
- -      struct net_device_stats stats;
+ +      struct net_device_stats stats; /* not used by modern drivers */
   
         atomic_long_t           rx_dropped;
         atomic_long_t           tx_dropped;
@@@ -1920,6 -1920,7 +1920,6 @@@
         const struct iw_handler_def *wireless_handlers;
         struct iw_public_data   *wireless_data;
   #endif
- -      const struct net_device_ops *netdev_ops;
         const struct ethtool_ops *ethtool_ops;
   #ifdef CONFIG_NET_L3_MASTER_DEV
         const struct l3mdev_ops *l3mdev_ops;
@@@ -1938,12 -1939,34 +1938,12 @@@
   
         const struct header_ops *header_ops;
   
- -      unsigned int            flags;
- -      unsigned int            priv_flags;
- -
- -      unsigned short          gflags;
- -      unsigned short          padded;
- -
         unsigned char           operstate;
         unsigned char           link_mode;
   
         unsigned char           if_port;
         unsigned char           dma;
   
- -      /* Note : dev->mtu is often read without holding a lock.
- -       * Writers usually hold RTNL.
- -       * It is recommended to use READ_ONCE() to annotate the reads,
- -       * and to use WRITE_ONCE() to annotate the writes.
- -       */
- -      unsigned int            mtu;
- -      unsigned int            min_mtu;
- -      unsigned int            max_mtu;
- -      unsigned short          type;
- -      unsigned short          hard_header_len;
- -      unsigned char           min_header_len;
- -      unsigned char           name_assign_type;
- -
- -      unsigned short          needed_headroom;
- -      unsigned short          needed_tailroom;
- -
         /* Interface address info. */
         unsigned char           perm_addr[MAX_ADDR_LEN];
         unsigned char           addr_assign_type;
@@@ -1954,10 -1977,7 +1954,10 @@@
         unsigned short          neigh_priv_len;
         unsigned short          dev_id;
         unsigned short          dev_port;
+ +      unsigned short          padded;
+ +
         spinlock_t              addr_list_lock;
+ +      int                     irq;
   
         struct netdev_hw_addr_list      uc;
         struct netdev_hw_addr_list      mc;
@@@ -2139,7 -2159,6 +2139,7 @@@
         struct lock_class_key   *qdisc_running_key;
         bool                    proto_down;
         unsigned                wol_enabled:1;
+ +      unsigned                threaded:1;
   
         struct list_head        net_notifier_list;
   
@@@ -2614,7 -2633,6 +2614,7 @@@ enum netdev_lag_hash 
         NETDEV_LAG_HASH_L23,
         NETDEV_LAG_HASH_E23,
         NETDEV_LAG_HASH_E34,
+ +      NETDEV_LAG_HASH_VLAN_SRCMAC,
         NETDEV_LAG_HASH_UNKNOWN,
   };
   
@@@ -2858,8 -2876,6 +2858,8 @@@ int init_dummy_netdev(struct net_devic
   struct net_device *netdev_get_xmit_slave(struct net_device *dev,
                                          struct sk_buff *skb,
                                          bool all_slaves);
+ +struct net_device *netdev_sk_get_lowest_dev(struct net_device *dev,
+ +                                          struct sock *sk);
   struct net_device *dev_get_by_index(struct net *net, int ifindex);
   struct net_device *__dev_get_by_index(struct net *net, int ifindex);
   struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
@@@ -4336,6 -4352,7 +4336,7 @@@ static inline void netif_tx_disable(str
   
         local_bh_disable();
         cpu = smp_processor_id();
+       spin_lock(&dev->tx_global_lock);
         for (i = 0; i < dev->num_tx_queues; i++) {
                 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
   
@@@ -4343,6 -4360,7 +4344,7 @@@
                 netif_tx_stop_queue(txq);
                 __netif_tx_unlock(txq);
         }
+       spin_unlock(&dev->tx_global_lock);
         local_bh_enable();
   }
   
diff --combined include/net/switchdev.h

index 88fcac1409667dc7e77d819023baf9865c5c387c,afdf8bd1b4fe52f4be39a0a15d1b36e08a9d90fe..6dcfc4c51a6e7fe0f95fc22a9ce4119ed22a5c87
--- 1/include/net/switchdev.h
--- 2/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@@ -16,6 -16,20 +16,6 @@@
   #define SWITCHDEV_F_SKIP_EOPNOTSUPP   BIT(1)
   #define SWITCHDEV_F_DEFER             BIT(2)
   
- -struct switchdev_trans {
- -      bool ph_prepare;
- -};
- -
- -static inline bool switchdev_trans_ph_prepare(struct switchdev_trans *trans)
- -{
- -      return trans && trans->ph_prepare;
- -}
- -
- -static inline bool switchdev_trans_ph_commit(struct switchdev_trans *trans)
- -{
- -      return trans && !trans->ph_prepare;
- -}
- -
   enum switchdev_attr_id {
         SWITCHDEV_ATTR_ID_UNDEFINED,
         SWITCHDEV_ATTR_ID_PORT_STP_STATE,
@@@ -28,7 -42,6 +28,6 @@@
         SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED,
         SWITCHDEV_ATTR_ID_BRIDGE_MROUTER,
   #if IS_ENABLED(CONFIG_BRIDGE_MRP)
-       SWITCHDEV_ATTR_ID_MRP_PORT_STATE,
         SWITCHDEV_ATTR_ID_MRP_PORT_ROLE,
   #endif
   };
@@@ -48,7 -61,6 +47,6 @@@ struct switchdev_attr 
                 u16 vlan_protocol;                      /* BRIDGE_VLAN_PROTOCOL */
                 bool mc_disabled;                       /* MC_DISABLED */
   #if IS_ENABLED(CONFIG_BRIDGE_MRP)
-               u8 mrp_port_state;                      /* MRP_PORT_STATE */
                 u8 mrp_port_role;                       /* MRP_PORT_ROLE */
   #endif
         } u;
@@@ -83,7 -95,8 +81,7 @@@ struct switchdev_obj 
   struct switchdev_obj_port_vlan {
         struct switchdev_obj obj;
         u16 flags;
- -      u16 vid_begin;
- -      u16 vid_end;
+ +      u16 vid;
   };
   
   #define SWITCHDEV_OBJ_PORT_VLAN(OBJ) \
@@@ -219,12 -232,14 +217,12 @@@ struct switchdev_notifier_fdb_info 
   struct switchdev_notifier_port_obj_info {
         struct switchdev_notifier_info info; /* must be first */
         const struct switchdev_obj *obj;
- -      struct switchdev_trans *trans;
         bool handled;
   };
   
   struct switchdev_notifier_port_attr_info {
         struct switchdev_notifier_info info; /* must be first */
         const struct switchdev_attr *attr;
- -      struct switchdev_trans *trans;
         bool handled;
   };
   
@@@ -272,6 -287,7 +270,6 @@@ int switchdev_handle_port_obj_add(struc
                         bool (*check_cb)(const struct net_device *dev),
                         int (*add_cb)(struct net_device *dev,
                                       const struct switchdev_obj *obj,
- -                                    struct switchdev_trans *trans,
                                       struct netlink_ext_ack *extack));
   int switchdev_handle_port_obj_del(struct net_device *dev,
                         struct switchdev_notifier_port_obj_info *port_obj_info,
@@@ -283,7 -299,8 +281,7 @@@ int switchdev_handle_port_attr_set(stru
                         struct switchdev_notifier_port_attr_info *port_attr_info,
                         bool (*check_cb)(const struct net_device *dev),
                         int (*set_cb)(struct net_device *dev,
- -                                    const struct switchdev_attr *attr,
- -                                    struct switchdev_trans *trans));
+ +                                    const struct switchdev_attr *attr));
   #else
   
   static inline void switchdev_deferred_process(void)
@@@ -354,6 -371,7 +352,6 @@@ switchdev_handle_port_obj_add(struct ne
                         bool (*check_cb)(const struct net_device *dev),
                         int (*add_cb)(struct net_device *dev,
                                       const struct switchdev_obj *obj,
- -                                    struct switchdev_trans *trans,
                                       struct netlink_ext_ack *extack))
   {
         return 0;
@@@ -374,7 -392,8 +372,7 @@@ switchdev_handle_port_attr_set(struct n
                         struct switchdev_notifier_port_attr_info *port_attr_info,
                         bool (*check_cb)(const struct net_device *dev),
                         int (*set_cb)(struct net_device *dev,
- -                                    const struct switchdev_attr *attr,
- -                                    struct switchdev_trans *trans))
+ +                                    const struct switchdev_attr *attr))
   {
         return 0;
   }
diff --combined include/soc/mscc/ocelot.h

index d0d48e9620fb7e7c3d892e4f21c620d8ef04c434,c34b9ccb64722df8612232eaea34756d52dbff51..bfce3df61bfd071cdd470330418d2203af48e935
--- 1/include/soc/mscc/ocelot.h
--- 2/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@@ -98,7 -98,6 +98,7 @@@
   #define IFH_REW_OP_TWO_STEP_PTP               0x3
   #define IFH_REW_OP_ORIGIN_PTP         0x5
   
+ +#define OCELOT_NUM_TC                 8
   #define OCELOT_TAG_LEN                        16
   #define OCELOT_SHORT_PREFIX_LEN               4
   #define OCELOT_LONG_PREFIX_LEN                16
@@@ -564,8 -563,6 +564,8 @@@ struct ocelot_ops 
         int (*netdev_to_port)(struct net_device *dev);
         int (*reset)(struct ocelot *ocelot);
         u16 (*wm_enc)(u16 value);
+ +      u16 (*wm_dec)(u16 value);
+ +      void (*wm_stat)(u32 val, u32 *inuse, u32 *maxuse);
   };
   
   struct ocelot_vcap_block {
@@@ -579,18 -576,6 +579,18 @@@ struct ocelot_vlan 
         u16 vid;
   };
   
+ +enum ocelot_sb {
+ +      OCELOT_SB_BUF,
+ +      OCELOT_SB_REF,
+ +      OCELOT_SB_NUM,
+ +};
+ +
+ +enum ocelot_sb_pool {
+ +      OCELOT_SB_POOL_ING,
+ +      OCELOT_SB_POOL_EGR,
+ +      OCELOT_SB_POOL_NUM,
+ +};
+ +
   struct ocelot_port {
         struct ocelot                   *ocelot;
   
@@@ -610,16 -595,10 +610,16 @@@
         phy_interface_t                 phy_mode;
   
         u8                              *xmit_template;
+ +      bool                            is_dsa_8021q_cpu;
+ +
+ +      struct net_device               *bond;
+ +      bool                            lag_tx_active;
   };
   
   struct ocelot {
         struct device                   *dev;
+ +      struct devlink                  *devlink;
+ +      struct devlink_port             *devlink_ports;
   
         const struct ocelot_ops         *ops;
         struct regmap                   *targets[TARGET_MAX];
@@@ -628,9 -607,7 +628,9 @@@
         const struct ocelot_stat_layout *stats_layout;
         unsigned int                    num_stats;
   
- -      int                             shared_queue_sz;
+ +      u32                             pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM];
+ +      int                             packet_buffer_size;
+ +      int                             num_frame_refs;
         int                             num_mact_rows;
   
         struct net_device               *hw_bridge_dev;
@@@ -655,8 -632,10 +655,8 @@@
   
         int                             npi;
   
- -      enum ocelot_tag_prefix          inj_prefix;
- -      enum ocelot_tag_prefix          xtr_prefix;
- -
- -      u32                             *lags;
+ +      enum ocelot_tag_prefix          npi_inj_prefix;
+ +      enum ocelot_tag_prefix          npi_xtr_prefix;
   
         struct list_head                multicast;
         struct list_head                pgids;
@@@ -730,6 -709,7 +730,7 @@@ struct ocelot_policer 
   /* I/O */
   u32 ocelot_port_readl(struct ocelot_port *port, u32 reg);
   void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg);
+ void ocelot_port_rmwl(struct ocelot_port *port, u32 val, u32 mask, u32 reg);
   u32 __ocelot_read_ix(struct ocelot *ocelot, u32 reg, u32 offset);
   void __ocelot_write_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 offset);
   void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask, u32 reg,
@@@ -758,11 -738,12 +759,12 @@@ int ocelot_get_sset_count(struct ocelo
   int ocelot_get_ts_info(struct ocelot *ocelot, int port,
                        struct ethtool_ts_info *info);
   void ocelot_set_ageing_time(struct ocelot *ocelot, unsigned int msecs);
+ int ocelot_port_flush(struct ocelot *ocelot, int port);
   void ocelot_adjust_link(struct ocelot *ocelot, int port,
                         struct phy_device *phydev);
- -int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, bool enabled,
- -                             struct switchdev_trans *trans);
+ +int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, bool enabled);
   void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state);
+ +void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot);
   int ocelot_port_bridge_join(struct ocelot *ocelot, int port,
                             struct net_device *bridge);
   int ocelot_port_bridge_leave(struct ocelot *ocelot, int port,
@@@ -798,45 -779,5 +800,45 @@@ int ocelot_port_mdb_add(struct ocelot *
                         const struct switchdev_obj_port_mdb *mdb);
   int ocelot_port_mdb_del(struct ocelot *ocelot, int port,
                         const struct switchdev_obj_port_mdb *mdb);
+ +int ocelot_port_lag_join(struct ocelot *ocelot, int port,
+ +                       struct net_device *bond,
+ +                       struct netdev_lag_upper_info *info);
+ +void ocelot_port_lag_leave(struct ocelot *ocelot, int port,
+ +                         struct net_device *bond);
+ +void ocelot_port_lag_change(struct ocelot *ocelot, int port, bool lag_tx_active);
+ +
+ +int ocelot_devlink_sb_register(struct ocelot *ocelot);
+ +void ocelot_devlink_sb_unregister(struct ocelot *ocelot);
+ +int ocelot_sb_pool_get(struct ocelot *ocelot, unsigned int sb_index,
+ +                     u16 pool_index,
+ +                     struct devlink_sb_pool_info *pool_info);
+ +int ocelot_sb_pool_set(struct ocelot *ocelot, unsigned int sb_index,
+ +                     u16 pool_index, u32 size,
+ +                     enum devlink_sb_threshold_type threshold_type,
+ +                     struct netlink_ext_ack *extack);
+ +int ocelot_sb_port_pool_get(struct ocelot *ocelot, int port,
+ +                          unsigned int sb_index, u16 pool_index,
+ +                          u32 *p_threshold);
+ +int ocelot_sb_port_pool_set(struct ocelot *ocelot, int port,
+ +                          unsigned int sb_index, u16 pool_index,
+ +                          u32 threshold, struct netlink_ext_ack *extack);
+ +int ocelot_sb_tc_pool_bind_get(struct ocelot *ocelot, int port,
+ +                             unsigned int sb_index, u16 tc_index,
+ +                             enum devlink_sb_pool_type pool_type,
+ +                             u16 *p_pool_index, u32 *p_threshold);
+ +int ocelot_sb_tc_pool_bind_set(struct ocelot *ocelot, int port,
+ +                             unsigned int sb_index, u16 tc_index,
+ +                             enum devlink_sb_pool_type pool_type,
+ +                             u16 pool_index, u32 threshold,
+ +                             struct netlink_ext_ack *extack);
+ +int ocelot_sb_occ_snapshot(struct ocelot *ocelot, unsigned int sb_index);
+ +int ocelot_sb_occ_max_clear(struct ocelot *ocelot, unsigned int sb_index);
+ +int ocelot_sb_occ_port_pool_get(struct ocelot *ocelot, int port,
+ +                              unsigned int sb_index, u16 pool_index,
+ +                              u32 *p_cur, u32 *p_max);
+ +int ocelot_sb_occ_tc_port_bind_get(struct ocelot *ocelot, int port,
+ +                                 unsigned int sb_index, u16 tc_index,
+ +                                 enum devlink_sb_pool_type pool_type,
+ +                                 u32 *p_cur, u32 *p_max);
   
   #endif
diff --combined kernel/bpf/stackmap.c

index cabaf7db8efc09db4c0eb7437cec5bb8418b5403,bfafbf115bf306a2712c22dfb4eae1681c36fff1..be35bfb7fb13f31b921b39bc48fa78303bc7f66d
--- 1/kernel/bpf/stackmap.c
--- 2/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@@ -7,9 -7,10 +7,9 @@@
   #include <linux/kernel.h>
   #include <linux/stacktrace.h>
   #include <linux/perf_event.h>
- -#include <linux/elf.h>
- -#include <linux/pagemap.h>
   #include <linux/irq_work.h>
   #include <linux/btf_ids.h>
+ +#include <linux/buildid.h>
   #include "percpu_freelist.h"
   
   #define STACK_CREATE_FLAG_MASK                                        \
@@@ -114,6 -115,8 +114,8 @@@ static struct bpf_map *stack_map_alloc(
   
         /* hash table size must be power of 2 */
         n_buckets = roundup_pow_of_two(attr->max_entries);
+       if (!n_buckets)
+               return ERR_PTR(-E2BIG);
   
         cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap);
         cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
@@@ -142,6 -145,140 +144,6 @@@ free_smap
         return ERR_PTR(err);
   }
   
- -#define BPF_BUILD_ID 3
- -/*
- - * Parse build id from the note segment. This logic can be shared between
- - * 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are
- - * identical.
- - */
- -static inline int stack_map_parse_build_id(void *page_addr,
- -                                         unsigned char *build_id,
- -                                         void *note_start,
- -                                         Elf32_Word note_size)
- -{
- -      Elf32_Word note_offs = 0, new_offs;
- -
- -      /* check for overflow */
- -      if (note_start < page_addr || note_start + note_size < note_start)
- -              return -EINVAL;
- -
- -      /* only supports note that fits in the first page */
- -      if (note_start + note_size > page_addr + PAGE_SIZE)
- -              return -EINVAL;
- -
- -      while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
- -              Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
- -
- -              if (nhdr->n_type == BPF_BUILD_ID &&
- -                  nhdr->n_namesz == sizeof("GNU") &&
- -                  nhdr->n_descsz > 0 &&
- -                  nhdr->n_descsz <= BPF_BUILD_ID_SIZE) {
- -                      memcpy(build_id,
- -                             note_start + note_offs +
- -                             ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
- -                             nhdr->n_descsz);
- -                      memset(build_id + nhdr->n_descsz, 0,
- -                             BPF_BUILD_ID_SIZE - nhdr->n_descsz);
- -                      return 0;
- -              }
- -              new_offs = note_offs + sizeof(Elf32_Nhdr) +
- -                      ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4);
- -              if (new_offs <= note_offs)  /* overflow */
- -                      break;
- -              note_offs = new_offs;
- -      }
- -      return -EINVAL;
- -}
- -
- -/* Parse build ID from 32-bit ELF */
- -static int stack_map_get_build_id_32(void *page_addr,
- -                                   unsigned char *build_id)
- -{
- -      Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr;
- -      Elf32_Phdr *phdr;
- -      int i;
- -
- -      /* only supports phdr that fits in one page */
- -      if (ehdr->e_phnum >
- -          (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr))
- -              return -EINVAL;
- -
- -      phdr = (Elf32_Phdr *)(page_addr + sizeof(Elf32_Ehdr));
- -
- -      for (i = 0; i < ehdr->e_phnum; ++i) {
- -              if (phdr[i].p_type == PT_NOTE &&
- -                  !stack_map_parse_build_id(page_addr, build_id,
- -                                            page_addr + phdr[i].p_offset,
- -                                            phdr[i].p_filesz))
- -                      return 0;
- -      }
- -      return -EINVAL;
- -}
- -
- -/* Parse build ID from 64-bit ELF */
- -static int stack_map_get_build_id_64(void *page_addr,
- -                                   unsigned char *build_id)
- -{
- -      Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr;
- -      Elf64_Phdr *phdr;
- -      int i;
- -
- -      /* only supports phdr that fits in one page */
- -      if (ehdr->e_phnum >
- -          (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr))
- -              return -EINVAL;
- -
- -      phdr = (Elf64_Phdr *)(page_addr + sizeof(Elf64_Ehdr));
- -
- -      for (i = 0; i < ehdr->e_phnum; ++i) {
- -              if (phdr[i].p_type == PT_NOTE &&
- -                  !stack_map_parse_build_id(page_addr, build_id,
- -                                            page_addr + phdr[i].p_offset,
- -                                            phdr[i].p_filesz))
- -                      return 0;
- -      }
- -      return -EINVAL;
- -}
- -
- -/* Parse build ID of ELF file mapped to vma */
- -static int stack_map_get_build_id(struct vm_area_struct *vma,
- -                                unsigned char *build_id)
- -{
- -      Elf32_Ehdr *ehdr;
- -      struct page *page;
- -      void *page_addr;
- -      int ret;
- -
- -      /* only works for page backed storage  */
- -      if (!vma->vm_file)
- -              return -EINVAL;
- -
- -      page = find_get_page(vma->vm_file->f_mapping, 0);
- -      if (!page)
- -              return -EFAULT; /* page not mapped */
- -
- -      ret = -EINVAL;
- -      page_addr = kmap_atomic(page);
- -      ehdr = (Elf32_Ehdr *)page_addr;
- -
- -      /* compare magic x7f "ELF" */
- -      if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0)
- -              goto out;
- -
- -      /* only support executable file and shared object file */
- -      if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN)
- -              goto out;
- -
- -      if (ehdr->e_ident[EI_CLASS] == ELFCLASS32)
- -              ret = stack_map_get_build_id_32(page_addr, build_id);
- -      else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64)
- -              ret = stack_map_get_build_id_64(page_addr, build_id);
- -out:
- -      kunmap_atomic(page_addr);
- -      put_page(page);
- -      return ret;
- -}
- -
   static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
                                           u64 *ips, u32 trace_nr, bool user)
   {
@@@ -182,18 -319,18 +184,18 @@@
                 for (i = 0; i < trace_nr; i++) {
                         id_offs[i].status = BPF_STACK_BUILD_ID_IP;
                         id_offs[i].ip = ips[i];
- -                      memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE);
+ +                      memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX);
                 }
                 return;
         }
   
         for (i = 0; i < trace_nr; i++) {
                 vma = find_vma(current->mm, ips[i]);
- -              if (!vma || stack_map_get_build_id(vma, id_offs[i].build_id)) {
+ +              if (!vma || build_id_parse(vma, id_offs[i].build_id, NULL)) {
                         /* per entry fall back to ips */
                         id_offs[i].status = BPF_STACK_BUILD_ID_IP;
                         id_offs[i].ip = ips[i];
- -                      memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE);
+ +                      memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX);
                         continue;
                 }
                 id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i]
diff --combined kernel/bpf/verifier.c

index 785d25392ead7a411be645e810f43c0307377b2f,37581919e050c8fc63afb74aa9812d7e40df8eea..1cffd4e847258d3e97f6f7e8b5fc42fd222aa3ac
--- 1/kernel/bpf/verifier.c
--- 2/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@@ -3606,30 -3606,13 +3606,30 @@@ static int check_mem_access(struct bpf_
         return err;
   }
   
- -static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
+ +static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
   {
+ +      int load_reg;
         int err;
   
- -      if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
- -          insn->imm != 0) {
- -              verbose(env, "BPF_XADD uses reserved fields\n");
+ +      switch (insn->imm) {
+ +      case BPF_ADD:
+ +      case BPF_ADD | BPF_FETCH:
+ +      case BPF_AND:
+ +      case BPF_AND | BPF_FETCH:
+ +      case BPF_OR:
+ +      case BPF_OR | BPF_FETCH:
+ +      case BPF_XOR:
+ +      case BPF_XOR | BPF_FETCH:
+ +      case BPF_XCHG:
+ +      case BPF_CMPXCHG:
+ +              break;
+ +      default:
+ +              verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
+ +              return -EINVAL;
+ +      }
+ +
+ +      if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
+ +              verbose(env, "invalid atomic operand size\n");
                 return -EINVAL;
         }
   
@@@ -3643,13 -3626,6 +3643,13 @@@
         if (err)
                 return err;
   
+ +      if (insn->imm == BPF_CMPXCHG) {
+ +              /* Check comparison of R0 with memory location */
+ +              err = check_reg_arg(env, BPF_REG_0, SRC_OP);
+ +              if (err)
+ +                      return err;
+ +      }
+ +
         if (is_pointer_value(env, insn->src_reg)) {
                 verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
                 return -EACCES;
@@@ -3659,38 -3635,21 +3659,38 @@@
             is_pkt_reg(env, insn->dst_reg) ||
             is_flow_key_reg(env, insn->dst_reg) ||
             is_sk_reg(env, insn->dst_reg)) {
- -              verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
+ +              verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
                         insn->dst_reg,
                         reg_type_str[reg_state(env, insn->dst_reg)->type]);
                 return -EACCES;
         }
   
- -      /* check whether atomic_add can read the memory */
+ +      /* check whether we can read the memory */
         err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
                                BPF_SIZE(insn->code), BPF_READ, -1, true);
         if (err)
                 return err;
   
- -      /* check whether atomic_add can write into the same memory */
- -      return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
- -                              BPF_SIZE(insn->code), BPF_WRITE, -1, true);
+ +      /* check whether we can write into the same memory */
+ +      err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
+ +                             BPF_SIZE(insn->code), BPF_WRITE, -1, true);
+ +      if (err)
+ +              return err;
+ +
+ +      if (!(insn->imm & BPF_FETCH))
+ +              return 0;
+ +
+ +      if (insn->imm == BPF_CMPXCHG)
+ +              load_reg = BPF_REG_0;
+ +      else
+ +              load_reg = insn->src_reg;
+ +
+ +      /* check and record load of old value */
+ +      err = check_reg_arg(env, load_reg, DST_OP);
+ +      if (err)
+ +              return err;
+ +
+ +      return 0;
   }
   
   static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno,
@@@ -4362,7 -4321,7 +4362,7 @@@ skip_type_check
                         err = mark_chain_precision(env, regno);
         } else if (arg_type_is_alloc_size(arg_type)) {
                 if (!tnum_is_const(reg->var_off)) {
- -                      verbose(env, "R%d unbounded size, use 'var &= const' or 'if (var < const)'\n",
+ +                      verbose(env, "R%d is not a known constant'\n",
                                 regno);
                         return -EACCES;
                 }
@@@ -6918,7 -6877,7 +6918,7 @@@ static int is_branch32_taken(struct bpf
         case BPF_JSGT:
                 if (reg->s32_min_value > sval)
                         return 1;
-               else if (reg->s32_max_value < sval)
+               else if (reg->s32_max_value <= sval)
                         return 0;
                 break;
         case BPF_JLT:
@@@ -6991,7 -6950,7 +6991,7 @@@ static int is_branch64_taken(struct bpf
         case BPF_JSGT:
                 if (reg->smin_value > sval)
                         return 1;
-               else if (reg->smax_value < sval)
+               else if (reg->smax_value <= sval)
                         return 0;
                 break;
         case BPF_JLT:
@@@ -8631,7 -8590,11 +8631,11 @@@ static bool range_within(struct bpf_reg
         return old->umin_value <= cur->umin_value &&
                old->umax_value >= cur->umax_value &&
                old->smin_value <= cur->smin_value &&
-              old->smax_value >= cur->smax_value;
+              old->smax_value >= cur->smax_value &&
+              old->u32_min_value <= cur->u32_min_value &&
+              old->u32_max_value >= cur->u32_max_value &&
+              old->s32_min_value <= cur->s32_min_value &&
+              old->s32_max_value >= cur->s32_max_value;
   }
   
   /* Maximum number of register states that can exist at once */
@@@ -9567,19 -9530,14 +9571,19 @@@ static int do_check(struct bpf_verifier
                 } else if (class == BPF_STX) {
                         enum bpf_reg_type *prev_dst_type, dst_reg_type;
   
- -                      if (BPF_MODE(insn->code) == BPF_XADD) {
- -                              err = check_xadd(env, env->insn_idx, insn);
+ +                      if (BPF_MODE(insn->code) == BPF_ATOMIC) {
+ +                              err = check_atomic(env, env->insn_idx, insn);
                                 if (err)
                                         return err;
                                 env->insn_idx++;
                                 continue;
                         }
   
+ +                      if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
+ +                              verbose(env, "BPF_STX uses reserved fields\n");
+ +                              return -EINVAL;
+ +                      }
+ +
                         /* check src1 operand */
                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
                         if (err)
@@@ -9751,36 -9709,6 +9755,36 @@@ process_bpf_exit
         return 0;
   }
   
+ +static int find_btf_percpu_datasec(struct btf *btf)
+ +{
+ +      const struct btf_type *t;
+ +      const char *tname;
+ +      int i, n;
+ +
+ +      /*
+ +       * Both vmlinux and module each have their own ".data..percpu"
+ +       * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
+ +       * types to look at only module's own BTF types.
+ +       */
+ +      n = btf_nr_types(btf);
+ +      if (btf_is_module(btf))
+ +              i = btf_nr_types(btf_vmlinux);
+ +      else
+ +              i = 1;
+ +
+ +      for(; i < n; i++) {
+ +              t = btf_type_by_id(btf, i);
+ +              if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
+ +                      continue;
+ +
+ +              tname = btf_name_by_offset(btf, t->name_off);
+ +              if (!strcmp(tname, ".data..percpu"))
+ +                      return i;
+ +      }
+ +
+ +      return -ENOENT;
+ +}
+ +
   /* replace pseudo btf_id with kernel symbol address */
   static int check_pseudo_btf_id(struct bpf_verifier_env *env,
                                struct bpf_insn *insn,
@@@ -9788,57 -9716,48 +9792,57 @@@
   {
         const struct btf_var_secinfo *vsi;
         const struct btf_type *datasec;
+ +      struct btf_mod_pair *btf_mod;
         const struct btf_type *t;
         const char *sym_name;
         bool percpu = false;
         u32 type, id = insn->imm;
+ +      struct btf *btf;
         s32 datasec_id;
         u64 addr;
- -      int i;
- -
- -      if (!btf_vmlinux) {
- -              verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
- -              return -EINVAL;
- -      }
+ +      int i, btf_fd, err;
   
- -      if (insn[1].imm != 0) {
- -              verbose(env, "reserved field (insn[1].imm) is used in pseudo_btf_id ldimm64 insn.\n");
- -              return -EINVAL;
+ +      btf_fd = insn[1].imm;
+ +      if (btf_fd) {
+ +              btf = btf_get_by_fd(btf_fd);
+ +              if (IS_ERR(btf)) {
+ +                      verbose(env, "invalid module BTF object FD specified.\n");
+ +                      return -EINVAL;
+ +              }
+ +      } else {
+ +              if (!btf_vmlinux) {
+ +                      verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
+ +                      return -EINVAL;
+ +              }
+ +              btf = btf_vmlinux;
+ +              btf_get(btf);
         }
   
- -      t = btf_type_by_id(btf_vmlinux, id);
+ +      t = btf_type_by_id(btf, id);
         if (!t) {
                 verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
- -              return -ENOENT;
+ +              err = -ENOENT;
+ +              goto err_put;
         }
   
         if (!btf_type_is_var(t)) {
- -              verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n",
- -                      id);
- -              return -EINVAL;
+ +              verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n", id);
+ +              err = -EINVAL;
+ +              goto err_put;
         }
   
- -      sym_name = btf_name_by_offset(btf_vmlinux, t->name_off);
+ +      sym_name = btf_name_by_offset(btf, t->name_off);
         addr = kallsyms_lookup_name(sym_name);
         if (!addr) {
                 verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
                         sym_name);
- -              return -ENOENT;
+ +              err = -ENOENT;
+ +              goto err_put;
         }
   
- -      datasec_id = btf_find_by_name_kind(btf_vmlinux, ".data..percpu",
- -                                         BTF_KIND_DATASEC);
+ +      datasec_id = find_btf_percpu_datasec(btf);
         if (datasec_id > 0) {
- -              datasec = btf_type_by_id(btf_vmlinux, datasec_id);
+ +              datasec = btf_type_by_id(btf, datasec_id);
                 for_each_vsi(i, datasec, vsi) {
                         if (vsi->type == id) {
                                 percpu = true;
@@@ -9851,10 -9770,10 +9855,10 @@@
         insn[1].imm = addr >> 32;
   
         type = t->type;
- -      t = btf_type_skip_modifiers(btf_vmlinux, type, NULL);
+ +      t = btf_type_skip_modifiers(btf, type, NULL);
         if (percpu) {
                 aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
- -              aux->btf_var.btf = btf_vmlinux;
+ +              aux->btf_var.btf = btf;
                 aux->btf_var.btf_id = type;
         } else if (!btf_type_is_struct(t)) {
                 const struct btf_type *ret;
@@@ -9862,54 -9781,21 +9866,54 @@@
                 u32 tsize;
   
                 /* resolve the type size of ksym. */
- -              ret = btf_resolve_size(btf_vmlinux, t, &tsize);
+ +              ret = btf_resolve_size(btf, t, &tsize);
                 if (IS_ERR(ret)) {
- -                      tname = btf_name_by_offset(btf_vmlinux, t->name_off);
+ +                      tname = btf_name_by_offset(btf, t->name_off);
                         verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
                                 tname, PTR_ERR(ret));
- -                      return -EINVAL;
+ +                      err = -EINVAL;
+ +                      goto err_put;
                 }
                 aux->btf_var.reg_type = PTR_TO_MEM;
                 aux->btf_var.mem_size = tsize;
         } else {
                 aux->btf_var.reg_type = PTR_TO_BTF_ID;
- -              aux->btf_var.btf = btf_vmlinux;
+ +              aux->btf_var.btf = btf;
                 aux->btf_var.btf_id = type;
         }
+ +
+ +      /* check whether we recorded this BTF (and maybe module) already */
+ +      for (i = 0; i < env->used_btf_cnt; i++) {
+ +              if (env->used_btfs[i].btf == btf) {
+ +                      btf_put(btf);
+ +                      return 0;
+ +              }
+ +      }
+ +
+ +      if (env->used_btf_cnt >= MAX_USED_BTFS) {
+ +              err = -E2BIG;
+ +              goto err_put;
+ +      }
+ +
+ +      btf_mod = &env->used_btfs[env->used_btf_cnt];
+ +      btf_mod->btf = btf;
+ +      btf_mod->module = NULL;
+ +
+ +      /* if we reference variables from kernel module, bump its refcount */
+ +      if (btf_is_module(btf)) {
+ +              btf_mod->module = btf_try_get_module(btf);
+ +              if (!btf_mod->module) {
+ +                      err = -ENXIO;
+ +                      goto err_put;
+ +              }
+ +      }
+ +
+ +      env->used_btf_cnt++;
+ +
         return 0;
+ +err_put:
+ +      btf_put(btf);
+ +      return err;
   }
   
   static int check_map_prealloc(struct bpf_map *map)
@@@ -10056,6 -9942,13 +10060,6 @@@ static int resolve_pseudo_ldimm64(struc
                         return -EINVAL;
                 }
   
- -              if (BPF_CLASS(insn->code) == BPF_STX &&
- -                  ((BPF_MODE(insn->code) != BPF_MEM &&
- -                    BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
- -                      verbose(env, "BPF_STX uses reserved fields\n");
- -                      return -EINVAL;
- -              }
- -
                 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
                         struct bpf_insn_aux_data *aux;
                         struct bpf_map *map;
@@@ -10199,13 -10092,6 +10203,13 @@@ static void release_maps(struct bpf_ver
                              env->used_map_cnt);
   }
   
+ +/* drop refcnt of maps used by the rejected program */
+ +static void release_btfs(struct bpf_verifier_env *env)
+ +{
+ +      __bpf_free_used_btfs(env->prog->aux, env->used_btfs,
+ +                           env->used_btf_cnt);
+ +}
+ +
   /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
   static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
   {
@@@ -11117,30 -11003,28 +11121,28 @@@ static int fixup_bpf_calls(struct bpf_v
                     insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
                     insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
                         bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
-                       struct bpf_insn mask_and_div[] = {
-                               BPF_MOV32_REG(insn->src_reg, insn->src_reg),
+                       bool isdiv = BPF_OP(insn->code) == BPF_DIV;
+                       struct bpf_insn *patchlet;
+                       struct bpf_insn chk_and_div[] = {
                                 /* Rx div 0 -> 0 */
-                               BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2),
+                               BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+                                            BPF_JNE | BPF_K, insn->src_reg,
+                                            0, 2, 0),
                                 BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
                                 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
                                 *insn,
                         };
-                       struct bpf_insn mask_and_mod[] = {
-                               BPF_MOV32_REG(insn->src_reg, insn->src_reg),
+                       struct bpf_insn chk_and_mod[] = {
                                 /* Rx mod 0 -> Rx */
-                               BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1),
+                               BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+                                            BPF_JEQ | BPF_K, insn->src_reg,
+                                            0, 1, 0),
                                 *insn,
                         };
-                       struct bpf_insn *patchlet;
   
-                       if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
-                           insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
-                               patchlet = mask_and_div + (is64 ? 1 : 0);
-                               cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0);
-                       } else {
-                               patchlet = mask_and_mod + (is64 ? 1 : 0);
-                               cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0);
-                       }
+                       patchlet = isdiv ? chk_and_div : chk_and_mod;
+                       cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
+                                     ARRAY_SIZE(chk_and_mod);
   
                         new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
                         if (!new_prog)
@@@ -12218,10 -12102,7 +12220,10 @@@ skip_full_check
                 goto err_release_maps;
         }
   
- -      if (ret == 0 && env->used_map_cnt) {
+ +      if (ret)
+ +              goto err_release_maps;
+ +
+ +      if (env->used_map_cnt) {
                 /* if program passed verifier, update used_maps in bpf_prog_info */
                 env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
                                                           sizeof(env->used_maps[0]),
@@@ -12235,29 -12116,15 +12237,29 @@@
                 memcpy(env->prog->aux->used_maps, env->used_maps,
                        sizeof(env->used_maps[0]) * env->used_map_cnt);
                 env->prog->aux->used_map_cnt = env->used_map_cnt;
+ +      }
+ +      if (env->used_btf_cnt) {
+ +              /* if program passed verifier, update used_btfs in bpf_prog_aux */
+ +              env->prog->aux->used_btfs = kmalloc_array(env->used_btf_cnt,
+ +                                                        sizeof(env->used_btfs[0]),
+ +                                                        GFP_KERNEL);
+ +              if (!env->prog->aux->used_btfs) {
+ +                      ret = -ENOMEM;
+ +                      goto err_release_maps;
+ +              }
   
+ +              memcpy(env->prog->aux->used_btfs, env->used_btfs,
+ +                     sizeof(env->used_btfs[0]) * env->used_btf_cnt);
+ +              env->prog->aux->used_btf_cnt = env->used_btf_cnt;
+ +      }
+ +      if (env->used_map_cnt || env->used_btf_cnt) {
                 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
                  * bpf_ld_imm64 instructions
                  */
                 convert_pseudo_ld_imm64(env);
         }
   
- -      if (ret == 0)
- -              adjust_btf_func(env);
+ +      adjust_btf_func(env);
   
   err_release_maps:
         if (!env->prog->aux->used_maps)
@@@ -12265,8 -12132,6 +12267,8 @@@
                  * them now. Otherwise free_used_maps() will release them.
                  */
                 release_maps(env);
+ +      if (!env->prog->aux->used_btfs)
+ +              release_btfs(env);
   
         /* extension progs temporarily inherit the attach_type of their targets
            for verification purposes, so set it back to zero before returning
diff --combined net/bridge/br_mrp.c

index fc0a98874bfc7dae0af3d5d71a5ed83fb6e33a4e,5aeae6ad17b37d8beac099bfd7db44b841941516..01c67ed727a9cd393c333bffea4764970077314c
--- 1/net/bridge/br_mrp.c
--- 2/net/bridge/br_mrp.c
+++ b/net/bridge/br_mrp.c
@@@ -557,19 -557,22 +557,22 @@@ int br_mrp_del(struct net_bridge *br, s
   int br_mrp_set_port_state(struct net_bridge_port *p,
                           enum br_mrp_port_state_type state)
   {
+       u32 port_state;
+ 
         if (!p || !(p->flags & BR_MRP_AWARE))
                 return -EINVAL;
   
         spin_lock_bh(&p->br->lock);
   
         if (state == BR_MRP_PORT_STATE_FORWARDING)
-               p->state = BR_STATE_FORWARDING;
+               port_state = BR_STATE_FORWARDING;
         else
-               p->state = BR_STATE_BLOCKING;
+               port_state = BR_STATE_BLOCKING;
   
+       p->state = port_state;
         spin_unlock_bh(&p->br->lock);
   
-       br_mrp_port_switchdev_set_state(p, state);
+       br_mrp_port_switchdev_set_state(p, port_state);
   
         return 0;
   }
@@@ -825,7 -828,7 +828,7 @@@ int br_mrp_start_in_test(struct net_bri
         return 0;
   }
   
- -/* Determin if the frame type is a ring frame */
+ +/* Determine if the frame type is a ring frame */
   static bool br_mrp_ring_frame(struct sk_buff *skb)
   {
         const struct br_mrp_tlv_hdr *hdr;
@@@ -845,7 -848,7 +848,7 @@@
         return false;
   }
   
- -/* Determin if the frame type is an interconnect frame */
+ +/* Determine if the frame type is an interconnect frame */
   static bool br_mrp_in_frame(struct sk_buff *skb)
   {
         const struct br_mrp_tlv_hdr *hdr;
@@@ -894,7 -897,7 +897,7 @@@ static void br_mrp_mrm_process(struct b
                 br_mrp_ring_port_open(port->dev, false);
   }
   
- -/* Determin if the test hdr has a better priority than the node */
+ +/* Determine if the test hdr has a better priority than the node */
   static bool br_mrp_test_better_than_own(struct br_mrp *mrp,
                                         struct net_bridge *br,
                                         const struct br_mrp_ring_test_hdr *hdr)
diff --combined net/core/dev.c

index 7647278e46f0eb6b6c6270b3d36959232fccd469,449b45b843d40ece7dd1e2ed6a5996ee1db9f591..321d41a110e723e962cbf7e78141161c9b0d5532
--- 1/net/core/dev.c
--- 2/net/core/dev.c
+++ b/net/core/dev.c
@@@ -91,7 -91,6 +91,7 @@@
   #include <linux/etherdevice.h>
   #include <linux/ethtool.h>
   #include <linux/skbuff.h>
+ +#include <linux/kthread.h>
   #include <linux/bpf.h>
   #include <linux/bpf_trace.h>
   #include <net/net_namespace.h>
@@@ -102,7 -101,6 +102,7 @@@
   #include <net/dsa.h>
   #include <net/dst.h>
   #include <net/dst_metadata.h>
+ +#include <net/gro.h>
   #include <net/pkt_sched.h>
   #include <net/pkt_cls.h>
   #include <net/checksum.h>
@@@ -1495,27 -1493,6 +1495,27 @@@ void netdev_notify_peers(struct net_dev
   }
   EXPORT_SYMBOL(netdev_notify_peers);
   
+ +static int napi_threaded_poll(void *data);
+ +
+ +static int napi_kthread_create(struct napi_struct *n)
+ +{
+ +      int err = 0;
+ +
+ +      /* Create and wake up the kthread once to put it in
+ +       * TASK_INTERRUPTIBLE mode to avoid the blocked task
+ +       * warning and work with loadavg.
+ +       */
+ +      n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d",
+ +                              n->dev->name, n->napi_id);
+ +      if (IS_ERR(n->thread)) {
+ +              err = PTR_ERR(n->thread);
+ +              pr_err("kthread_run failed with err %d\n", err);
+ +              n->thread = NULL;
+ +      }
+ +
+ +      return err;
+ +}
+ +
   static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
   {
         const struct net_device_ops *ops = dev->netdev_ops;
@@@ -3640,22 -3617,11 +3640,22 @@@ static struct sk_buff *validate_xmit_vl
   int skb_csum_hwoffload_help(struct sk_buff *skb,
                             const netdev_features_t features)
   {
- -      if (unlikely(skb->csum_not_inet))
+ +      if (unlikely(skb_csum_is_sctp(skb)))
                 return !!(features & NETIF_F_SCTP_CRC) ? 0 :
                         skb_crc32c_csum_help(skb);
   
- -      return !!(features & NETIF_F_CSUM_MASK) ? 0 : skb_checksum_help(skb);
+ +      if (features & NETIF_F_HW_CSUM)
+ +              return 0;
+ +
+ +      if (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) {
+ +              switch (skb->csum_offset) {
+ +              case offsetof(struct tcphdr, check):
+ +              case offsetof(struct udphdr, check):
+ +                      return 0;
+ +              }
+ +      }
+ +
+ +      return skb_checksum_help(skb);
   }
   EXPORT_SYMBOL(skb_csum_hwoffload_help);
   
@@@ -3912,7 -3878,6 +3912,7 @@@ sch_handle_egress(struct sk_buff *skb, 
   
         /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
         qdisc_skb_cb(skb)->mru = 0;
+ +      qdisc_skb_cb(skb)->post_ct = false;
         mini_qdisc_bstats_cpu_update(miniq, skb);
   
         switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
@@@ -4118,7 -4083,7 +4118,7 @@@ static int __dev_queue_xmit(struct sk_b
         skb_reset_mac_header(skb);
   
         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
- -              __skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED);
+ +              __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SCHED);
   
         /* Disable soft irqs for various locks below. Also
          * stops preemption for RCU.
@@@ -4287,22 -4252,6 +4287,22 @@@ int gro_normal_batch __read_mostly = 8
   static inline void ____napi_schedule(struct softnet_data *sd,
                                      struct napi_struct *napi)
   {
+ +      struct task_struct *thread;
+ +
+ +      if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
+ +              /* Paired with smp_mb__before_atomic() in
+ +               * napi_enable()/dev_set_threaded().
+ +               * Use READ_ONCE() to guarantee a complete
+ +               * read on napi->thread. Only call
+ +               * wake_up_process() when it's not NULL.
+ +               */
+ +              thread = READ_ONCE(napi->thread);
+ +              if (thread) {
+ +                      wake_up_process(thread);
+ +                      return;
+ +              }
+ +      }
+ +
         list_add_tail(&napi->poll_list, &sd->poll_list);
         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
   }
@@@ -4654,14 -4603,14 +4654,14 @@@ static u32 netif_receive_generic_xdp(st
                                      struct xdp_buff *xdp,
                                      struct bpf_prog *xdp_prog)
   {
+ +      void *orig_data, *orig_data_end, *hard_start;
         struct netdev_rx_queue *rxqueue;
- -      void *orig_data, *orig_data_end;
         u32 metalen, act = XDP_DROP;
+ +      u32 mac_len, frame_sz;
         __be16 orig_eth_type;
         struct ethhdr *eth;
         bool orig_bcast;
- -      int hlen, off;
- -      u32 mac_len;
+ +      int off;
   
         /* Reinjected packets coming from act_mirred or similar should
          * not get XDP generic processing.
@@@ -4693,16 -4642,15 +4693,16 @@@
          * header.
          */
         mac_len = skb->data - skb_mac_header(skb);
- -      hlen = skb_headlen(skb) + mac_len;
- -      xdp->data = skb->data - mac_len;
- -      xdp->data_meta = xdp->data;
- -      xdp->data_end = xdp->data + hlen;
- -      xdp->data_hard_start = skb->data - skb_headroom(skb);
+ +      hard_start = skb->data - skb_headroom(skb);
   
         /* SKB "head" area always have tailroom for skb_shared_info */
- -      xdp->frame_sz  = (void *)skb_end_pointer(skb) - xdp->data_hard_start;
- -      xdp->frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ +      frame_sz = (void *)skb_end_pointer(skb) - hard_start;
+ +      frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ +
+ +      rxqueue = netif_get_rxqueue(skb);
+ +      xdp_init_buff(xdp, frame_sz, &rxqueue->xdp_rxq);
+ +      xdp_prepare_buff(xdp, hard_start, skb_headroom(skb) - mac_len,
+ +                       skb_headlen(skb) + mac_len, true);
   
         orig_data_end = xdp->data_end;
         orig_data = xdp->data;
@@@ -4710,6 -4658,9 +4710,6 @@@
         orig_bcast = is_multicast_ether_addr_64bits(eth->h_dest);
         orig_eth_type = eth->h_proto;
   
- -      rxqueue = netif_get_rxqueue(skb);
- -      xdp->rxq = &rxqueue->xdp_rxq;
- -
         act = bpf_prog_run_xdp(xdp_prog, xdp);
   
         /* check if bpf_xdp_adjust_head was used */
@@@ -5011,7 -4962,6 +5011,7 @@@ sch_handle_ingress(struct sk_buff *skb
   
         qdisc_skb_cb(skb)->pkt_len = skb->len;
         qdisc_skb_cb(skb)->mru = 0;
+ +      qdisc_skb_cb(skb)->post_ct = false;
         skb->tc_at_ingress = 1;
         mini_qdisc_bstats_cpu_update(miniq, skb);
   
@@@ -5201,7 -5151,8 +5201,7 @@@ another_round
                 skb_reset_mac_len(skb);
         }
   
- -      if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
- -          skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
+ +      if (eth_type_vlan(skb->protocol)) {
                 skb = skb_vlan_untag(skb);
                 if (unlikely(!skb))
                         goto out;
@@@ -5285,7 -5236,8 +5285,7 @@@ check_vlan_id
                          * find vlan device.
                          */
                         skb->pkt_type = PACKET_OTHERHOST;
- -              } else if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
- -                         skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
+ +              } else if (eth_type_vlan(skb->protocol)) {
                         /* Outer header is 802.1P with vlan 0, inner header is
                          * 802.1Q or 802.1AD and vlan_do_receive() above could
                          * not find vlan dev for vlan id 0.
@@@ -5761,7 -5713,7 +5761,7 @@@ static void flush_all_backlogs(void
         }
   
         /* we can have in flight packet[s] on the cpus we are not flushing,
- -       * synchronize_net() in rollback_registered_many() will take care of
+ +       * synchronize_net() in unregister_netdevice_many() will take care of
          * them
          */
         for_each_cpu(cpu, &flush_cpus)
@@@ -5783,13 -5735,16 +5783,14 @@@ static void gro_normal_list(struct napi
   /* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
    * pass the whole batch up to the stack.
    */
- static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
+ static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int segs)
   {
         list_add_tail(&skb->list, &napi->rx_list);
-       if (++napi->rx_count >= gro_normal_batch)
+       napi->rx_count += segs;
+       if (napi->rx_count >= gro_normal_batch)
                 gro_normal_list(napi);
   }
   
- -INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
- -INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int));
   static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
   {
         struct packet_offload *ptype;
@@@ -5823,7 -5778,7 +5824,7 @@@
         }
   
   out:
-       gro_normal_one(napi, skb);
+       gro_normal_one(napi, skb, NAPI_GRO_CB(skb)->count);
         return NET_RX_SUCCESS;
   }
   
@@@ -5958,6 -5913,10 +5959,6 @@@ static void gro_flush_oldest(struct nap
         napi_gro_complete(napi, oldest);
   }
   
- -INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *,
- -                                                         struct sk_buff *));
- -INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *,
- -                                                         struct sk_buff *));
   static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
   {
         u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
@@@ -6109,9 -6068,13 +6110,9 @@@ static gro_result_t napi_skb_finish(str
   {
         switch (ret) {
         case GRO_NORMAL:
-               gro_normal_one(napi, skb);
+               gro_normal_one(napi, skb, 1);
                 break;
   
- -      case GRO_DROP:
- -              kfree_skb(skb);
- -              break;
- -
         case GRO_MERGED_FREE:
                 if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
                         napi_skb_free_stolen_head(skb);
@@@ -6193,9 -6156,13 +6194,9 @@@ static gro_result_t napi_frags_finish(s
                 __skb_push(skb, ETH_HLEN);
                 skb->protocol = eth_type_trans(skb, skb->dev);
                 if (ret == GRO_NORMAL)
-                       gro_normal_one(napi, skb);
+                       gro_normal_one(napi, skb, 1);
                 break;
   
- -      case GRO_DROP:
- -              napi_reuse_skb(napi, skb);
- -              break;
- -
         case GRO_MERGED_FREE:
                 if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
                         napi_skb_free_stolen_head(skb);
@@@ -6257,6 -6224,9 +6258,6 @@@ gro_result_t napi_gro_frags(struct napi
         gro_result_t ret;
         struct sk_buff *skb = napi_frags_skb(napi);
   
- -      if (!skb)
- -              return GRO_DROP;
- -
         trace_napi_gro_frags_entry(skb);
   
         ret = napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
@@@ -6739,49 -6709,6 +6740,49 @@@ static void init_gro_hash(struct napi_s
         napi->gro_bitmask = 0;
   }
   
+ +int dev_set_threaded(struct net_device *dev, bool threaded)
+ +{
+ +      struct napi_struct *napi;
+ +      int err = 0;
+ +
+ +      if (dev->threaded == threaded)
+ +              return 0;
+ +
+ +      if (threaded) {
+ +              list_for_each_entry(napi, &dev->napi_list, dev_list) {
+ +                      if (!napi->thread) {
+ +                              err = napi_kthread_create(napi);
+ +                              if (err) {
+ +                                      threaded = false;
+ +                                      break;
+ +                              }
+ +                      }
+ +              }
+ +      }
+ +
+ +      dev->threaded = threaded;
+ +
+ +      /* Make sure kthread is created before THREADED bit
+ +       * is set.
+ +       */
+ +      smp_mb__before_atomic();
+ +
+ +      /* Setting/unsetting threaded mode on a napi might not immediately
+ +       * take effect, if the current napi instance is actively being
+ +       * polled. In this case, the switch between threaded mode and
+ +       * softirq mode will happen in the next round of napi_schedule().
+ +       * This should not cause hiccups/stalls to the live traffic.
+ +       */
+ +      list_for_each_entry(napi, &dev->napi_list, dev_list) {
+ +              if (threaded)
+ +                      set_bit(NAPI_STATE_THREADED, &napi->state);
+ +              else
+ +                      clear_bit(NAPI_STATE_THREADED, &napi->state);
+ +      }
+ +
+ +      return err;
+ +}
+ +
   void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
                     int (*poll)(struct napi_struct *, int), int weight)
   {
@@@ -6809,12 -6736,6 +6810,12 @@@
         set_bit(NAPI_STATE_NPSVC, &napi->state);
         list_add_rcu(&napi->dev_list, &dev->napi_list);
         napi_hash_add(napi);
+ +      /* Create kthread for this napi if dev->threaded is set.
+ +       * Clear dev->threaded if kthread creation failed so that
+ +       * threaded mode will not be enabled in napi_enable().
+ +       */
+ +      if (dev->threaded && napi_kthread_create(napi))
+ +              dev->threaded = 0;
   }
   EXPORT_SYMBOL(netif_napi_add);
   
@@@ -6832,28 -6753,9 +6833,28 @@@ void napi_disable(struct napi_struct *n
   
         clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state);
         clear_bit(NAPI_STATE_DISABLE, &n->state);
+ +      clear_bit(NAPI_STATE_THREADED, &n->state);
   }
   EXPORT_SYMBOL(napi_disable);
   
+ +/**
+ + *    napi_enable - enable NAPI scheduling
+ + *    @n: NAPI context
+ + *
+ + * Resume NAPI from being scheduled on this context.
+ + * Must be paired with napi_disable.
+ + */
+ +void napi_enable(struct napi_struct *n)
+ +{
+ +      BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
+ +      smp_mb__before_atomic();
+ +      clear_bit(NAPI_STATE_SCHED, &n->state);
+ +      clear_bit(NAPI_STATE_NPSVC, &n->state);
+ +      if (n->dev->threaded && n->thread)
+ +              set_bit(NAPI_STATE_THREADED, &n->state);
+ +}
+ +EXPORT_SYMBOL(napi_enable);
+ +
   static void flush_gro_hash(struct napi_struct *napi)
   {
         int i;
@@@ -6879,18 -6781,18 +6880,18 @@@ void __netif_napi_del(struct napi_struc
   
         flush_gro_hash(napi);
         napi->gro_bitmask = 0;
+ +
+ +      if (napi->thread) {
+ +              kthread_stop(napi->thread);
+ +              napi->thread = NULL;
+ +      }
   }
   EXPORT_SYMBOL(__netif_napi_del);
   
- -static int napi_poll(struct napi_struct *n, struct list_head *repoll)
+ +static int __napi_poll(struct napi_struct *n, bool *repoll)
   {
- -      void *have;
         int work, weight;
   
- -      list_del_init(&n->poll_list);
- -
- -      have = netpoll_poll_lock(n);
- -
         weight = n->weight;
   
         /* This NAPI_STATE_SCHED test is for avoiding a race
@@@ -6910,7 -6812,7 +6911,7 @@@
                             n->poll, work, weight);
   
         if (likely(work < weight))
- -              goto out_unlock;
+ +              return work;
   
         /* Drivers must not modify the NAPI state if they
          * consume the entire weight.  In such cases this code
@@@ -6919,7 -6821,7 +6920,7 @@@
          */
         if (unlikely(napi_disable_pending(n))) {
                 napi_complete(n);
- -              goto out_unlock;
+ +              return work;
         }
   
         /* The NAPI context has more processing work, but busy-polling
@@@ -6932,7 -6834,7 +6933,7 @@@
                          */
                         napi_schedule(n);
                 }
- -              goto out_unlock;
+ +              return work;
         }
   
         if (n->gro_bitmask) {
@@@ -6950,79 -6852,17 +6951,79 @@@
         if (unlikely(!list_empty(&n->poll_list))) {
                 pr_warn_once("%s: Budget exhausted after napi rescheduled\n",
                              n->dev ? n->dev->name : "backlog");
- -              goto out_unlock;
+ +              return work;
         }
   
- -      list_add_tail(&n->poll_list, repoll);
+ +      *repoll = true;
+ +
+ +      return work;
+ +}
+ +
+ +static int napi_poll(struct napi_struct *n, struct list_head *repoll)
+ +{
+ +      bool do_repoll = false;
+ +      void *have;
+ +      int work;
+ +
+ +      list_del_init(&n->poll_list);
+ +
+ +      have = netpoll_poll_lock(n);
+ +
+ +      work = __napi_poll(n, &do_repoll);
+ +
+ +      if (do_repoll)
+ +              list_add_tail(&n->poll_list, repoll);
   
- -out_unlock:
         netpoll_poll_unlock(have);
   
         return work;
   }
   
+ +static int napi_thread_wait(struct napi_struct *napi)
+ +{
+ +      set_current_state(TASK_INTERRUPTIBLE);
+ +
+ +      while (!kthread_should_stop() && !napi_disable_pending(napi)) {
+ +              if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
+ +                      WARN_ON(!list_empty(&napi->poll_list));
+ +                      __set_current_state(TASK_RUNNING);
+ +                      return 0;
+ +              }
+ +
+ +              schedule();
+ +              set_current_state(TASK_INTERRUPTIBLE);
+ +      }
+ +      __set_current_state(TASK_RUNNING);
+ +      return -1;
+ +}
+ +
+ +static int napi_threaded_poll(void *data)
+ +{
+ +      struct napi_struct *napi = data;
+ +      void *have;
+ +
+ +      while (!napi_thread_wait(napi)) {
+ +              for (;;) {
+ +                      bool repoll = false;
+ +
+ +                      local_bh_disable();
+ +
+ +                      have = netpoll_poll_lock(napi);
+ +                      __napi_poll(napi, &repoll);
+ +                      netpoll_poll_unlock(have);
+ +
+ +                      __kfree_skb_flush();
+ +                      local_bh_enable();
+ +
+ +                      if (!repoll)
+ +                              break;
+ +
+ +                      cond_resched();
+ +              }
+ +      }
+ +      return 0;
+ +}
+ +
   static __latent_entropy void net_rx_action(struct softirq_action *h)
   {
         struct softnet_data *sd = this_cpu_ptr(&softnet_data);
@@@ -8281,39 -8121,6 +8282,39 @@@ struct net_device *netdev_get_xmit_slav
   }
   EXPORT_SYMBOL(netdev_get_xmit_slave);
   
+ +static struct net_device *netdev_sk_get_lower_dev(struct net_device *dev,
+ +                                                struct sock *sk)
+ +{
+ +      const struct net_device_ops *ops = dev->netdev_ops;
+ +
+ +      if (!ops->ndo_sk_get_lower_dev)
+ +              return NULL;
+ +      return ops->ndo_sk_get_lower_dev(dev, sk);
+ +}
+ +
+ +/**
+ + * netdev_sk_get_lowest_dev - Get the lowest device in chain given device and socket
+ + * @dev: device
+ + * @sk: the socket
+ + *
+ + * %NULL is returned if no lower device is found.
+ + */
+ +
+ +struct net_device *netdev_sk_get_lowest_dev(struct net_device *dev,
+ +                                          struct sock *sk)
+ +{
+ +      struct net_device *lower;
+ +
+ +      lower = netdev_sk_get_lower_dev(dev, sk);
+ +      while (lower) {
+ +              dev = lower;
+ +              lower = netdev_sk_get_lower_dev(dev, sk);
+ +      }
+ +
+ +      return dev;
+ +}
+ +EXPORT_SYMBOL(netdev_sk_get_lowest_dev);
+ +
   static void netdev_adjacent_add_links(struct net_device *dev)
   {
         struct netdev_adjacent *iter;
@@@ -9635,6 -9442,106 +9636,6 @@@ static void net_set_todo(struct net_dev
         dev_net(dev)->dev_unreg_count++;
   }
   
- -static void rollback_registered_many(struct list_head *head)
- -{
- -      struct net_device *dev, *tmp;
- -      LIST_HEAD(close_head);
- -
- -      BUG_ON(dev_boot_phase);
- -      ASSERT_RTNL();
- -
- -      list_for_each_entry_safe(dev, tmp, head, unreg_list) {
- -              /* Some devices call without registering
- -               * for initialization unwind. Remove those
- -               * devices and proceed with the remaining.
- -               */
- -              if (dev->reg_state == NETREG_UNINITIALIZED) {
- -                      pr_debug("unregister_netdevice: device %s/%p never was registered\n",
- -                               dev->name, dev);
- -
- -                      WARN_ON(1);
- -                      list_del(&dev->unreg_list);
- -                      continue;
- -              }
- -              dev->dismantle = true;
- -              BUG_ON(dev->reg_state != NETREG_REGISTERED);
- -      }
- -
- -      /* If device is running, close it first. */
- -      list_for_each_entry(dev, head, unreg_list)
- -              list_add_tail(&dev->close_list, &close_head);
- -      dev_close_many(&close_head, true);
- -
- -      list_for_each_entry(dev, head, unreg_list) {
- -              /* And unlink it from device chain. */
- -              unlist_netdevice(dev);
- -
- -              dev->reg_state = NETREG_UNREGISTERING;
- -      }
- -      flush_all_backlogs();
- -
- -      synchronize_net();
- -
- -      list_for_each_entry(dev, head, unreg_list) {
- -              struct sk_buff *skb = NULL;
- -
- -              /* Shutdown queueing discipline. */
- -              dev_shutdown(dev);
- -
- -              dev_xdp_uninstall(dev);
- -
- -              /* Notify protocols, that we are about to destroy
- -               * this device. They should clean all the things.
- -               */
- -              call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
- -
- -              if (!dev->rtnl_link_ops ||
- -                  dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
- -                      skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
- -                                                   GFP_KERNEL, NULL, 0);
- -
- -              /*
- -               *      Flush the unicast and multicast chains
- -               */
- -              dev_uc_flush(dev);
- -              dev_mc_flush(dev);
- -
- -              netdev_name_node_alt_flush(dev);
- -              netdev_name_node_free(dev->name_node);
- -
- -              if (dev->netdev_ops->ndo_uninit)
- -                      dev->netdev_ops->ndo_uninit(dev);
- -
- -              if (skb)
- -                      rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);
- -
- -              /* Notifier chain MUST detach us all upper devices. */
- -              WARN_ON(netdev_has_any_upper_dev(dev));
- -              WARN_ON(netdev_has_any_lower_dev(dev));
- -
- -              /* Remove entries from kobject tree */
- -              netdev_unregister_kobject(dev);
- -#ifdef CONFIG_XPS
- -              /* Remove XPS queueing entries */
- -              netif_reset_xps_queues_gt(dev, 0);
- -#endif
- -      }
- -
- -      synchronize_net();
- -
- -      list_for_each_entry(dev, head, unreg_list)
- -              dev_put(dev);
- -}
- -
- -static void rollback_registered(struct net_device *dev)
- -{
- -      LIST_HEAD(single);
- -
- -      list_add(&dev->unreg_list, &single);
- -      rollback_registered_many(&single);
- -      list_del(&single);
- -}
- -
   static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
         struct net_device *upper, netdev_features_t features)
   {
@@@ -10107,7 -10014,7 +10108,7 @@@ int register_netdevice(struct net_devic
         dev->hw_features |= (NETIF_F_SOFT_FEATURES | NETIF_F_SOFT_FEATURES_OFF);
         dev->features |= NETIF_F_SOFT_FEATURES;
   
- -      if (dev->netdev_ops->ndo_udp_tunnel_add) {
+ +      if (dev->udp_tunnel_nic_info) {
                 dev->features |= NETIF_F_RX_UDP_TUNNEL_PORT;
                 dev->hw_features |= NETIF_F_RX_UDP_TUNNEL_PORT;
         }
@@@ -10184,7 -10091,8 +10185,7 @@@
         if (ret) {
                 /* Expect explicit free_netdev() on failure */
                 dev->needs_free_netdev = false;
- -              rollback_registered(dev);
- -              net_set_todo(dev);
+ +              unregister_netdevice_queue(dev, NULL);
                 goto out;
         }
         /*
@@@ -10806,10 -10714,9 +10807,10 @@@ void unregister_netdevice_queue(struct 
         if (head) {
                 list_move_tail(&dev->unreg_list, head);
         } else {
- -              rollback_registered(dev);
- -              /* Finish processing unregister after unlock */
- -              net_set_todo(dev);
+ +              LIST_HEAD(single);
+ +
+ +              list_add(&dev->unreg_list, &single);
+ +              unregister_netdevice_many(&single);
         }
   }
   EXPORT_SYMBOL(unregister_netdevice_queue);
@@@ -10823,100 -10730,14 +10824,100 @@@
    */
   void unregister_netdevice_many(struct list_head *head)
   {
- -      struct net_device *dev;
+ +      struct net_device *dev, *tmp;
+ +      LIST_HEAD(close_head);
+ +
+ +      BUG_ON(dev_boot_phase);
+ +      ASSERT_RTNL();
+ +
+ +      if (list_empty(head))
+ +              return;
+ +
+ +      list_for_each_entry_safe(dev, tmp, head, unreg_list) {
+ +              /* Some devices call without registering
+ +               * for initialization unwind. Remove those
+ +               * devices and proceed with the remaining.
+ +               */
+ +              if (dev->reg_state == NETREG_UNINITIALIZED) {
+ +                      pr_debug("unregister_netdevice: device %s/%p never was registered\n",
+ +                               dev->name, dev);
+ +
+ +                      WARN_ON(1);
+ +                      list_del(&dev->unreg_list);
+ +                      continue;
+ +              }
+ +              dev->dismantle = true;
+ +              BUG_ON(dev->reg_state != NETREG_REGISTERED);
+ +      }
+ +
+ +      /* If device is running, close it first. */
+ +      list_for_each_entry(dev, head, unreg_list)
+ +              list_add_tail(&dev->close_list, &close_head);
+ +      dev_close_many(&close_head, true);
+ +
+ +      list_for_each_entry(dev, head, unreg_list) {
+ +              /* And unlink it from device chain. */
+ +              unlist_netdevice(dev);
+ +
+ +              dev->reg_state = NETREG_UNREGISTERING;
+ +      }
+ +      flush_all_backlogs();
+ +
+ +      synchronize_net();
+ +
+ +      list_for_each_entry(dev, head, unreg_list) {
+ +              struct sk_buff *skb = NULL;
+ +
+ +              /* Shutdown queueing discipline. */
+ +              dev_shutdown(dev);
+ +
+ +              dev_xdp_uninstall(dev);
+ +
+ +              /* Notify protocols, that we are about to destroy
+ +               * this device. They should clean all the things.
+ +               */
+ +              call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+ +
+ +              if (!dev->rtnl_link_ops ||
+ +                  dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
+ +                      skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
+ +                                                   GFP_KERNEL, NULL, 0);
+ +
+ +              /*
+ +               *      Flush the unicast and multicast chains
+ +               */
+ +              dev_uc_flush(dev);
+ +              dev_mc_flush(dev);
+ +
+ +              netdev_name_node_alt_flush(dev);
+ +              netdev_name_node_free(dev->name_node);
+ +
+ +              if (dev->netdev_ops->ndo_uninit)
+ +                      dev->netdev_ops->ndo_uninit(dev);
+ +
+ +              if (skb)
+ +                      rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);
+ +
+ +              /* Notifier chain MUST detach us all upper devices. */
+ +              WARN_ON(netdev_has_any_upper_dev(dev));
+ +              WARN_ON(netdev_has_any_lower_dev(dev));
+ +
+ +              /* Remove entries from kobject tree */
+ +              netdev_unregister_kobject(dev);
+ +#ifdef CONFIG_XPS
+ +              /* Remove XPS queueing entries */
+ +              netif_reset_xps_queues_gt(dev, 0);
+ +#endif
+ +      }
+ +
+ +      synchronize_net();
   
- -      if (!list_empty(head)) {
- -              rollback_registered_many(head);
- -              list_for_each_entry(dev, head, unreg_list)
- -                      net_set_todo(dev);
- -              list_del(head);
+ +      list_for_each_entry(dev, head, unreg_list) {
+ +              dev_put(dev);
+ +              net_set_todo(dev);
         }
+ +
+ +      list_del(head);
   }
   EXPORT_SYMBOL(unregister_netdevice_many);
   
diff --combined net/dsa/dsa2.c

index 96249c4ad5f277df5c19208ac61062251e38a433,a04fd637b4cdcc3466f09bbdf2e80b5b41394e60..4d4956ed303b096e685b499c534128be8c5a91a9
--- 1/net/dsa/dsa2.c
--- 2/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@@ -21,108 -21,6 +21,108 @@@
   static DEFINE_MUTEX(dsa2_mutex);
   LIST_HEAD(dsa_tree_list);
   
+ +/**
+ + * dsa_tree_notify - Execute code for all switches in a DSA switch tree.
+ + * @dst: collection of struct dsa_switch devices to notify.
+ + * @e: event, must be of type DSA_NOTIFIER_*
+ + * @v: event-specific value.
+ + *
+ + * Given a struct dsa_switch_tree, this can be used to run a function once for
+ + * each member DSA switch. The other alternative of traversing the tree is only
+ + * through its ports list, which does not uniquely list the switches.
+ + */
+ +int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v)
+ +{
+ +      struct raw_notifier_head *nh = &dst->nh;
+ +      int err;
+ +
+ +      err = raw_notifier_call_chain(nh, e, v);
+ +
+ +      return notifier_to_errno(err);
+ +}
+ +
+ +/**
+ + * dsa_broadcast - Notify all DSA trees in the system.
+ + * @e: event, must be of type DSA_NOTIFIER_*
+ + * @v: event-specific value.
+ + *
+ + * Can be used to notify the switching fabric of events such as cross-chip
+ + * bridging between disjoint trees (such as islands of tagger-compatible
+ + * switches bridged by an incompatible middle switch).
+ + */
+ +int dsa_broadcast(unsigned long e, void *v)
+ +{
+ +      struct dsa_switch_tree *dst;
+ +      int err = 0;
+ +
+ +      list_for_each_entry(dst, &dsa_tree_list, list) {
+ +              err = dsa_tree_notify(dst, e, v);
+ +              if (err)
+ +                      break;
+ +      }
+ +
+ +      return err;
+ +}
+ +
+ +/**
+ + * dsa_lag_map() - Map LAG netdev to a linear LAG ID
+ + * @dst: Tree in which to record the mapping.
+ + * @lag: Netdev that is to be mapped to an ID.
+ + *
+ + * dsa_lag_id/dsa_lag_dev can then be used to translate between the
+ + * two spaces. The size of the mapping space is determined by the
+ + * driver by setting ds->num_lag_ids. It is perfectly legal to leave
+ + * it unset if it is not needed, in which case these functions become
+ + * no-ops.
+ + */
+ +void dsa_lag_map(struct dsa_switch_tree *dst, struct net_device *lag)
+ +{
+ +      unsigned int id;
+ +
+ +      if (dsa_lag_id(dst, lag) >= 0)
+ +              /* Already mapped */
+ +              return;
+ +
+ +      for (id = 0; id < dst->lags_len; id++) {
+ +              if (!dsa_lag_dev(dst, id)) {
+ +                      dst->lags[id] = lag;
+ +                      return;
+ +              }
+ +      }
+ +
+ +      /* No IDs left, which is OK. Some drivers do not need it. The
+ +       * ones that do, e.g. mv88e6xxx, will discover that dsa_lag_id
+ +       * returns an error for this device when joining the LAG. The
+ +       * driver can then return -EOPNOTSUPP back to DSA, which will
+ +       * fall back to a software LAG.
+ +       */
+ +}
+ +
+ +/**
+ + * dsa_lag_unmap() - Remove a LAG ID mapping
+ + * @dst: Tree in which the mapping is recorded.
+ + * @lag: Netdev that was mapped.
+ + *
+ + * As there may be multiple users of the mapping, it is only removed
+ + * if there are no other references to it.
+ + */
+ +void dsa_lag_unmap(struct dsa_switch_tree *dst, struct net_device *lag)
+ +{
+ +      struct dsa_port *dp;
+ +      unsigned int id;
+ +
+ +      dsa_lag_foreach_port(dp, dst, lag)
+ +              /* There are remaining users of this mapping */
+ +              return;
+ +
+ +      dsa_lags_foreach_id(id, dst) {
+ +              if (dsa_lag_dev(dst, id) == lag) {
+ +                      dst->lags[id] = NULL;
+ +                      break;
+ +              }
+ +      }
+ +}
+ +
   struct dsa_switch *dsa_switch_find(int tree_index, int sw_index)
   {
         struct dsa_switch_tree *dst;
@@@ -179,8 -77,6 +179,8 @@@ static struct dsa_switch_tree *dsa_tree
   
   static void dsa_tree_free(struct dsa_switch_tree *dst)
   {
+ +      if (dst->tag_ops)
+ +              dsa_tag_driver_put(dst->tag_ops);
         list_del(&dst->list);
         kfree(dst);
   }
@@@ -469,6 -365,7 +469,6 @@@ static void dsa_port_teardown(struct ds
                 break;
         case DSA_PORT_TYPE_CPU:
                 dsa_port_disable(dp);
- -              dsa_tag_driver_put(dp->tag_ops);
                 dsa_port_link_unregister_of(dp);
                 break;
         case DSA_PORT_TYPE_DSA:
@@@ -507,165 -404,8 +507,165 @@@ static int dsa_devlink_info_get(struct 
         return -EOPNOTSUPP;
   }
   
+ +static int dsa_devlink_sb_pool_get(struct devlink *dl,
+ +                                 unsigned int sb_index, u16 pool_index,
+ +                                 struct devlink_sb_pool_info *pool_info)
+ +{
+ +      struct dsa_switch *ds = dsa_devlink_to_ds(dl);
+ +
+ +      if (!ds->ops->devlink_sb_pool_get)
+ +              return -EOPNOTSUPP;
+ +
+ +      return ds->ops->devlink_sb_pool_get(ds, sb_index, pool_index,
+ +                                          pool_info);
+ +}
+ +
+ +static int dsa_devlink_sb_pool_set(struct devlink *dl, unsigned int sb_index,
+ +                                 u16 pool_index, u32 size,
+ +                                 enum devlink_sb_threshold_type threshold_type,
+ +                                 struct netlink_ext_ack *extack)
+ +{
+ +      struct dsa_switch *ds = dsa_devlink_to_ds(dl);
+ +
+ +      if (!ds->ops->devlink_sb_pool_set)
+ +              return -EOPNOTSUPP;
+ +
+ +      return ds->ops->devlink_sb_pool_set(ds, sb_index, pool_index, size,
+ +                                          threshold_type, extack);
+ +}
+ +
+ +static int dsa_devlink_sb_port_pool_get(struct devlink_port *dlp,
+ +                                      unsigned int sb_index, u16 pool_index,
+ +                                      u32 *p_threshold)
+ +{
+ +      struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
+ +      int port = dsa_devlink_port_to_port(dlp);
+ +
+ +      if (!ds->ops->devlink_sb_port_pool_get)
+ +              return -EOPNOTSUPP;
+ +
+ +      return ds->ops->devlink_sb_port_pool_get(ds, port, sb_index,
+ +                                               pool_index, p_threshold);
+ +}
+ +
+ +static int dsa_devlink_sb_port_pool_set(struct devlink_port *dlp,
+ +                                      unsigned int sb_index, u16 pool_index,
+ +                                      u32 threshold,
+ +                                      struct netlink_ext_ack *extack)
+ +{
+ +      struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
+ +      int port = dsa_devlink_port_to_port(dlp);
+ +
+ +      if (!ds->ops->devlink_sb_port_pool_set)
+ +              return -EOPNOTSUPP;
+ +
+ +      return ds->ops->devlink_sb_port_pool_set(ds, port, sb_index,
+ +                                               pool_index, threshold, extack);
+ +}
+ +
+ +static int
+ +dsa_devlink_sb_tc_pool_bind_get(struct devlink_port *dlp,
+ +                              unsigned int sb_index, u16 tc_index,
+ +                              enum devlink_sb_pool_type pool_type,
+ +                              u16 *p_pool_index, u32 *p_threshold)
+ +{
+ +      struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
+ +      int port = dsa_devlink_port_to_port(dlp);
+ +
+ +      if (!ds->ops->devlink_sb_tc_pool_bind_get)
+ +              return -EOPNOTSUPP;
+ +
+ +      return ds->ops->devlink_sb_tc_pool_bind_get(ds, port, sb_index,
+ +                                                  tc_index, pool_type,
+ +                                                  p_pool_index, p_threshold);
+ +}
+ +
+ +static int
+ +dsa_devlink_sb_tc_pool_bind_set(struct devlink_port *dlp,
+ +                              unsigned int sb_index, u16 tc_index,
+ +                              enum devlink_sb_pool_type pool_type,
+ +                              u16 pool_index, u32 threshold,
+ +                              struct netlink_ext_ack *extack)
+ +{
+ +      struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
+ +      int port = dsa_devlink_port_to_port(dlp);
+ +
+ +      if (!ds->ops->devlink_sb_tc_pool_bind_set)
+ +              return -EOPNOTSUPP;
+ +
+ +      return ds->ops->devlink_sb_tc_pool_bind_set(ds, port, sb_index,
+ +                                                  tc_index, pool_type,
+ +                                                  pool_index, threshold,
+ +                                                  extack);
+ +}
+ +
+ +static int dsa_devlink_sb_occ_snapshot(struct devlink *dl,
+ +                                     unsigned int sb_index)
+ +{
+ +      struct dsa_switch *ds = dsa_devlink_to_ds(dl);
+ +
+ +      if (!ds->ops->devlink_sb_occ_snapshot)
+ +              return -EOPNOTSUPP;
+ +
+ +      return ds->ops->devlink_sb_occ_snapshot(ds, sb_index);
+ +}
+ +
+ +static int dsa_devlink_sb_occ_max_clear(struct devlink *dl,
+ +                                      unsigned int sb_index)
+ +{
+ +      struct dsa_switch *ds = dsa_devlink_to_ds(dl);
+ +
+ +      if (!ds->ops->devlink_sb_occ_max_clear)
+ +              return -EOPNOTSUPP;
+ +
+ +      return ds->ops->devlink_sb_occ_max_clear(ds, sb_index);
+ +}
+ +
+ +static int dsa_devlink_sb_occ_port_pool_get(struct devlink_port *dlp,
+ +                                          unsigned int sb_index,
+ +                                          u16 pool_index, u32 *p_cur,
+ +                                          u32 *p_max)
+ +{
+ +      struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
+ +      int port = dsa_devlink_port_to_port(dlp);
+ +
+ +      if (!ds->ops->devlink_sb_occ_port_pool_get)
+ +              return -EOPNOTSUPP;
+ +
+ +      return ds->ops->devlink_sb_occ_port_pool_get(ds, port, sb_index,
+ +                                                   pool_index, p_cur, p_max);
+ +}
+ +
+ +static int
+ +dsa_devlink_sb_occ_tc_port_bind_get(struct devlink_port *dlp,
+ +                                  unsigned int sb_index, u16 tc_index,
+ +                                  enum devlink_sb_pool_type pool_type,
+ +                                  u32 *p_cur, u32 *p_max)
+ +{
+ +      struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
+ +      int port = dsa_devlink_port_to_port(dlp);
+ +
+ +      if (!ds->ops->devlink_sb_occ_tc_port_bind_get)
+ +              return -EOPNOTSUPP;
+ +
+ +      return ds->ops->devlink_sb_occ_tc_port_bind_get(ds, port,
+ +                                                      sb_index, tc_index,
+ +                                                      pool_type, p_cur,
+ +                                                      p_max);
+ +}
+ +
   static const struct devlink_ops dsa_devlink_ops = {
- -      .info_get = dsa_devlink_info_get,
+ +      .info_get                       = dsa_devlink_info_get,
+ +      .sb_pool_get                    = dsa_devlink_sb_pool_get,
+ +      .sb_pool_set                    = dsa_devlink_sb_pool_set,
+ +      .sb_port_pool_get               = dsa_devlink_sb_port_pool_get,
+ +      .sb_port_pool_set               = dsa_devlink_sb_port_pool_set,
+ +      .sb_tc_pool_bind_get            = dsa_devlink_sb_tc_pool_bind_get,
+ +      .sb_tc_pool_bind_set            = dsa_devlink_sb_tc_pool_bind_set,
+ +      .sb_occ_snapshot                = dsa_devlink_sb_occ_snapshot,
+ +      .sb_occ_max_clear               = dsa_devlink_sb_occ_max_clear,
+ +      .sb_occ_port_pool_get           = dsa_devlink_sb_occ_port_pool_get,
+ +      .sb_occ_tc_port_bind_get        = dsa_devlink_sb_occ_tc_port_bind_get,
   };
   
   static int dsa_switch_setup(struct dsa_switch *ds)
@@@ -712,8 -452,6 +712,8 @@@
         if (err)
                 goto unregister_devlink_ports;
   
+ +      ds->configure_vlan_while_not_filtering = true;
+ +
         err = ds->ops->setup(ds);
         if (err < 0)
                 goto unregister_notifier;
@@@ -724,20 -462,23 +724,23 @@@
                 ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
                 if (!ds->slave_mii_bus) {
                         err = -ENOMEM;
-                       goto unregister_notifier;
+                       goto teardown;
                 }
   
                 dsa_slave_mii_bus_init(ds);
   
                 err = mdiobus_register(ds->slave_mii_bus);
                 if (err < 0)
-                       goto unregister_notifier;
+                       goto teardown;
         }
   
         ds->setup = true;
   
         return 0;
   
+ teardown:
+       if (ds->ops->teardown)
+               ds->ops->teardown(ds);
   unregister_notifier:
         dsa_switch_unregister_notifier(ds);
   unregister_devlink_ports:
@@@ -844,32 -585,6 +847,32 @@@ static void dsa_tree_teardown_master(st
                         dsa_master_teardown(dp->master);
   }
   
+ +static int dsa_tree_setup_lags(struct dsa_switch_tree *dst)
+ +{
+ +      unsigned int len = 0;
+ +      struct dsa_port *dp;
+ +
+ +      list_for_each_entry(dp, &dst->ports, list) {
+ +              if (dp->ds->num_lag_ids > len)
+ +                      len = dp->ds->num_lag_ids;
+ +      }
+ +
+ +      if (!len)
+ +              return 0;
+ +
+ +      dst->lags = kcalloc(len, sizeof(*dst->lags), GFP_KERNEL);
+ +      if (!dst->lags)
+ +              return -ENOMEM;
+ +
+ +      dst->lags_len = len;
+ +      return 0;
+ +}
+ +
+ +static void dsa_tree_teardown_lags(struct dsa_switch_tree *dst)
+ +{
+ +      kfree(dst->lags);
+ +}
+ +
   static int dsa_tree_setup(struct dsa_switch_tree *dst)
   {
         bool complete;
@@@ -897,18 -612,12 +900,18 @@@
         if (err)
                 goto teardown_switches;
   
+ +      err = dsa_tree_setup_lags(dst);
+ +      if (err)
+ +              goto teardown_master;
+ +
         dst->setup = true;
   
         pr_info("DSA: tree %d setup\n", dst->index);
   
         return 0;
   
+ +teardown_master:
+ +      dsa_tree_teardown_master(dst);
   teardown_switches:
         dsa_tree_teardown_switches(dst);
   teardown_default_cpu:
@@@ -924,8 -633,6 +927,8 @@@ static void dsa_tree_teardown(struct ds
         if (!dst->setup)
                 return;
   
+ +      dsa_tree_teardown_lags(dst);
+ +
         dsa_tree_teardown_master(dst);
   
         dsa_tree_teardown_switches(dst);
@@@ -942,57 -649,6 +945,57 @@@
         dst->setup = false;
   }
   
+ +/* Since the dsa/tagging sysfs device attribute is per master, the assumption
+ + * is that all DSA switches within a tree share the same tagger, otherwise
+ + * they would have formed disjoint trees (different "dsa,member" values).
+ + */
+ +int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
+ +                            struct net_device *master,
+ +                            const struct dsa_device_ops *tag_ops,
+ +                            const struct dsa_device_ops *old_tag_ops)
+ +{
+ +      struct dsa_notifier_tag_proto_info info;
+ +      struct dsa_port *dp;
+ +      int err = -EBUSY;
+ +
+ +      if (!rtnl_trylock())
+ +              return restart_syscall();
+ +
+ +      /* At the moment we don't allow changing the tag protocol under
+ +       * traffic. The rtnl_mutex also happens to serialize concurrent
+ +       * attempts to change the tagging protocol. If we ever lift the IFF_UP
+ +       * restriction, there needs to be another mutex which serializes this.
+ +       */
+ +      if (master->flags & IFF_UP)
+ +              goto out_unlock;
+ +
+ +      list_for_each_entry(dp, &dst->ports, list) {
+ +              if (!dsa_is_user_port(dp->ds, dp->index))
+ +                      continue;
+ +
+ +              if (dp->slave->flags & IFF_UP)
+ +                      goto out_unlock;
+ +      }
+ +
+ +      info.tag_ops = tag_ops;
+ +      err = dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO, &info);
+ +      if (err)
+ +              goto out_unwind_tagger;
+ +
+ +      dst->tag_ops = tag_ops;
+ +
+ +      rtnl_unlock();
+ +
+ +      return 0;
+ +
+ +out_unwind_tagger:
+ +      info.tag_ops = old_tag_ops;
+ +      dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO, &info);
+ +out_unlock:
+ +      rtnl_unlock();
+ +      return err;
+ +}
+ +
   static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index)
   {
         struct dsa_switch_tree *dst = ds->dst;
@@@ -1063,33 -719,24 +1066,33 @@@ static int dsa_port_parse_cpu(struct ds
   {
         struct dsa_switch *ds = dp->ds;
         struct dsa_switch_tree *dst = ds->dst;
- -      const struct dsa_device_ops *tag_ops;
         enum dsa_tag_protocol tag_protocol;
   
         tag_protocol = dsa_get_tag_protocol(dp, master);
- -      tag_ops = dsa_tag_driver_get(tag_protocol);
- -      if (IS_ERR(tag_ops)) {
- -              if (PTR_ERR(tag_ops) == -ENOPROTOOPT)
- -                      return -EPROBE_DEFER;
- -              dev_warn(ds->dev, "No tagger for this switch\n");
- -              dp->master = NULL;
- -              return PTR_ERR(tag_ops);
+ +      if (dst->tag_ops) {
+ +              if (dst->tag_ops->proto != tag_protocol) {
+ +                      dev_err(ds->dev,
+ +                              "A DSA switch tree can have only one tagging protocol\n");
+ +                      return -EINVAL;
+ +              }
+ +              /* In the case of multiple CPU ports per switch, the tagging
+ +               * protocol is still reference-counted only per switch tree, so
+ +               * nothing to do here.
+ +               */
+ +      } else {
+ +              dst->tag_ops = dsa_tag_driver_get(tag_protocol);
+ +              if (IS_ERR(dst->tag_ops)) {
+ +                      if (PTR_ERR(dst->tag_ops) == -ENOPROTOOPT)
+ +                              return -EPROBE_DEFER;
+ +                      dev_warn(ds->dev, "No tagger for this switch\n");
+ +                      dp->master = NULL;
+ +                      return PTR_ERR(dst->tag_ops);
+ +              }
         }
   
         dp->master = master;
         dp->type = DSA_PORT_TYPE_CPU;
- -      dp->filter = tag_ops->filter;
- -      dp->rcv = tag_ops->rcv;
- -      dp->tag_ops = tag_ops;
+ +      dsa_port_set_tag_protocol(dp, dst->tag_ops);
         dp->dst = dst;
   
         return 0;
@@@ -1143,8 -790,6 +1146,8 @@@ static int dsa_switch_parse_ports_of(st
                         goto out_put_node;
   
                 if (reg >= ds->num_ports) {
+ +                      dev_err(ds->dev, "port %pOF index %u exceeds num_ports (%zu)\n",
+ +                              port, reg, ds->num_ports);
                         err = -EINVAL;
                         goto out_put_node;
                 }
diff --combined net/netfilter/nf_flow_table_core.c

index 55fca71ace262888ecb2aa961142260a9a4174c1,4a4acbba78ff77292c2ddfa24e0ed7e7bde1090e..5fa657b8e03dff15a2fa828f22b09a364e7d5887
--- 1/net/netfilter/nf_flow_table_core.c
--- 2/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@@ -191,14 -191,14 +191,14 @@@ static u32 flow_offload_hash(const voi
   {
         const struct flow_offload_tuple *tuple = data;
   
- -      return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
+ +      return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed);
   }
   
   static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
   {
         const struct flow_offload_tuple_rhash *tuplehash = data;
   
- -      return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
+ +      return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed);
   }
   
   static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
@@@ -207,7 -207,7 +207,7 @@@
         const struct flow_offload_tuple *tuple = arg->key;
         const struct flow_offload_tuple_rhash *x = ptr;
   
- -      if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
+ +      if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash)))
                 return 1;
   
         return 0;
@@@ -399,7 -399,7 +399,7 @@@ static int nf_flow_nat_port_tcp(struct 
                 return -1;
   
         tcph = (void *)(skb_network_header(skb) + thoff);
-       inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true);
+       inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
   
         return 0;
   }
@@@ -415,7 -415,7 +415,7 @@@ static int nf_flow_nat_port_udp(struct 
         udph = (void *)(skb_network_header(skb) + thoff);
         if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
                 inet_proto_csum_replace2(&udph->check, skb, port,
-                                        new_port, true);
+                                        new_port, false);
                 if (!udph->check)
                         udph->check = CSUM_MANGLED_0;
         }
diff --combined net/netfilter/nf_tables_api.c

index 01dc0a169a780a13b86ffaebea51ba6f8a551eaa,8ee9f40cc0ea2816839a3c7264fcb65eebd5c9e7..ab93a353651a657c2df8fb05d39f06f3310297ab
--- 1/net/netfilter/nf_tables_api.c
--- 2/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@@ -4438,12 -4438,6 +4438,12 @@@ static int nf_tables_delset(struct net 
         return nft_delset(&ctx, set);
   }
   
+ +static int nft_validate_register_store(const struct nft_ctx *ctx,
+ +                                     enum nft_registers reg,
+ +                                     const struct nft_data *data,
+ +                                     enum nft_data_types type,
+ +                                     unsigned int len);
+ +
   static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
                                         struct nft_set *set,
                                         const struct nft_set_iter *iter,
@@@ -5287,6 -5281,7 +5287,7 @@@ static int nft_add_set_elem(struct nft_
         struct nft_expr *expr_array[NFT_SET_EXPR_MAX] = {};
         struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
         u8 genmask = nft_genmask_next(ctx->net);
+       u32 flags = 0, size = 0, num_exprs = 0;
         struct nft_set_ext_tmpl tmpl;
         struct nft_set_ext *ext, *ext2;
         struct nft_set_elem elem;
@@@ -5296,7 -5291,6 +5297,6 @@@
         struct nft_data_desc desc;
         enum nft_registers dreg;
         struct nft_trans *trans;
-       u32 flags = 0, size = 0;
         u64 timeout;
         u64 expiration;
         int err, i;
@@@ -5362,7 -5356,7 +5362,7 @@@
         if (nla[NFTA_SET_ELEM_EXPR]) {
                 struct nft_expr *expr;
   
-               if (set->num_exprs != 1)
+               if (set->num_exprs && set->num_exprs != 1)
                         return -EOPNOTSUPP;
   
                 expr = nft_set_elem_expr_alloc(ctx, set,
@@@ -5371,8 -5365,9 +5371,9 @@@
                         return PTR_ERR(expr);
   
                 expr_array[0] = expr;
+               num_exprs = 1;
   
-               if (set->exprs[0] && set->exprs[0]->ops != expr->ops) {
+               if (set->num_exprs && set->exprs[0]->ops != expr->ops) {
                         err = -EOPNOTSUPP;
                         goto err_set_elem_expr;
                 }
@@@ -5381,12 -5376,10 +5382,10 @@@
                 struct nlattr *tmp;
                 int left;
   
-               if (set->num_exprs == 0)
-                       return -EOPNOTSUPP;
- 
                 i = 0;
                 nla_for_each_nested(tmp, nla[NFTA_SET_ELEM_EXPRESSIONS], left) {
-                       if (i == set->num_exprs) {
+                       if (i == NFT_SET_EXPR_MAX ||
+                           (set->num_exprs && set->num_exprs == i)) {
                                 err = -E2BIG;
                                 goto err_set_elem_expr;
                         }
@@@ -5400,14 -5393,15 +5399,15 @@@
                                 goto err_set_elem_expr;
                         }
                         expr_array[i] = expr;
+                       num_exprs++;
   
-                       if (expr->ops != set->exprs[i]->ops) {
+                       if (set->num_exprs && expr->ops != set->exprs[i]->ops) {
                                 err = -EOPNOTSUPP;
                                 goto err_set_elem_expr;
                         }
                         i++;
                 }
-               if (set->num_exprs != i) {
+               if (set->num_exprs && set->num_exprs != i) {
                         err = -EOPNOTSUPP;
                         goto err_set_elem_expr;
                 }
@@@ -5415,6 -5409,8 +5415,8 @@@
                 err = nft_set_elem_expr_clone(ctx, set, expr_array);
                 if (err < 0)
                         goto err_set_elem_expr_clone;
+ 
+               num_exprs = set->num_exprs;
         }
   
         err = nft_setelem_parse_key(ctx, set, &elem.key.val,
@@@ -5439,8 -5435,8 +5441,8 @@@
                         nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
         }
   
-       if (set->num_exprs) {
-               for (i = 0; i < set->num_exprs; i++)
+       if (num_exprs) {
+               for (i = 0; i < num_exprs; i++)
                         size += expr_array[i]->ops->size;
   
                 nft_set_ext_add_length(&tmpl, NFT_SET_EXT_EXPRESSIONS,
@@@ -5528,7 -5524,7 +5530,7 @@@
                 *nft_set_ext_obj(ext) = obj;
                 obj->use++;
         }
-       for (i = 0; i < set->num_exprs; i++)
+       for (i = 0; i < num_exprs; i++)
                 nft_set_elem_expr_setup(ext, i, expr_array);
   
         trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
@@@ -5590,7 -5586,7 +5592,7 @@@ err_parse_key_end
   err_parse_key:
         nft_data_release(&elem.key.val, NFT_DATA_VALUE);
   err_set_elem_expr:
-       for (i = 0; i < set->num_exprs && expr_array[i]; i++)
+       for (i = 0; i < num_exprs && expr_array[i]; i++)
                 nft_expr_destroy(ctx, expr_array[i]);
   err_set_elem_expr_clone:
         return err;
@@@ -8594,7 -8590,7 +8596,7 @@@ EXPORT_SYMBOL_GPL(nft_parse_u32_check)
    *    Registers used to be 128 bit wide, these register numbers will be
    *    mapped to the corresponding 32 bit register numbers.
    */
- -unsigned int nft_parse_register(const struct nlattr *attr)
+ +static unsigned int nft_parse_register(const struct nlattr *attr)
   {
         unsigned int reg;
   
@@@ -8606,6 -8602,7 +8608,6 @@@
                 return reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00;
         }
   }
- -EXPORT_SYMBOL_GPL(nft_parse_register);
   
   /**
    *    nft_dump_register - dump a register value to a netlink attribute
@@@ -8638,7 -8635,7 +8640,7 @@@ EXPORT_SYMBOL_GPL(nft_dump_register)
    *    Validate that the input register is one of the general purpose
    *    registers and that the length of the load is within the bounds.
    */
- -int nft_validate_register_load(enum nft_registers reg, unsigned int len)
+ +static int nft_validate_register_load(enum nft_registers reg, unsigned int len)
   {
         if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE)
                 return -EINVAL;
@@@ -8649,21 -8646,7 +8651,21 @@@
   
         return 0;
   }
- -EXPORT_SYMBOL_GPL(nft_validate_register_load);
+ +
+ +int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len)
+ +{
+ +      u32 reg;
+ +      int err;
+ +
+ +      reg = nft_parse_register(attr);
+ +      err = nft_validate_register_load(reg, len);
+ +      if (err < 0)
+ +              return err;
+ +
+ +      *sreg = reg;
+ +      return 0;
+ +}
+ +EXPORT_SYMBOL_GPL(nft_parse_register_load);
   
   /**
    *    nft_validate_register_store - validate an expressions' register store
@@@ -8679,11 -8662,10 +8681,11 @@@
    *    A value of NULL for the data means that its runtime gathered
    *    data.
    */
- -int nft_validate_register_store(const struct nft_ctx *ctx,
- -                              enum nft_registers reg,
- -                              const struct nft_data *data,
- -                              enum nft_data_types type, unsigned int len)
+ +static int nft_validate_register_store(const struct nft_ctx *ctx,
+ +                                     enum nft_registers reg,
+ +                                     const struct nft_data *data,
+ +                                     enum nft_data_types type,
+ +                                     unsigned int len)
   {
         int err;
   
@@@ -8715,24 -8697,7 +8717,24 @@@
                 return 0;
         }
   }
- -EXPORT_SYMBOL_GPL(nft_validate_register_store);
+ +
+ +int nft_parse_register_store(const struct nft_ctx *ctx,
+ +                           const struct nlattr *attr, u8 *dreg,
+ +                           const struct nft_data *data,
+ +                           enum nft_data_types type, unsigned int len)
+ +{
+ +      int err;
+ +      u32 reg;
+ +
+ +      reg = nft_parse_register(attr);
+ +      err = nft_validate_register_store(ctx, reg, data, type, len);
+ +      if (err < 0)
+ +              return err;
+ +
+ +      *dreg = reg;
+ +      return 0;
+ +}
+ +EXPORT_SYMBOL_GPL(nft_parse_register_store);
   
   static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = {
         [NFTA_VERDICT_CODE]     = { .type = NLA_U32 },
@@@ -8986,6 -8951,17 +8988,17 @@@ int __nft_release_basechain(struct nft_
   }
   EXPORT_SYMBOL_GPL(__nft_release_basechain);
   
+ static void __nft_release_hooks(struct net *net)
+ {
+       struct nft_table *table;
+       struct nft_chain *chain;
+ 
+       list_for_each_entry(table, &net->nft.tables, list) {
+               list_for_each_entry(chain, &table->chains, list)
+                       nf_tables_unregister_hook(net, table, chain);
+       }
+ }
+ 
   static void __nft_release_tables(struct net *net)
   {
         struct nft_flowtable *flowtable, *nf;
@@@ -9001,10 -8977,6 +9014,6 @@@
   
         list_for_each_entry_safe(table, nt, &net->nft.tables, list) {
                 ctx.family = table->family;
- 
-               list_for_each_entry(chain, &table->chains, list)
-                       nf_tables_unregister_hook(net, table, chain);
-               /* No packets are walking on these chains anymore. */
                 ctx.table = table;
                 list_for_each_entry(chain, &table->chains, list) {
                         ctx.chain = chain;
@@@ -9053,6 -9025,11 +9062,11 @@@ static int __net_init nf_tables_init_ne
         return 0;
   }
   
+ static void __net_exit nf_tables_pre_exit_net(struct net *net)
+ {
+       __nft_release_hooks(net);
+ }
+ 
   static void __net_exit nf_tables_exit_net(struct net *net)
   {
         mutex_lock(&net->nft.commit_mutex);
@@@ -9066,8 -9043,9 +9080,9 @@@
   }
   
   static struct pernet_operations nf_tables_net_ops = {
-       .init   = nf_tables_init_net,
-       .exit   = nf_tables_exit_net,
+       .init           = nf_tables_init_net,
+       .pre_exit       = nf_tables_pre_exit_net,
+       .exit           = nf_tables_exit_net,
   };
   
   static int __init nf_tables_module_init(void)
author	David S. Miller <[email protected]>
	Wed, 10 Feb 2021 21:30:12 +0000 (13:30 -0800)
committer	David S. Miller <[email protected]>
	Wed, 10 Feb 2021 21:30:12 +0000 (13:30 -0800)
		1	2
MAINTAINERS	patch \|	diff1 \|	diff2 \|	blob \| history
Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/dsa/ocelot/felix.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/amazon/ena/ena_netdev.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/freescale/dpaa/dpaa_eth.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/ibm/ibmvnic.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/mscc/ocelot.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/hyperv/netvsc.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/hyperv/rndis_filter.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ipa/gsi.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/usb/qmi_wwan.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/netdevice.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/net/switchdev.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/soc/mscc/ocelot.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/bpf/stackmap.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/bpf/verifier.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/bridge/br_mrp.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/core/dev.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/dsa/dsa2.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/netfilter/nf_flow_table_core.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/netfilter/nf_tables_api.c	patch \|	diff1 \|	diff2 \|	blob \| history