F: drivers/power/reset/keystone-reset.c
ARM/TEXAS INSTRUMENTS K3 ARCHITECTURE
S: Supported
F: Documentation/devicetree/bindings/arm/ti/k3.yaml
F: tools/testing/selftests/arm64/
X: arch/arm64/boot/dts/
+ARROW SPEEDCHIPS XRS7000 SERIES ETHERNET SWITCH DRIVER
+S: Maintained
+F: Documentation/devicetree/bindings/net/dsa/arrow,xrs700x.yaml
+F: drivers/net/dsa/xrs700x/*
+F: net/dsa/tag_xrs700x.c
+
AS3645A LED FLASH CONTROLLER DRIVER
S: Supported
F: Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml
F: drivers/net/dsa/b53/*
+F: include/linux/dsa/brcm.h
F: include/linux/platform_data/b53.h
BROADCOM BCM2711/BCM2835 ARM ARCHITECTURE
F: arch/mips/bcm47xx/*
F: arch/mips/include/asm/mach-bcm47xx/*
+BROADCOM BCM4908 ETHERNET DRIVER
+S: Maintained
+F: Documentation/devicetree/bindings/net/brcm,bcm4908enet.yaml
+F: drivers/net/ethernet/broadcom/bcm4908enet.*
+F: drivers/net/ethernet/broadcom/unimac.h
+
BROADCOM BCM5301X ARM ARCHITECTURE
F: Documentation/devicetree/bindings/net/brcm,bcmgenet.txt
F: Documentation/devicetree/bindings/net/brcm,unimac-mdio.txt
F: drivers/net/ethernet/broadcom/genet/
+F: drivers/net/ethernet/broadcom/unimac.h
F: drivers/net/mdio/mdio-bcm-unimac.c
F: include/linux/platform_data/bcmgenet.h
F: include/linux/platform_data/mdio-bcm-unimac.h
N: hr2
N: stingray
+BROADCOM IPROC GBIT ETHERNET DRIVER
+S: Maintained
+F: Documentation/devicetree/bindings/net/brcm,amac.txt
+F: drivers/net/ethernet/broadcom/bgmac*
+F: drivers/net/ethernet/broadcom/unimac.h
+
BROADCOM KONA GPIO DRIVER
S: Supported
F: drivers/net/ethernet/broadcom/bcmsysport.*
+F: drivers/net/ethernet/broadcom/unimac.h
BROADCOM TG3 GIGABIT ETHERNET DRIVER
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can-next.git
F: Documentation/devicetree/bindings/net/can/
F: drivers/net/can/
+F: include/linux/can/bittiming.h
F: include/linux/can/dev.h
F: include/linux/can/led.h
+F: include/linux/can/length.h
F: include/linux/can/platform/
F: include/linux/can/rx-offload.h
F: include/uapi/linux/can/error.h
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can-next.git
F: Documentation/networking/can.rst
+F: include/linux/can/can-ml.h
F: include/linux/can/core.h
F: include/linux/can/skb.h
F: include/net/netns/can.h
F: .clang-format
CLANG/LLVM BUILD SUPPORT
S: Supported
F: drivers/edac/skx_*.[ch]
EDAC-TI
- S: Maintained
+ S: Odd Fixes
F: drivers/edac/ti_edac.c
EDIROL UA-101/UA-1000 DRIVER
F: drivers/hwmon/k8temp.c
KASAN
- M: Andrey Ryabinin <aryabinin@virtuozzo.com>
+ M: Andrey Ryabinin <ryabinin.a.a@gmail.com>
F: include/uapi/linux/nfc.h
F: net/nfc/
+NFC VIRTUAL NCI DEVICE DRIVER
+S: Supported
+F: drivers/nfc/virtual_ncidev.c
+F: tools/testing/selftests/nci/
+
NFS, SUNRPC, AND LOCKD CLIENTS
F: drivers/net/ethernet/mscc/
F: include/soc/mscc/ocelot*
F: net/dsa/tag_ocelot.c
+F: net/dsa/tag_ocelot_8021q.c
F: tools/testing/selftests/drivers/net/ocelot/*
OCXL (Open Coherent Accelerator Processor Interface OpenCAPI) DRIVER
TEXAS INSTRUMENTS' SYSTEM CONTROL INTERFACE (TISCI) PROTOCOL DRIVER
S: Maintained
F: drivers/clk/clk-cdce706.c
TI CLOCK DRIVER
- S: Maintained
+ S: Odd Fixes
F: drivers/clk/ti/
F: include/linux/clk/ti.h
S: Maintained
F: Documentation/devicetree/bindings/net/can/tcan4x5x.txt
-F: drivers/net/can/m_can/tcan4x5x.c
+F: drivers/net/can/m_can/tcan4x5x*
TI TRF7970A NFC DRIVER
VERSION = 5
PATCHLEVEL = 11
SUBLEVEL = 0
- EXTRAVERSION = -rc6
+ EXTRAVERSION = -rc7
NAME = Kleptomaniac Octopus
# *DOCUMENTATION*
INSTALLKERNEL := installkernel
DEPMOD = depmod
PERL = perl
- PYTHON = python
PYTHON3 = python3
CHECK = sparse
BASH = bash
export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC
export CPP AR NM STRIP OBJCOPY OBJDUMP READELF PAHOLE RESOLVE_BTFIDS LEX YACC AWK INSTALLKERNEL
- export PERL PYTHON PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX
+ export PERL PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX
export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD
export KBUILD_HOSTCXXFLAGS KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS LDFLAGS_MODULE
core-y := init/ usr/
drivers-y := drivers/ sound/
drivers-$(CONFIG_SAMPLES) += samples/
-drivers-y += net/ virt/
+drivers-$(CONFIG_NET) += net/
+drivers-y += virt/
libs-y := lib/
endif # KBUILD_EXTMOD
KBUILD_CFLAGS += -enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang
endif
+ DEBUG_CFLAGS :=
+
# Workaround for GCC versions < 5.0
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61801
ifdef CONFIG_CC_IS_GCC
- DEBUG_CFLAGS := $(call cc-ifversion, -lt, 0500, $(call cc-option, -fno-var-tracking-assignments))
+ DEBUG_CFLAGS += $(call cc-ifversion, -lt, 0500, $(call cc-option, -fno-var-tracking-assignments))
endif
ifdef CONFIG_DEBUG_INFO
# change __FILE__ to the relative path from the srctree
KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
- # ensure -fcf-protection is disabled when using retpoline as it is
- # incompatible with -mindirect-branch=thunk-extern
- ifdef CONFIG_RETPOLINE
- KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
- endif
-
# include additional Makefiles when needed
include-y := scripts/Makefile.extrawarn
include-$(CONFIG_KASAN) += scripts/Makefile.kasan
// SPDX-License-Identifier: GPL-2.0
-/* Copyright 2019 NXP Semiconductors
+/* Copyright 2019-2021 NXP Semiconductors
*
* This is an umbrella module for all network switches that are
* register-compatible with Ocelot and that perform I/O to their host CPU
#include <soc/mscc/ocelot_ana.h>
#include <soc/mscc/ocelot_ptp.h>
#include <soc/mscc/ocelot.h>
+#include <linux/dsa/8021q.h>
#include <linux/platform_device.h>
#include <linux/packing.h>
#include <linux/module.h>
#include <net/dsa.h>
#include "felix.h"
+static int felix_tag_8021q_rxvlan_add(struct felix *felix, int port, u16 vid,
+ bool pvid, bool untagged)
+{
+ struct ocelot_vcap_filter *outer_tagging_rule;
+ struct ocelot *ocelot = &felix->ocelot;
+ struct dsa_switch *ds = felix->ds;
+ int key_length, upstream, err;
+
+ /* We don't need to install the rxvlan into the other ports' filtering
+ * tables, because we're just pushing the rxvlan when sending towards
+ * the CPU
+ */
+ if (!pvid)
+ return 0;
+
+ key_length = ocelot->vcap[VCAP_ES0].keys[VCAP_ES0_IGR_PORT].length;
+ upstream = dsa_upstream_port(ds, port);
+
+ outer_tagging_rule = kzalloc(sizeof(struct ocelot_vcap_filter),
+ GFP_KERNEL);
+ if (!outer_tagging_rule)
+ return -ENOMEM;
+
+ outer_tagging_rule->key_type = OCELOT_VCAP_KEY_ANY;
+ outer_tagging_rule->prio = 1;
+ outer_tagging_rule->id.cookie = port;
+ outer_tagging_rule->id.tc_offload = false;
+ outer_tagging_rule->block_id = VCAP_ES0;
+ outer_tagging_rule->type = OCELOT_VCAP_FILTER_OFFLOAD;
+ outer_tagging_rule->lookup = 0;
+ outer_tagging_rule->ingress_port.value = port;
+ outer_tagging_rule->ingress_port.mask = GENMASK(key_length - 1, 0);
+ outer_tagging_rule->egress_port.value = upstream;
+ outer_tagging_rule->egress_port.mask = GENMASK(key_length - 1, 0);
+ outer_tagging_rule->action.push_outer_tag = OCELOT_ES0_TAG;
+ outer_tagging_rule->action.tag_a_tpid_sel = OCELOT_TAG_TPID_SEL_8021AD;
+ outer_tagging_rule->action.tag_a_vid_sel = 1;
+ outer_tagging_rule->action.vid_a_val = vid;
+
+ err = ocelot_vcap_filter_add(ocelot, outer_tagging_rule, NULL);
+ if (err)
+ kfree(outer_tagging_rule);
+
+ return err;
+}
+
+static int felix_tag_8021q_txvlan_add(struct felix *felix, int port, u16 vid,
+ bool pvid, bool untagged)
+{
+ struct ocelot_vcap_filter *untagging_rule, *redirect_rule;
+ struct ocelot *ocelot = &felix->ocelot;
+ struct dsa_switch *ds = felix->ds;
+ int upstream, err;
+
+ /* tag_8021q.c assumes we are implementing this via port VLAN
+ * membership, which we aren't. So we don't need to add any VCAP filter
+ * for the CPU port.
+ */
+ if (ocelot->ports[port]->is_dsa_8021q_cpu)
+ return 0;
+
+ untagging_rule = kzalloc(sizeof(struct ocelot_vcap_filter), GFP_KERNEL);
+ if (!untagging_rule)
+ return -ENOMEM;
+
+ redirect_rule = kzalloc(sizeof(struct ocelot_vcap_filter), GFP_KERNEL);
+ if (!redirect_rule) {
+ kfree(untagging_rule);
+ return -ENOMEM;
+ }
+
+ upstream = dsa_upstream_port(ds, port);
+
+ untagging_rule->key_type = OCELOT_VCAP_KEY_ANY;
+ untagging_rule->ingress_port_mask = BIT(upstream);
+ untagging_rule->vlan.vid.value = vid;
+ untagging_rule->vlan.vid.mask = VLAN_VID_MASK;
+ untagging_rule->prio = 1;
+ untagging_rule->id.cookie = port;
+ untagging_rule->id.tc_offload = false;
+ untagging_rule->block_id = VCAP_IS1;
+ untagging_rule->type = OCELOT_VCAP_FILTER_OFFLOAD;
+ untagging_rule->lookup = 0;
+ untagging_rule->action.vlan_pop_cnt_ena = true;
+ untagging_rule->action.vlan_pop_cnt = 1;
+ untagging_rule->action.pag_override_mask = 0xff;
+ untagging_rule->action.pag_val = port;
+
+ err = ocelot_vcap_filter_add(ocelot, untagging_rule, NULL);
+ if (err) {
+ kfree(untagging_rule);
+ kfree(redirect_rule);
+ return err;
+ }
+
+ redirect_rule->key_type = OCELOT_VCAP_KEY_ANY;
+ redirect_rule->ingress_port_mask = BIT(upstream);
+ redirect_rule->pag = port;
+ redirect_rule->prio = 1;
+ redirect_rule->id.cookie = port;
+ redirect_rule->id.tc_offload = false;
+ redirect_rule->block_id = VCAP_IS2;
+ redirect_rule->type = OCELOT_VCAP_FILTER_OFFLOAD;
+ redirect_rule->lookup = 0;
+ redirect_rule->action.mask_mode = OCELOT_MASK_MODE_REDIRECT;
+ redirect_rule->action.port_mask = BIT(port);
+
+ err = ocelot_vcap_filter_add(ocelot, redirect_rule, NULL);
+ if (err) {
+ ocelot_vcap_filter_del(ocelot, untagging_rule);
+ kfree(redirect_rule);
+ return err;
+ }
+
+ return 0;
+}
+
+static int felix_tag_8021q_vlan_add(struct dsa_switch *ds, int port, u16 vid,
+ u16 flags)
+{
+ bool untagged = flags & BRIDGE_VLAN_INFO_UNTAGGED;
+ bool pvid = flags & BRIDGE_VLAN_INFO_PVID;
+ struct ocelot *ocelot = ds->priv;
+
+ if (vid_is_dsa_8021q_rxvlan(vid))
+ return felix_tag_8021q_rxvlan_add(ocelot_to_felix(ocelot),
+ port, vid, pvid, untagged);
+
+ if (vid_is_dsa_8021q_txvlan(vid))
+ return felix_tag_8021q_txvlan_add(ocelot_to_felix(ocelot),
+ port, vid, pvid, untagged);
+
+ return 0;
+}
+
+static int felix_tag_8021q_rxvlan_del(struct felix *felix, int port, u16 vid)
+{
+ struct ocelot_vcap_filter *outer_tagging_rule;
+ struct ocelot_vcap_block *block_vcap_es0;
+ struct ocelot *ocelot = &felix->ocelot;
+
+ block_vcap_es0 = &ocelot->block[VCAP_ES0];
+
+ outer_tagging_rule = ocelot_vcap_block_find_filter_by_id(block_vcap_es0,
+ port, false);
+ /* In rxvlan_add, we had the "if (!pvid) return 0" logic to avoid
+ * installing outer tagging ES0 rules where they weren't needed.
+ * But in rxvlan_del, the API doesn't give us the "flags" anymore,
+ * so that forces us to be slightly sloppy here, and just assume that
+ * if we didn't find an outer_tagging_rule it means that there was
+ * none in the first place, i.e. rxvlan_del is called on a non-pvid
+ * port. This is most probably true though.
+ */
+ if (!outer_tagging_rule)
+ return 0;
+
+ return ocelot_vcap_filter_del(ocelot, outer_tagging_rule);
+}
+
+static int felix_tag_8021q_txvlan_del(struct felix *felix, int port, u16 vid)
+{
+ struct ocelot_vcap_filter *untagging_rule, *redirect_rule;
+ struct ocelot_vcap_block *block_vcap_is1;
+ struct ocelot_vcap_block *block_vcap_is2;
+ struct ocelot *ocelot = &felix->ocelot;
+ int err;
+
+ if (ocelot->ports[port]->is_dsa_8021q_cpu)
+ return 0;
+
+ block_vcap_is1 = &ocelot->block[VCAP_IS1];
+ block_vcap_is2 = &ocelot->block[VCAP_IS2];
+
+ untagging_rule = ocelot_vcap_block_find_filter_by_id(block_vcap_is1,
+ port, false);
+ if (!untagging_rule)
+ return 0;
+
+ err = ocelot_vcap_filter_del(ocelot, untagging_rule);
+ if (err)
+ return err;
+
+ redirect_rule = ocelot_vcap_block_find_filter_by_id(block_vcap_is2,
+ port, false);
+ if (!redirect_rule)
+ return 0;
+
+ return ocelot_vcap_filter_del(ocelot, redirect_rule);
+}
+
+static int felix_tag_8021q_vlan_del(struct dsa_switch *ds, int port, u16 vid)
+{
+ struct ocelot *ocelot = ds->priv;
+
+ if (vid_is_dsa_8021q_rxvlan(vid))
+ return felix_tag_8021q_rxvlan_del(ocelot_to_felix(ocelot),
+ port, vid);
+
+ if (vid_is_dsa_8021q_txvlan(vid))
+ return felix_tag_8021q_txvlan_del(ocelot_to_felix(ocelot),
+ port, vid);
+
+ return 0;
+}
+
+static const struct dsa_8021q_ops felix_tag_8021q_ops = {
+ .vlan_add = felix_tag_8021q_vlan_add,
+ .vlan_del = felix_tag_8021q_vlan_del,
+};
+
+/* Alternatively to using the NPI functionality, that same hardware MAC
+ * connected internally to the enetc or fman DSA master can be configured to
+ * use the software-defined tag_8021q frame format. As far as the hardware is
+ * concerned, it thinks it is a "dumb switch" - the queues of the CPU port
+ * module are now disconnected from it, but can still be accessed through
+ * register-based MMIO.
+ */
+static void felix_8021q_cpu_port_init(struct ocelot *ocelot, int port)
+{
+ ocelot->ports[port]->is_dsa_8021q_cpu = true;
+ ocelot->npi = -1;
+
+ /* Overwrite PGID_CPU with the non-tagging port */
+ ocelot_write_rix(ocelot, BIT(port), ANA_PGID_PGID, PGID_CPU);
+
+ ocelot_apply_bridge_fwd_mask(ocelot);
+}
+
+static void felix_8021q_cpu_port_deinit(struct ocelot *ocelot, int port)
+{
+ ocelot->ports[port]->is_dsa_8021q_cpu = false;
+
+ /* Restore PGID_CPU */
+ ocelot_write_rix(ocelot, BIT(ocelot->num_phys_ports), ANA_PGID_PGID,
+ PGID_CPU);
+
+ ocelot_apply_bridge_fwd_mask(ocelot);
+}
+
+static int felix_setup_tag_8021q(struct dsa_switch *ds, int cpu)
+{
+ struct ocelot *ocelot = ds->priv;
+ struct felix *felix = ocelot_to_felix(ocelot);
+ unsigned long cpu_flood;
+ int port, err;
+
+ felix_8021q_cpu_port_init(ocelot, cpu);
+
+ for (port = 0; port < ds->num_ports; port++) {
+ if (dsa_is_unused_port(ds, port))
+ continue;
+
+ /* This overwrites ocelot_init():
+ * Do not forward BPDU frames to the CPU port module,
+ * for 2 reasons:
+ * - When these packets are injected from the tag_8021q
+ * CPU port, we want them to go out, not loop back
+ * into the system.
+ * - STP traffic ingressing on a user port should go to
+ * the tag_8021q CPU port, not to the hardware CPU
+ * port module.
+ */
+ ocelot_write_gix(ocelot,
+ ANA_PORT_CPU_FWD_BPDU_CFG_BPDU_REDIR_ENA(0),
+ ANA_PORT_CPU_FWD_BPDU_CFG, port);
+ }
+
+ /* In tag_8021q mode, the CPU port module is unused. So we
+ * want to disable flooding of any kind to the CPU port module,
+ * since packets going there will end in a black hole.
+ */
+ cpu_flood = ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports));
+ ocelot_rmw_rix(ocelot, 0, cpu_flood, ANA_PGID_PGID, PGID_UC);
+ ocelot_rmw_rix(ocelot, 0, cpu_flood, ANA_PGID_PGID, PGID_MC);
+
+ felix->dsa_8021q_ctx = kzalloc(sizeof(*felix->dsa_8021q_ctx),
+ GFP_KERNEL);
+ if (!felix->dsa_8021q_ctx)
+ return -ENOMEM;
+
+ felix->dsa_8021q_ctx->ops = &felix_tag_8021q_ops;
+ felix->dsa_8021q_ctx->proto = htons(ETH_P_8021AD);
+ felix->dsa_8021q_ctx->ds = ds;
+
+ err = dsa_8021q_setup(felix->dsa_8021q_ctx, true);
+ if (err)
+ goto out_free_dsa_8021_ctx;
+
+ return 0;
+
+out_free_dsa_8021_ctx:
+ kfree(felix->dsa_8021q_ctx);
+ return err;
+}
+
+static void felix_teardown_tag_8021q(struct dsa_switch *ds, int cpu)
+{
+ struct ocelot *ocelot = ds->priv;
+ struct felix *felix = ocelot_to_felix(ocelot);
+ int err, port;
+
+ err = dsa_8021q_setup(felix->dsa_8021q_ctx, false);
+ if (err)
+ dev_err(ds->dev, "dsa_8021q_setup returned %d", err);
+
+ kfree(felix->dsa_8021q_ctx);
+
+ for (port = 0; port < ds->num_ports; port++) {
+ if (dsa_is_unused_port(ds, port))
+ continue;
+
+ /* Restore the logic from ocelot_init:
+ * do not forward BPDU frames to the front ports.
+ */
+ ocelot_write_gix(ocelot,
+ ANA_PORT_CPU_FWD_BPDU_CFG_BPDU_REDIR_ENA(0xffff),
+ ANA_PORT_CPU_FWD_BPDU_CFG,
+ port);
+ }
+
+ felix_8021q_cpu_port_deinit(ocelot, cpu);
+}
+
+/* The CPU port module is connected to the Node Processor Interface (NPI). This
+ * is the mode through which frames can be injected from and extracted to an
+ * external CPU, over Ethernet. In NXP SoCs, the "external CPU" is the ARM CPU
+ * running Linux, and this forms a DSA setup together with the enetc or fman
+ * DSA master.
+ */
+static void felix_npi_port_init(struct ocelot *ocelot, int port)
+{
+ ocelot->npi = port;
+
+ ocelot_write(ocelot, QSYS_EXT_CPU_CFG_EXT_CPUQ_MSK_M |
+ QSYS_EXT_CPU_CFG_EXT_CPU_PORT(port),
+ QSYS_EXT_CPU_CFG);
+
+ /* NPI port Injection/Extraction configuration */
+ ocelot_fields_write(ocelot, port, SYS_PORT_MODE_INCL_XTR_HDR,
+ ocelot->npi_xtr_prefix);
+ ocelot_fields_write(ocelot, port, SYS_PORT_MODE_INCL_INJ_HDR,
+ ocelot->npi_inj_prefix);
+
+ /* Disable transmission of pause frames */
+ ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 0);
+}
+
+static void felix_npi_port_deinit(struct ocelot *ocelot, int port)
+{
+ /* Restore hardware defaults */
+ int unused_port = ocelot->num_phys_ports + 2;
+
+ ocelot->npi = -1;
+
+ ocelot_write(ocelot, QSYS_EXT_CPU_CFG_EXT_CPU_PORT(unused_port),
+ QSYS_EXT_CPU_CFG);
+
+ ocelot_fields_write(ocelot, port, SYS_PORT_MODE_INCL_XTR_HDR,
+ OCELOT_TAG_PREFIX_DISABLED);
+ ocelot_fields_write(ocelot, port, SYS_PORT_MODE_INCL_INJ_HDR,
+ OCELOT_TAG_PREFIX_DISABLED);
+
+ /* Enable transmission of pause frames */
+ ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 1);
+}
+
+static int felix_setup_tag_npi(struct dsa_switch *ds, int cpu)
+{
+ struct ocelot *ocelot = ds->priv;
+ unsigned long cpu_flood;
+
+ felix_npi_port_init(ocelot, cpu);
+
+ /* Include the CPU port module (and indirectly, the NPI port)
+ * in the forwarding mask for unknown unicast - the hardware
+ * default value for ANA_FLOODING_FLD_UNICAST excludes
+ * BIT(ocelot->num_phys_ports), and so does ocelot_init,
+ * since Ocelot relies on whitelisting MAC addresses towards
+ * PGID_CPU.
+ * We do this because DSA does not yet perform RX filtering,
+ * and the NPI port does not perform source address learning,
+ * so traffic sent to Linux is effectively unknown from the
+ * switch's perspective.
+ */
+ cpu_flood = ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports));
+ ocelot_rmw_rix(ocelot, cpu_flood, cpu_flood, ANA_PGID_PGID, PGID_UC);
+
+ return 0;
+}
+
+static void felix_teardown_tag_npi(struct dsa_switch *ds, int cpu)
+{
+ struct ocelot *ocelot = ds->priv;
+
+ felix_npi_port_deinit(ocelot, cpu);
+}
+
+static int felix_set_tag_protocol(struct dsa_switch *ds, int cpu,
+ enum dsa_tag_protocol proto)
+{
+ int err;
+
+ switch (proto) {
+ case DSA_TAG_PROTO_OCELOT:
+ err = felix_setup_tag_npi(ds, cpu);
+ break;
+ case DSA_TAG_PROTO_OCELOT_8021Q:
+ err = felix_setup_tag_8021q(ds, cpu);
+ break;
+ default:
+ err = -EPROTONOSUPPORT;
+ }
+
+ return err;
+}
+
+static void felix_del_tag_protocol(struct dsa_switch *ds, int cpu,
+ enum dsa_tag_protocol proto)
+{
+ switch (proto) {
+ case DSA_TAG_PROTO_OCELOT:
+ felix_teardown_tag_npi(ds, cpu);
+ break;
+ case DSA_TAG_PROTO_OCELOT_8021Q:
+ felix_teardown_tag_8021q(ds, cpu);
+ break;
+ default:
+ break;
+ }
+}
+
+/* This always leaves the switch in a consistent state, because although the
+ * tag_8021q setup can fail, the NPI setup can't. So either the change is made,
+ * or the restoration is guaranteed to work.
+ */
+static int felix_change_tag_protocol(struct dsa_switch *ds, int cpu,
+ enum dsa_tag_protocol proto)
+{
+ struct ocelot *ocelot = ds->priv;
+ struct felix *felix = ocelot_to_felix(ocelot);
+ enum dsa_tag_protocol old_proto = felix->tag_proto;
+ int err;
+
+ if (proto != DSA_TAG_PROTO_OCELOT &&
+ proto != DSA_TAG_PROTO_OCELOT_8021Q)
+ return -EPROTONOSUPPORT;
+
+ felix_del_tag_protocol(ds, cpu, old_proto);
+
+ err = felix_set_tag_protocol(ds, cpu, proto);
+ if (err) {
+ felix_set_tag_protocol(ds, cpu, old_proto);
+ return err;
+ }
+
+ felix->tag_proto = proto;
+
+ return 0;
+}
+
static enum dsa_tag_protocol felix_get_tag_protocol(struct dsa_switch *ds,
int port,
enum dsa_tag_protocol mp)
{
- return DSA_TAG_PROTO_OCELOT;
+ struct ocelot *ocelot = ds->priv;
+ struct felix *felix = ocelot_to_felix(ocelot);
+
+ return felix->tag_proto;
}
static int felix_set_ageing_time(struct dsa_switch *ds,
return ocelot_fdb_del(ocelot, port, addr, vid);
}
-/* This callback needs to be present */
-static int felix_mdb_prepare(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb)
-{
- return 0;
-}
-
-static void felix_mdb_add(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb)
+static int felix_mdb_add(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb)
{
struct ocelot *ocelot = ds->priv;
- ocelot_port_mdb_add(ocelot, port, mdb);
+ return ocelot_port_mdb_add(ocelot, port, mdb);
}
static int felix_mdb_del(struct dsa_switch *ds, int port,
ocelot_port_bridge_leave(ocelot, port, br);
}
+static int felix_lag_join(struct dsa_switch *ds, int port,
+ struct net_device *bond,
+ struct netdev_lag_upper_info *info)
+{
+ struct ocelot *ocelot = ds->priv;
+
+ return ocelot_port_lag_join(ocelot, port, bond, info);
+}
+
+static int felix_lag_leave(struct dsa_switch *ds, int port,
+ struct net_device *bond)
+{
+ struct ocelot *ocelot = ds->priv;
+
+ ocelot_port_lag_leave(ocelot, port, bond);
+
+ return 0;
+}
+
+static int felix_lag_change(struct dsa_switch *ds, int port)
+{
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct ocelot *ocelot = ds->priv;
+
+ ocelot_port_lag_change(ocelot, port, dp->lag_tx_enabled);
+
+ return 0;
+}
+
static int felix_vlan_prepare(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_vlan *vlan)
{
struct ocelot *ocelot = ds->priv;
- u16 vid, flags = vlan->flags;
- int err;
+ u16 flags = vlan->flags;
/* Ocelot switches copy frames as-is to the CPU, so the flags:
* egress-untagged or not, pvid or not, make no difference. This
if (port == ocelot->npi)
return 0;
- for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) {
- err = ocelot_vlan_prepare(ocelot, port, vid,
- flags & BRIDGE_VLAN_INFO_PVID,
- flags & BRIDGE_VLAN_INFO_UNTAGGED);
- if (err)
- return err;
- }
-
- return 0;
+ return ocelot_vlan_prepare(ocelot, port, vlan->vid,
+ flags & BRIDGE_VLAN_INFO_PVID,
+ flags & BRIDGE_VLAN_INFO_UNTAGGED);
}
-static int felix_vlan_filtering(struct dsa_switch *ds, int port, bool enabled,
- struct switchdev_trans *trans)
+static int felix_vlan_filtering(struct dsa_switch *ds, int port, bool enabled)
{
struct ocelot *ocelot = ds->priv;
- return ocelot_port_vlan_filtering(ocelot, port, enabled, trans);
+ return ocelot_port_vlan_filtering(ocelot, port, enabled);
}
-static void felix_vlan_add(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan)
+static int felix_vlan_add(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_vlan *vlan)
{
struct ocelot *ocelot = ds->priv;
u16 flags = vlan->flags;
- u16 vid;
int err;
- for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) {
- err = ocelot_vlan_add(ocelot, port, vid,
- flags & BRIDGE_VLAN_INFO_PVID,
- flags & BRIDGE_VLAN_INFO_UNTAGGED);
- if (err) {
- dev_err(ds->dev, "Failed to add VLAN %d to port %d: %d\n",
- vid, port, err);
- return;
- }
- }
+ err = felix_vlan_prepare(ds, port, vlan);
+ if (err)
+ return err;
+
+ return ocelot_vlan_add(ocelot, port, vlan->vid,
+ flags & BRIDGE_VLAN_INFO_PVID,
+ flags & BRIDGE_VLAN_INFO_UNTAGGED);
}
static int felix_vlan_del(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_vlan *vlan)
{
struct ocelot *ocelot = ds->priv;
- u16 vid;
- int err;
- for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) {
- err = ocelot_vlan_del(ocelot, port, vid);
- if (err) {
- dev_err(ds->dev, "Failed to remove VLAN %d from port %d: %d\n",
- vid, port, err);
- return err;
- }
- }
- return 0;
+ return ocelot_vlan_del(ocelot, port, vlan->vid);
}
static int felix_port_enable(struct dsa_switch *ds, int port,
{
struct ocelot *ocelot = ds->priv;
struct ocelot_port *ocelot_port = ocelot->ports[port];
+ int err;
+
+ ocelot_port_rmwl(ocelot_port, 0, DEV_MAC_ENA_CFG_RX_ENA,
+ DEV_MAC_ENA_CFG);
- ocelot_port_writel(ocelot_port, 0, DEV_MAC_ENA_CFG);
ocelot_fields_write(ocelot, port, QSYS_SWITCH_PORT_MODE_PORT_ENA, 0);
+
+ err = ocelot_port_flush(ocelot, port);
+ if (err)
+ dev_err(ocelot->dev, "failed to flush port %d: %d\n",
+ port, err);
+
+ /* Put the port in reset. */
+ ocelot_port_writel(ocelot_port,
+ DEV_CLOCK_CFG_MAC_TX_RST |
+ DEV_CLOCK_CFG_MAC_RX_RST |
+ DEV_CLOCK_CFG_LINK_SPEED(OCELOT_SPEED_1000),
+ DEV_CLOCK_CFG);
}
static void felix_phylink_mac_link_up(struct dsa_switch *ds, int port,
ANA_PORT_QOS_CFG,
port);
- for (i = 0; i < FELIX_NUM_TC * 2; i++) {
+ for (i = 0; i < OCELOT_NUM_TC * 2; i++) {
ocelot_rmw_ix(ocelot,
(ANA_PORT_PCP_DEI_MAP_DP_PCP_DEI_VAL & i) |
ANA_PORT_PCP_DEI_MAP_QOS_PCP_DEI_VAL(i),
ocelot->map = felix->info->map;
ocelot->stats_layout = felix->info->stats_layout;
ocelot->num_stats = felix->info->num_stats;
- ocelot->shared_queue_sz = felix->info->shared_queue_sz;
ocelot->num_mact_rows = felix->info->num_mact_rows;
ocelot->vcap = felix->info->vcap;
ocelot->ops = felix->info->ops;
- ocelot->inj_prefix = OCELOT_TAG_PREFIX_SHORT;
- ocelot->xtr_prefix = OCELOT_TAG_PREFIX_SHORT;
+ ocelot->npi_inj_prefix = OCELOT_TAG_PREFIX_SHORT;
+ ocelot->npi_xtr_prefix = OCELOT_TAG_PREFIX_SHORT;
+ ocelot->devlink = felix->ds->devlink;
port_phy_modes = kcalloc(num_phys_ports, sizeof(phy_interface_t),
GFP_KERNEL);
return 0;
}
-/* The CPU port module is connected to the Node Processor Interface (NPI). This
- * is the mode through which frames can be injected from and extracted to an
- * external CPU, over Ethernet.
- */
-static void felix_npi_port_init(struct ocelot *ocelot, int port)
-{
- ocelot->npi = port;
-
- ocelot_write(ocelot, QSYS_EXT_CPU_CFG_EXT_CPUQ_MSK_M |
- QSYS_EXT_CPU_CFG_EXT_CPU_PORT(port),
- QSYS_EXT_CPU_CFG);
-
- /* NPI port Injection/Extraction configuration */
- ocelot_fields_write(ocelot, port, SYS_PORT_MODE_INCL_XTR_HDR,
- ocelot->xtr_prefix);
- ocelot_fields_write(ocelot, port, SYS_PORT_MODE_INCL_INJ_HDR,
- ocelot->inj_prefix);
-
- /* Disable transmission of pause frames */
- ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 0);
-}
-
/* Hardware initialization done here so that we can allocate structures with
* devm without fear of dsa_register_switch returning -EPROBE_DEFER and causing
* us to allocate structures twice (leak memory) and map PCI memory twice
}
for (port = 0; port < ds->num_ports; port++) {
- ocelot_init_port(ocelot, port);
+ if (dsa_is_unused_port(ds, port))
+ continue;
- if (dsa_is_cpu_port(ds, port))
- felix_npi_port_init(ocelot, port);
+ ocelot_init_port(ocelot, port);
/* Set the default QoS Classification based on PCP and DEI
* bits of vlan tag.
felix_port_qos_map_init(ocelot, port);
}
- /* Include the CPU port module in the forwarding mask for unknown
- * unicast - the hardware default value for ANA_FLOODING_FLD_UNICAST
- * excludes BIT(ocelot->num_phys_ports), and so does ocelot_init, since
- * Ocelot relies on whitelisting MAC addresses towards PGID_CPU.
- */
- ocelot_write_rix(ocelot,
- ANA_PGID_PGID_PGID(GENMASK(ocelot->num_phys_ports, 0)),
- ANA_PGID_PGID, PGID_UC);
+ err = ocelot_devlink_sb_register(ocelot);
+ if (err)
+ return err;
+
+ for (port = 0; port < ds->num_ports; port++) {
+ if (!dsa_is_cpu_port(ds, port))
+ continue;
+
+ /* The initial tag protocol is NPI which always returns 0, so
+ * there's no real point in checking for errors.
+ */
+ felix_set_tag_protocol(ds, port, felix->tag_proto);
+ }
ds->mtu_enforcement_ingress = true;
- ds->configure_vlan_while_not_filtering = true;
+ ds->assisted_learning_on_cpu_port = true;
return 0;
}
struct felix *felix = ocelot_to_felix(ocelot);
int port;
- if (felix->info->mdio_bus_free)
- felix->info->mdio_bus_free(ocelot);
+ for (port = 0; port < ds->num_ports; port++) {
+ if (!dsa_is_cpu_port(ds, port))
+ continue;
- for (port = 0; port < ocelot->num_phys_ports; port++)
- ocelot_deinit_port(ocelot, port);
+ felix_del_tag_protocol(ds, port, felix->tag_proto);
+ }
+
+ ocelot_devlink_sb_unregister(ocelot);
ocelot_deinit_timestamp(ocelot);
- /* stop workqueue thread */
ocelot_deinit(ocelot);
+
+ for (port = 0; port < ocelot->num_phys_ports; port++)
+ ocelot_deinit_port(ocelot, port);
+
+ if (felix->info->mdio_bus_free)
+ felix->info->mdio_bus_free(ocelot);
}
static int felix_hwtstamp_get(struct dsa_switch *ds, int port,
return -EOPNOTSUPP;
}
+static int felix_sb_pool_get(struct dsa_switch *ds, unsigned int sb_index,
+ u16 pool_index,
+ struct devlink_sb_pool_info *pool_info)
+{
+ struct ocelot *ocelot = ds->priv;
+
+ return ocelot_sb_pool_get(ocelot, sb_index, pool_index, pool_info);
+}
+
+static int felix_sb_pool_set(struct dsa_switch *ds, unsigned int sb_index,
+ u16 pool_index, u32 size,
+ enum devlink_sb_threshold_type threshold_type,
+ struct netlink_ext_ack *extack)
+{
+ struct ocelot *ocelot = ds->priv;
+
+ return ocelot_sb_pool_set(ocelot, sb_index, pool_index, size,
+ threshold_type, extack);
+}
+
+static int felix_sb_port_pool_get(struct dsa_switch *ds, int port,
+ unsigned int sb_index, u16 pool_index,
+ u32 *p_threshold)
+{
+ struct ocelot *ocelot = ds->priv;
+
+ return ocelot_sb_port_pool_get(ocelot, port, sb_index, pool_index,
+ p_threshold);
+}
+
+static int felix_sb_port_pool_set(struct dsa_switch *ds, int port,
+ unsigned int sb_index, u16 pool_index,
+ u32 threshold, struct netlink_ext_ack *extack)
+{
+ struct ocelot *ocelot = ds->priv;
+
+ return ocelot_sb_port_pool_set(ocelot, port, sb_index, pool_index,
+ threshold, extack);
+}
+
+static int felix_sb_tc_pool_bind_get(struct dsa_switch *ds, int port,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u16 *p_pool_index, u32 *p_threshold)
+{
+ struct ocelot *ocelot = ds->priv;
+
+ return ocelot_sb_tc_pool_bind_get(ocelot, port, sb_index, tc_index,
+ pool_type, p_pool_index,
+ p_threshold);
+}
+
+static int felix_sb_tc_pool_bind_set(struct dsa_switch *ds, int port,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u16 pool_index, u32 threshold,
+ struct netlink_ext_ack *extack)
+{
+ struct ocelot *ocelot = ds->priv;
+
+ return ocelot_sb_tc_pool_bind_set(ocelot, port, sb_index, tc_index,
+ pool_type, pool_index, threshold,
+ extack);
+}
+
+static int felix_sb_occ_snapshot(struct dsa_switch *ds,
+ unsigned int sb_index)
+{
+ struct ocelot *ocelot = ds->priv;
+
+ return ocelot_sb_occ_snapshot(ocelot, sb_index);
+}
+
+static int felix_sb_occ_max_clear(struct dsa_switch *ds,
+ unsigned int sb_index)
+{
+ struct ocelot *ocelot = ds->priv;
+
+ return ocelot_sb_occ_max_clear(ocelot, sb_index);
+}
+
+static int felix_sb_occ_port_pool_get(struct dsa_switch *ds, int port,
+ unsigned int sb_index, u16 pool_index,
+ u32 *p_cur, u32 *p_max)
+{
+ struct ocelot *ocelot = ds->priv;
+
+ return ocelot_sb_occ_port_pool_get(ocelot, port, sb_index, pool_index,
+ p_cur, p_max);
+}
+
+static int felix_sb_occ_tc_port_bind_get(struct dsa_switch *ds, int port,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u32 *p_cur, u32 *p_max)
+{
+ struct ocelot *ocelot = ds->priv;
+
+ return ocelot_sb_occ_tc_port_bind_get(ocelot, port, sb_index, tc_index,
+ pool_type, p_cur, p_max);
+}
+
const struct dsa_switch_ops felix_switch_ops = {
- .get_tag_protocol = felix_get_tag_protocol,
- .setup = felix_setup,
- .teardown = felix_teardown,
- .set_ageing_time = felix_set_ageing_time,
- .get_strings = felix_get_strings,
- .get_ethtool_stats = felix_get_ethtool_stats,
- .get_sset_count = felix_get_sset_count,
- .get_ts_info = felix_get_ts_info,
- .phylink_validate = felix_phylink_validate,
- .phylink_mac_config = felix_phylink_mac_config,
- .phylink_mac_link_down = felix_phylink_mac_link_down,
- .phylink_mac_link_up = felix_phylink_mac_link_up,
- .port_enable = felix_port_enable,
- .port_disable = felix_port_disable,
- .port_fdb_dump = felix_fdb_dump,
- .port_fdb_add = felix_fdb_add,
- .port_fdb_del = felix_fdb_del,
- .port_mdb_prepare = felix_mdb_prepare,
- .port_mdb_add = felix_mdb_add,
- .port_mdb_del = felix_mdb_del,
- .port_bridge_join = felix_bridge_join,
- .port_bridge_leave = felix_bridge_leave,
- .port_stp_state_set = felix_bridge_stp_state_set,
- .port_vlan_prepare = felix_vlan_prepare,
- .port_vlan_filtering = felix_vlan_filtering,
- .port_vlan_add = felix_vlan_add,
- .port_vlan_del = felix_vlan_del,
- .port_hwtstamp_get = felix_hwtstamp_get,
- .port_hwtstamp_set = felix_hwtstamp_set,
- .port_rxtstamp = felix_rxtstamp,
- .port_txtstamp = felix_txtstamp,
- .port_change_mtu = felix_change_mtu,
- .port_max_mtu = felix_get_max_mtu,
- .port_policer_add = felix_port_policer_add,
- .port_policer_del = felix_port_policer_del,
- .cls_flower_add = felix_cls_flower_add,
- .cls_flower_del = felix_cls_flower_del,
- .cls_flower_stats = felix_cls_flower_stats,
- .port_setup_tc = felix_port_setup_tc,
+ .get_tag_protocol = felix_get_tag_protocol,
+ .change_tag_protocol = felix_change_tag_protocol,
+ .setup = felix_setup,
+ .teardown = felix_teardown,
+ .set_ageing_time = felix_set_ageing_time,
+ .get_strings = felix_get_strings,
+ .get_ethtool_stats = felix_get_ethtool_stats,
+ .get_sset_count = felix_get_sset_count,
+ .get_ts_info = felix_get_ts_info,
+ .phylink_validate = felix_phylink_validate,
+ .phylink_mac_config = felix_phylink_mac_config,
+ .phylink_mac_link_down = felix_phylink_mac_link_down,
+ .phylink_mac_link_up = felix_phylink_mac_link_up,
+ .port_enable = felix_port_enable,
+ .port_disable = felix_port_disable,
+ .port_fdb_dump = felix_fdb_dump,
+ .port_fdb_add = felix_fdb_add,
+ .port_fdb_del = felix_fdb_del,
+ .port_mdb_add = felix_mdb_add,
+ .port_mdb_del = felix_mdb_del,
+ .port_bridge_join = felix_bridge_join,
+ .port_bridge_leave = felix_bridge_leave,
+ .port_lag_join = felix_lag_join,
+ .port_lag_leave = felix_lag_leave,
+ .port_lag_change = felix_lag_change,
+ .port_stp_state_set = felix_bridge_stp_state_set,
+ .port_vlan_filtering = felix_vlan_filtering,
+ .port_vlan_add = felix_vlan_add,
+ .port_vlan_del = felix_vlan_del,
+ .port_hwtstamp_get = felix_hwtstamp_get,
+ .port_hwtstamp_set = felix_hwtstamp_set,
+ .port_rxtstamp = felix_rxtstamp,
+ .port_txtstamp = felix_txtstamp,
+ .port_change_mtu = felix_change_mtu,
+ .port_max_mtu = felix_get_max_mtu,
+ .port_policer_add = felix_port_policer_add,
+ .port_policer_del = felix_port_policer_del,
+ .cls_flower_add = felix_cls_flower_add,
+ .cls_flower_del = felix_cls_flower_del,
+ .cls_flower_stats = felix_cls_flower_stats,
+ .port_setup_tc = felix_port_setup_tc,
+ .devlink_sb_pool_get = felix_sb_pool_get,
+ .devlink_sb_pool_set = felix_sb_pool_set,
+ .devlink_sb_port_pool_get = felix_sb_port_pool_get,
+ .devlink_sb_port_pool_set = felix_sb_port_pool_set,
+ .devlink_sb_tc_pool_bind_get = felix_sb_tc_pool_bind_get,
+ .devlink_sb_tc_pool_bind_set = felix_sb_tc_pool_bind_set,
+ .devlink_sb_occ_snapshot = felix_sb_occ_snapshot,
+ .devlink_sb_occ_max_clear = felix_sb_occ_max_clear,
+ .devlink_sb_occ_port_pool_get = felix_sb_occ_port_pool_get,
+ .devlink_sb_occ_tc_port_bind_get= felix_sb_occ_tc_port_bind_get,
};
struct net_device *felix_port_to_netdev(struct ocelot *ocelot, int port)
if (unlikely(!xdpf)) {
trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
xdp_stat = &rx_ring->rx_stats.xdp_aborted;
+ verdict = XDP_ABORTED;
break;
}
xdp_stat = &rx_ring->rx_stats.xdp_redirect;
break;
}
- fallthrough;
+ trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
+ xdp_stat = &rx_ring->rx_stats.xdp_aborted;
+ verdict = XDP_ABORTED;
+ break;
case XDP_ABORTED:
trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
xdp_stat = &rx_ring->rx_stats.xdp_aborted;
int ret;
rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
- xdp->data = page_address(rx_info->page) + rx_info->page_offset;
- xdp_set_data_meta_invalid(xdp);
- xdp->data_hard_start = page_address(rx_info->page);
- xdp->data_end = xdp->data + rx_ring->ena_bufs[0].len;
+ xdp_prepare_buff(xdp, page_address(rx_info->page),
+ rx_info->page_offset,
+ rx_ring->ena_bufs[0].len, false);
/* If for some reason we received a bigger packet than
* we expect, then we simply drop it
*/
netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
"%s qid %d\n", __func__, rx_ring->qid);
res_budget = budget;
- xdp.rxq = &rx_ring->xdp_rxq;
- xdp.frame_sz = ENA_PAGE_SIZE;
+ xdp_init_buff(&xdp, ENA_PAGE_SIZE, &rx_ring->xdp_rxq);
do {
xdp_verdict = XDP_PASS;
struct xdp_frame **init_xdpf)
{
struct xdp_frame *new_xdpf, *xdpf = *init_xdpf;
- void *new_buff;
+ void *new_buff, *aligned_data;
struct page *p;
+ u32 data_shift;
+ int headroom;
/* Check the data alignment and make sure the headroom is large
* enough to store the xdpf backpointer. Use an aligned headroom
* byte frame headroom. If the XDP program uses all of it, copy the
* data to a new buffer and make room for storing the backpointer.
*/
- if (PTR_IS_ALIGNED(xdpf->data, DPAA_A050385_ALIGN) &&
+ if (PTR_IS_ALIGNED(xdpf->data, DPAA_FD_DATA_ALIGNMENT) &&
xdpf->headroom >= priv->tx_headroom) {
xdpf->headroom = priv->tx_headroom;
return 0;
}
+ /* Try to move the data inside the buffer just enough to align it and
+ * store the xdpf backpointer. If the available headroom isn't large
+ * enough, resort to allocating a new buffer and copying the data.
+ */
+ aligned_data = PTR_ALIGN_DOWN(xdpf->data, DPAA_FD_DATA_ALIGNMENT);
+ data_shift = xdpf->data - aligned_data;
+
+ /* The XDP frame's headroom needs to be large enough to accommodate
+ * shifting the data as well as storing the xdpf backpointer.
+ */
+ if (xdpf->headroom >= data_shift + priv->tx_headroom) {
+ memmove(aligned_data, xdpf->data, xdpf->len);
+ xdpf->data = aligned_data;
+ xdpf->headroom = priv->tx_headroom;
+ return 0;
+ }
+
+ /* The new xdp_frame is stored in the new buffer. Reserve enough space
+ * in the headroom for storing it along with the driver's private
+ * info. The headroom needs to be aligned to DPAA_FD_DATA_ALIGNMENT to
+ * guarantee the data's alignment in the buffer.
+ */
+ headroom = ALIGN(sizeof(*new_xdpf) + priv->tx_headroom,
+ DPAA_FD_DATA_ALIGNMENT);
+
+ /* Assure the extended headroom and data don't overflow the buffer,
+ * while maintaining the mandatory tailroom.
+ */
+ if (headroom + xdpf->len > DPAA_BP_RAW_SIZE -
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+ return -ENOMEM;
+
p = dev_alloc_pages(0);
if (unlikely(!p))
return -ENOMEM;
/* Copy the data to the new buffer at a properly aligned offset */
new_buff = page_address(p);
- memcpy(new_buff + priv->tx_headroom, xdpf->data, xdpf->len);
+ memcpy(new_buff + headroom, xdpf->data, xdpf->len);
/* Create an XDP frame around the new buffer in a similar fashion
* to xdp_convert_buff_to_frame.
*/
new_xdpf = new_buff;
- new_xdpf->data = new_buff + priv->tx_headroom;
+ new_xdpf->data = new_buff + headroom;
new_xdpf->len = xdpf->len;
new_xdpf->headroom = priv->tx_headroom;
new_xdpf->frame_sz = DPAA_BP_RAW_SIZE;
return XDP_PASS;
}
- xdp.data = vaddr + fd_off;
- xdp.data_meta = xdp.data;
- xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
- xdp.data_end = xdp.data + qm_fd_get_length(fd);
- xdp.frame_sz = DPAA_BP_RAW_SIZE - DPAA_TX_PRIV_DATA_SIZE;
- xdp.rxq = &dpaa_fq->xdp_rxq;
+ xdp_init_buff(&xdp, DPAA_BP_RAW_SIZE - DPAA_TX_PRIV_DATA_SIZE,
+ &dpaa_fq->xdp_rxq);
+ xdp_prepare_buff(&xdp, vaddr + fd_off - XDP_PACKET_HEADROOM,
+ XDP_PACKET_HEADROOM, qm_fd_get_length(fd), true);
/* We reserve a fixed headroom of 256 bytes under the erratum and we
* offer it all to XDP programs to use. If no room is left for the
#include "hnae3.h"
#define HCLGE_NAME "hclge"
-#define HCLGE_STATS_READ(p, offset) (*((u64 *)((u8 *)(p) + (offset))))
+#define HCLGE_STATS_READ(p, offset) (*(u64 *)((u8 *)(p) + (offset)))
#define HCLGE_MAC_STATS_FIELD_OFF(f) (offsetof(struct hclge_mac_stats, f))
#define HCLGE_BUF_SIZE_UNIT 256U
#define HCLGE_LINK_STATUS_MS 10
-#define HCLGE_VF_VPORT_START_NUM 1
-
static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mps);
static int hclge_init_vlan_config(struct hclge_dev *hdev);
static void hclge_sync_vlan_filter(struct hclge_dev *hdev);
for (i = 0; i < kinfo->num_tqps; i++) {
struct hclge_tqp *tqp = container_of(handle->kinfo.tqp[i],
struct hclge_tqp, q);
- snprintf(buff, ETH_GSTRING_LEN, "txq%d_pktnum_rcd",
+ snprintf(buff, ETH_GSTRING_LEN, "txq%u_pktnum_rcd",
tqp->index);
buff = buff + ETH_GSTRING_LEN;
}
for (i = 0; i < kinfo->num_tqps; i++) {
struct hclge_tqp *tqp = container_of(kinfo->tqp[i],
struct hclge_tqp, q);
- snprintf(buff, ETH_GSTRING_LEN, "rxq%d_pktnum_rcd",
+ snprintf(buff, ETH_GSTRING_LEN, "rxq%u_pktnum_rcd",
tqp->index);
buff = buff + ETH_GSTRING_LEN;
}
return 0;
}
-static int hclge_parse_speed(int speed_cmd, int *speed)
+static int hclge_parse_speed(u8 speed_cmd, u32 *speed)
{
switch (speed_cmd) {
case 6:
ae_dev->dev_specs.rss_key_size = HCLGE_RSS_KEY_SIZE;
ae_dev->dev_specs.max_tm_rate = HCLGE_ETHER_MAX_RATE;
ae_dev->dev_specs.max_int_gl = HCLGE_DEF_MAX_INT_GL;
+ ae_dev->dev_specs.max_frm_size = HCLGE_MAC_MAX_FRAME;
+ ae_dev->dev_specs.max_qset_num = HCLGE_MAX_QSET_NUM;
}
static void hclge_parse_dev_specs(struct hclge_dev *hdev,
ae_dev->dev_specs.int_ql_max = le16_to_cpu(req0->int_ql_max);
ae_dev->dev_specs.rss_key_size = le16_to_cpu(req0->rss_key_size);
ae_dev->dev_specs.max_tm_rate = le32_to_cpu(req0->max_tm_rate);
+ ae_dev->dev_specs.max_qset_num = le16_to_cpu(req1->max_qset_num);
ae_dev->dev_specs.max_int_gl = le16_to_cpu(req1->max_int_gl);
+ ae_dev->dev_specs.max_frm_size = le16_to_cpu(req1->max_frm_size);
}
static void hclge_check_dev_specs(struct hclge_dev *hdev)
dev_specs->rss_key_size = HCLGE_RSS_KEY_SIZE;
if (!dev_specs->max_tm_rate)
dev_specs->max_tm_rate = HCLGE_ETHER_MAX_RATE;
+ if (!dev_specs->max_qset_num)
+ dev_specs->max_qset_num = HCLGE_MAX_QSET_NUM;
if (!dev_specs->max_int_gl)
dev_specs->max_int_gl = HCLGE_DEF_MAX_INT_GL;
+ if (!dev_specs->max_frm_size)
+ dev_specs->max_frm_size = HCLGE_MAC_MAX_FRAME;
}
static int hclge_query_dev_specs(struct hclge_dev *hdev)
return HCLGE_RSS_KEY_SIZE;
}
-static u32 hclge_get_rss_indir_size(struct hnae3_handle *handle)
-{
- return HCLGE_RSS_IND_TBL_SIZE;
-}
-
static int hclge_set_rss_algo_key(struct hclge_dev *hdev,
const u8 hfunc, const u8 *key)
{
{
struct hclge_rss_indirection_table_cmd *req;
struct hclge_desc desc;
+ int rss_cfg_tbl_num;
u8 rss_msb_oft;
u8 rss_msb_val;
int ret;
u32 j;
req = (struct hclge_rss_indirection_table_cmd *)desc.data;
+ rss_cfg_tbl_num = hdev->ae_dev->dev_specs.rss_ind_tbl_size /
+ HCLGE_RSS_CFG_TBL_SIZE;
- for (i = 0; i < HCLGE_RSS_CFG_TBL_NUM; i++) {
+ for (i = 0; i < rss_cfg_tbl_num; i++) {
hclge_cmd_setup_basic_desc
(&desc, HCLGE_OPC_RSS_INDIR_TABLE, false);
static int hclge_get_rss(struct hnae3_handle *handle, u32 *indir,
u8 *key, u8 *hfunc)
{
+ struct hnae3_ae_dev *ae_dev = pci_get_drvdata(handle->pdev);
struct hclge_vport *vport = hclge_get_vport(handle);
int i;
/* Get indirect table */
if (indir)
- for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++)
+ for (i = 0; i < ae_dev->dev_specs.rss_ind_tbl_size; i++)
indir[i] = vport->rss_indirection_tbl[i];
return 0;
static int hclge_set_rss(struct hnae3_handle *handle, const u32 *indir,
const u8 *key, const u8 hfunc)
{
+ struct hnae3_ae_dev *ae_dev = pci_get_drvdata(handle->pdev);
struct hclge_vport *vport = hclge_get_vport(handle);
struct hclge_dev *hdev = vport->back;
u8 hash_algo;
}
/* Update the shadow RSS table with user specified qids */
- for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++)
+ for (i = 0; i < ae_dev->dev_specs.rss_ind_tbl_size; i++)
vport->rss_indirection_tbl[i] = indir[i];
/* Update the hardware */
int i, j;
for (j = 0; j < hdev->num_vmdq_vport + 1; j++) {
- for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++)
+ for (i = 0; i < hdev->ae_dev->dev_specs.rss_ind_tbl_size; i++)
vport[j].rss_indirection_tbl[i] =
i % vport[j].alloc_rss_size;
}
}
-static void hclge_rss_init_cfg(struct hclge_dev *hdev)
+static int hclge_rss_init_cfg(struct hclge_dev *hdev)
{
+ u16 rss_ind_tbl_size = hdev->ae_dev->dev_specs.rss_ind_tbl_size;
int i, rss_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
struct hclge_vport *vport = hdev->vport;
rss_algo = HCLGE_RSS_HASH_ALGO_SIMPLE;
for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
+ u16 *rss_ind_tbl;
+
vport[i].rss_tuple_sets.ipv4_tcp_en =
HCLGE_RSS_INPUT_TUPLE_OTHER;
vport[i].rss_tuple_sets.ipv4_udp_en =
vport[i].rss_algo = rss_algo;
+ rss_ind_tbl = devm_kcalloc(&hdev->pdev->dev, rss_ind_tbl_size,
+ sizeof(*rss_ind_tbl), GFP_KERNEL);
+ if (!rss_ind_tbl)
+ return -ENOMEM;
+
+ vport[i].rss_indirection_tbl = rss_ind_tbl;
memcpy(vport[i].rss_hash_key, hclge_hash_key,
HCLGE_RSS_KEY_SIZE);
}
hclge_rss_indir_init_cfg(hdev);
+
+ return 0;
}
int hclge_bind_ring_with_vector(struct hclge_vport *vport,
if (fs->m_ext.vlan_tci &&
be16_to_cpu(fs->h_ext.vlan_tci) >= VLAN_N_VID) {
dev_err(&hdev->pdev->dev,
- "failed to config vlan_tci, invalid vlan_tci: %u, max is %u.\n",
+ "failed to config vlan_tci, invalid vlan_tci: %u, max is %d.\n",
ntohs(fs->h_ext.vlan_tci), VLAN_N_VID - 1);
return -EINVAL;
}
/* HW supprt 2 layer vlan */
max_frm_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + 2 * VLAN_HLEN;
if (max_frm_size < HCLGE_MAC_MIN_FRAME ||
- max_frm_size > HCLGE_MAC_MAX_FRAME)
+ max_frm_size > hdev->ae_dev->dev_specs.max_frm_size)
return -EINVAL;
max_frm_size = max(max_frm_size, HCLGE_MAC_DEFAULT_FRAME);
void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id)
{
+ struct hnae3_handle *handle = &vport->nic;
struct hclge_dev *hdev = vport->back;
int reset_try_times = 0;
int reset_status;
u16 queue_gid;
int ret;
+ if (queue_id >= handle->kinfo.num_tqps) {
+ dev_warn(&hdev->pdev->dev, "Invalid vf queue id(%u)\n",
+ queue_id);
+ return;
+ }
+
queue_gid = hclge_covert_handle_qid_global(&vport->nic, queue_id);
ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true);
goto err_mdiobus_unreg;
}
- hclge_rss_init_cfg(hdev);
+ ret = hclge_rss_init_cfg(hdev);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to init rss cfg, ret = %d\n", ret);
+ goto err_mdiobus_unreg;
+ }
+
ret = hclge_rss_init_hw(hdev);
if (ret) {
dev_err(&pdev->dev, "Rss init fail, ret =%d\n", ret);
}
}
-static int hclge_vf_rate_param_check(struct hclge_dev *hdev, int vf,
+static int hclge_vf_rate_param_check(struct hclge_dev *hdev,
int min_tx_rate, int max_tx_rate)
{
if (min_tx_rate != 0 ||
struct hclge_dev *hdev = vport->back;
int ret;
- ret = hclge_vf_rate_param_check(hdev, vf, min_tx_rate, max_tx_rate);
+ ret = hclge_vf_rate_param_check(hdev, min_tx_rate, max_tx_rate);
if (ret)
return ret;
static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num,
bool rxfh_configured)
{
+ struct hnae3_ae_dev *ae_dev = pci_get_drvdata(handle->pdev);
struct hclge_vport *vport = hclge_get_vport(handle);
struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
u16 tc_offset[HCLGE_MAX_TC_NUM] = {0};
goto out;
/* Reinitializes the rss indirect table according to the new RSS size */
- rss_indir = kcalloc(HCLGE_RSS_IND_TBL_SIZE, sizeof(u32), GFP_KERNEL);
+ rss_indir = kcalloc(ae_dev->dev_specs.rss_ind_tbl_size, sizeof(u32),
+ GFP_KERNEL);
if (!rss_indir)
return -ENOMEM;
- for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++)
+ for (i = 0; i < ae_dev->dev_specs.rss_ind_tbl_size; i++)
rss_indir[i] = i % kinfo->rss_size;
ret = hclge_set_rss(handle, rss_indir, NULL, 0);
.get_fec = hclge_get_fec,
.set_fec = hclge_set_fec,
.get_rss_key_size = hclge_get_rss_key_size,
- .get_rss_indir_size = hclge_get_rss_indir_size,
.get_rss = hclge_get_rss,
.set_rss = hclge_set_rss,
.set_rss_tuple = hclge_set_rss_tuple,
.enable_fd = hclge_enable_fd,
.add_arfs_entry = hclge_add_fd_entry_by_arfs,
.dbg_run_cmd = hclge_dbg_run_cmd,
+ .dbg_read_cmd = hclge_dbg_read_cmd,
.handle_hw_ras_error = hclge_handle_hw_ras_error,
.get_hw_reset_stat = hclge_get_hw_reset_stat,
.ae_dev_resetting = hclge_ae_dev_resetting,
resp_pf_to_vf->msg.resp_status = resp;
} else {
dev_warn(&hdev->pdev->dev,
- "failed to send response to VF, response status %d is out-of-bound\n",
+ "failed to send response to VF, response status %u is out-of-bound\n",
resp);
resp_pf_to_vf->msg.resp_status = EIO;
}
struct hclge_vport *vport)
{
struct hnae3_ring_chain_node *cur_chain, *new_chain;
+ struct hclge_dev *hdev = vport->back;
int ring_num;
- int i = 0;
+ int i;
ring_num = req->msg.ring_num;
if (ring_num > HCLGE_MBX_MAX_RING_CHAIN_PARAM_NUM)
return -ENOMEM;
+ for (i = 0; i < ring_num; i++) {
+ if (req->msg.param[i].tqp_index >= vport->nic.kinfo.rss_size) {
+ dev_err(&hdev->pdev->dev, "tqp index(%u) is out of range(0-%u)\n",
+ req->msg.param[i].tqp_index,
+ vport->nic.kinfo.rss_size - 1);
+ return -EINVAL;
+ }
+ }
+
hnae3_set_bit(ring_chain->flag, HNAE3_RING_TYPE_B,
- req->msg.param[i].ring_type);
+ req->msg.param[0].ring_type);
ring_chain->tqp_index =
hclge_get_queue_id(vport->nic.kinfo.tqp
- [req->msg.param[i].tqp_index]);
+ [req->msg.param[0].tqp_index]);
hnae3_set_field(ring_chain->int_gl_idx, HNAE3_RING_GL_IDX_M,
- HNAE3_RING_GL_IDX_S, req->msg.param[i].int_gl_index);
+ HNAE3_RING_GL_IDX_S, req->msg.param[0].int_gl_index);
cur_chain = ring_chain;
index = mbx_req->msg.data[0];
+ /* Check the query index of rss_hash_key from VF, make sure no
+ * more than the size of rss_hash_key.
+ */
+ if (((index + 1) * HCLGE_RSS_MBX_RESP_LEN) >
+ sizeof(vport[0].rss_hash_key)) {
+ dev_warn(&hdev->pdev->dev,
+ "failed to get the rss hash key, the index(%u) invalid !\n",
+ index);
+ return;
+ }
+
memcpy(resp_msg->data,
&hdev->vport[0].rss_hash_key[index * HCLGE_RSS_MBX_RESP_LEN],
HCLGE_RSS_MBX_RESP_LEN);
/**
* build_hdr_data - creates L2/L3/L4 header data buffer
- * @hdr_field - bitfield determining needed headers
- * @skb - socket buffer
- * @hdr_len - array of header lengths
- * @tot_len - total length of data
+ * @hdr_field: bitfield determining needed headers
+ * @skb: socket buffer
+ * @hdr_len: array of header lengths
+ * @hdr_data: buffer to write the header to
*
* Reads hdr_field to determine which headers are needed by firmware.
* Builds a buffer containing these headers. Saves individual header
/**
* create_hdr_descs - create header and header extension descriptors
- * @hdr_field - bitfield determining needed headers
- * @data - buffer containing header data
- * @len - length of data buffer
- * @hdr_len - array of individual header lengths
- * @scrq_arr - descriptor array
+ * @hdr_field: bitfield determining needed headers
+ * @hdr_data: buffer containing header data
+ * @len: length of data buffer
+ * @hdr_len: array of individual header lengths
+ * @scrq_arr: descriptor array
*
* Creates header and, if needed, header extension descriptors and
* places them in a descriptor array, scrq_arr
/**
* build_hdr_descs_arr - build a header descriptor array
- * @skb - socket buffer
- * @num_entries - number of descriptors to be sent
- * @subcrq - first TX descriptor
- * @hdr_field - bit field determining which headers will be sent
+ * @txbuff: tx buffer
+ * @num_entries: number of descriptors to be sent
+ * @hdr_field: bit field determining which headers will be sent
*
* This function will build a TX descriptor array with applicable
* L2/L3/L4 packet header descriptors to be sent by send_subcrq_indirect.
return rc;
}
-/**
- * do_change_param_reset returns zero if we are able to keep processing reset
- * events, or non-zero if we hit a fatal error and must halt.
- */
-static int do_change_param_reset(struct ibmvnic_adapter *adapter,
- struct ibmvnic_rwi *rwi,
- u32 reset_state)
-{
- struct net_device *netdev = adapter->netdev;
- int i, rc;
-
- netdev_dbg(adapter->netdev, "Change param resetting driver (%d)\n",
- rwi->reset_reason);
-
- netif_carrier_off(netdev);
- adapter->reset_reason = rwi->reset_reason;
-
- ibmvnic_cleanup(netdev);
-
- if (reset_state == VNIC_OPEN) {
- rc = __ibmvnic_close(netdev);
- if (rc)
- goto out;
- }
-
- release_resources(adapter);
- release_sub_crqs(adapter, 1);
- release_crq_queue(adapter);
-
- adapter->state = VNIC_PROBED;
-
- rc = init_crq_queue(adapter);
-
- if (rc) {
- netdev_err(adapter->netdev,
- "Couldn't initialize crq. rc=%d\n", rc);
- return rc;
- }
-
- rc = ibmvnic_reset_init(adapter, true);
- if (rc) {
- rc = IBMVNIC_INIT_FAILED;
- goto out;
- }
-
- /* If the adapter was in PROBE state prior to the reset,
- * exit here.
- */
- if (reset_state == VNIC_PROBED)
- goto out;
-
- rc = ibmvnic_login(netdev);
- if (rc) {
- goto out;
- }
-
- rc = init_resources(adapter);
- if (rc)
- goto out;
-
- ibmvnic_disable_irqs(adapter);
-
- adapter->state = VNIC_CLOSED;
-
- if (reset_state == VNIC_CLOSED)
- return 0;
-
- rc = __ibmvnic_open(netdev);
- if (rc) {
- rc = IBMVNIC_OPEN_FAILED;
- goto out;
- }
-
- /* refresh device's multicast list */
- ibmvnic_set_multi(netdev);
-
- /* kick napi */
- for (i = 0; i < adapter->req_rx_queues; i++)
- napi_schedule(&adapter->napi[i]);
-
-out:
- if (rc)
- adapter->state = reset_state;
- return rc;
-}
-
-/**
+/*
* do_reset returns zero if we are able to keep processing reset events, or
* non-zero if we hit a fatal error and must halt.
*/
adapter->state, adapter->failover_pending,
rwi->reset_reason, reset_state);
- rtnl_lock();
+ adapter->reset_reason = rwi->reset_reason;
+ /* requestor of VNIC_RESET_CHANGE_PARAM already has the rtnl lock */
+ if (!(adapter->reset_reason == VNIC_RESET_CHANGE_PARAM))
+ rtnl_lock();
+
/*
* Now that we have the rtnl lock, clear any pending failover.
* This will ensure ibmvnic_open() has either completed or will
adapter->failover_pending = false;
netif_carrier_off(netdev);
- adapter->reset_reason = rwi->reset_reason;
old_num_rx_queues = adapter->req_rx_queues;
old_num_tx_queues = adapter->req_tx_queues;
if (reset_state == VNIC_OPEN &&
adapter->reset_reason != VNIC_RESET_MOBILITY &&
adapter->reset_reason != VNIC_RESET_FAILOVER) {
- adapter->state = VNIC_CLOSING;
+ if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) {
+ rc = __ibmvnic_close(netdev);
+ if (rc)
+ goto out;
+ } else {
+ adapter->state = VNIC_CLOSING;
- /* Release the RTNL lock before link state change and
- * re-acquire after the link state change to allow
- * linkwatch_event to grab the RTNL lock and run during
- * a reset.
- */
- rtnl_unlock();
- rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
- rtnl_lock();
- if (rc)
- goto out;
+ /* Release the RTNL lock before link state change and
+ * re-acquire after the link state change to allow
+ * linkwatch_event to grab the RTNL lock and run during
+ * a reset.
+ */
+ rtnl_unlock();
+ rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
+ rtnl_lock();
+ if (rc)
+ goto out;
- if (adapter->state != VNIC_CLOSING) {
- rc = -1;
- goto out;
+ if (adapter->state != VNIC_CLOSING) {
+ rc = -1;
+ goto out;
+ }
+
+ adapter->state = VNIC_CLOSED;
}
+ }
- adapter->state = VNIC_CLOSED;
+ if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) {
+ release_resources(adapter);
+ release_sub_crqs(adapter, 1);
+ release_crq_queue(adapter);
}
if (adapter->reset_reason != VNIC_RESET_NON_FATAL) {
*/
adapter->state = VNIC_PROBED;
- if (adapter->reset_reason == VNIC_RESET_MOBILITY) {
+ if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) {
+ rc = init_crq_queue(adapter);
+ } else if (adapter->reset_reason == VNIC_RESET_MOBILITY) {
rc = ibmvnic_reenable_crq_queue(adapter);
release_sub_crqs(adapter, 1);
} else {
goto out;
}
- if (adapter->req_rx_queues != old_num_rx_queues ||
+ if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) {
+ rc = init_resources(adapter);
+ if (rc)
+ goto out;
+ } else if (adapter->req_rx_queues != old_num_rx_queues ||
adapter->req_tx_queues != old_num_tx_queues ||
adapter->req_rx_add_entries_per_subcrq !=
old_num_rx_slots ||
/* restore the adapter state if reset failed */
if (rc)
adapter->state = reset_state;
- rtnl_unlock();
+ /* requestor of VNIC_RESET_CHANGE_PARAM should still hold the rtnl lock */
+ if (!(adapter->reset_reason == VNIC_RESET_CHANGE_PARAM))
+ rtnl_unlock();
netdev_dbg(adapter->netdev, "[S:%d FOP:%d] Reset done, rc %d\n",
adapter->state, adapter->failover_pending, rc);
}
spin_unlock_irqrestore(&adapter->state_lock, flags);
- if (rwi->reset_reason == VNIC_RESET_CHANGE_PARAM) {
- /* CHANGE_PARAM requestor holds rtnl_lock */
- rc = do_change_param_reset(adapter, rwi, reset_state);
- } else if (adapter->force_reset_recovery) {
+ if (adapter->force_reset_recovery) {
/*
* Since we are doing a hard reset now, clear the
* failover_pending flag so we don't ignore any
if (!pending_scrq(adapter, rx_scrq))
break;
- /* The queue entry at the current index is peeked at above
- * to determine that there is a valid descriptor awaiting
- * processing. We want to be sure that the current slot
- * holds a valid descriptor before reading its contents.
- */
- dma_rmb();
next = ibmvnic_next_scrq(adapter, rx_scrq);
rx_buff =
(struct ibmvnic_rx_buff *)be64_to_cpu(next->
if (napi_complete_done(napi, frames_processed)) {
enable_scrq_irq(adapter, rx_scrq);
if (pending_scrq(adapter, rx_scrq)) {
- rmb();
if (napi_reschedule(napi)) {
disable_scrq_irq(adapter, rx_scrq);
goto restart_poll;
int total_bytes = 0;
int num_packets = 0;
- /* The queue entry at the current index is peeked at above
- * to determine that there is a valid descriptor awaiting
- * processing. We want to be sure that the current slot
- * holds a valid descriptor before reading its contents.
- */
- dma_rmb();
-
next = ibmvnic_next_scrq(adapter, scrq);
for (i = 0; i < next->tx_comp.num_comps; i++) {
if (next->tx_comp.rcs[i])
struct ibmvnic_sub_crq_queue *scrq)
{
union sub_crq *entry = &scrq->msgs[scrq->cur];
+ int rc;
- if (entry->generic.first & IBMVNIC_CRQ_CMD_RSP)
- return 1;
- else
- return 0;
+ rc = !!(entry->generic.first & IBMVNIC_CRQ_CMD_RSP);
+
+ /* Ensure that the SCRQ valid flag is loaded prior to loading the
+ * contents of the SCRQ descriptor
+ */
+ dma_rmb();
+
+ return rc;
}
static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *adapter,
}
spin_unlock_irqrestore(&scrq->lock, flags);
- /* Ensure that the entire buffer descriptor has been
- * loaded before reading its contents
+ /* Ensure that the SCRQ valid flag is loaded prior to loading the
+ * contents of the SCRQ descriptor
*/
dma_rmb();
complete(&adapter->init_done);
adapter->init_done_rc = -EIO;
}
- ibmvnic_reset(adapter, VNIC_RESET_FAILOVER);
+ rc = ibmvnic_reset(adapter, VNIC_RESET_FAILOVER);
+ if (rc && rc != -EBUSY) {
+ /* We were unable to schedule the failover
+ * reset either because the adapter was still
+ * probing (eg: during kexec) or we could not
+ * allocate memory. Clear the failover_pending
+ * flag since no one else will. We ignore
+ * EBUSY because it means either FAILOVER reset
+ * is already scheduled or the adapter is
+ * being removed.
+ */
+ netdev_err(netdev,
+ "Error %ld scheduling failover reset\n",
+ rc);
+ adapter->failover_pending = false;
+ }
break;
case IBMVNIC_CRQ_INIT_COMPLETE:
dev_info(dev, "Partner initialization complete\n");
}
int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port,
- bool vlan_aware, struct switchdev_trans *trans)
+ bool vlan_aware)
{
+ struct ocelot_vcap_block *block = &ocelot->block[VCAP_IS1];
struct ocelot_port *ocelot_port = ocelot->ports[port];
+ struct ocelot_vcap_filter *filter;
u32 val;
- if (switchdev_trans_ph_prepare(trans)) {
- struct ocelot_vcap_block *block = &ocelot->block[VCAP_IS1];
- struct ocelot_vcap_filter *filter;
-
- list_for_each_entry(filter, &block->rules, list) {
- if (filter->ingress_port_mask & BIT(port) &&
- filter->action.vid_replace_ena) {
- dev_err(ocelot->dev,
- "Cannot change VLAN state with vlan modify rules active\n");
- return -EBUSY;
- }
+ list_for_each_entry(filter, &block->rules, list) {
+ if (filter->ingress_port_mask & BIT(port) &&
+ filter->action.vid_replace_ena) {
+ dev_err(ocelot->dev,
+ "Cannot change VLAN state with vlan modify rules active\n");
+ return -EBUSY;
}
-
- return 0;
}
ocelot_port->vlan_aware = vlan_aware;
}
}
+ static u32 ocelot_read_eq_avail(struct ocelot *ocelot, int port)
+ {
+ return ocelot_read_rix(ocelot, QSYS_SW_STATUS, port);
+ }
+
+ int ocelot_port_flush(struct ocelot *ocelot, int port)
+ {
+ int err, val;
+
+ /* Disable dequeuing from the egress queues */
+ ocelot_rmw_rix(ocelot, QSYS_PORT_MODE_DEQUEUE_DIS,
+ QSYS_PORT_MODE_DEQUEUE_DIS,
+ QSYS_PORT_MODE, port);
+
+ /* Disable flow control */
+ ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 0);
+
+ /* Disable priority flow control */
+ ocelot_fields_write(ocelot, port,
+ QSYS_SWITCH_PORT_MODE_TX_PFC_ENA, 0);
+
+ /* Wait at least the time it takes to receive a frame of maximum length
+ * at the port.
+ * Worst-case delays for 10 kilobyte jumbo frames are:
+ * 8 ms on a 10M port
+ * 800 μs on a 100M port
+ * 80 μs on a 1G port
+ * 32 μs on a 2.5G port
+ */
+ usleep_range(8000, 10000);
+
+ /* Disable half duplex backpressure. */
+ ocelot_rmw_rix(ocelot, 0, SYS_FRONT_PORT_MODE_HDX_MODE,
+ SYS_FRONT_PORT_MODE, port);
+
+ /* Flush the queues associated with the port. */
+ ocelot_rmw_gix(ocelot, REW_PORT_CFG_FLUSH_ENA, REW_PORT_CFG_FLUSH_ENA,
+ REW_PORT_CFG, port);
+
+ /* Enable dequeuing from the egress queues. */
+ ocelot_rmw_rix(ocelot, 0, QSYS_PORT_MODE_DEQUEUE_DIS, QSYS_PORT_MODE,
+ port);
+
+ /* Wait until flushing is complete. */
+ err = read_poll_timeout(ocelot_read_eq_avail, val, !val,
+ 100, 2000000, false, ocelot, port);
+
+ /* Clear flushing again. */
+ ocelot_rmw_gix(ocelot, 0, REW_PORT_CFG_FLUSH_ENA, REW_PORT_CFG, port);
+
+ return err;
+ }
+ EXPORT_SYMBOL(ocelot_port_flush);
+
void ocelot_adjust_link(struct ocelot *ocelot, int port,
struct phy_device *phydev)
{
}
EXPORT_SYMBOL(ocelot_get_ts_info);
+static u32 ocelot_get_bond_mask(struct ocelot *ocelot, struct net_device *bond,
+ bool only_active_ports)
+{
+ u32 mask = 0;
+ int port;
+
+ for (port = 0; port < ocelot->num_phys_ports; port++) {
+ struct ocelot_port *ocelot_port = ocelot->ports[port];
+
+ if (!ocelot_port)
+ continue;
+
+ if (ocelot_port->bond == bond) {
+ if (only_active_ports && !ocelot_port->lag_tx_active)
+ continue;
+
+ mask |= BIT(port);
+ }
+ }
+
+ return mask;
+}
+
+static u32 ocelot_get_dsa_8021q_cpu_mask(struct ocelot *ocelot)
+{
+ u32 mask = 0;
+ int port;
+
+ for (port = 0; port < ocelot->num_phys_ports; port++) {
+ struct ocelot_port *ocelot_port = ocelot->ports[port];
+
+ if (!ocelot_port)
+ continue;
+
+ if (ocelot_port->is_dsa_8021q_cpu)
+ mask |= BIT(port);
+ }
+
+ return mask;
+}
+
+void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot)
+{
+ unsigned long cpu_fwd_mask;
+ int port;
+
+ /* If a DSA tag_8021q CPU exists, it needs to be included in the
+ * regular forwarding path of the front ports regardless of whether
+ * those are bridged or standalone.
+ * If DSA tag_8021q is not used, this returns 0, which is fine because
+ * the hardware-based CPU port module can be a destination for packets
+ * even if it isn't part of PGID_SRC.
+ */
+ cpu_fwd_mask = ocelot_get_dsa_8021q_cpu_mask(ocelot);
+
+ /* Apply FWD mask. The loop is needed to add/remove the current port as
+ * a source for the other ports.
+ */
+ for (port = 0; port < ocelot->num_phys_ports; port++) {
+ struct ocelot_port *ocelot_port = ocelot->ports[port];
+ unsigned long mask;
+
+ if (!ocelot_port) {
+ /* Unused ports can't send anywhere */
+ mask = 0;
+ } else if (ocelot_port->is_dsa_8021q_cpu) {
+ /* The DSA tag_8021q CPU ports need to be able to
+ * forward packets to all other ports except for
+ * themselves
+ */
+ mask = GENMASK(ocelot->num_phys_ports - 1, 0);
+ mask &= ~cpu_fwd_mask;
+ } else if (ocelot->bridge_fwd_mask & BIT(port)) {
+ struct net_device *bond = ocelot_port->bond;
+
+ mask = ocelot->bridge_fwd_mask & ~BIT(port);
+ if (bond) {
+ mask &= ~ocelot_get_bond_mask(ocelot, bond,
+ false);
+ }
+ } else {
+ /* Standalone ports forward only to DSA tag_8021q CPU
+ * ports (if those exist), or to the hardware CPU port
+ * module otherwise.
+ */
+ mask = cpu_fwd_mask;
+ }
+
+ ocelot_write_rix(ocelot, mask, ANA_PGID_PGID, PGID_SRC + port);
+ }
+}
+EXPORT_SYMBOL(ocelot_apply_bridge_fwd_mask);
+
void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state)
{
u32 port_cfg;
- int p, i;
if (!(BIT(port) & ocelot->bridge_mask))
return;
ocelot_write_gix(ocelot, port_cfg, ANA_PORT_PORT_CFG, port);
- /* Apply FWD mask. The loop is needed to add/remove the current port as
- * a source for the other ports.
- */
- for (p = 0; p < ocelot->num_phys_ports; p++) {
- if (ocelot->bridge_fwd_mask & BIT(p)) {
- unsigned long mask = ocelot->bridge_fwd_mask & ~BIT(p);
-
- for (i = 0; i < ocelot->num_phys_ports; i++) {
- unsigned long bond_mask = ocelot->lags[i];
-
- if (!bond_mask)
- continue;
-
- if (bond_mask & BIT(p)) {
- mask &= ~bond_mask;
- break;
- }
- }
-
- ocelot_write_rix(ocelot, mask,
- ANA_PGID_PGID, PGID_SRC + p);
- } else {
- ocelot_write_rix(ocelot, 0,
- ANA_PGID_PGID, PGID_SRC + p);
- }
- }
+ ocelot_apply_bridge_fwd_mask(ocelot);
}
EXPORT_SYMBOL(ocelot_bridge_stp_state_set);
struct net_device *bridge)
{
struct ocelot_vlan pvid = {0}, native_vlan = {0};
- struct switchdev_trans trans;
int ret;
ocelot->bridge_mask &= ~BIT(port);
if (!ocelot->bridge_mask)
ocelot->hw_bridge_dev = NULL;
- trans.ph_prepare = true;
- ret = ocelot_port_vlan_filtering(ocelot, port, false, &trans);
- if (ret)
- return ret;
-
- trans.ph_prepare = false;
- ret = ocelot_port_vlan_filtering(ocelot, port, false, &trans);
+ ret = ocelot_port_vlan_filtering(ocelot, port, false);
if (ret)
return ret;
static void ocelot_set_aggr_pgids(struct ocelot *ocelot)
{
+ unsigned long visited = GENMASK(ocelot->num_phys_ports - 1, 0);
int i, port, lag;
/* Reset destination and aggregation PGIDS */
ocelot_write_rix(ocelot, GENMASK(ocelot->num_phys_ports - 1, 0),
ANA_PGID_PGID, i);
- /* Now, set PGIDs for each LAG */
+ /* The visited ports bitmask holds the list of ports offloading any
+ * bonding interface. Initially we mark all these ports as unvisited,
+ * then every time we visit a port in this bitmask, we know that it is
+ * the lowest numbered port, i.e. the one whose logical ID == physical
+ * port ID == LAG ID. So we mark as visited all further ports in the
+ * bitmask that are offloading the same bonding interface. This way,
+ * we set up the aggregation PGIDs only once per bonding interface.
+ */
+ for (port = 0; port < ocelot->num_phys_ports; port++) {
+ struct ocelot_port *ocelot_port = ocelot->ports[port];
+
+ if (!ocelot_port || !ocelot_port->bond)
+ continue;
+
+ visited &= ~BIT(port);
+ }
+
+ /* Now, set PGIDs for each active LAG */
for (lag = 0; lag < ocelot->num_phys_ports; lag++) {
+ struct net_device *bond = ocelot->ports[lag]->bond;
+ int num_active_ports = 0;
unsigned long bond_mask;
- int aggr_count = 0;
u8 aggr_idx[16];
- bond_mask = ocelot->lags[lag];
- if (!bond_mask)
+ if (!bond || (visited & BIT(lag)))
continue;
+ bond_mask = ocelot_get_bond_mask(ocelot, bond, true);
+
for_each_set_bit(port, &bond_mask, ocelot->num_phys_ports) {
// Destination mask
ocelot_write_rix(ocelot, bond_mask,
ANA_PGID_PGID, port);
- aggr_idx[aggr_count] = port;
- aggr_count++;
+ aggr_idx[num_active_ports++] = port;
}
for_each_aggr_pgid(ocelot, i) {
ac = ocelot_read_rix(ocelot, ANA_PGID_PGID, i);
ac &= ~bond_mask;
- ac |= BIT(aggr_idx[i % aggr_count]);
+ /* Don't do division by zero if there was no active
+ * port. Just make all aggregation codes zero.
+ */
+ if (num_active_ports)
+ ac |= BIT(aggr_idx[i % num_active_ports]);
ocelot_write_rix(ocelot, ac, ANA_PGID_PGID, i);
}
- }
-}
-static void ocelot_setup_lag(struct ocelot *ocelot, int lag)
-{
- unsigned long bond_mask = ocelot->lags[lag];
- unsigned int p;
-
- for_each_set_bit(p, &bond_mask, ocelot->num_phys_ports) {
- u32 port_cfg = ocelot_read_gix(ocelot, ANA_PORT_PORT_CFG, p);
+ /* Mark all ports in the same LAG as visited to avoid applying
+ * the same config again.
+ */
+ for (port = lag; port < ocelot->num_phys_ports; port++) {
+ struct ocelot_port *ocelot_port = ocelot->ports[port];
- port_cfg &= ~ANA_PORT_PORT_CFG_PORTID_VAL_M;
+ if (!ocelot_port)
+ continue;
- /* Use lag port as logical port for port i */
- ocelot_write_gix(ocelot, port_cfg |
- ANA_PORT_PORT_CFG_PORTID_VAL(lag),
- ANA_PORT_PORT_CFG, p);
+ if (ocelot_port->bond == bond)
+ visited |= BIT(port);
+ }
}
}
-int ocelot_port_lag_join(struct ocelot *ocelot, int port,
- struct net_device *bond)
+/* When offloading a bonding interface, the switch ports configured under the
+ * same bond must have the same logical port ID, equal to the physical port ID
+ * of the lowest numbered physical port in that bond. Otherwise, in standalone/
+ * bridged mode, each port has a logical port ID equal to its physical port ID.
+ */
+static void ocelot_setup_logical_port_ids(struct ocelot *ocelot)
{
- struct net_device *ndev;
- u32 bond_mask = 0;
- int lag, lp;
+ int port;
- rcu_read_lock();
- for_each_netdev_in_bond_rcu(bond, ndev) {
- struct ocelot_port_private *priv = netdev_priv(ndev);
+ for (port = 0; port < ocelot->num_phys_ports; port++) {
+ struct ocelot_port *ocelot_port = ocelot->ports[port];
+ struct net_device *bond;
- bond_mask |= BIT(priv->chip_port);
- }
- rcu_read_unlock();
+ if (!ocelot_port)
+ continue;
- lp = __ffs(bond_mask);
+ bond = ocelot_port->bond;
+ if (bond) {
+ int lag = __ffs(ocelot_get_bond_mask(ocelot, bond,
+ false));
- /* If the new port is the lowest one, use it as the logical port from
- * now on
- */
- if (port == lp) {
- lag = port;
- ocelot->lags[port] = bond_mask;
- bond_mask &= ~BIT(port);
- if (bond_mask) {
- lp = __ffs(bond_mask);
- ocelot->lags[lp] = 0;
+ ocelot_rmw_gix(ocelot,
+ ANA_PORT_PORT_CFG_PORTID_VAL(lag),
+ ANA_PORT_PORT_CFG_PORTID_VAL_M,
+ ANA_PORT_PORT_CFG, port);
+ } else {
+ ocelot_rmw_gix(ocelot,
+ ANA_PORT_PORT_CFG_PORTID_VAL(port),
+ ANA_PORT_PORT_CFG_PORTID_VAL_M,
+ ANA_PORT_PORT_CFG, port);
}
- } else {
- lag = lp;
- ocelot->lags[lp] |= BIT(port);
}
+}
- ocelot_setup_lag(ocelot, lag);
+int ocelot_port_lag_join(struct ocelot *ocelot, int port,
+ struct net_device *bond,
+ struct netdev_lag_upper_info *info)
+{
+ if (info->tx_type != NETDEV_LAG_TX_TYPE_HASH)
+ return -EOPNOTSUPP;
+
+ ocelot->ports[port]->bond = bond;
+
+ ocelot_setup_logical_port_ids(ocelot);
+ ocelot_apply_bridge_fwd_mask(ocelot);
ocelot_set_aggr_pgids(ocelot);
return 0;
void ocelot_port_lag_leave(struct ocelot *ocelot, int port,
struct net_device *bond)
{
- u32 port_cfg;
- int i;
+ ocelot->ports[port]->bond = NULL;
- /* Remove port from any lag */
- for (i = 0; i < ocelot->num_phys_ports; i++)
- ocelot->lags[i] &= ~BIT(port);
-
- /* if it was the logical port of the lag, move the lag config to the
- * next port
- */
- if (ocelot->lags[port]) {
- int n = __ffs(ocelot->lags[port]);
-
- ocelot->lags[n] = ocelot->lags[port];
- ocelot->lags[port] = 0;
+ ocelot_setup_logical_port_ids(ocelot);
+ ocelot_apply_bridge_fwd_mask(ocelot);
+ ocelot_set_aggr_pgids(ocelot);
+}
+EXPORT_SYMBOL(ocelot_port_lag_leave);
- ocelot_setup_lag(ocelot, n);
- }
+void ocelot_port_lag_change(struct ocelot *ocelot, int port, bool lag_tx_active)
+{
+ struct ocelot_port *ocelot_port = ocelot->ports[port];
- port_cfg = ocelot_read_gix(ocelot, ANA_PORT_PORT_CFG, port);
- port_cfg &= ~ANA_PORT_PORT_CFG_PORTID_VAL_M;
- ocelot_write_gix(ocelot, port_cfg | ANA_PORT_PORT_CFG_PORTID_VAL(port),
- ANA_PORT_PORT_CFG, port);
+ ocelot_port->lag_tx_active = lag_tx_active;
+ /* Rebalance the LAGs */
ocelot_set_aggr_pgids(ocelot);
}
-EXPORT_SYMBOL(ocelot_port_lag_leave);
+EXPORT_SYMBOL(ocelot_port_lag_change);
/* Configure the maximum SDU (L2 payload) on RX to the value specified in @sdu.
* The length of VLAN tags is accounted for automatically via DEV_MAC_TAGS_CFG.
if (port == ocelot->npi) {
maxlen += OCELOT_TAG_LEN;
- if (ocelot->inj_prefix == OCELOT_TAG_PREFIX_SHORT)
+ if (ocelot->npi_inj_prefix == OCELOT_TAG_PREFIX_SHORT)
maxlen += OCELOT_SHORT_PREFIX_LEN;
- else if (ocelot->inj_prefix == OCELOT_TAG_PREFIX_LONG)
+ else if (ocelot->npi_inj_prefix == OCELOT_TAG_PREFIX_LONG)
maxlen += OCELOT_LONG_PREFIX_LEN;
}
pause_stop);
/* Tail dropping watermarks */
- atop_tot = (ocelot->shared_queue_sz - 9 * maxlen) /
+ atop_tot = (ocelot->packet_buffer_size - 9 * maxlen) /
OCELOT_BUFFER_CELL_SZ;
atop = (9 * maxlen) / OCELOT_BUFFER_CELL_SZ;
ocelot_write_rix(ocelot, ocelot->ops->wm_enc(atop), SYS_ATOP, port);
if (port == ocelot->npi) {
max_mtu -= OCELOT_TAG_LEN;
- if (ocelot->inj_prefix == OCELOT_TAG_PREFIX_SHORT)
+ if (ocelot->npi_inj_prefix == OCELOT_TAG_PREFIX_SHORT)
max_mtu -= OCELOT_SHORT_PREFIX_LEN;
- else if (ocelot->inj_prefix == OCELOT_TAG_PREFIX_LONG)
+ else if (ocelot->npi_inj_prefix == OCELOT_TAG_PREFIX_LONG)
max_mtu -= OCELOT_LONG_PREFIX_LEN;
}
ocelot_fields_write(ocelot, cpu, QSYS_SWITCH_PORT_MODE_PORT_ENA, 1);
/* CPU port Injection/Extraction configuration */
ocelot_fields_write(ocelot, cpu, SYS_PORT_MODE_INCL_XTR_HDR,
- ocelot->xtr_prefix);
+ OCELOT_TAG_PREFIX_NONE);
ocelot_fields_write(ocelot, cpu, SYS_PORT_MODE_INCL_INJ_HDR,
- ocelot->inj_prefix);
+ OCELOT_TAG_PREFIX_NONE);
/* Configure the CPU port to be VLAN aware */
ocelot_write_gix(ocelot, ANA_PORT_VLAN_CFG_VLAN_VID(0) |
ANA_PORT_VLAN_CFG, cpu);
}
+static void ocelot_detect_features(struct ocelot *ocelot)
+{
+ int mmgt, eq_ctrl;
+
+ /* For Ocelot, Felix, Seville, Serval etc, SYS:MMGT:MMGT:FREECNT holds
+ * the number of 240-byte free memory words (aka 4-cell chunks) and not
+ * 192 bytes as the documentation incorrectly says.
+ */
+ mmgt = ocelot_read(ocelot, SYS_MMGT);
+ ocelot->packet_buffer_size = 240 * SYS_MMGT_FREECNT(mmgt);
+
+ eq_ctrl = ocelot_read(ocelot, QSYS_EQ_CTRL);
+ ocelot->num_frame_refs = QSYS_MMGT_EQ_CTRL_FP_FREE_CNT(eq_ctrl);
+}
+
int ocelot_init(struct ocelot *ocelot)
{
char queue_name[32];
}
}
- ocelot->lags = devm_kcalloc(ocelot->dev, ocelot->num_phys_ports,
- sizeof(u32), GFP_KERNEL);
- if (!ocelot->lags)
- return -ENOMEM;
-
ocelot->stats = devm_kcalloc(ocelot->dev,
ocelot->num_phys_ports * ocelot->num_stats,
sizeof(u64), GFP_KERNEL);
INIT_LIST_HEAD(&ocelot->multicast);
INIT_LIST_HEAD(&ocelot->pgids);
+ ocelot_detect_features(ocelot);
ocelot_mact_init(ocelot);
ocelot_vlan_init(ocelot);
ocelot_vcap_init(ocelot);
ocelot_write(ocelot, ANA_AGGR_CFG_AC_SMAC_ENA |
ANA_AGGR_CFG_AC_DMAC_ENA |
ANA_AGGR_CFG_AC_IP4_SIPDIP_ENA |
- ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA, ANA_AGGR_CFG);
+ ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA |
+ ANA_AGGR_CFG_AC_IP6_FLOW_LBL_ENA |
+ ANA_AGGR_CFG_AC_IP6_TCPUDP_ENA,
+ ANA_AGGR_CFG);
/* Set MAC age time to default value. The entry is aged after
* 2*AGE_PERIOD
struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev);
struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt;
+ /* Block sending traffic to VF if it's about to be gone */
+ if (!vf)
+ net_device_ctx->data_path_is_vf = vf;
+
memset(init_pkt, 0, sizeof(struct nvsp_message));
init_pkt->hdr.msg_type = NVSP_MSG4_TYPE_SWITCH_DATA_PATH;
if (vf)
vmbus_sendpacket(dev->channel, init_pkt,
sizeof(struct nvsp_message),
- VMBUS_RQST_ID_NO_RESPONSE,
- VM_PKT_DATA_INBAND, 0);
+ (unsigned long)init_pkt,
+ VM_PKT_DATA_INBAND,
+ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ wait_for_completion(&nv_dev->channel_init_wait);
+ net_device_ctx->data_path_is_vf = vf;
}
/* Worker to setup sub channels on initial setup
for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
xdp_rxq_info_unreg(&nvdev->chan_table[i].xdp_rxq);
+ kfree(nvdev->chan_table[i].recv_buf);
vfree(nvdev->chan_table[i].mrc.slots);
}
struct nvsp_message *init_packet;
unsigned int buf_size;
size_t map_words;
- int ret = 0;
+ int i, ret = 0;
/* Get receive buffer area. */
buf_size = device_info->recv_sections * device_info->recv_section_size;
goto cleanup;
}
+ for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
+ struct netvsc_channel *nvchan = &net_device->chan_table[i];
+
+ nvchan->recv_buf = kzalloc(net_device->recv_section_size, GFP_KERNEL);
+ if (nvchan->recv_buf == NULL) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+ }
+
/* Setup receive completion ring.
* Add 1 to the recv_section_cnt because at least one entry in a
* ring buffer has to be empty.
const struct vmpacket_descriptor *desc,
int budget)
{
- const struct nvsp_message *nvsp_packet = hv_pkt_data(desc);
+ const struct nvsp_message *nvsp_packet;
u32 msglen = hv_pkt_datalen(desc);
+ struct nvsp_message *pkt_rqst;
+ u64 cmd_rqst;
+
+ /* First check if this is a VMBUS completion without data payload */
+ if (!msglen) {
+ cmd_rqst = vmbus_request_addr(&incoming_channel->requestor,
+ (u64)desc->trans_id);
+ if (cmd_rqst == VMBUS_RQST_ERROR) {
+ netdev_err(ndev, "Invalid transaction id\n");
+ return;
+ }
+
+ pkt_rqst = (struct nvsp_message *)(uintptr_t)cmd_rqst;
+ switch (pkt_rqst->hdr.msg_type) {
+ case NVSP_MSG4_TYPE_SWITCH_DATA_PATH:
+ complete(&net_device->channel_init_wait);
+ break;
+
+ default:
+ netdev_err(ndev, "Unexpected VMBUS completion!!\n");
+ }
+ return;
+ }
/* Ensure packet is big enough to read header fields */
if (msglen < sizeof(struct nvsp_message_header)) {
return;
}
+ nvsp_packet = hv_pkt_data(desc);
switch (nvsp_packet->hdr.msg_type) {
case NVSP_MSG_TYPE_INIT_COMPLETE:
if (msglen < sizeof(struct nvsp_message_header) +
int ret;
u32 ring_avail = hv_get_avail_to_write_percent(&out_channel->outbound);
+ memset(&nvmsg, 0, sizeof(struct nvsp_message));
nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
if (skb)
rpkt->channel_type = 0; /* 0 is RMC_DATA */
continue;
}
+ /* We're going to copy (sections of) the packet into nvchan->recv_buf;
+ * make sure that nvchan->recv_buf is large enough to hold the packet.
+ */
+ if (unlikely(buflen > net_device->recv_section_size)) {
+ nvchan->rsc.cnt = 0;
+ status = NVSP_STAT_FAIL;
+ netif_err(net_device_ctx, rx_err, ndev,
+ "Packet too big: buflen=%u recv_section_size=%u\n",
+ buflen, net_device->recv_section_size);
+
+ continue;
+ }
+
data = recv_buf + offset;
nvchan->rsc.is_last = (i == count - 1);
ret = rndis_filter_receive(ndev, net_device,
nvchan, data, buflen);
- if (unlikely(ret != NVSP_STAT_SUCCESS))
+ if (unlikely(ret != NVSP_STAT_SUCCESS)) {
+ /* Drop incomplete packet */
+ nvchan->rsc.cnt = 0;
status = NVSP_STAT_FAIL;
+ }
}
enq_receive_complete(ndev, net_device, q_idx,
sizeof(union nvsp_6_message_uber);
/* Boundary check for all versions */
- if (offset > msglen - count * sizeof(u32)) {
+ if (msglen < count * sizeof(u32) || offset > msglen - count * sizeof(u32)) {
netdev_err(ndev, "Received send-table offset too big:%u\n",
offset);
return;
}
static void dump_rndis_message(struct net_device *netdev,
- const struct rndis_message *rndis_msg)
+ const struct rndis_message *rndis_msg,
+ const void *data)
{
switch (rndis_msg->ndis_msg_type) {
case RNDIS_MSG_PACKET:
- netdev_dbg(netdev, "RNDIS_MSG_PACKET (len %u, "
- "data offset %u data len %u, # oob %u, "
- "oob offset %u, oob len %u, pkt offset %u, "
- "pkt len %u\n",
- rndis_msg->msg_len,
- rndis_msg->msg.pkt.data_offset,
- rndis_msg->msg.pkt.data_len,
- rndis_msg->msg.pkt.num_oob_data_elements,
- rndis_msg->msg.pkt.oob_data_offset,
- rndis_msg->msg.pkt.oob_data_len,
- rndis_msg->msg.pkt.per_pkt_info_offset,
- rndis_msg->msg.pkt.per_pkt_info_len);
+ if (rndis_msg->msg_len - RNDIS_HEADER_SIZE >= sizeof(struct rndis_packet)) {
+ const struct rndis_packet *pkt = data + RNDIS_HEADER_SIZE;
+ netdev_dbg(netdev, "RNDIS_MSG_PACKET (len %u, "
+ "data offset %u data len %u, # oob %u, "
+ "oob offset %u, oob len %u, pkt offset %u, "
+ "pkt len %u\n",
+ rndis_msg->msg_len,
+ pkt->data_offset,
+ pkt->data_len,
+ pkt->num_oob_data_elements,
+ pkt->oob_data_offset,
+ pkt->oob_data_len,
+ pkt->per_pkt_info_offset,
+ pkt->per_pkt_info_len);
+ }
break;
case RNDIS_MSG_INIT_C:
- netdev_dbg(netdev, "RNDIS_MSG_INIT_C "
- "(len %u, id 0x%x, status 0x%x, major %d, minor %d, "
- "device flags %d, max xfer size 0x%x, max pkts %u, "
- "pkt aligned %u)\n",
- rndis_msg->msg_len,
- rndis_msg->msg.init_complete.req_id,
- rndis_msg->msg.init_complete.status,
- rndis_msg->msg.init_complete.major_ver,
- rndis_msg->msg.init_complete.minor_ver,
- rndis_msg->msg.init_complete.dev_flags,
- rndis_msg->msg.init_complete.max_xfer_size,
- rndis_msg->msg.init_complete.
- max_pkt_per_msg,
- rndis_msg->msg.init_complete.
- pkt_alignment_factor);
+ if (rndis_msg->msg_len - RNDIS_HEADER_SIZE >=
+ sizeof(struct rndis_initialize_complete)) {
+ const struct rndis_initialize_complete *init_complete =
+ data + RNDIS_HEADER_SIZE;
+ netdev_dbg(netdev, "RNDIS_MSG_INIT_C "
+ "(len %u, id 0x%x, status 0x%x, major %d, minor %d, "
+ "device flags %d, max xfer size 0x%x, max pkts %u, "
+ "pkt aligned %u)\n",
+ rndis_msg->msg_len,
+ init_complete->req_id,
+ init_complete->status,
+ init_complete->major_ver,
+ init_complete->minor_ver,
+ init_complete->dev_flags,
+ init_complete->max_xfer_size,
+ init_complete->max_pkt_per_msg,
+ init_complete->pkt_alignment_factor);
+ }
break;
case RNDIS_MSG_QUERY_C:
- netdev_dbg(netdev, "RNDIS_MSG_QUERY_C "
- "(len %u, id 0x%x, status 0x%x, buf len %u, "
- "buf offset %u)\n",
- rndis_msg->msg_len,
- rndis_msg->msg.query_complete.req_id,
- rndis_msg->msg.query_complete.status,
- rndis_msg->msg.query_complete.
- info_buflen,
- rndis_msg->msg.query_complete.
- info_buf_offset);
+ if (rndis_msg->msg_len - RNDIS_HEADER_SIZE >=
+ sizeof(struct rndis_query_complete)) {
+ const struct rndis_query_complete *query_complete =
+ data + RNDIS_HEADER_SIZE;
+ netdev_dbg(netdev, "RNDIS_MSG_QUERY_C "
+ "(len %u, id 0x%x, status 0x%x, buf len %u, "
+ "buf offset %u)\n",
+ rndis_msg->msg_len,
+ query_complete->req_id,
+ query_complete->status,
+ query_complete->info_buflen,
+ query_complete->info_buf_offset);
+ }
break;
case RNDIS_MSG_SET_C:
- netdev_dbg(netdev,
- "RNDIS_MSG_SET_C (len %u, id 0x%x, status 0x%x)\n",
- rndis_msg->msg_len,
- rndis_msg->msg.set_complete.req_id,
- rndis_msg->msg.set_complete.status);
+ if (rndis_msg->msg_len - RNDIS_HEADER_SIZE + sizeof(struct rndis_set_complete)) {
+ const struct rndis_set_complete *set_complete =
+ data + RNDIS_HEADER_SIZE;
+ netdev_dbg(netdev,
+ "RNDIS_MSG_SET_C (len %u, id 0x%x, status 0x%x)\n",
+ rndis_msg->msg_len,
+ set_complete->req_id,
+ set_complete->status);
+ }
break;
case RNDIS_MSG_INDICATE:
- netdev_dbg(netdev, "RNDIS_MSG_INDICATE "
- "(len %u, status 0x%x, buf len %u, buf offset %u)\n",
- rndis_msg->msg_len,
- rndis_msg->msg.indicate_status.status,
- rndis_msg->msg.indicate_status.status_buflen,
- rndis_msg->msg.indicate_status.status_buf_offset);
+ if (rndis_msg->msg_len - RNDIS_HEADER_SIZE >=
+ sizeof(struct rndis_indicate_status)) {
+ const struct rndis_indicate_status *indicate_status =
+ data + RNDIS_HEADER_SIZE;
+ netdev_dbg(netdev, "RNDIS_MSG_INDICATE "
+ "(len %u, status 0x%x, buf len %u, buf offset %u)\n",
+ rndis_msg->msg_len,
+ indicate_status->status,
+ indicate_status->status_buflen,
+ indicate_status->status_buf_offset);
+ }
break;
default:
{
u32 link_status;
struct rndis_query_complete *query_complete;
+ u32 msg_len = request->response_msg.msg_len;
+
+ /* Ensure the packet is big enough to access its fields */
+ if (msg_len - RNDIS_HEADER_SIZE < sizeof(struct rndis_query_complete))
+ return;
query_complete = &request->response_msg.msg.query_complete;
if (query_complete->status == RNDIS_STATUS_SUCCESS &&
- query_complete->info_buflen == sizeof(u32)) {
+ query_complete->info_buflen >= sizeof(u32) &&
+ query_complete->info_buf_offset >= sizeof(*query_complete) &&
+ msg_len - RNDIS_HEADER_SIZE >= query_complete->info_buf_offset &&
+ msg_len - RNDIS_HEADER_SIZE - query_complete->info_buf_offset
+ >= query_complete->info_buflen) {
memcpy(&link_status, (void *)((unsigned long)query_complete +
query_complete->info_buf_offset), sizeof(u32));
rdev->link_state = link_status != 0;
static void rndis_filter_receive_response(struct net_device *ndev,
struct netvsc_device *nvdev,
- const struct rndis_message *resp)
+ struct rndis_message *resp,
+ void *data)
{
+ u32 *req_id = &resp->msg.init_complete.req_id;
struct rndis_device *dev = nvdev->extension;
struct rndis_request *request = NULL;
bool found = false;
return;
}
+ /* Copy the request ID into nvchan->recv_buf */
+ *req_id = *(u32 *)(data + RNDIS_HEADER_SIZE);
+
spin_lock_irqsave(&dev->request_lock, flags);
list_for_each_entry(request, &dev->req_list, list_ent) {
/*
* All request/response message contains RequestId as the 1st
* field
*/
- if (request->request_msg.msg.init_req.req_id
- == resp->msg.init_complete.req_id) {
+ if (request->request_msg.msg.init_req.req_id == *req_id) {
found = true;
break;
}
if (found) {
if (resp->msg_len <=
sizeof(struct rndis_message) + RNDIS_EXT_LEN) {
- memcpy(&request->response_msg, resp,
- resp->msg_len);
+ memcpy(&request->response_msg, resp, RNDIS_HEADER_SIZE + sizeof(*req_id));
+ memcpy((void *)&request->response_msg + RNDIS_HEADER_SIZE + sizeof(*req_id),
+ data + RNDIS_HEADER_SIZE + sizeof(*req_id),
+ resp->msg_len - RNDIS_HEADER_SIZE - sizeof(*req_id));
if (request->request_msg.ndis_msg_type ==
RNDIS_MSG_QUERY && request->request_msg.msg.
query_req.oid == RNDIS_OID_GEN_MEDIA_CONNECT_STATUS)
netdev_err(ndev,
"no rndis request found for this response "
"(id 0x%x res type 0x%x)\n",
- resp->msg.init_complete.req_id,
+ *req_id,
resp->ndis_msg_type);
}
}
*/
static inline void *rndis_get_ppi(struct net_device *ndev,
struct rndis_packet *rpkt,
- u32 rpkt_len, u32 type, u8 internal)
+ u32 rpkt_len, u32 type, u8 internal,
+ u32 ppi_size, void *data)
{
struct rndis_per_packet_info *ppi;
int len;
return NULL;
}
- if (rpkt->per_pkt_info_len > rpkt_len - rpkt->per_pkt_info_offset) {
+ if (rpkt->per_pkt_info_len < sizeof(*ppi) ||
+ rpkt->per_pkt_info_len > rpkt_len - rpkt->per_pkt_info_offset) {
netdev_err(ndev, "Invalid per_pkt_info_len: %u\n",
rpkt->per_pkt_info_len);
return NULL;
ppi = (struct rndis_per_packet_info *)((ulong)rpkt +
rpkt->per_pkt_info_offset);
+ /* Copy the PPIs into nvchan->recv_buf */
+ memcpy(ppi, data + RNDIS_HEADER_SIZE + rpkt->per_pkt_info_offset, rpkt->per_pkt_info_len);
len = rpkt->per_pkt_info_len;
while (len > 0) {
continue;
}
- if (ppi->type == type && ppi->internal == internal)
+ if (ppi->type == type && ppi->internal == internal) {
+ /* ppi->size should be big enough to hold the returned object. */
+ if (ppi->size - ppi->ppi_offset < ppi_size) {
+ netdev_err(ndev, "Invalid ppi: size %u ppi_offset %u\n",
+ ppi->size, ppi->ppi_offset);
+ continue;
+ }
return (void *)((ulong)ppi + ppi->ppi_offset);
+ }
len -= ppi->size;
ppi = (struct rndis_per_packet_info *)((ulong)ppi + ppi->size);
}
if (cnt) {
nvchan->rsc.pktlen += len;
} else {
- nvchan->rsc.vlan = vlan;
- nvchan->rsc.csum_info = csum_info;
+ /* The data/values pointed by vlan, csum_info and hash_info are shared
+ * across the different 'fragments' of the RSC packet; store them into
+ * the packet itself.
+ */
+ if (vlan != NULL) {
+ memcpy(&nvchan->rsc.vlan, vlan, sizeof(*vlan));
+ nvchan->rsc.ppi_flags |= NVSC_RSC_VLAN;
+ } else {
+ nvchan->rsc.ppi_flags &= ~NVSC_RSC_VLAN;
+ }
+ if (csum_info != NULL) {
+ memcpy(&nvchan->rsc.csum_info, csum_info, sizeof(*csum_info));
+ nvchan->rsc.ppi_flags |= NVSC_RSC_CSUM_INFO;
+ } else {
+ nvchan->rsc.ppi_flags &= ~NVSC_RSC_CSUM_INFO;
+ }
nvchan->rsc.pktlen = len;
- nvchan->rsc.hash_info = hash_info;
+ if (hash_info != NULL) {
+ nvchan->rsc.hash_info = *hash_info;
+ nvchan->rsc.ppi_flags |= NVSC_RSC_HASH_INFO;
+ } else {
+ nvchan->rsc.ppi_flags &= ~NVSC_RSC_HASH_INFO;
+ }
}
nvchan->rsc.data[cnt] = data;
struct netvsc_device *nvdev,
struct netvsc_channel *nvchan,
struct rndis_message *msg,
- u32 data_buflen)
+ void *data, u32 data_buflen)
{
struct rndis_packet *rndis_pkt = &msg->msg.pkt;
const struct ndis_tcp_ip_checksum_info *csum_info;
const struct rndis_pktinfo_id *pktinfo_id;
const u32 *hash_info;
u32 data_offset, rpkt_len;
- void *data;
bool rsc_more = false;
int ret;
return NVSP_STAT_FAIL;
}
+ /* Copy the RNDIS packet into nvchan->recv_buf */
+ memcpy(rndis_pkt, data + RNDIS_HEADER_SIZE, sizeof(*rndis_pkt));
+
/* Validate rndis_pkt offset */
if (rndis_pkt->data_offset >= data_buflen - RNDIS_HEADER_SIZE) {
netdev_err(ndev, "invalid rndis packet offset: %u\n",
return NVSP_STAT_FAIL;
}
- vlan = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, IEEE_8021Q_INFO, 0);
-
- csum_info = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, TCPIP_CHKSUM_PKTINFO, 0);
+ vlan = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, IEEE_8021Q_INFO, 0, sizeof(*vlan),
+ data);
- hash_info = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, NBL_HASH_VALUE, 0);
+ csum_info = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, TCPIP_CHKSUM_PKTINFO, 0,
+ sizeof(*csum_info), data);
- pktinfo_id = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, RNDIS_PKTINFO_ID, 1);
+ hash_info = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, NBL_HASH_VALUE, 0,
+ sizeof(*hash_info), data);
- data = (void *)msg + data_offset;
+ pktinfo_id = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, RNDIS_PKTINFO_ID, 1,
+ sizeof(*pktinfo_id), data);
/* Identify RSC frags, drop erroneous packets */
if (pktinfo_id && (pktinfo_id->flag & RNDIS_PKTINFO_SUBALLOC)) {
* the data packet to the stack, without the rndis trailer padding
*/
rsc_add_data(nvchan, vlan, csum_info, hash_info,
- data, rndis_pkt->data_len);
+ data + data_offset, rndis_pkt->data_len);
if (rsc_more)
return NVSP_STAT_SUCCESS;
return ret;
drop:
- /* Drop incomplete packet */
- nvchan->rsc.cnt = 0;
return NVSP_STAT_FAIL;
}
void *data, u32 buflen)
{
struct net_device_context *net_device_ctx = netdev_priv(ndev);
- struct rndis_message *rndis_msg = data;
+ struct rndis_message *rndis_msg = nvchan->recv_buf;
- if (netif_msg_rx_status(net_device_ctx))
- dump_rndis_message(ndev, rndis_msg);
+ if (buflen < RNDIS_HEADER_SIZE) {
+ netdev_err(ndev, "Invalid rndis_msg (buflen: %u)\n", buflen);
+ return NVSP_STAT_FAIL;
+ }
+
+ /* Copy the RNDIS msg header into nvchan->recv_buf */
+ memcpy(rndis_msg, data, RNDIS_HEADER_SIZE);
/* Validate incoming rndis_message packet */
- if (buflen < RNDIS_HEADER_SIZE || rndis_msg->msg_len < RNDIS_HEADER_SIZE ||
+ if (rndis_msg->msg_len < RNDIS_HEADER_SIZE ||
buflen < rndis_msg->msg_len) {
netdev_err(ndev, "Invalid rndis_msg (buflen: %u, msg_len: %u)\n",
buflen, rndis_msg->msg_len);
return NVSP_STAT_FAIL;
}
+ if (netif_msg_rx_status(net_device_ctx))
+ dump_rndis_message(ndev, rndis_msg, data);
+
switch (rndis_msg->ndis_msg_type) {
case RNDIS_MSG_PACKET:
return rndis_filter_receive_data(ndev, net_dev, nvchan,
- rndis_msg, buflen);
+ rndis_msg, data, buflen);
case RNDIS_MSG_INIT_C:
case RNDIS_MSG_QUERY_C:
case RNDIS_MSG_SET_C:
/* completion msgs */
- rndis_filter_receive_response(ndev, net_dev, rndis_msg);
+ rndis_filter_receive_response(ndev, net_dev, rndis_msg, data);
break;
case RNDIS_MSG_INDICATE:
/* notification msgs */
- netvsc_linkstatus_callback(ndev, rndis_msg);
+ netvsc_linkstatus_callback(ndev, rndis_msg, data);
break;
default:
netdev_err(ndev,
u32 inresult_size = *result_size;
struct rndis_query_request *query;
struct rndis_query_complete *query_complete;
+ u32 msg_len;
int ret = 0;
if (!result)
/* Copy the response back */
query_complete = &request->response_msg.msg.query_complete;
+ msg_len = request->response_msg.msg_len;
+
+ /* Ensure the packet is big enough to access its fields */
+ if (msg_len - RNDIS_HEADER_SIZE < sizeof(struct rndis_query_complete)) {
+ ret = -1;
+ goto cleanup;
+ }
- if (query_complete->info_buflen > inresult_size) {
+ if (query_complete->info_buflen > inresult_size ||
+ query_complete->info_buf_offset < sizeof(*query_complete) ||
+ msg_len - RNDIS_HEADER_SIZE < query_complete->info_buf_offset ||
+ msg_len - RNDIS_HEADER_SIZE - query_complete->info_buf_offset
+ < query_complete->info_buflen) {
ret = -1;
goto cleanup;
}
/* Delay period for interrupt moderation (in 32KHz IPA internal timer ticks) */
#define GSI_EVT_RING_INT_MODT (32 * 1) /* 1ms under 32KHz clock */
-#define GSI_CMD_TIMEOUT 5 /* seconds */
+#define GSI_CMD_TIMEOUT 50 /* milliseconds */
-#define GSI_CHANNEL_STOP_RX_RETRIES 10
+#define GSI_CHANNEL_STOP_RETRIES 10
#define GSI_CHANNEL_MODEM_HALT_RETRIES 10
#define GSI_MHI_EVENT_ID_START 10 /* 1st reserved event id */
/* Nothing to do */
}
-static void gsi_irq_ieob_enable(struct gsi *gsi, u32 evt_ring_id)
+/* Event ring commands are performed one at a time. Their completion
+ * is signaled by the event ring control GSI interrupt type, which is
+ * only enabled when we issue an event ring command. Only the event
+ * ring being operated on has this interrupt enabled.
+ */
+static void gsi_irq_ev_ctrl_enable(struct gsi *gsi, u32 evt_ring_id)
+{
+ u32 val = BIT(evt_ring_id);
+
+ /* There's a small chance that a previous command completed
+ * after the interrupt was disabled, so make sure we have no
+ * pending interrupts before we enable them.
+ */
+ iowrite32(~0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_CLR_OFFSET);
+
+ iowrite32(val, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET);
+ gsi_irq_type_enable(gsi, GSI_EV_CTRL);
+}
+
+/* Disable event ring control interrupts */
+static void gsi_irq_ev_ctrl_disable(struct gsi *gsi)
+{
+ gsi_irq_type_disable(gsi, GSI_EV_CTRL);
+ iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET);
+}
+
+/* Channel commands are performed one at a time. Their completion is
+ * signaled by the channel control GSI interrupt type, which is only
+ * enabled when we issue a channel command. Only the channel being
+ * operated on has this interrupt enabled.
+ */
+static void gsi_irq_ch_ctrl_enable(struct gsi *gsi, u32 channel_id)
+{
+ u32 val = BIT(channel_id);
+
+ /* There's a small chance that a previous command completed
+ * after the interrupt was disabled, so make sure we have no
+ * pending interrupts before we enable them.
+ */
+ iowrite32(~0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_CLR_OFFSET);
+
+ iowrite32(val, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET);
+ gsi_irq_type_enable(gsi, GSI_CH_CTRL);
+}
+
+/* Disable channel control interrupts */
+static void gsi_irq_ch_ctrl_disable(struct gsi *gsi)
+{
+ gsi_irq_type_disable(gsi, GSI_CH_CTRL);
+ iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET);
+}
+
+static void gsi_irq_ieob_enable_one(struct gsi *gsi, u32 evt_ring_id)
{
bool enable_ieob = !gsi->ieob_enabled_bitmap;
u32 val;
gsi_irq_type_enable(gsi, GSI_IEOB);
}
-static void gsi_irq_ieob_disable(struct gsi *gsi, u32 evt_ring_id)
+static void gsi_irq_ieob_disable(struct gsi *gsi, u32 event_mask)
{
u32 val;
- gsi->ieob_enabled_bitmap &= ~BIT(evt_ring_id);
+ gsi->ieob_enabled_bitmap &= ~event_mask;
/* Disable the interrupt type if this was the last enabled channel */
if (!gsi->ieob_enabled_bitmap)
iowrite32(val, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET);
}
+static void gsi_irq_ieob_disable_one(struct gsi *gsi, u32 evt_ring_id)
+{
+ gsi_irq_ieob_disable(gsi, BIT(evt_ring_id));
+}
+
/* Enable all GSI_interrupt types */
static void gsi_irq_enable(struct gsi *gsi)
{
static bool
gsi_command(struct gsi *gsi, u32 reg, u32 val, struct completion *completion)
{
+ unsigned long timeout = msecs_to_jiffies(GSI_CMD_TIMEOUT);
+
reinit_completion(completion);
iowrite32(val, gsi->virt + reg);
- return !!wait_for_completion_timeout(completion, GSI_CMD_TIMEOUT * HZ);
+ return !!wait_for_completion_timeout(completion, timeout);
}
/* Return the hardware's notion of the current state of an event ring */
}
/* Issue an event ring command and wait for it to complete */
-static void evt_ring_command(struct gsi *gsi, u32 evt_ring_id,
- enum gsi_evt_cmd_opcode opcode)
+static void gsi_evt_ring_command(struct gsi *gsi, u32 evt_ring_id,
+ enum gsi_evt_cmd_opcode opcode)
{
struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id];
struct completion *completion = &evt_ring->completion;
struct device *dev = gsi->dev;
- bool success;
+ bool timeout;
u32 val;
- /* We only perform one event ring command at a time, and event
- * control interrupts should only occur when such a command
- * is issued here. Only permit *this* event ring to trigger
- * an interrupt, and only enable the event control IRQ type
- * when we expect it to occur.
- *
- * There's a small chance that a previous command completed
- * after the interrupt was disabled, so make sure we have no
- * pending interrupts before we enable them.
- */
- iowrite32(~0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_CLR_OFFSET);
-
- val = BIT(evt_ring_id);
- iowrite32(val, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET);
- gsi_irq_type_enable(gsi, GSI_EV_CTRL);
+ /* Enable the completion interrupt for the command */
+ gsi_irq_ev_ctrl_enable(gsi, evt_ring_id);
val = u32_encode_bits(evt_ring_id, EV_CHID_FMASK);
val |= u32_encode_bits(opcode, EV_OPCODE_FMASK);
- success = gsi_command(gsi, GSI_EV_CH_CMD_OFFSET, val, completion);
+ timeout = !gsi_command(gsi, GSI_EV_CH_CMD_OFFSET, val, completion);
- /* Disable the interrupt again */
- gsi_irq_type_disable(gsi, GSI_EV_CTRL);
- iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET);
+ gsi_irq_ev_ctrl_disable(gsi);
- if (success)
+ if (!timeout)
return;
dev_err(dev, "GSI command %u for event ring %u timed out, state %u\n",
- opcode, evt_ring_id, evt_ring->state);
+ opcode, evt_ring_id, gsi_evt_ring_state(gsi, evt_ring_id));
}
/* Allocate an event ring in NOT_ALLOCATED state */
static int gsi_evt_ring_alloc_command(struct gsi *gsi, u32 evt_ring_id)
{
- struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id];
+ enum gsi_evt_ring_state state;
/* Get initial event ring state */
- evt_ring->state = gsi_evt_ring_state(gsi, evt_ring_id);
- if (evt_ring->state != GSI_EVT_RING_STATE_NOT_ALLOCATED) {
+ state = gsi_evt_ring_state(gsi, evt_ring_id);
+ if (state != GSI_EVT_RING_STATE_NOT_ALLOCATED) {
dev_err(gsi->dev, "event ring %u bad state %u before alloc\n",
- evt_ring_id, evt_ring->state);
+ evt_ring_id, state);
return -EINVAL;
}
- evt_ring_command(gsi, evt_ring_id, GSI_EVT_ALLOCATE);
+ gsi_evt_ring_command(gsi, evt_ring_id, GSI_EVT_ALLOCATE);
/* If successful the event ring state will have changed */
- if (evt_ring->state == GSI_EVT_RING_STATE_ALLOCATED)
+ state = gsi_evt_ring_state(gsi, evt_ring_id);
+ if (state == GSI_EVT_RING_STATE_ALLOCATED)
return 0;
dev_err(gsi->dev, "event ring %u bad state %u after alloc\n",
- evt_ring_id, evt_ring->state);
+ evt_ring_id, state);
return -EIO;
}
/* Reset a GSI event ring in ALLOCATED or ERROR state. */
static void gsi_evt_ring_reset_command(struct gsi *gsi, u32 evt_ring_id)
{
- struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id];
- enum gsi_evt_ring_state state = evt_ring->state;
+ enum gsi_evt_ring_state state;
+ state = gsi_evt_ring_state(gsi, evt_ring_id);
if (state != GSI_EVT_RING_STATE_ALLOCATED &&
state != GSI_EVT_RING_STATE_ERROR) {
dev_err(gsi->dev, "event ring %u bad state %u before reset\n",
- evt_ring_id, evt_ring->state);
+ evt_ring_id, state);
return;
}
- evt_ring_command(gsi, evt_ring_id, GSI_EVT_RESET);
+ gsi_evt_ring_command(gsi, evt_ring_id, GSI_EVT_RESET);
/* If successful the event ring state will have changed */
- if (evt_ring->state == GSI_EVT_RING_STATE_ALLOCATED)
+ state = gsi_evt_ring_state(gsi, evt_ring_id);
+ if (state == GSI_EVT_RING_STATE_ALLOCATED)
return;
dev_err(gsi->dev, "event ring %u bad state %u after reset\n",
- evt_ring_id, evt_ring->state);
+ evt_ring_id, state);
}
/* Issue a hardware de-allocation request for an allocated event ring */
static void gsi_evt_ring_de_alloc_command(struct gsi *gsi, u32 evt_ring_id)
{
- struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id];
+ enum gsi_evt_ring_state state;
- if (evt_ring->state != GSI_EVT_RING_STATE_ALLOCATED) {
+ state = gsi_evt_ring_state(gsi, evt_ring_id);
+ if (state != GSI_EVT_RING_STATE_ALLOCATED) {
dev_err(gsi->dev, "event ring %u state %u before dealloc\n",
- evt_ring_id, evt_ring->state);
+ evt_ring_id, state);
return;
}
- evt_ring_command(gsi, evt_ring_id, GSI_EVT_DE_ALLOC);
+ gsi_evt_ring_command(gsi, evt_ring_id, GSI_EVT_DE_ALLOC);
/* If successful the event ring state will have changed */
- if (evt_ring->state == GSI_EVT_RING_STATE_NOT_ALLOCATED)
+ state = gsi_evt_ring_state(gsi, evt_ring_id);
+ if (state == GSI_EVT_RING_STATE_NOT_ALLOCATED)
return;
dev_err(gsi->dev, "event ring %u bad state %u after dealloc\n",
- evt_ring_id, evt_ring->state);
+ evt_ring_id, state);
}
/* Fetch the current state of a channel from hardware */
u32 channel_id = gsi_channel_id(channel);
struct gsi *gsi = channel->gsi;
struct device *dev = gsi->dev;
- bool success;
+ bool timeout;
u32 val;
- /* We only perform one channel command at a time, and channel
- * control interrupts should only occur when such a command is
- * issued here. So we only permit *this* channel to trigger
- * an interrupt and only enable the channel control IRQ type
- * when we expect it to occur.
- *
- * There's a small chance that a previous command completed
- * after the interrupt was disabled, so make sure we have no
- * pending interrupts before we enable them.
- */
- iowrite32(~0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_CLR_OFFSET);
-
- val = BIT(channel_id);
- iowrite32(val, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET);
- gsi_irq_type_enable(gsi, GSI_CH_CTRL);
+ /* Enable the completion interrupt for the command */
+ gsi_irq_ch_ctrl_enable(gsi, channel_id);
val = u32_encode_bits(channel_id, CH_CHID_FMASK);
val |= u32_encode_bits(opcode, CH_OPCODE_FMASK);
- success = gsi_command(gsi, GSI_CH_CMD_OFFSET, val, completion);
+ timeout = !gsi_command(gsi, GSI_CH_CMD_OFFSET, val, completion);
- /* Disable the interrupt again */
- gsi_irq_type_disable(gsi, GSI_CH_CTRL);
- iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET);
+ gsi_irq_ch_ctrl_disable(gsi);
- if (success)
+ if (!timeout)
return;
dev_err(dev, "GSI command %u for channel %u timed out, state %u\n",
struct device *dev = channel->gsi->dev;
enum gsi_channel_state state;
- msleep(1); /* A short delay is required before a RESET command */
+ /* A short delay is required before a RESET command */
+ usleep_range(USEC_PER_MSEC, 2 * USEC_PER_MSEC);
state = gsi_channel_state(channel);
if (state != GSI_CHANNEL_STATE_STOPPED &&
gsi_evt_ring_doorbell(gsi, evt_ring_id, 0);
}
-/* Return the last (most recent) transaction completed on a channel. */
+/* Find the transaction whose completion indicates a channel is quiesced */
static struct gsi_trans *gsi_channel_trans_last(struct gsi_channel *channel)
{
struct gsi_trans_info *trans_info = &channel->trans_info;
+ const struct list_head *list;
struct gsi_trans *trans;
spin_lock_bh(&trans_info->spinlock);
- if (!list_empty(&trans_info->complete))
- trans = list_last_entry(&trans_info->complete,
- struct gsi_trans, links);
- else if (!list_empty(&trans_info->polled))
- trans = list_last_entry(&trans_info->polled,
- struct gsi_trans, links);
- else
- trans = NULL;
+ /* There is a small chance a TX transaction got allocated just
+ * before we disabled transmits, so check for that.
+ */
+ if (channel->toward_ipa) {
+ list = &trans_info->alloc;
+ if (!list_empty(list))
+ goto done;
+ list = &trans_info->pending;
+ if (!list_empty(list))
+ goto done;
+ }
+
+ /* Otherwise (TX or RX) we want to wait for anything that
+ * has completed, or has been polled but not released yet.
+ */
+ list = &trans_info->complete;
+ if (!list_empty(list))
+ goto done;
+ list = &trans_info->polled;
+ if (list_empty(list))
+ list = NULL;
+done:
+ trans = list ? list_last_entry(list, struct gsi_trans, links) : NULL;
/* Caller will wait for this, so take a reference */
if (trans)
}
}
-/* Stop channel activity. Transactions may not be allocated until thawed. */
-static void gsi_channel_freeze(struct gsi_channel *channel)
-{
- gsi_channel_trans_quiesce(channel);
-
- napi_disable(&channel->napi);
-
- gsi_irq_ieob_disable(channel->gsi, channel->evt_ring_id);
-}
-
-/* Allow transactions to be used on the channel again. */
-static void gsi_channel_thaw(struct gsi_channel *channel)
-{
- gsi_irq_ieob_enable(channel->gsi, channel->evt_ring_id);
-
- napi_enable(&channel->napi);
-}
-
/* Program a channel for use */
static void gsi_channel_program(struct gsi_channel *channel, bool doorbell)
{
/* Nothing to do */
}
-/* Start an allocated GSI channel */
-int gsi_channel_start(struct gsi *gsi, u32 channel_id)
+static int __gsi_channel_start(struct gsi_channel *channel, bool start)
{
- struct gsi_channel *channel = &gsi->channel[channel_id];
+ struct gsi *gsi = channel->gsi;
int ret;
+ if (!start)
+ return 0;
+
mutex_lock(&gsi->mutex);
ret = gsi_channel_start_command(channel);
mutex_unlock(&gsi->mutex);
- gsi_channel_thaw(channel);
-
return ret;
}
-/* Stop a started channel */
-int gsi_channel_stop(struct gsi *gsi, u32 channel_id)
+/* Start an allocated GSI channel */
+int gsi_channel_start(struct gsi *gsi, u32 channel_id)
{
struct gsi_channel *channel = &gsi->channel[channel_id];
- u32 retries;
int ret;
- gsi_channel_freeze(channel);
+ /* Enable NAPI and the completion interrupt */
+ napi_enable(&channel->napi);
+ gsi_irq_ieob_enable_one(gsi, channel->evt_ring_id);
- /* RX channels might require a little time to enter STOPPED state */
- retries = channel->toward_ipa ? 0 : GSI_CHANNEL_STOP_RX_RETRIES;
+ ret = __gsi_channel_start(channel, true);
+ if (ret) {
+ gsi_irq_ieob_disable_one(gsi, channel->evt_ring_id);
+ napi_disable(&channel->napi);
+ }
- mutex_lock(&gsi->mutex);
+ return ret;
+}
+
+static int gsi_channel_stop_retry(struct gsi_channel *channel)
+{
+ u32 retries = GSI_CHANNEL_STOP_RETRIES;
+ int ret;
do {
ret = gsi_channel_stop_command(channel);
if (ret != -EAGAIN)
break;
- msleep(1);
+ usleep_range(3 * USEC_PER_MSEC, 5 * USEC_PER_MSEC);
} while (retries--);
+ return ret;
+}
+
+static int __gsi_channel_stop(struct gsi_channel *channel, bool stop)
+{
+ struct gsi *gsi = channel->gsi;
+ int ret;
+
+ /* Wait for any underway transactions to complete before stopping. */
+ gsi_channel_trans_quiesce(channel);
+
+ if (!stop)
+ return 0;
+
+ mutex_lock(&gsi->mutex);
+
+ ret = gsi_channel_stop_retry(channel);
+
mutex_unlock(&gsi->mutex);
- /* Thaw the channel if we need to retry (or on error) */
+ return ret;
+}
+
+/* Stop a started channel */
+int gsi_channel_stop(struct gsi *gsi, u32 channel_id)
+{
+ struct gsi_channel *channel = &gsi->channel[channel_id];
+ int ret;
+
+ ret = __gsi_channel_stop(channel, true);
if (ret)
- gsi_channel_thaw(channel);
+ return ret;
- return ret;
+ /* Disable the completion interrupt and NAPI if successful */
+ gsi_irq_ieob_disable_one(gsi, channel->evt_ring_id);
+ napi_disable(&channel->napi);
+
+ return 0;
}
/* Reset and reconfigure a channel, (possibly) enabling the doorbell engine */
int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop)
{
struct gsi_channel *channel = &gsi->channel[channel_id];
+ int ret;
- if (stop)
- return gsi_channel_stop(gsi, channel_id);
+ ret = __gsi_channel_stop(channel, stop);
+ if (ret)
+ return ret;
- gsi_channel_freeze(channel);
+ /* Ensure NAPI polling has finished. */
+ napi_synchronize(&channel->napi);
return 0;
}
{
struct gsi_channel *channel = &gsi->channel[channel_id];
- if (start)
- return gsi_channel_start(gsi, channel_id);
-
- gsi_channel_thaw(channel);
-
- return 0;
+ return __gsi_channel_start(channel, start);
}
/**
event_mask ^= BIT(evt_ring_id);
evt_ring = &gsi->evt_ring[evt_ring_id];
- evt_ring->state = gsi_evt_ring_state(gsi, evt_ring_id);
complete(&evt_ring->completion);
}
u32 event_mask;
event_mask = ioread32(gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_OFFSET);
+ gsi_irq_ieob_disable(gsi, event_mask);
iowrite32(event_mask, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_CLR_OFFSET);
while (event_mask) {
event_mask ^= BIT(evt_ring_id);
- gsi_irq_ieob_disable(gsi, evt_ring_id);
napi_schedule(&gsi->evt_ring[evt_ring_id].channel->napi);
}
}
}
/* Consult hardware, move any newly completed transactions to completed list */
-static void gsi_channel_update(struct gsi_channel *channel)
+static struct gsi_trans *gsi_channel_update(struct gsi_channel *channel)
{
u32 evt_ring_id = channel->evt_ring_id;
struct gsi *gsi = channel->gsi;
offset = GSI_EV_CH_E_CNTXT_4_OFFSET(evt_ring_id);
index = gsi_ring_index(ring, ioread32(gsi->virt + offset));
if (index == ring->index % ring->count)
- return;
+ return NULL;
/* Get the transaction for the latest completed event. Take a
* reference to keep it from completing before we give the events
gsi_evt_ring_doorbell(channel->gsi, channel->evt_ring_id, index);
gsi_trans_free(trans);
+
+ return gsi_channel_trans_complete(channel);
}
/**
/* Get the first transaction from the completed list */
trans = gsi_channel_trans_complete(channel);
- if (!trans) {
- /* List is empty; see if there's more to do */
- gsi_channel_update(channel);
- trans = gsi_channel_trans_complete(channel);
- }
+ if (!trans) /* List is empty; see if there's more to do */
+ trans = gsi_channel_update(channel);
if (trans)
gsi_trans_move_polled(trans);
static int gsi_channel_poll(struct napi_struct *napi, int budget)
{
struct gsi_channel *channel;
- int count = 0;
+ int count;
channel = container_of(napi, struct gsi_channel, napi);
- while (count < budget) {
+ for (count = 0; count < budget; count++) {
struct gsi_trans *trans;
- count++;
trans = gsi_channel_poll_one(channel);
if (!trans)
break;
gsi_trans_complete(trans);
}
- if (count < budget) {
- napi_complete(&channel->napi);
- gsi_irq_ieob_enable(channel->gsi, channel->evt_ring_id);
- }
+ if (count < budget && napi_complete(napi))
+ gsi_irq_ieob_enable_one(channel->gsi, channel->evt_ring_id);
return count;
}
enum gsi_generic_cmd_opcode opcode)
{
struct completion *completion = &gsi->completion;
- bool success;
+ bool timeout;
u32 val;
/* The error global interrupt type is always enabled (until we
val |= u32_encode_bits(channel_id, GENERIC_CHID_FMASK);
val |= u32_encode_bits(GSI_EE_MODEM, GENERIC_EE_FMASK);
- success = gsi_command(gsi, GSI_GENERIC_CMD_OFFSET, val, completion);
+ timeout = !gsi_command(gsi, GSI_GENERIC_CMD_OFFSET, val, completion);
/* Disable the GP_INT1 IRQ type again */
iowrite32(BIT(ERROR_INT), gsi->virt + GSI_CNTXT_GLOB_IRQ_EN_OFFSET);
- if (success)
+ if (!timeout)
return gsi->result;
dev_err(gsi->dev, "GSI generic command %u to channel %u timed out\n",
if (!channel->gsi)
continue; /* Ignore uninitialized channels */
+ ret = -EINVAL;
dev_err(gsi->dev, "channel %u not supported by hardware\n",
channel_id - 1);
channel_id = gsi->channel_count;
enum qmi_wwan_flags {
QMI_WWAN_FLAG_RAWIP = 1 << 0,
QMI_WWAN_FLAG_MUX = 1 << 1,
+ QMI_WWAN_FLAG_PASS_THROUGH = 1 << 2,
};
enum qmi_wwan_quirks {
net = qmimux_find_dev(dev, hdr->mux_id);
if (!net)
goto skip;
- skbn = netdev_alloc_skb(net, pkt_len);
+ skbn = netdev_alloc_skb(net, pkt_len + LL_MAX_HEADER);
if (!skbn)
return 0;
skbn->dev = net;
goto skip;
}
+ skb_reserve(skbn, LL_MAX_HEADER);
skb_put_data(skbn, skb->data + offset + qmimux_hdr_sz, pkt_len);
if (netif_rx(skbn) != NET_RX_SUCCESS) {
net->stats.rx_errors++;
return 1;
}
+static ssize_t mux_id_show(struct device *d, struct device_attribute *attr, char *buf)
+{
+ struct net_device *dev = to_net_dev(d);
+ struct qmimux_priv *priv;
+
+ priv = netdev_priv(dev);
+
+ return sysfs_emit(buf, "0x%02x\n", priv->mux_id);
+}
+
+static DEVICE_ATTR_RO(mux_id);
+
+static struct attribute *qmi_wwan_sysfs_qmimux_attrs[] = {
+ &dev_attr_mux_id.attr,
+ NULL,
+};
+
+static struct attribute_group qmi_wwan_sysfs_qmimux_attr_group = {
+ .name = "qmap",
+ .attrs = qmi_wwan_sysfs_qmimux_attrs,
+};
+
static int qmimux_register_device(struct net_device *real_dev, u8 mux_id)
{
struct net_device *new_dev;
goto out_free_newdev;
}
+ new_dev->sysfs_groups[0] = &qmi_wwan_sysfs_qmimux_attr_group;
+
err = register_netdevice(new_dev);
if (err < 0)
goto out_free_newdev;
if (enable == (info->flags & QMI_WWAN_FLAG_RAWIP))
return len;
+ /* ip mode cannot be cleared when pass through mode is set */
+ if (!enable && (info->flags & QMI_WWAN_FLAG_PASS_THROUGH)) {
+ netdev_err(dev->net,
+ "Cannot clear ip mode on pass through device\n");
+ return -EINVAL;
+ }
+
if (!rtnl_trylock())
return restart_syscall();
return ret;
}
+static ssize_t pass_through_show(struct device *d,
+ struct device_attribute *attr, char *buf)
+{
+ struct usbnet *dev = netdev_priv(to_net_dev(d));
+ struct qmi_wwan_state *info;
+
+ info = (void *)&dev->data;
+ return sprintf(buf, "%c\n",
+ info->flags & QMI_WWAN_FLAG_PASS_THROUGH ? 'Y' : 'N');
+}
+
+static ssize_t pass_through_store(struct device *d,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct usbnet *dev = netdev_priv(to_net_dev(d));
+ struct qmi_wwan_state *info;
+ bool enable;
+
+ if (strtobool(buf, &enable))
+ return -EINVAL;
+
+ info = (void *)&dev->data;
+
+ /* no change? */
+ if (enable == (info->flags & QMI_WWAN_FLAG_PASS_THROUGH))
+ return len;
+
+ /* pass through mode can be set for raw ip devices only */
+ if (!(info->flags & QMI_WWAN_FLAG_RAWIP)) {
+ netdev_err(dev->net,
+ "Cannot set pass through mode on non ip device\n");
+ return -EINVAL;
+ }
+
+ if (enable)
+ info->flags |= QMI_WWAN_FLAG_PASS_THROUGH;
+ else
+ info->flags &= ~QMI_WWAN_FLAG_PASS_THROUGH;
+
+ return len;
+}
+
static DEVICE_ATTR_RW(raw_ip);
static DEVICE_ATTR_RW(add_mux);
static DEVICE_ATTR_RW(del_mux);
+static DEVICE_ATTR_RW(pass_through);
static struct attribute *qmi_wwan_sysfs_attrs[] = {
&dev_attr_raw_ip.attr,
&dev_attr_add_mux.attr,
&dev_attr_del_mux.attr,
+ &dev_attr_pass_through.attr,
NULL,
};
if (info->flags & QMI_WWAN_FLAG_MUX)
return qmimux_rx_fixup(dev, skb);
+ if (info->flags & QMI_WWAN_FLAG_PASS_THROUGH) {
+ skb->protocol = htons(ETH_P_MAP);
+ return (netif_rx(skb) == NET_RX_SUCCESS);
+ }
+
switch (skb->data[0] & 0xf0) {
case 0x40:
proto = htons(ETH_P_IP);
{QMI_FIXED_INTF(0x1e2d, 0x0082, 5)}, /* Cinterion PHxx,PXxx (2 RmNet) */
{QMI_FIXED_INTF(0x1e2d, 0x0083, 4)}, /* Cinterion PHxx,PXxx (1 RmNet + USB Audio)*/
{QMI_QUIRK_SET_DTR(0x1e2d, 0x00b0, 4)}, /* Cinterion CLS8 */
+ {QMI_FIXED_INTF(0x1e2d, 0x00b7, 0)}, /* Cinterion MV31 RmNet */
{QMI_FIXED_INTF(0x413c, 0x81a2, 8)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */
{QMI_FIXED_INTF(0x413c, 0x81a3, 8)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */
{QMI_FIXED_INTF(0x413c, 0x81a4, 8)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */
struct list_head dev_list;
struct hlist_node napi_hash_node;
unsigned int napi_id;
+ struct task_struct *thread;
};
enum {
NAPI_STATE_NO_BUSY_POLL, /* Do not add in napi_hash, no busy polling */
NAPI_STATE_IN_BUSY_POLL, /* sk_busy_loop() owns this NAPI */
NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/
+ NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/
};
enum {
NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
NAPIF_STATE_PREFER_BUSY_POLL = BIT(NAPI_STATE_PREFER_BUSY_POLL),
+ NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
};
enum gro_result {
GRO_MERGED_FREE,
GRO_HELD,
GRO_NORMAL,
- GRO_DROP,
GRO_CONSUMED,
};
typedef enum gro_result gro_result_t;
return napi_complete_done(n, 0);
}
+int dev_set_threaded(struct net_device *dev, bool threaded);
+
/**
* napi_disable - prevent NAPI from scheduling
* @n: NAPI context
*/
void napi_disable(struct napi_struct *n);
-/**
- * napi_enable - enable NAPI scheduling
- * @n: NAPI context
- *
- * Resume NAPI from being scheduled on this context.
- * Must be paired with napi_disable.
- */
-static inline void napi_enable(struct napi_struct *n)
-{
- BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
- smp_mb__before_atomic();
- clear_bit(NAPI_STATE_SCHED, &n->state);
- clear_bit(NAPI_STATE_NPSVC, &n->state);
-}
+void napi_enable(struct napi_struct *n);
/**
* napi_synchronize - wait until NAPI is not running
TC_SETUP_QDISC_ETS,
TC_SETUP_QDISC_TBF,
TC_SETUP_QDISC_FIFO,
+ TC_SETUP_QDISC_HTB,
};
/* These structures hold the attributes of bpf state that are being passed
* struct netdev_phys_item_id *ppid)
* Called to get the parent ID of the physical port of this device.
*
- * void (*ndo_udp_tunnel_add)(struct net_device *dev,
- * struct udp_tunnel_info *ti);
- * Called by UDP tunnel to notify a driver about the UDP port and socket
- * address family that a UDP tunnel is listnening to. It is called only
- * when a new port starts listening. The operation is protected by the
- * RTNL.
- *
- * void (*ndo_udp_tunnel_del)(struct net_device *dev,
- * struct udp_tunnel_info *ti);
- * Called by UDP tunnel to notify the driver about a UDP port and socket
- * address family that the UDP tunnel is not listening to anymore. The
- * operation is protected by the RTNL.
- *
* void* (*ndo_dfwd_add_station)(struct net_device *pdev,
* struct net_device *dev)
* Called by upper layer devices to accelerate switching or other
struct net_device* (*ndo_get_xmit_slave)(struct net_device *dev,
struct sk_buff *skb,
bool all_slaves);
+ struct net_device* (*ndo_sk_get_lower_dev)(struct net_device *dev,
+ struct sock *sk);
netdev_features_t (*ndo_fix_features)(struct net_device *dev,
netdev_features_t features);
int (*ndo_set_features)(struct net_device *dev,
struct netdev_phys_item_id *ppid);
int (*ndo_get_phys_port_name)(struct net_device *dev,
char *name, size_t len);
- void (*ndo_udp_tunnel_add)(struct net_device *dev,
- struct udp_tunnel_info *ti);
- void (*ndo_udp_tunnel_del)(struct net_device *dev,
- struct udp_tunnel_info *ti);
void* (*ndo_dfwd_add_station)(struct net_device *pdev,
struct net_device *dev);
void (*ndo_dfwd_del_station)(struct net_device *pdev,
*
* @wol_enabled: Wake-on-LAN is enabled
*
+ * @threaded: napi threaded mode is enabled
+ *
* @net_notifier_list: List of per-net netdev notifier block
* that follow this device when it is moved
* to another network namespace.
unsigned long mem_end;
unsigned long mem_start;
unsigned long base_addr;
- int irq;
/*
* Some hardware also needs these fields (state,dev_list,
struct list_head lower;
} adj_list;
+ /* Read-mostly cache-line for fast-path access */
+ unsigned int flags;
+ unsigned int priv_flags;
+ const struct net_device_ops *netdev_ops;
+ int ifindex;
+ unsigned short gflags;
+ unsigned short hard_header_len;
+
+ /* Note : dev->mtu is often read without holding a lock.
+ * Writers usually hold RTNL.
+ * It is recommended to use READ_ONCE() to annotate the reads,
+ * and to use WRITE_ONCE() to annotate the writes.
+ */
+ unsigned int mtu;
+ unsigned short needed_headroom;
+ unsigned short needed_tailroom;
+
netdev_features_t features;
netdev_features_t hw_features;
netdev_features_t wanted_features;
netdev_features_t mpls_features;
netdev_features_t gso_partial_features;
- int ifindex;
+ unsigned int min_mtu;
+ unsigned int max_mtu;
+ unsigned short type;
+ unsigned char min_header_len;
+ unsigned char name_assign_type;
+
int group;
- struct net_device_stats stats;
+ struct net_device_stats stats; /* not used by modern drivers */
atomic_long_t rx_dropped;
atomic_long_t tx_dropped;
const struct iw_handler_def *wireless_handlers;
struct iw_public_data *wireless_data;
#endif
- const struct net_device_ops *netdev_ops;
const struct ethtool_ops *ethtool_ops;
#ifdef CONFIG_NET_L3_MASTER_DEV
const struct l3mdev_ops *l3mdev_ops;
const struct header_ops *header_ops;
- unsigned int flags;
- unsigned int priv_flags;
-
- unsigned short gflags;
- unsigned short padded;
-
unsigned char operstate;
unsigned char link_mode;
unsigned char if_port;
unsigned char dma;
- /* Note : dev->mtu is often read without holding a lock.
- * Writers usually hold RTNL.
- * It is recommended to use READ_ONCE() to annotate the reads,
- * and to use WRITE_ONCE() to annotate the writes.
- */
- unsigned int mtu;
- unsigned int min_mtu;
- unsigned int max_mtu;
- unsigned short type;
- unsigned short hard_header_len;
- unsigned char min_header_len;
- unsigned char name_assign_type;
-
- unsigned short needed_headroom;
- unsigned short needed_tailroom;
-
/* Interface address info. */
unsigned char perm_addr[MAX_ADDR_LEN];
unsigned char addr_assign_type;
unsigned short neigh_priv_len;
unsigned short dev_id;
unsigned short dev_port;
+ unsigned short padded;
+
spinlock_t addr_list_lock;
+ int irq;
struct netdev_hw_addr_list uc;
struct netdev_hw_addr_list mc;
struct lock_class_key *qdisc_running_key;
bool proto_down;
unsigned wol_enabled:1;
+ unsigned threaded:1;
struct list_head net_notifier_list;
NETDEV_LAG_HASH_L23,
NETDEV_LAG_HASH_E23,
NETDEV_LAG_HASH_E34,
+ NETDEV_LAG_HASH_VLAN_SRCMAC,
NETDEV_LAG_HASH_UNKNOWN,
};
struct net_device *netdev_get_xmit_slave(struct net_device *dev,
struct sk_buff *skb,
bool all_slaves);
+struct net_device *netdev_sk_get_lowest_dev(struct net_device *dev,
+ struct sock *sk);
struct net_device *dev_get_by_index(struct net *net, int ifindex);
struct net_device *__dev_get_by_index(struct net *net, int ifindex);
struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
local_bh_disable();
cpu = smp_processor_id();
+ spin_lock(&dev->tx_global_lock);
for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
netif_tx_stop_queue(txq);
__netif_tx_unlock(txq);
}
+ spin_unlock(&dev->tx_global_lock);
local_bh_enable();
}
#define SWITCHDEV_F_SKIP_EOPNOTSUPP BIT(1)
#define SWITCHDEV_F_DEFER BIT(2)
-struct switchdev_trans {
- bool ph_prepare;
-};
-
-static inline bool switchdev_trans_ph_prepare(struct switchdev_trans *trans)
-{
- return trans && trans->ph_prepare;
-}
-
-static inline bool switchdev_trans_ph_commit(struct switchdev_trans *trans)
-{
- return trans && !trans->ph_prepare;
-}
-
enum switchdev_attr_id {
SWITCHDEV_ATTR_ID_UNDEFINED,
SWITCHDEV_ATTR_ID_PORT_STP_STATE,
SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED,
SWITCHDEV_ATTR_ID_BRIDGE_MROUTER,
#if IS_ENABLED(CONFIG_BRIDGE_MRP)
- SWITCHDEV_ATTR_ID_MRP_PORT_STATE,
SWITCHDEV_ATTR_ID_MRP_PORT_ROLE,
#endif
};
u16 vlan_protocol; /* BRIDGE_VLAN_PROTOCOL */
bool mc_disabled; /* MC_DISABLED */
#if IS_ENABLED(CONFIG_BRIDGE_MRP)
- u8 mrp_port_state; /* MRP_PORT_STATE */
u8 mrp_port_role; /* MRP_PORT_ROLE */
#endif
} u;
struct switchdev_obj_port_vlan {
struct switchdev_obj obj;
u16 flags;
- u16 vid_begin;
- u16 vid_end;
+ u16 vid;
};
#define SWITCHDEV_OBJ_PORT_VLAN(OBJ) \
struct switchdev_notifier_port_obj_info {
struct switchdev_notifier_info info; /* must be first */
const struct switchdev_obj *obj;
- struct switchdev_trans *trans;
bool handled;
};
struct switchdev_notifier_port_attr_info {
struct switchdev_notifier_info info; /* must be first */
const struct switchdev_attr *attr;
- struct switchdev_trans *trans;
bool handled;
};
bool (*check_cb)(const struct net_device *dev),
int (*add_cb)(struct net_device *dev,
const struct switchdev_obj *obj,
- struct switchdev_trans *trans,
struct netlink_ext_ack *extack));
int switchdev_handle_port_obj_del(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
struct switchdev_notifier_port_attr_info *port_attr_info,
bool (*check_cb)(const struct net_device *dev),
int (*set_cb)(struct net_device *dev,
- const struct switchdev_attr *attr,
- struct switchdev_trans *trans));
+ const struct switchdev_attr *attr));
#else
static inline void switchdev_deferred_process(void)
bool (*check_cb)(const struct net_device *dev),
int (*add_cb)(struct net_device *dev,
const struct switchdev_obj *obj,
- struct switchdev_trans *trans,
struct netlink_ext_ack *extack))
{
return 0;
struct switchdev_notifier_port_attr_info *port_attr_info,
bool (*check_cb)(const struct net_device *dev),
int (*set_cb)(struct net_device *dev,
- const struct switchdev_attr *attr,
- struct switchdev_trans *trans))
+ const struct switchdev_attr *attr))
{
return 0;
}
#define IFH_REW_OP_TWO_STEP_PTP 0x3
#define IFH_REW_OP_ORIGIN_PTP 0x5
+#define OCELOT_NUM_TC 8
#define OCELOT_TAG_LEN 16
#define OCELOT_SHORT_PREFIX_LEN 4
#define OCELOT_LONG_PREFIX_LEN 16
int (*netdev_to_port)(struct net_device *dev);
int (*reset)(struct ocelot *ocelot);
u16 (*wm_enc)(u16 value);
+ u16 (*wm_dec)(u16 value);
+ void (*wm_stat)(u32 val, u32 *inuse, u32 *maxuse);
};
struct ocelot_vcap_block {
u16 vid;
};
+enum ocelot_sb {
+ OCELOT_SB_BUF,
+ OCELOT_SB_REF,
+ OCELOT_SB_NUM,
+};
+
+enum ocelot_sb_pool {
+ OCELOT_SB_POOL_ING,
+ OCELOT_SB_POOL_EGR,
+ OCELOT_SB_POOL_NUM,
+};
+
struct ocelot_port {
struct ocelot *ocelot;
phy_interface_t phy_mode;
u8 *xmit_template;
+ bool is_dsa_8021q_cpu;
+
+ struct net_device *bond;
+ bool lag_tx_active;
};
struct ocelot {
struct device *dev;
+ struct devlink *devlink;
+ struct devlink_port *devlink_ports;
const struct ocelot_ops *ops;
struct regmap *targets[TARGET_MAX];
const struct ocelot_stat_layout *stats_layout;
unsigned int num_stats;
- int shared_queue_sz;
+ u32 pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM];
+ int packet_buffer_size;
+ int num_frame_refs;
int num_mact_rows;
struct net_device *hw_bridge_dev;
int npi;
- enum ocelot_tag_prefix inj_prefix;
- enum ocelot_tag_prefix xtr_prefix;
-
- u32 *lags;
+ enum ocelot_tag_prefix npi_inj_prefix;
+ enum ocelot_tag_prefix npi_xtr_prefix;
struct list_head multicast;
struct list_head pgids;
/* I/O */
u32 ocelot_port_readl(struct ocelot_port *port, u32 reg);
void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg);
+ void ocelot_port_rmwl(struct ocelot_port *port, u32 val, u32 mask, u32 reg);
u32 __ocelot_read_ix(struct ocelot *ocelot, u32 reg, u32 offset);
void __ocelot_write_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 offset);
void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask, u32 reg,
int ocelot_get_ts_info(struct ocelot *ocelot, int port,
struct ethtool_ts_info *info);
void ocelot_set_ageing_time(struct ocelot *ocelot, unsigned int msecs);
+ int ocelot_port_flush(struct ocelot *ocelot, int port);
void ocelot_adjust_link(struct ocelot *ocelot, int port,
struct phy_device *phydev);
-int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, bool enabled,
- struct switchdev_trans *trans);
+int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, bool enabled);
void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state);
+void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot);
int ocelot_port_bridge_join(struct ocelot *ocelot, int port,
struct net_device *bridge);
int ocelot_port_bridge_leave(struct ocelot *ocelot, int port,
const struct switchdev_obj_port_mdb *mdb);
int ocelot_port_mdb_del(struct ocelot *ocelot, int port,
const struct switchdev_obj_port_mdb *mdb);
+int ocelot_port_lag_join(struct ocelot *ocelot, int port,
+ struct net_device *bond,
+ struct netdev_lag_upper_info *info);
+void ocelot_port_lag_leave(struct ocelot *ocelot, int port,
+ struct net_device *bond);
+void ocelot_port_lag_change(struct ocelot *ocelot, int port, bool lag_tx_active);
+
+int ocelot_devlink_sb_register(struct ocelot *ocelot);
+void ocelot_devlink_sb_unregister(struct ocelot *ocelot);
+int ocelot_sb_pool_get(struct ocelot *ocelot, unsigned int sb_index,
+ u16 pool_index,
+ struct devlink_sb_pool_info *pool_info);
+int ocelot_sb_pool_set(struct ocelot *ocelot, unsigned int sb_index,
+ u16 pool_index, u32 size,
+ enum devlink_sb_threshold_type threshold_type,
+ struct netlink_ext_ack *extack);
+int ocelot_sb_port_pool_get(struct ocelot *ocelot, int port,
+ unsigned int sb_index, u16 pool_index,
+ u32 *p_threshold);
+int ocelot_sb_port_pool_set(struct ocelot *ocelot, int port,
+ unsigned int sb_index, u16 pool_index,
+ u32 threshold, struct netlink_ext_ack *extack);
+int ocelot_sb_tc_pool_bind_get(struct ocelot *ocelot, int port,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u16 *p_pool_index, u32 *p_threshold);
+int ocelot_sb_tc_pool_bind_set(struct ocelot *ocelot, int port,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u16 pool_index, u32 threshold,
+ struct netlink_ext_ack *extack);
+int ocelot_sb_occ_snapshot(struct ocelot *ocelot, unsigned int sb_index);
+int ocelot_sb_occ_max_clear(struct ocelot *ocelot, unsigned int sb_index);
+int ocelot_sb_occ_port_pool_get(struct ocelot *ocelot, int port,
+ unsigned int sb_index, u16 pool_index,
+ u32 *p_cur, u32 *p_max);
+int ocelot_sb_occ_tc_port_bind_get(struct ocelot *ocelot, int port,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u32 *p_cur, u32 *p_max);
#endif
#include <linux/kernel.h>
#include <linux/stacktrace.h>
#include <linux/perf_event.h>
-#include <linux/elf.h>
-#include <linux/pagemap.h>
#include <linux/irq_work.h>
#include <linux/btf_ids.h>
+#include <linux/buildid.h>
#include "percpu_freelist.h"
#define STACK_CREATE_FLAG_MASK \
/* hash table size must be power of 2 */
n_buckets = roundup_pow_of_two(attr->max_entries);
+ if (!n_buckets)
+ return ERR_PTR(-E2BIG);
cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap);
cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
return ERR_PTR(err);
}
-#define BPF_BUILD_ID 3
-/*
- * Parse build id from the note segment. This logic can be shared between
- * 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are
- * identical.
- */
-static inline int stack_map_parse_build_id(void *page_addr,
- unsigned char *build_id,
- void *note_start,
- Elf32_Word note_size)
-{
- Elf32_Word note_offs = 0, new_offs;
-
- /* check for overflow */
- if (note_start < page_addr || note_start + note_size < note_start)
- return -EINVAL;
-
- /* only supports note that fits in the first page */
- if (note_start + note_size > page_addr + PAGE_SIZE)
- return -EINVAL;
-
- while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
- Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
-
- if (nhdr->n_type == BPF_BUILD_ID &&
- nhdr->n_namesz == sizeof("GNU") &&
- nhdr->n_descsz > 0 &&
- nhdr->n_descsz <= BPF_BUILD_ID_SIZE) {
- memcpy(build_id,
- note_start + note_offs +
- ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
- nhdr->n_descsz);
- memset(build_id + nhdr->n_descsz, 0,
- BPF_BUILD_ID_SIZE - nhdr->n_descsz);
- return 0;
- }
- new_offs = note_offs + sizeof(Elf32_Nhdr) +
- ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4);
- if (new_offs <= note_offs) /* overflow */
- break;
- note_offs = new_offs;
- }
- return -EINVAL;
-}
-
-/* Parse build ID from 32-bit ELF */
-static int stack_map_get_build_id_32(void *page_addr,
- unsigned char *build_id)
-{
- Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr;
- Elf32_Phdr *phdr;
- int i;
-
- /* only supports phdr that fits in one page */
- if (ehdr->e_phnum >
- (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr))
- return -EINVAL;
-
- phdr = (Elf32_Phdr *)(page_addr + sizeof(Elf32_Ehdr));
-
- for (i = 0; i < ehdr->e_phnum; ++i) {
- if (phdr[i].p_type == PT_NOTE &&
- !stack_map_parse_build_id(page_addr, build_id,
- page_addr + phdr[i].p_offset,
- phdr[i].p_filesz))
- return 0;
- }
- return -EINVAL;
-}
-
-/* Parse build ID from 64-bit ELF */
-static int stack_map_get_build_id_64(void *page_addr,
- unsigned char *build_id)
-{
- Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr;
- Elf64_Phdr *phdr;
- int i;
-
- /* only supports phdr that fits in one page */
- if (ehdr->e_phnum >
- (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr))
- return -EINVAL;
-
- phdr = (Elf64_Phdr *)(page_addr + sizeof(Elf64_Ehdr));
-
- for (i = 0; i < ehdr->e_phnum; ++i) {
- if (phdr[i].p_type == PT_NOTE &&
- !stack_map_parse_build_id(page_addr, build_id,
- page_addr + phdr[i].p_offset,
- phdr[i].p_filesz))
- return 0;
- }
- return -EINVAL;
-}
-
-/* Parse build ID of ELF file mapped to vma */
-static int stack_map_get_build_id(struct vm_area_struct *vma,
- unsigned char *build_id)
-{
- Elf32_Ehdr *ehdr;
- struct page *page;
- void *page_addr;
- int ret;
-
- /* only works for page backed storage */
- if (!vma->vm_file)
- return -EINVAL;
-
- page = find_get_page(vma->vm_file->f_mapping, 0);
- if (!page)
- return -EFAULT; /* page not mapped */
-
- ret = -EINVAL;
- page_addr = kmap_atomic(page);
- ehdr = (Elf32_Ehdr *)page_addr;
-
- /* compare magic x7f "ELF" */
- if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0)
- goto out;
-
- /* only support executable file and shared object file */
- if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN)
- goto out;
-
- if (ehdr->e_ident[EI_CLASS] == ELFCLASS32)
- ret = stack_map_get_build_id_32(page_addr, build_id);
- else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64)
- ret = stack_map_get_build_id_64(page_addr, build_id);
-out:
- kunmap_atomic(page_addr);
- put_page(page);
- return ret;
-}
-
static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
u64 *ips, u32 trace_nr, bool user)
{
for (i = 0; i < trace_nr; i++) {
id_offs[i].status = BPF_STACK_BUILD_ID_IP;
id_offs[i].ip = ips[i];
- memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE);
+ memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX);
}
return;
}
for (i = 0; i < trace_nr; i++) {
vma = find_vma(current->mm, ips[i]);
- if (!vma || stack_map_get_build_id(vma, id_offs[i].build_id)) {
+ if (!vma || build_id_parse(vma, id_offs[i].build_id, NULL)) {
/* per entry fall back to ips */
id_offs[i].status = BPF_STACK_BUILD_ID_IP;
id_offs[i].ip = ips[i];
- memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE);
+ memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX);
continue;
}
id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i]
return err;
}
-static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
+static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
{
+ int load_reg;
int err;
- if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
- insn->imm != 0) {
- verbose(env, "BPF_XADD uses reserved fields\n");
+ switch (insn->imm) {
+ case BPF_ADD:
+ case BPF_ADD | BPF_FETCH:
+ case BPF_AND:
+ case BPF_AND | BPF_FETCH:
+ case BPF_OR:
+ case BPF_OR | BPF_FETCH:
+ case BPF_XOR:
+ case BPF_XOR | BPF_FETCH:
+ case BPF_XCHG:
+ case BPF_CMPXCHG:
+ break;
+ default:
+ verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
+ return -EINVAL;
+ }
+
+ if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
+ verbose(env, "invalid atomic operand size\n");
return -EINVAL;
}
if (err)
return err;
+ if (insn->imm == BPF_CMPXCHG) {
+ /* Check comparison of R0 with memory location */
+ err = check_reg_arg(env, BPF_REG_0, SRC_OP);
+ if (err)
+ return err;
+ }
+
if (is_pointer_value(env, insn->src_reg)) {
verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
return -EACCES;
is_pkt_reg(env, insn->dst_reg) ||
is_flow_key_reg(env, insn->dst_reg) ||
is_sk_reg(env, insn->dst_reg)) {
- verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
+ verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
insn->dst_reg,
reg_type_str[reg_state(env, insn->dst_reg)->type]);
return -EACCES;
}
- /* check whether atomic_add can read the memory */
+ /* check whether we can read the memory */
err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
BPF_SIZE(insn->code), BPF_READ, -1, true);
if (err)
return err;
- /* check whether atomic_add can write into the same memory */
- return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
- BPF_SIZE(insn->code), BPF_WRITE, -1, true);
+ /* check whether we can write into the same memory */
+ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
+ BPF_SIZE(insn->code), BPF_WRITE, -1, true);
+ if (err)
+ return err;
+
+ if (!(insn->imm & BPF_FETCH))
+ return 0;
+
+ if (insn->imm == BPF_CMPXCHG)
+ load_reg = BPF_REG_0;
+ else
+ load_reg = insn->src_reg;
+
+ /* check and record load of old value */
+ err = check_reg_arg(env, load_reg, DST_OP);
+ if (err)
+ return err;
+
+ return 0;
}
static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno,
err = mark_chain_precision(env, regno);
} else if (arg_type_is_alloc_size(arg_type)) {
if (!tnum_is_const(reg->var_off)) {
- verbose(env, "R%d unbounded size, use 'var &= const' or 'if (var < const)'\n",
+ verbose(env, "R%d is not a known constant'\n",
regno);
return -EACCES;
}
case BPF_JSGT:
if (reg->s32_min_value > sval)
return 1;
- else if (reg->s32_max_value < sval)
+ else if (reg->s32_max_value <= sval)
return 0;
break;
case BPF_JLT:
case BPF_JSGT:
if (reg->smin_value > sval)
return 1;
- else if (reg->smax_value < sval)
+ else if (reg->smax_value <= sval)
return 0;
break;
case BPF_JLT:
return old->umin_value <= cur->umin_value &&
old->umax_value >= cur->umax_value &&
old->smin_value <= cur->smin_value &&
- old->smax_value >= cur->smax_value;
+ old->smax_value >= cur->smax_value &&
+ old->u32_min_value <= cur->u32_min_value &&
+ old->u32_max_value >= cur->u32_max_value &&
+ old->s32_min_value <= cur->s32_min_value &&
+ old->s32_max_value >= cur->s32_max_value;
}
/* Maximum number of register states that can exist at once */
} else if (class == BPF_STX) {
enum bpf_reg_type *prev_dst_type, dst_reg_type;
- if (BPF_MODE(insn->code) == BPF_XADD) {
- err = check_xadd(env, env->insn_idx, insn);
+ if (BPF_MODE(insn->code) == BPF_ATOMIC) {
+ err = check_atomic(env, env->insn_idx, insn);
if (err)
return err;
env->insn_idx++;
continue;
}
+ if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
+ verbose(env, "BPF_STX uses reserved fields\n");
+ return -EINVAL;
+ }
+
/* check src1 operand */
err = check_reg_arg(env, insn->src_reg, SRC_OP);
if (err)
return 0;
}
+static int find_btf_percpu_datasec(struct btf *btf)
+{
+ const struct btf_type *t;
+ const char *tname;
+ int i, n;
+
+ /*
+ * Both vmlinux and module each have their own ".data..percpu"
+ * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
+ * types to look at only module's own BTF types.
+ */
+ n = btf_nr_types(btf);
+ if (btf_is_module(btf))
+ i = btf_nr_types(btf_vmlinux);
+ else
+ i = 1;
+
+ for(; i < n; i++) {
+ t = btf_type_by_id(btf, i);
+ if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
+ continue;
+
+ tname = btf_name_by_offset(btf, t->name_off);
+ if (!strcmp(tname, ".data..percpu"))
+ return i;
+ }
+
+ return -ENOENT;
+}
+
/* replace pseudo btf_id with kernel symbol address */
static int check_pseudo_btf_id(struct bpf_verifier_env *env,
struct bpf_insn *insn,
{
const struct btf_var_secinfo *vsi;
const struct btf_type *datasec;
+ struct btf_mod_pair *btf_mod;
const struct btf_type *t;
const char *sym_name;
bool percpu = false;
u32 type, id = insn->imm;
+ struct btf *btf;
s32 datasec_id;
u64 addr;
- int i;
-
- if (!btf_vmlinux) {
- verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
- return -EINVAL;
- }
+ int i, btf_fd, err;
- if (insn[1].imm != 0) {
- verbose(env, "reserved field (insn[1].imm) is used in pseudo_btf_id ldimm64 insn.\n");
- return -EINVAL;
+ btf_fd = insn[1].imm;
+ if (btf_fd) {
+ btf = btf_get_by_fd(btf_fd);
+ if (IS_ERR(btf)) {
+ verbose(env, "invalid module BTF object FD specified.\n");
+ return -EINVAL;
+ }
+ } else {
+ if (!btf_vmlinux) {
+ verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
+ return -EINVAL;
+ }
+ btf = btf_vmlinux;
+ btf_get(btf);
}
- t = btf_type_by_id(btf_vmlinux, id);
+ t = btf_type_by_id(btf, id);
if (!t) {
verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
- return -ENOENT;
+ err = -ENOENT;
+ goto err_put;
}
if (!btf_type_is_var(t)) {
- verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n",
- id);
- return -EINVAL;
+ verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n", id);
+ err = -EINVAL;
+ goto err_put;
}
- sym_name = btf_name_by_offset(btf_vmlinux, t->name_off);
+ sym_name = btf_name_by_offset(btf, t->name_off);
addr = kallsyms_lookup_name(sym_name);
if (!addr) {
verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
sym_name);
- return -ENOENT;
+ err = -ENOENT;
+ goto err_put;
}
- datasec_id = btf_find_by_name_kind(btf_vmlinux, ".data..percpu",
- BTF_KIND_DATASEC);
+ datasec_id = find_btf_percpu_datasec(btf);
if (datasec_id > 0) {
- datasec = btf_type_by_id(btf_vmlinux, datasec_id);
+ datasec = btf_type_by_id(btf, datasec_id);
for_each_vsi(i, datasec, vsi) {
if (vsi->type == id) {
percpu = true;
insn[1].imm = addr >> 32;
type = t->type;
- t = btf_type_skip_modifiers(btf_vmlinux, type, NULL);
+ t = btf_type_skip_modifiers(btf, type, NULL);
if (percpu) {
aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
- aux->btf_var.btf = btf_vmlinux;
+ aux->btf_var.btf = btf;
aux->btf_var.btf_id = type;
} else if (!btf_type_is_struct(t)) {
const struct btf_type *ret;
u32 tsize;
/* resolve the type size of ksym. */
- ret = btf_resolve_size(btf_vmlinux, t, &tsize);
+ ret = btf_resolve_size(btf, t, &tsize);
if (IS_ERR(ret)) {
- tname = btf_name_by_offset(btf_vmlinux, t->name_off);
+ tname = btf_name_by_offset(btf, t->name_off);
verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
tname, PTR_ERR(ret));
- return -EINVAL;
+ err = -EINVAL;
+ goto err_put;
}
aux->btf_var.reg_type = PTR_TO_MEM;
aux->btf_var.mem_size = tsize;
} else {
aux->btf_var.reg_type = PTR_TO_BTF_ID;
- aux->btf_var.btf = btf_vmlinux;
+ aux->btf_var.btf = btf;
aux->btf_var.btf_id = type;
}
+
+ /* check whether we recorded this BTF (and maybe module) already */
+ for (i = 0; i < env->used_btf_cnt; i++) {
+ if (env->used_btfs[i].btf == btf) {
+ btf_put(btf);
+ return 0;
+ }
+ }
+
+ if (env->used_btf_cnt >= MAX_USED_BTFS) {
+ err = -E2BIG;
+ goto err_put;
+ }
+
+ btf_mod = &env->used_btfs[env->used_btf_cnt];
+ btf_mod->btf = btf;
+ btf_mod->module = NULL;
+
+ /* if we reference variables from kernel module, bump its refcount */
+ if (btf_is_module(btf)) {
+ btf_mod->module = btf_try_get_module(btf);
+ if (!btf_mod->module) {
+ err = -ENXIO;
+ goto err_put;
+ }
+ }
+
+ env->used_btf_cnt++;
+
return 0;
+err_put:
+ btf_put(btf);
+ return err;
}
static int check_map_prealloc(struct bpf_map *map)
return -EINVAL;
}
- if (BPF_CLASS(insn->code) == BPF_STX &&
- ((BPF_MODE(insn->code) != BPF_MEM &&
- BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
- verbose(env, "BPF_STX uses reserved fields\n");
- return -EINVAL;
- }
-
if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
struct bpf_insn_aux_data *aux;
struct bpf_map *map;
env->used_map_cnt);
}
+/* drop refcnt of maps used by the rejected program */
+static void release_btfs(struct bpf_verifier_env *env)
+{
+ __bpf_free_used_btfs(env->prog->aux, env->used_btfs,
+ env->used_btf_cnt);
+}
+
/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
{
insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
- struct bpf_insn mask_and_div[] = {
- BPF_MOV32_REG(insn->src_reg, insn->src_reg),
+ bool isdiv = BPF_OP(insn->code) == BPF_DIV;
+ struct bpf_insn *patchlet;
+ struct bpf_insn chk_and_div[] = {
/* Rx div 0 -> 0 */
- BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2),
+ BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+ BPF_JNE | BPF_K, insn->src_reg,
+ 0, 2, 0),
BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
BPF_JMP_IMM(BPF_JA, 0, 0, 1),
*insn,
};
- struct bpf_insn mask_and_mod[] = {
- BPF_MOV32_REG(insn->src_reg, insn->src_reg),
+ struct bpf_insn chk_and_mod[] = {
/* Rx mod 0 -> Rx */
- BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1),
+ BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+ BPF_JEQ | BPF_K, insn->src_reg,
+ 0, 1, 0),
*insn,
};
- struct bpf_insn *patchlet;
- if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
- insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
- patchlet = mask_and_div + (is64 ? 1 : 0);
- cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0);
- } else {
- patchlet = mask_and_mod + (is64 ? 1 : 0);
- cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0);
- }
+ patchlet = isdiv ? chk_and_div : chk_and_mod;
+ cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
+ ARRAY_SIZE(chk_and_mod);
new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
if (!new_prog)
goto err_release_maps;
}
- if (ret == 0 && env->used_map_cnt) {
+ if (ret)
+ goto err_release_maps;
+
+ if (env->used_map_cnt) {
/* if program passed verifier, update used_maps in bpf_prog_info */
env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
sizeof(env->used_maps[0]),
memcpy(env->prog->aux->used_maps, env->used_maps,
sizeof(env->used_maps[0]) * env->used_map_cnt);
env->prog->aux->used_map_cnt = env->used_map_cnt;
+ }
+ if (env->used_btf_cnt) {
+ /* if program passed verifier, update used_btfs in bpf_prog_aux */
+ env->prog->aux->used_btfs = kmalloc_array(env->used_btf_cnt,
+ sizeof(env->used_btfs[0]),
+ GFP_KERNEL);
+ if (!env->prog->aux->used_btfs) {
+ ret = -ENOMEM;
+ goto err_release_maps;
+ }
+ memcpy(env->prog->aux->used_btfs, env->used_btfs,
+ sizeof(env->used_btfs[0]) * env->used_btf_cnt);
+ env->prog->aux->used_btf_cnt = env->used_btf_cnt;
+ }
+ if (env->used_map_cnt || env->used_btf_cnt) {
/* program is valid. Convert pseudo bpf_ld_imm64 into generic
* bpf_ld_imm64 instructions
*/
convert_pseudo_ld_imm64(env);
}
- if (ret == 0)
- adjust_btf_func(env);
+ adjust_btf_func(env);
err_release_maps:
if (!env->prog->aux->used_maps)
* them now. Otherwise free_used_maps() will release them.
*/
release_maps(env);
+ if (!env->prog->aux->used_btfs)
+ release_btfs(env);
/* extension progs temporarily inherit the attach_type of their targets
for verification purposes, so set it back to zero before returning
int br_mrp_set_port_state(struct net_bridge_port *p,
enum br_mrp_port_state_type state)
{
+ u32 port_state;
+
if (!p || !(p->flags & BR_MRP_AWARE))
return -EINVAL;
spin_lock_bh(&p->br->lock);
if (state == BR_MRP_PORT_STATE_FORWARDING)
- p->state = BR_STATE_FORWARDING;
+ port_state = BR_STATE_FORWARDING;
else
- p->state = BR_STATE_BLOCKING;
+ port_state = BR_STATE_BLOCKING;
+ p->state = port_state;
spin_unlock_bh(&p->br->lock);
- br_mrp_port_switchdev_set_state(p, state);
+ br_mrp_port_switchdev_set_state(p, port_state);
return 0;
}
return 0;
}
-/* Determin if the frame type is a ring frame */
+/* Determine if the frame type is a ring frame */
static bool br_mrp_ring_frame(struct sk_buff *skb)
{
const struct br_mrp_tlv_hdr *hdr;
return false;
}
-/* Determin if the frame type is an interconnect frame */
+/* Determine if the frame type is an interconnect frame */
static bool br_mrp_in_frame(struct sk_buff *skb)
{
const struct br_mrp_tlv_hdr *hdr;
br_mrp_ring_port_open(port->dev, false);
}
-/* Determin if the test hdr has a better priority than the node */
+/* Determine if the test hdr has a better priority than the node */
static bool br_mrp_test_better_than_own(struct br_mrp *mrp,
struct net_bridge *br,
const struct br_mrp_ring_test_hdr *hdr)
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
#include <linux/skbuff.h>
+#include <linux/kthread.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <net/net_namespace.h>
#include <net/dsa.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
+#include <net/gro.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
#include <net/checksum.h>
}
EXPORT_SYMBOL(netdev_notify_peers);
+static int napi_threaded_poll(void *data);
+
+static int napi_kthread_create(struct napi_struct *n)
+{
+ int err = 0;
+
+ /* Create and wake up the kthread once to put it in
+ * TASK_INTERRUPTIBLE mode to avoid the blocked task
+ * warning and work with loadavg.
+ */
+ n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d",
+ n->dev->name, n->napi_id);
+ if (IS_ERR(n->thread)) {
+ err = PTR_ERR(n->thread);
+ pr_err("kthread_run failed with err %d\n", err);
+ n->thread = NULL;
+ }
+
+ return err;
+}
+
static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
{
const struct net_device_ops *ops = dev->netdev_ops;
int skb_csum_hwoffload_help(struct sk_buff *skb,
const netdev_features_t features)
{
- if (unlikely(skb->csum_not_inet))
+ if (unlikely(skb_csum_is_sctp(skb)))
return !!(features & NETIF_F_SCTP_CRC) ? 0 :
skb_crc32c_csum_help(skb);
- return !!(features & NETIF_F_CSUM_MASK) ? 0 : skb_checksum_help(skb);
+ if (features & NETIF_F_HW_CSUM)
+ return 0;
+
+ if (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) {
+ switch (skb->csum_offset) {
+ case offsetof(struct tcphdr, check):
+ case offsetof(struct udphdr, check):
+ return 0;
+ }
+ }
+
+ return skb_checksum_help(skb);
}
EXPORT_SYMBOL(skb_csum_hwoffload_help);
/* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
qdisc_skb_cb(skb)->mru = 0;
+ qdisc_skb_cb(skb)->post_ct = false;
mini_qdisc_bstats_cpu_update(miniq, skb);
switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
skb_reset_mac_header(skb);
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
- __skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED);
+ __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SCHED);
/* Disable soft irqs for various locks below. Also
* stops preemption for RCU.
static inline void ____napi_schedule(struct softnet_data *sd,
struct napi_struct *napi)
{
+ struct task_struct *thread;
+
+ if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
+ /* Paired with smp_mb__before_atomic() in
+ * napi_enable()/dev_set_threaded().
+ * Use READ_ONCE() to guarantee a complete
+ * read on napi->thread. Only call
+ * wake_up_process() when it's not NULL.
+ */
+ thread = READ_ONCE(napi->thread);
+ if (thread) {
+ wake_up_process(thread);
+ return;
+ }
+ }
+
list_add_tail(&napi->poll_list, &sd->poll_list);
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
}
struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
+ void *orig_data, *orig_data_end, *hard_start;
struct netdev_rx_queue *rxqueue;
- void *orig_data, *orig_data_end;
u32 metalen, act = XDP_DROP;
+ u32 mac_len, frame_sz;
__be16 orig_eth_type;
struct ethhdr *eth;
bool orig_bcast;
- int hlen, off;
- u32 mac_len;
+ int off;
/* Reinjected packets coming from act_mirred or similar should
* not get XDP generic processing.
* header.
*/
mac_len = skb->data - skb_mac_header(skb);
- hlen = skb_headlen(skb) + mac_len;
- xdp->data = skb->data - mac_len;
- xdp->data_meta = xdp->data;
- xdp->data_end = xdp->data + hlen;
- xdp->data_hard_start = skb->data - skb_headroom(skb);
+ hard_start = skb->data - skb_headroom(skb);
/* SKB "head" area always have tailroom for skb_shared_info */
- xdp->frame_sz = (void *)skb_end_pointer(skb) - xdp->data_hard_start;
- xdp->frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ frame_sz = (void *)skb_end_pointer(skb) - hard_start;
+ frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+ rxqueue = netif_get_rxqueue(skb);
+ xdp_init_buff(xdp, frame_sz, &rxqueue->xdp_rxq);
+ xdp_prepare_buff(xdp, hard_start, skb_headroom(skb) - mac_len,
+ skb_headlen(skb) + mac_len, true);
orig_data_end = xdp->data_end;
orig_data = xdp->data;
orig_bcast = is_multicast_ether_addr_64bits(eth->h_dest);
orig_eth_type = eth->h_proto;
- rxqueue = netif_get_rxqueue(skb);
- xdp->rxq = &rxqueue->xdp_rxq;
-
act = bpf_prog_run_xdp(xdp_prog, xdp);
/* check if bpf_xdp_adjust_head was used */
qdisc_skb_cb(skb)->pkt_len = skb->len;
qdisc_skb_cb(skb)->mru = 0;
+ qdisc_skb_cb(skb)->post_ct = false;
skb->tc_at_ingress = 1;
mini_qdisc_bstats_cpu_update(miniq, skb);
skb_reset_mac_len(skb);
}
- if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
- skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
+ if (eth_type_vlan(skb->protocol)) {
skb = skb_vlan_untag(skb);
if (unlikely(!skb))
goto out;
* find vlan device.
*/
skb->pkt_type = PACKET_OTHERHOST;
- } else if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
- skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
+ } else if (eth_type_vlan(skb->protocol)) {
/* Outer header is 802.1P with vlan 0, inner header is
* 802.1Q or 802.1AD and vlan_do_receive() above could
* not find vlan dev for vlan id 0.
}
/* we can have in flight packet[s] on the cpus we are not flushing,
- * synchronize_net() in rollback_registered_many() will take care of
+ * synchronize_net() in unregister_netdevice_many() will take care of
* them
*/
for_each_cpu(cpu, &flush_cpus)
/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
* pass the whole batch up to the stack.
*/
- static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
+ static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int segs)
{
list_add_tail(&skb->list, &napi->rx_list);
- if (++napi->rx_count >= gro_normal_batch)
+ napi->rx_count += segs;
+ if (napi->rx_count >= gro_normal_batch)
gro_normal_list(napi);
}
-INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
-INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int));
static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
{
struct packet_offload *ptype;
}
out:
- gro_normal_one(napi, skb);
+ gro_normal_one(napi, skb, NAPI_GRO_CB(skb)->count);
return NET_RX_SUCCESS;
}
napi_gro_complete(napi, oldest);
}
-INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *,
- struct sk_buff *));
-INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *,
- struct sk_buff *));
static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
{
switch (ret) {
case GRO_NORMAL:
- gro_normal_one(napi, skb);
+ gro_normal_one(napi, skb, 1);
break;
- case GRO_DROP:
- kfree_skb(skb);
- break;
-
case GRO_MERGED_FREE:
if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
napi_skb_free_stolen_head(skb);
__skb_push(skb, ETH_HLEN);
skb->protocol = eth_type_trans(skb, skb->dev);
if (ret == GRO_NORMAL)
- gro_normal_one(napi, skb);
+ gro_normal_one(napi, skb, 1);
break;
- case GRO_DROP:
- napi_reuse_skb(napi, skb);
- break;
-
case GRO_MERGED_FREE:
if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
napi_skb_free_stolen_head(skb);
gro_result_t ret;
struct sk_buff *skb = napi_frags_skb(napi);
- if (!skb)
- return GRO_DROP;
-
trace_napi_gro_frags_entry(skb);
ret = napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
napi->gro_bitmask = 0;
}
+int dev_set_threaded(struct net_device *dev, bool threaded)
+{
+ struct napi_struct *napi;
+ int err = 0;
+
+ if (dev->threaded == threaded)
+ return 0;
+
+ if (threaded) {
+ list_for_each_entry(napi, &dev->napi_list, dev_list) {
+ if (!napi->thread) {
+ err = napi_kthread_create(napi);
+ if (err) {
+ threaded = false;
+ break;
+ }
+ }
+ }
+ }
+
+ dev->threaded = threaded;
+
+ /* Make sure kthread is created before THREADED bit
+ * is set.
+ */
+ smp_mb__before_atomic();
+
+ /* Setting/unsetting threaded mode on a napi might not immediately
+ * take effect, if the current napi instance is actively being
+ * polled. In this case, the switch between threaded mode and
+ * softirq mode will happen in the next round of napi_schedule().
+ * This should not cause hiccups/stalls to the live traffic.
+ */
+ list_for_each_entry(napi, &dev->napi_list, dev_list) {
+ if (threaded)
+ set_bit(NAPI_STATE_THREADED, &napi->state);
+ else
+ clear_bit(NAPI_STATE_THREADED, &napi->state);
+ }
+
+ return err;
+}
+
void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
set_bit(NAPI_STATE_NPSVC, &napi->state);
list_add_rcu(&napi->dev_list, &dev->napi_list);
napi_hash_add(napi);
+ /* Create kthread for this napi if dev->threaded is set.
+ * Clear dev->threaded if kthread creation failed so that
+ * threaded mode will not be enabled in napi_enable().
+ */
+ if (dev->threaded && napi_kthread_create(napi))
+ dev->threaded = 0;
}
EXPORT_SYMBOL(netif_napi_add);
clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state);
clear_bit(NAPI_STATE_DISABLE, &n->state);
+ clear_bit(NAPI_STATE_THREADED, &n->state);
}
EXPORT_SYMBOL(napi_disable);
+/**
+ * napi_enable - enable NAPI scheduling
+ * @n: NAPI context
+ *
+ * Resume NAPI from being scheduled on this context.
+ * Must be paired with napi_disable.
+ */
+void napi_enable(struct napi_struct *n)
+{
+ BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
+ smp_mb__before_atomic();
+ clear_bit(NAPI_STATE_SCHED, &n->state);
+ clear_bit(NAPI_STATE_NPSVC, &n->state);
+ if (n->dev->threaded && n->thread)
+ set_bit(NAPI_STATE_THREADED, &n->state);
+}
+EXPORT_SYMBOL(napi_enable);
+
static void flush_gro_hash(struct napi_struct *napi)
{
int i;
flush_gro_hash(napi);
napi->gro_bitmask = 0;
+
+ if (napi->thread) {
+ kthread_stop(napi->thread);
+ napi->thread = NULL;
+ }
}
EXPORT_SYMBOL(__netif_napi_del);
-static int napi_poll(struct napi_struct *n, struct list_head *repoll)
+static int __napi_poll(struct napi_struct *n, bool *repoll)
{
- void *have;
int work, weight;
- list_del_init(&n->poll_list);
-
- have = netpoll_poll_lock(n);
-
weight = n->weight;
/* This NAPI_STATE_SCHED test is for avoiding a race
n->poll, work, weight);
if (likely(work < weight))
- goto out_unlock;
+ return work;
/* Drivers must not modify the NAPI state if they
* consume the entire weight. In such cases this code
*/
if (unlikely(napi_disable_pending(n))) {
napi_complete(n);
- goto out_unlock;
+ return work;
}
/* The NAPI context has more processing work, but busy-polling
*/
napi_schedule(n);
}
- goto out_unlock;
+ return work;
}
if (n->gro_bitmask) {
if (unlikely(!list_empty(&n->poll_list))) {
pr_warn_once("%s: Budget exhausted after napi rescheduled\n",
n->dev ? n->dev->name : "backlog");
- goto out_unlock;
+ return work;
}
- list_add_tail(&n->poll_list, repoll);
+ *repoll = true;
+
+ return work;
+}
+
+static int napi_poll(struct napi_struct *n, struct list_head *repoll)
+{
+ bool do_repoll = false;
+ void *have;
+ int work;
+
+ list_del_init(&n->poll_list);
+
+ have = netpoll_poll_lock(n);
+
+ work = __napi_poll(n, &do_repoll);
+
+ if (do_repoll)
+ list_add_tail(&n->poll_list, repoll);
-out_unlock:
netpoll_poll_unlock(have);
return work;
}
+static int napi_thread_wait(struct napi_struct *napi)
+{
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ while (!kthread_should_stop() && !napi_disable_pending(napi)) {
+ if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
+ WARN_ON(!list_empty(&napi->poll_list));
+ __set_current_state(TASK_RUNNING);
+ return 0;
+ }
+
+ schedule();
+ set_current_state(TASK_INTERRUPTIBLE);
+ }
+ __set_current_state(TASK_RUNNING);
+ return -1;
+}
+
+static int napi_threaded_poll(void *data)
+{
+ struct napi_struct *napi = data;
+ void *have;
+
+ while (!napi_thread_wait(napi)) {
+ for (;;) {
+ bool repoll = false;
+
+ local_bh_disable();
+
+ have = netpoll_poll_lock(napi);
+ __napi_poll(napi, &repoll);
+ netpoll_poll_unlock(have);
+
+ __kfree_skb_flush();
+ local_bh_enable();
+
+ if (!repoll)
+ break;
+
+ cond_resched();
+ }
+ }
+ return 0;
+}
+
static __latent_entropy void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
}
EXPORT_SYMBOL(netdev_get_xmit_slave);
+static struct net_device *netdev_sk_get_lower_dev(struct net_device *dev,
+ struct sock *sk)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (!ops->ndo_sk_get_lower_dev)
+ return NULL;
+ return ops->ndo_sk_get_lower_dev(dev, sk);
+}
+
+/**
+ * netdev_sk_get_lowest_dev - Get the lowest device in chain given device and socket
+ * @dev: device
+ * @sk: the socket
+ *
+ * %NULL is returned if no lower device is found.
+ */
+
+struct net_device *netdev_sk_get_lowest_dev(struct net_device *dev,
+ struct sock *sk)
+{
+ struct net_device *lower;
+
+ lower = netdev_sk_get_lower_dev(dev, sk);
+ while (lower) {
+ dev = lower;
+ lower = netdev_sk_get_lower_dev(dev, sk);
+ }
+
+ return dev;
+}
+EXPORT_SYMBOL(netdev_sk_get_lowest_dev);
+
static void netdev_adjacent_add_links(struct net_device *dev)
{
struct netdev_adjacent *iter;
dev_net(dev)->dev_unreg_count++;
}
-static void rollback_registered_many(struct list_head *head)
-{
- struct net_device *dev, *tmp;
- LIST_HEAD(close_head);
-
- BUG_ON(dev_boot_phase);
- ASSERT_RTNL();
-
- list_for_each_entry_safe(dev, tmp, head, unreg_list) {
- /* Some devices call without registering
- * for initialization unwind. Remove those
- * devices and proceed with the remaining.
- */
- if (dev->reg_state == NETREG_UNINITIALIZED) {
- pr_debug("unregister_netdevice: device %s/%p never was registered\n",
- dev->name, dev);
-
- WARN_ON(1);
- list_del(&dev->unreg_list);
- continue;
- }
- dev->dismantle = true;
- BUG_ON(dev->reg_state != NETREG_REGISTERED);
- }
-
- /* If device is running, close it first. */
- list_for_each_entry(dev, head, unreg_list)
- list_add_tail(&dev->close_list, &close_head);
- dev_close_many(&close_head, true);
-
- list_for_each_entry(dev, head, unreg_list) {
- /* And unlink it from device chain. */
- unlist_netdevice(dev);
-
- dev->reg_state = NETREG_UNREGISTERING;
- }
- flush_all_backlogs();
-
- synchronize_net();
-
- list_for_each_entry(dev, head, unreg_list) {
- struct sk_buff *skb = NULL;
-
- /* Shutdown queueing discipline. */
- dev_shutdown(dev);
-
- dev_xdp_uninstall(dev);
-
- /* Notify protocols, that we are about to destroy
- * this device. They should clean all the things.
- */
- call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
-
- if (!dev->rtnl_link_ops ||
- dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
- skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
- GFP_KERNEL, NULL, 0);
-
- /*
- * Flush the unicast and multicast chains
- */
- dev_uc_flush(dev);
- dev_mc_flush(dev);
-
- netdev_name_node_alt_flush(dev);
- netdev_name_node_free(dev->name_node);
-
- if (dev->netdev_ops->ndo_uninit)
- dev->netdev_ops->ndo_uninit(dev);
-
- if (skb)
- rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);
-
- /* Notifier chain MUST detach us all upper devices. */
- WARN_ON(netdev_has_any_upper_dev(dev));
- WARN_ON(netdev_has_any_lower_dev(dev));
-
- /* Remove entries from kobject tree */
- netdev_unregister_kobject(dev);
-#ifdef CONFIG_XPS
- /* Remove XPS queueing entries */
- netif_reset_xps_queues_gt(dev, 0);
-#endif
- }
-
- synchronize_net();
-
- list_for_each_entry(dev, head, unreg_list)
- dev_put(dev);
-}
-
-static void rollback_registered(struct net_device *dev)
-{
- LIST_HEAD(single);
-
- list_add(&dev->unreg_list, &single);
- rollback_registered_many(&single);
- list_del(&single);
-}
-
static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
struct net_device *upper, netdev_features_t features)
{
dev->hw_features |= (NETIF_F_SOFT_FEATURES | NETIF_F_SOFT_FEATURES_OFF);
dev->features |= NETIF_F_SOFT_FEATURES;
- if (dev->netdev_ops->ndo_udp_tunnel_add) {
+ if (dev->udp_tunnel_nic_info) {
dev->features |= NETIF_F_RX_UDP_TUNNEL_PORT;
dev->hw_features |= NETIF_F_RX_UDP_TUNNEL_PORT;
}
if (ret) {
/* Expect explicit free_netdev() on failure */
dev->needs_free_netdev = false;
- rollback_registered(dev);
- net_set_todo(dev);
+ unregister_netdevice_queue(dev, NULL);
goto out;
}
/*
if (head) {
list_move_tail(&dev->unreg_list, head);
} else {
- rollback_registered(dev);
- /* Finish processing unregister after unlock */
- net_set_todo(dev);
+ LIST_HEAD(single);
+
+ list_add(&dev->unreg_list, &single);
+ unregister_netdevice_many(&single);
}
}
EXPORT_SYMBOL(unregister_netdevice_queue);
*/
void unregister_netdevice_many(struct list_head *head)
{
- struct net_device *dev;
+ struct net_device *dev, *tmp;
+ LIST_HEAD(close_head);
+
+ BUG_ON(dev_boot_phase);
+ ASSERT_RTNL();
+
+ if (list_empty(head))
+ return;
+
+ list_for_each_entry_safe(dev, tmp, head, unreg_list) {
+ /* Some devices call without registering
+ * for initialization unwind. Remove those
+ * devices and proceed with the remaining.
+ */
+ if (dev->reg_state == NETREG_UNINITIALIZED) {
+ pr_debug("unregister_netdevice: device %s/%p never was registered\n",
+ dev->name, dev);
+
+ WARN_ON(1);
+ list_del(&dev->unreg_list);
+ continue;
+ }
+ dev->dismantle = true;
+ BUG_ON(dev->reg_state != NETREG_REGISTERED);
+ }
+
+ /* If device is running, close it first. */
+ list_for_each_entry(dev, head, unreg_list)
+ list_add_tail(&dev->close_list, &close_head);
+ dev_close_many(&close_head, true);
+
+ list_for_each_entry(dev, head, unreg_list) {
+ /* And unlink it from device chain. */
+ unlist_netdevice(dev);
+
+ dev->reg_state = NETREG_UNREGISTERING;
+ }
+ flush_all_backlogs();
+
+ synchronize_net();
+
+ list_for_each_entry(dev, head, unreg_list) {
+ struct sk_buff *skb = NULL;
+
+ /* Shutdown queueing discipline. */
+ dev_shutdown(dev);
+
+ dev_xdp_uninstall(dev);
+
+ /* Notify protocols, that we are about to destroy
+ * this device. They should clean all the things.
+ */
+ call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+
+ if (!dev->rtnl_link_ops ||
+ dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
+ skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
+ GFP_KERNEL, NULL, 0);
+
+ /*
+ * Flush the unicast and multicast chains
+ */
+ dev_uc_flush(dev);
+ dev_mc_flush(dev);
+
+ netdev_name_node_alt_flush(dev);
+ netdev_name_node_free(dev->name_node);
+
+ if (dev->netdev_ops->ndo_uninit)
+ dev->netdev_ops->ndo_uninit(dev);
+
+ if (skb)
+ rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);
+
+ /* Notifier chain MUST detach us all upper devices. */
+ WARN_ON(netdev_has_any_upper_dev(dev));
+ WARN_ON(netdev_has_any_lower_dev(dev));
+
+ /* Remove entries from kobject tree */
+ netdev_unregister_kobject(dev);
+#ifdef CONFIG_XPS
+ /* Remove XPS queueing entries */
+ netif_reset_xps_queues_gt(dev, 0);
+#endif
+ }
+
+ synchronize_net();
- if (!list_empty(head)) {
- rollback_registered_many(head);
- list_for_each_entry(dev, head, unreg_list)
- net_set_todo(dev);
- list_del(head);
+ list_for_each_entry(dev, head, unreg_list) {
+ dev_put(dev);
+ net_set_todo(dev);
}
+
+ list_del(head);
}
EXPORT_SYMBOL(unregister_netdevice_many);
static DEFINE_MUTEX(dsa2_mutex);
LIST_HEAD(dsa_tree_list);
+/**
+ * dsa_tree_notify - Execute code for all switches in a DSA switch tree.
+ * @dst: collection of struct dsa_switch devices to notify.
+ * @e: event, must be of type DSA_NOTIFIER_*
+ * @v: event-specific value.
+ *
+ * Given a struct dsa_switch_tree, this can be used to run a function once for
+ * each member DSA switch. The other alternative of traversing the tree is only
+ * through its ports list, which does not uniquely list the switches.
+ */
+int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v)
+{
+ struct raw_notifier_head *nh = &dst->nh;
+ int err;
+
+ err = raw_notifier_call_chain(nh, e, v);
+
+ return notifier_to_errno(err);
+}
+
+/**
+ * dsa_broadcast - Notify all DSA trees in the system.
+ * @e: event, must be of type DSA_NOTIFIER_*
+ * @v: event-specific value.
+ *
+ * Can be used to notify the switching fabric of events such as cross-chip
+ * bridging between disjoint trees (such as islands of tagger-compatible
+ * switches bridged by an incompatible middle switch).
+ */
+int dsa_broadcast(unsigned long e, void *v)
+{
+ struct dsa_switch_tree *dst;
+ int err = 0;
+
+ list_for_each_entry(dst, &dsa_tree_list, list) {
+ err = dsa_tree_notify(dst, e, v);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+/**
+ * dsa_lag_map() - Map LAG netdev to a linear LAG ID
+ * @dst: Tree in which to record the mapping.
+ * @lag: Netdev that is to be mapped to an ID.
+ *
+ * dsa_lag_id/dsa_lag_dev can then be used to translate between the
+ * two spaces. The size of the mapping space is determined by the
+ * driver by setting ds->num_lag_ids. It is perfectly legal to leave
+ * it unset if it is not needed, in which case these functions become
+ * no-ops.
+ */
+void dsa_lag_map(struct dsa_switch_tree *dst, struct net_device *lag)
+{
+ unsigned int id;
+
+ if (dsa_lag_id(dst, lag) >= 0)
+ /* Already mapped */
+ return;
+
+ for (id = 0; id < dst->lags_len; id++) {
+ if (!dsa_lag_dev(dst, id)) {
+ dst->lags[id] = lag;
+ return;
+ }
+ }
+
+ /* No IDs left, which is OK. Some drivers do not need it. The
+ * ones that do, e.g. mv88e6xxx, will discover that dsa_lag_id
+ * returns an error for this device when joining the LAG. The
+ * driver can then return -EOPNOTSUPP back to DSA, which will
+ * fall back to a software LAG.
+ */
+}
+
+/**
+ * dsa_lag_unmap() - Remove a LAG ID mapping
+ * @dst: Tree in which the mapping is recorded.
+ * @lag: Netdev that was mapped.
+ *
+ * As there may be multiple users of the mapping, it is only removed
+ * if there are no other references to it.
+ */
+void dsa_lag_unmap(struct dsa_switch_tree *dst, struct net_device *lag)
+{
+ struct dsa_port *dp;
+ unsigned int id;
+
+ dsa_lag_foreach_port(dp, dst, lag)
+ /* There are remaining users of this mapping */
+ return;
+
+ dsa_lags_foreach_id(id, dst) {
+ if (dsa_lag_dev(dst, id) == lag) {
+ dst->lags[id] = NULL;
+ break;
+ }
+ }
+}
+
struct dsa_switch *dsa_switch_find(int tree_index, int sw_index)
{
struct dsa_switch_tree *dst;
static void dsa_tree_free(struct dsa_switch_tree *dst)
{
+ if (dst->tag_ops)
+ dsa_tag_driver_put(dst->tag_ops);
list_del(&dst->list);
kfree(dst);
}
break;
case DSA_PORT_TYPE_CPU:
dsa_port_disable(dp);
- dsa_tag_driver_put(dp->tag_ops);
dsa_port_link_unregister_of(dp);
break;
case DSA_PORT_TYPE_DSA:
return -EOPNOTSUPP;
}
+static int dsa_devlink_sb_pool_get(struct devlink *dl,
+ unsigned int sb_index, u16 pool_index,
+ struct devlink_sb_pool_info *pool_info)
+{
+ struct dsa_switch *ds = dsa_devlink_to_ds(dl);
+
+ if (!ds->ops->devlink_sb_pool_get)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_pool_get(ds, sb_index, pool_index,
+ pool_info);
+}
+
+static int dsa_devlink_sb_pool_set(struct devlink *dl, unsigned int sb_index,
+ u16 pool_index, u32 size,
+ enum devlink_sb_threshold_type threshold_type,
+ struct netlink_ext_ack *extack)
+{
+ struct dsa_switch *ds = dsa_devlink_to_ds(dl);
+
+ if (!ds->ops->devlink_sb_pool_set)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_pool_set(ds, sb_index, pool_index, size,
+ threshold_type, extack);
+}
+
+static int dsa_devlink_sb_port_pool_get(struct devlink_port *dlp,
+ unsigned int sb_index, u16 pool_index,
+ u32 *p_threshold)
+{
+ struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
+ int port = dsa_devlink_port_to_port(dlp);
+
+ if (!ds->ops->devlink_sb_port_pool_get)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_port_pool_get(ds, port, sb_index,
+ pool_index, p_threshold);
+}
+
+static int dsa_devlink_sb_port_pool_set(struct devlink_port *dlp,
+ unsigned int sb_index, u16 pool_index,
+ u32 threshold,
+ struct netlink_ext_ack *extack)
+{
+ struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
+ int port = dsa_devlink_port_to_port(dlp);
+
+ if (!ds->ops->devlink_sb_port_pool_set)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_port_pool_set(ds, port, sb_index,
+ pool_index, threshold, extack);
+}
+
+static int
+dsa_devlink_sb_tc_pool_bind_get(struct devlink_port *dlp,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u16 *p_pool_index, u32 *p_threshold)
+{
+ struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
+ int port = dsa_devlink_port_to_port(dlp);
+
+ if (!ds->ops->devlink_sb_tc_pool_bind_get)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_tc_pool_bind_get(ds, port, sb_index,
+ tc_index, pool_type,
+ p_pool_index, p_threshold);
+}
+
+static int
+dsa_devlink_sb_tc_pool_bind_set(struct devlink_port *dlp,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u16 pool_index, u32 threshold,
+ struct netlink_ext_ack *extack)
+{
+ struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
+ int port = dsa_devlink_port_to_port(dlp);
+
+ if (!ds->ops->devlink_sb_tc_pool_bind_set)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_tc_pool_bind_set(ds, port, sb_index,
+ tc_index, pool_type,
+ pool_index, threshold,
+ extack);
+}
+
+static int dsa_devlink_sb_occ_snapshot(struct devlink *dl,
+ unsigned int sb_index)
+{
+ struct dsa_switch *ds = dsa_devlink_to_ds(dl);
+
+ if (!ds->ops->devlink_sb_occ_snapshot)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_occ_snapshot(ds, sb_index);
+}
+
+static int dsa_devlink_sb_occ_max_clear(struct devlink *dl,
+ unsigned int sb_index)
+{
+ struct dsa_switch *ds = dsa_devlink_to_ds(dl);
+
+ if (!ds->ops->devlink_sb_occ_max_clear)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_occ_max_clear(ds, sb_index);
+}
+
+static int dsa_devlink_sb_occ_port_pool_get(struct devlink_port *dlp,
+ unsigned int sb_index,
+ u16 pool_index, u32 *p_cur,
+ u32 *p_max)
+{
+ struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
+ int port = dsa_devlink_port_to_port(dlp);
+
+ if (!ds->ops->devlink_sb_occ_port_pool_get)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_occ_port_pool_get(ds, port, sb_index,
+ pool_index, p_cur, p_max);
+}
+
+static int
+dsa_devlink_sb_occ_tc_port_bind_get(struct devlink_port *dlp,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u32 *p_cur, u32 *p_max)
+{
+ struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
+ int port = dsa_devlink_port_to_port(dlp);
+
+ if (!ds->ops->devlink_sb_occ_tc_port_bind_get)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_occ_tc_port_bind_get(ds, port,
+ sb_index, tc_index,
+ pool_type, p_cur,
+ p_max);
+}
+
static const struct devlink_ops dsa_devlink_ops = {
- .info_get = dsa_devlink_info_get,
+ .info_get = dsa_devlink_info_get,
+ .sb_pool_get = dsa_devlink_sb_pool_get,
+ .sb_pool_set = dsa_devlink_sb_pool_set,
+ .sb_port_pool_get = dsa_devlink_sb_port_pool_get,
+ .sb_port_pool_set = dsa_devlink_sb_port_pool_set,
+ .sb_tc_pool_bind_get = dsa_devlink_sb_tc_pool_bind_get,
+ .sb_tc_pool_bind_set = dsa_devlink_sb_tc_pool_bind_set,
+ .sb_occ_snapshot = dsa_devlink_sb_occ_snapshot,
+ .sb_occ_max_clear = dsa_devlink_sb_occ_max_clear,
+ .sb_occ_port_pool_get = dsa_devlink_sb_occ_port_pool_get,
+ .sb_occ_tc_port_bind_get = dsa_devlink_sb_occ_tc_port_bind_get,
};
static int dsa_switch_setup(struct dsa_switch *ds)
if (err)
goto unregister_devlink_ports;
+ ds->configure_vlan_while_not_filtering = true;
+
err = ds->ops->setup(ds);
if (err < 0)
goto unregister_notifier;
ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
if (!ds->slave_mii_bus) {
err = -ENOMEM;
- goto unregister_notifier;
+ goto teardown;
}
dsa_slave_mii_bus_init(ds);
err = mdiobus_register(ds->slave_mii_bus);
if (err < 0)
- goto unregister_notifier;
+ goto teardown;
}
ds->setup = true;
return 0;
+ teardown:
+ if (ds->ops->teardown)
+ ds->ops->teardown(ds);
unregister_notifier:
dsa_switch_unregister_notifier(ds);
unregister_devlink_ports:
dsa_master_teardown(dp->master);
}
+static int dsa_tree_setup_lags(struct dsa_switch_tree *dst)
+{
+ unsigned int len = 0;
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (dp->ds->num_lag_ids > len)
+ len = dp->ds->num_lag_ids;
+ }
+
+ if (!len)
+ return 0;
+
+ dst->lags = kcalloc(len, sizeof(*dst->lags), GFP_KERNEL);
+ if (!dst->lags)
+ return -ENOMEM;
+
+ dst->lags_len = len;
+ return 0;
+}
+
+static void dsa_tree_teardown_lags(struct dsa_switch_tree *dst)
+{
+ kfree(dst->lags);
+}
+
static int dsa_tree_setup(struct dsa_switch_tree *dst)
{
bool complete;
if (err)
goto teardown_switches;
+ err = dsa_tree_setup_lags(dst);
+ if (err)
+ goto teardown_master;
+
dst->setup = true;
pr_info("DSA: tree %d setup\n", dst->index);
return 0;
+teardown_master:
+ dsa_tree_teardown_master(dst);
teardown_switches:
dsa_tree_teardown_switches(dst);
teardown_default_cpu:
if (!dst->setup)
return;
+ dsa_tree_teardown_lags(dst);
+
dsa_tree_teardown_master(dst);
dsa_tree_teardown_switches(dst);
dst->setup = false;
}
+/* Since the dsa/tagging sysfs device attribute is per master, the assumption
+ * is that all DSA switches within a tree share the same tagger, otherwise
+ * they would have formed disjoint trees (different "dsa,member" values).
+ */
+int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
+ struct net_device *master,
+ const struct dsa_device_ops *tag_ops,
+ const struct dsa_device_ops *old_tag_ops)
+{
+ struct dsa_notifier_tag_proto_info info;
+ struct dsa_port *dp;
+ int err = -EBUSY;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ /* At the moment we don't allow changing the tag protocol under
+ * traffic. The rtnl_mutex also happens to serialize concurrent
+ * attempts to change the tagging protocol. If we ever lift the IFF_UP
+ * restriction, there needs to be another mutex which serializes this.
+ */
+ if (master->flags & IFF_UP)
+ goto out_unlock;
+
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (!dsa_is_user_port(dp->ds, dp->index))
+ continue;
+
+ if (dp->slave->flags & IFF_UP)
+ goto out_unlock;
+ }
+
+ info.tag_ops = tag_ops;
+ err = dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO, &info);
+ if (err)
+ goto out_unwind_tagger;
+
+ dst->tag_ops = tag_ops;
+
+ rtnl_unlock();
+
+ return 0;
+
+out_unwind_tagger:
+ info.tag_ops = old_tag_ops;
+ dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO, &info);
+out_unlock:
+ rtnl_unlock();
+ return err;
+}
+
static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index)
{
struct dsa_switch_tree *dst = ds->dst;
{
struct dsa_switch *ds = dp->ds;
struct dsa_switch_tree *dst = ds->dst;
- const struct dsa_device_ops *tag_ops;
enum dsa_tag_protocol tag_protocol;
tag_protocol = dsa_get_tag_protocol(dp, master);
- tag_ops = dsa_tag_driver_get(tag_protocol);
- if (IS_ERR(tag_ops)) {
- if (PTR_ERR(tag_ops) == -ENOPROTOOPT)
- return -EPROBE_DEFER;
- dev_warn(ds->dev, "No tagger for this switch\n");
- dp->master = NULL;
- return PTR_ERR(tag_ops);
+ if (dst->tag_ops) {
+ if (dst->tag_ops->proto != tag_protocol) {
+ dev_err(ds->dev,
+ "A DSA switch tree can have only one tagging protocol\n");
+ return -EINVAL;
+ }
+ /* In the case of multiple CPU ports per switch, the tagging
+ * protocol is still reference-counted only per switch tree, so
+ * nothing to do here.
+ */
+ } else {
+ dst->tag_ops = dsa_tag_driver_get(tag_protocol);
+ if (IS_ERR(dst->tag_ops)) {
+ if (PTR_ERR(dst->tag_ops) == -ENOPROTOOPT)
+ return -EPROBE_DEFER;
+ dev_warn(ds->dev, "No tagger for this switch\n");
+ dp->master = NULL;
+ return PTR_ERR(dst->tag_ops);
+ }
}
dp->master = master;
dp->type = DSA_PORT_TYPE_CPU;
- dp->filter = tag_ops->filter;
- dp->rcv = tag_ops->rcv;
- dp->tag_ops = tag_ops;
+ dsa_port_set_tag_protocol(dp, dst->tag_ops);
dp->dst = dst;
return 0;
goto out_put_node;
if (reg >= ds->num_ports) {
+ dev_err(ds->dev, "port %pOF index %u exceeds num_ports (%zu)\n",
+ port, reg, ds->num_ports);
err = -EINVAL;
goto out_put_node;
}
{
const struct flow_offload_tuple *tuple = data;
- return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
+ return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed);
}
static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
{
const struct flow_offload_tuple_rhash *tuplehash = data;
- return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
+ return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed);
}
static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
const struct flow_offload_tuple *tuple = arg->key;
const struct flow_offload_tuple_rhash *x = ptr;
- if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
+ if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash)))
return 1;
return 0;
return -1;
tcph = (void *)(skb_network_header(skb) + thoff);
- inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true);
+ inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
return 0;
}
udph = (void *)(skb_network_header(skb) + thoff);
if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
inet_proto_csum_replace2(&udph->check, skb, port,
- new_port, true);
+ new_port, false);
if (!udph->check)
udph->check = CSUM_MANGLED_0;
}
return nft_delset(&ctx, set);
}
+static int nft_validate_register_store(const struct nft_ctx *ctx,
+ enum nft_registers reg,
+ const struct nft_data *data,
+ enum nft_data_types type,
+ unsigned int len);
+
static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
struct nft_set *set,
const struct nft_set_iter *iter,
struct nft_expr *expr_array[NFT_SET_EXPR_MAX] = {};
struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
u8 genmask = nft_genmask_next(ctx->net);
+ u32 flags = 0, size = 0, num_exprs = 0;
struct nft_set_ext_tmpl tmpl;
struct nft_set_ext *ext, *ext2;
struct nft_set_elem elem;
struct nft_data_desc desc;
enum nft_registers dreg;
struct nft_trans *trans;
- u32 flags = 0, size = 0;
u64 timeout;
u64 expiration;
int err, i;
if (nla[NFTA_SET_ELEM_EXPR]) {
struct nft_expr *expr;
- if (set->num_exprs != 1)
+ if (set->num_exprs && set->num_exprs != 1)
return -EOPNOTSUPP;
expr = nft_set_elem_expr_alloc(ctx, set,
return PTR_ERR(expr);
expr_array[0] = expr;
+ num_exprs = 1;
- if (set->exprs[0] && set->exprs[0]->ops != expr->ops) {
+ if (set->num_exprs && set->exprs[0]->ops != expr->ops) {
err = -EOPNOTSUPP;
goto err_set_elem_expr;
}
struct nlattr *tmp;
int left;
- if (set->num_exprs == 0)
- return -EOPNOTSUPP;
-
i = 0;
nla_for_each_nested(tmp, nla[NFTA_SET_ELEM_EXPRESSIONS], left) {
- if (i == set->num_exprs) {
+ if (i == NFT_SET_EXPR_MAX ||
+ (set->num_exprs && set->num_exprs == i)) {
err = -E2BIG;
goto err_set_elem_expr;
}
goto err_set_elem_expr;
}
expr_array[i] = expr;
+ num_exprs++;
- if (expr->ops != set->exprs[i]->ops) {
+ if (set->num_exprs && expr->ops != set->exprs[i]->ops) {
err = -EOPNOTSUPP;
goto err_set_elem_expr;
}
i++;
}
- if (set->num_exprs != i) {
+ if (set->num_exprs && set->num_exprs != i) {
err = -EOPNOTSUPP;
goto err_set_elem_expr;
}
err = nft_set_elem_expr_clone(ctx, set, expr_array);
if (err < 0)
goto err_set_elem_expr_clone;
+
+ num_exprs = set->num_exprs;
}
err = nft_setelem_parse_key(ctx, set, &elem.key.val,
nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
}
- if (set->num_exprs) {
- for (i = 0; i < set->num_exprs; i++)
+ if (num_exprs) {
+ for (i = 0; i < num_exprs; i++)
size += expr_array[i]->ops->size;
nft_set_ext_add_length(&tmpl, NFT_SET_EXT_EXPRESSIONS,
*nft_set_ext_obj(ext) = obj;
obj->use++;
}
- for (i = 0; i < set->num_exprs; i++)
+ for (i = 0; i < num_exprs; i++)
nft_set_elem_expr_setup(ext, i, expr_array);
trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
err_parse_key:
nft_data_release(&elem.key.val, NFT_DATA_VALUE);
err_set_elem_expr:
- for (i = 0; i < set->num_exprs && expr_array[i]; i++)
+ for (i = 0; i < num_exprs && expr_array[i]; i++)
nft_expr_destroy(ctx, expr_array[i]);
err_set_elem_expr_clone:
return err;
* Registers used to be 128 bit wide, these register numbers will be
* mapped to the corresponding 32 bit register numbers.
*/
-unsigned int nft_parse_register(const struct nlattr *attr)
+static unsigned int nft_parse_register(const struct nlattr *attr)
{
unsigned int reg;
return reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00;
}
}
-EXPORT_SYMBOL_GPL(nft_parse_register);
/**
* nft_dump_register - dump a register value to a netlink attribute
* Validate that the input register is one of the general purpose
* registers and that the length of the load is within the bounds.
*/
-int nft_validate_register_load(enum nft_registers reg, unsigned int len)
+static int nft_validate_register_load(enum nft_registers reg, unsigned int len)
{
if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE)
return -EINVAL;
return 0;
}
-EXPORT_SYMBOL_GPL(nft_validate_register_load);
+
+int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len)
+{
+ u32 reg;
+ int err;
+
+ reg = nft_parse_register(attr);
+ err = nft_validate_register_load(reg, len);
+ if (err < 0)
+ return err;
+
+ *sreg = reg;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_parse_register_load);
/**
* nft_validate_register_store - validate an expressions' register store
* A value of NULL for the data means that its runtime gathered
* data.
*/
-int nft_validate_register_store(const struct nft_ctx *ctx,
- enum nft_registers reg,
- const struct nft_data *data,
- enum nft_data_types type, unsigned int len)
+static int nft_validate_register_store(const struct nft_ctx *ctx,
+ enum nft_registers reg,
+ const struct nft_data *data,
+ enum nft_data_types type,
+ unsigned int len)
{
int err;
return 0;
}
}
-EXPORT_SYMBOL_GPL(nft_validate_register_store);
+
+int nft_parse_register_store(const struct nft_ctx *ctx,
+ const struct nlattr *attr, u8 *dreg,
+ const struct nft_data *data,
+ enum nft_data_types type, unsigned int len)
+{
+ int err;
+ u32 reg;
+
+ reg = nft_parse_register(attr);
+ err = nft_validate_register_store(ctx, reg, data, type, len);
+ if (err < 0)
+ return err;
+
+ *dreg = reg;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_parse_register_store);
static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = {
[NFTA_VERDICT_CODE] = { .type = NLA_U32 },
}
EXPORT_SYMBOL_GPL(__nft_release_basechain);
+ static void __nft_release_hooks(struct net *net)
+ {
+ struct nft_table *table;
+ struct nft_chain *chain;
+
+ list_for_each_entry(table, &net->nft.tables, list) {
+ list_for_each_entry(chain, &table->chains, list)
+ nf_tables_unregister_hook(net, table, chain);
+ }
+ }
+
static void __nft_release_tables(struct net *net)
{
struct nft_flowtable *flowtable, *nf;
list_for_each_entry_safe(table, nt, &net->nft.tables, list) {
ctx.family = table->family;
-
- list_for_each_entry(chain, &table->chains, list)
- nf_tables_unregister_hook(net, table, chain);
- /* No packets are walking on these chains anymore. */
ctx.table = table;
list_for_each_entry(chain, &table->chains, list) {
ctx.chain = chain;
return 0;
}
+ static void __net_exit nf_tables_pre_exit_net(struct net *net)
+ {
+ __nft_release_hooks(net);
+ }
+
static void __net_exit nf_tables_exit_net(struct net *net)
{
mutex_lock(&net->nft.commit_mutex);
}
static struct pernet_operations nf_tables_net_ops = {
- .init = nf_tables_init_net,
- .exit = nf_tables_exit_net,
+ .init = nf_tables_init_net,
+ .pre_exit = nf_tables_pre_exit_net,
+ .exit = nf_tables_exit_net,
};
static int __init nf_tables_module_init(void)