ACPI FOR ARM64 (ACPI/arm64)
F: arch/arm/include/asm/floppy.h
ARM PMU PROFILING AND DEBUGGING
+ M: Will Deacon <will@kernel.org>
S: Maintained
F: drivers/irqchip/irq-vic.c
ARM SMMU DRIVERS
+ M: Will Deacon <will@kernel.org>
S: Maintained
F: drivers/usb/dwc3/dwc3-qcom.c
F: include/dt-bindings/*/qcom*
F: include/linux/*/qcom*
- T: git git://git.kernel.org/pub/scm/linux/kernel/git/agross/linux.git
+ T: git git://git.kernel.org/pub/scm/linux/kernel/git/qcom/linux.git
ARM/RADISYS ENP2611 MACHINE SUPPORT
ARM64 PORT (AARCH64 ARCHITECTURE)
+ M: Will Deacon <will@kernel.org>
T: git git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git
S: Maintained
F: drivers/net/wireless/atmel/atmel*
ATOMIC INFRASTRUCTURE
+ M: Will Deacon <will@kernel.org>
BROADCOM BCM2835 ARM ARCHITECTURE
T: git git://github.com/anholt/linux
S: Maintained
F: drivers/staging/fsl-dpaa2/ethsw
-DPAA2 PTP CLOCK DRIVER
-S: Maintained
-F: drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp*
-F: drivers/net/ethernet/freescale/dpaa2/dprtc*
-
DPT_I2O SCSI RAID DRIVER
K: fmc_d.*register
FPGA MANAGER FRAMEWORK
S: Maintained
S: Maintained
+F: drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp*
+F: drivers/net/ethernet/freescale/dpaa2/dprtc*
F: drivers/net/ethernet/freescale/enetc/enetc_ptp.c
F: drivers/ptp/ptp_qoriq.c
F: drivers/ptp/ptp_qoriq_debugfs.c
W: http://gigaset307x.sourceforge.net/
S: Odd Fixes
-F: Documentation/isdn/README.gigaset
-F: drivers/isdn/gigaset/
-F: include/uapi/linux/gigaset_dev.h
+F: drivers/staging/isdn/gigaset/
GNSS SUBSYSTEM
W: http://www.linux-iscsi.org
F: drivers/infiniband/ulp/isert
-ISDN SUBSYSTEM
+ISDN/mISDN SUBSYSTEM
W: http://www.isdn4linux.de
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/kkeil/isdn-2.6.git
S: Maintained
+F: drivers/isdn/mISDN
+F: drivers/isdn/hardware
+
+ISDN/CAPI SUBSYSTEM
+W: http://www.isdn4linux.de
+S: Odd Fixes
F: Documentation/isdn/
-F: drivers/isdn/
-F: include/linux/isdn.h
+F: drivers/isdn/capi/
+F: drivers/staging/isdn/
+F: net/bluetooth/cmtp/
F: include/linux/isdn/
-F: include/uapi/linux/isdn.h
F: include/uapi/linux/isdn/
IT87 HARDWARE MONITORING DRIVER
KERNEL NFSD, SUNRPC, AND LOCKD SERVERS
W: http://nfs.sourceforge.net/
T: git git://linux-nfs.org/~bfields/linux.git
LINUX KERNEL MEMORY CONSISTENCY MODEL (LKMM)
+ M: Will Deacon <will@kernel.org>
LOCKING PRIMITIVES
+ M: Will Deacon <will@kernel.org>
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core
S: Maintained
S: Supported
F: drivers/net/ethernet/mellanox/mlx5/core/
F: include/linux/mlx5/
+F: Documentation/networking/device_drivers/mellanox/
MELLANOX MLX5 IB driver
F: arch/arm/mach-mmp/
MMU GATHER AND TLB INVALIDATION
+ M: Will Deacon <will@kernel.org>
S: Supported
F: drivers/net/ethernet/qlogic/netxen/
+NEXTHOP
+S: Maintained
+F: include/net/nexthop.h
+F: include/uapi/linux/nexthop.h
+F: include/net/netns/nexthop.h
+F: net/ipv4/nexthop.c
+
NFC SUBSYSTEM
S: Orphan
F: drivers/pci/controller/dwc/*layerscape*
PCI DRIVER FOR GENERIC OF HOSTS
+ M: Will Deacon <will@kernel.org>
S: Maintained
F: include/linux/siphash.h
SIOX
- M: Gavin Schenk <g.schenk@eckelmann.de>
+ M: Thorsten Scherer <t.scherer@eckelmann.de>
S: Supported
return mv88e6xxx_g1_vtu_vid_read(chip, entry);
}
+int mv88e6250_g1_vtu_getnext(struct mv88e6xxx_chip *chip,
+ struct mv88e6xxx_vtu_entry *entry)
+{
+ u16 val;
+ int err;
+
+ err = mv88e6xxx_g1_vtu_getnext(chip, entry);
+ if (err)
+ return err;
+
+ if (entry->valid) {
+ err = mv88e6185_g1_vtu_data_read(chip, entry);
+ if (err)
+ return err;
+
+ /* VTU DBNum[3:0] are located in VTU Operation 3:0
+ * VTU DBNum[5:4] are located in VTU Operation 9:8
+ */
+ err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_VTU_OP, &val);
+ if (err)
+ return err;
+
+ entry->fid = val & 0x000f;
+ entry->fid |= (val & 0x0300) >> 4;
+ }
+
+ return 0;
+}
+
int mv88e6185_g1_vtu_getnext(struct mv88e6xxx_chip *chip,
struct mv88e6xxx_vtu_entry *entry)
{
return 0;
}
+int mv88e6250_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip,
+ struct mv88e6xxx_vtu_entry *entry)
+{
+ u16 op = MV88E6XXX_G1_VTU_OP_VTU_LOAD_PURGE;
+ int err;
+
+ err = mv88e6xxx_g1_vtu_op_wait(chip);
+ if (err)
+ return err;
+
+ err = mv88e6xxx_g1_vtu_vid_write(chip, entry);
+ if (err)
+ return err;
+
+ if (entry->valid) {
+ err = mv88e6185_g1_vtu_data_write(chip, entry);
+ if (err)
+ return err;
+
+ /* VTU DBNum[3:0] are located in VTU Operation 3:0
+ * VTU DBNum[5:4] are located in VTU Operation 9:8
+ */
+ op |= entry->fid & 0x000f;
+ op |= (entry->fid & 0x0030) << 4;
+ }
+
+ return mv88e6xxx_g1_vtu_op(chip, op);
+}
+
int mv88e6185_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip,
struct mv88e6xxx_vtu_entry *entry)
{
* VTU DBNum[7:4] are located in VTU Operation 11:8
*/
op |= entry->fid & 0x000f;
- op |= (entry->fid & 0x00f0) << 8;
+ op |= (entry->fid & 0x00f0) << 4;
}
return mv88e6xxx_g1_vtu_op(chip, op);
+ // SPDX-License-Identifier: GPL-2.0-only
/*
* Cadence MACB/GEM Ethernet Controller driver
*
* Copyright (C) 2004-2006 Atmel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/clk.h>
+#include <linux/clk-provider.h>
#include <linux/crc32.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/pm_runtime.h>
#include "macb.h"
+/* This structure is only used for MACB on SiFive FU540 devices */
+struct sifive_fu540_macb_mgmt {
+ void __iomem *reg;
+ unsigned long rate;
+ struct clk_hw hw;
+};
+
+static struct sifive_fu540_macb_mgmt *mgmt;
+
#define MACB_RX_BUFFER_SIZE 128
#define RX_BUFFER_MULTIPLE 64 /* bytes */
return 0;
}
+static unsigned long fu540_macb_tx_recalc_rate(struct clk_hw *hw,
+ unsigned long parent_rate)
+{
+ return mgmt->rate;
+}
+
+static long fu540_macb_tx_round_rate(struct clk_hw *hw, unsigned long rate,
+ unsigned long *parent_rate)
+{
+ if (WARN_ON(rate < 2500000))
+ return 2500000;
+ else if (rate == 2500000)
+ return 2500000;
+ else if (WARN_ON(rate < 13750000))
+ return 2500000;
+ else if (WARN_ON(rate < 25000000))
+ return 25000000;
+ else if (rate == 25000000)
+ return 25000000;
+ else if (WARN_ON(rate < 75000000))
+ return 25000000;
+ else if (WARN_ON(rate < 125000000))
+ return 125000000;
+ else if (rate == 125000000)
+ return 125000000;
+
+ WARN_ON(rate > 125000000);
+
+ return 125000000;
+}
+
+static int fu540_macb_tx_set_rate(struct clk_hw *hw, unsigned long rate,
+ unsigned long parent_rate)
+{
+ rate = fu540_macb_tx_round_rate(hw, rate, &parent_rate);
+ if (rate != 125000000)
+ iowrite32(1, mgmt->reg);
+ else
+ iowrite32(0, mgmt->reg);
+ mgmt->rate = rate;
+
+ return 0;
+}
+
+static const struct clk_ops fu540_c000_ops = {
+ .recalc_rate = fu540_macb_tx_recalc_rate,
+ .round_rate = fu540_macb_tx_round_rate,
+ .set_rate = fu540_macb_tx_set_rate,
+};
+
+static int fu540_c000_clk_init(struct platform_device *pdev, struct clk **pclk,
+ struct clk **hclk, struct clk **tx_clk,
+ struct clk **rx_clk, struct clk **tsu_clk)
+{
+ struct clk_init_data init;
+ int err = 0;
+
+ err = macb_clk_init(pdev, pclk, hclk, tx_clk, rx_clk, tsu_clk);
+ if (err)
+ return err;
+
+ mgmt = devm_kzalloc(&pdev->dev, sizeof(*mgmt), GFP_KERNEL);
+ if (!mgmt)
+ return -ENOMEM;
+
+ init.name = "sifive-gemgxl-mgmt";
+ init.ops = &fu540_c000_ops;
+ init.flags = 0;
+ init.num_parents = 0;
+
+ mgmt->rate = 0;
+ mgmt->hw.init = &init;
+
+ *tx_clk = clk_register(NULL, &mgmt->hw);
+ if (IS_ERR(*tx_clk))
+ return PTR_ERR(*tx_clk);
+
+ err = clk_prepare_enable(*tx_clk);
+ if (err)
+ dev_err(&pdev->dev, "failed to enable tx_clk (%u)\n", err);
+ else
+ dev_info(&pdev->dev, "Registered clk switch '%s'\n", init.name);
+
+ return 0;
+}
+
+static int fu540_c000_init(struct platform_device *pdev)
+{
+ struct resource *res;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ if (!res)
+ return -ENODEV;
+
+ mgmt->reg = ioremap(res->start, resource_size(res));
+ if (!mgmt->reg)
+ return -ENOMEM;
+
+ return macb_init(pdev);
+}
+
+static const struct macb_config fu540_c000_config = {
+ .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO |
+ MACB_CAPS_GEM_HAS_PTP,
+ .dma_burst_length = 16,
+ .clk_init = fu540_c000_clk_init,
+ .init = fu540_c000_init,
+ .jumbo_max_len = 10240,
+};
+
static const struct macb_config at91sam9260_config = {
.caps = MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII,
.clk_init = macb_clk_init,
{ .compatible = "cdns,emac", .data = &emac_config },
{ .compatible = "cdns,zynqmp-gem", .data = &zynqmp_config},
{ .compatible = "cdns,zynq-gem", .data = &zynq_config },
+ { .compatible = "sifive,fu540-macb", .data = &fu540_c000_config },
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, macb_dt_ids);
err_disable_clocks:
clk_disable_unprepare(tx_clk);
+ clk_unregister(tx_clk);
clk_disable_unprepare(hclk);
clk_disable_unprepare(pclk);
clk_disable_unprepare(rx_clk);
pm_runtime_dont_use_autosuspend(&pdev->dev);
if (!pm_runtime_suspended(&pdev->dev)) {
clk_disable_unprepare(bp->tx_clk);
+ clk_unregister(bp->tx_clk);
clk_disable_unprepare(bp->hclk);
clk_disable_unprepare(bp->pclk);
clk_disable_unprepare(bp->rx_clk);
#include <linux/dma-mapping.h>
#include <linux/etherdevice.h>
#include <linux/interrupt.h>
+#ifdef CONFIG_RFS_ACCEL
+#include <linux/cpu_rmap.h>
+#endif
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/sctp.h>
#include <linux/vermagic.h>
#include <net/gre.h>
+#include <net/ip6_checksum.h>
#include <net/pkt_cls.h>
#include <net/tcp.h>
#include <net/vxlan.h>
#define hns3_tx_bd_count(S) DIV_ROUND_UP(S, HNS3_MAX_BD_SIZE)
static void hns3_clear_all_ring(struct hnae3_handle *h);
-static void hns3_force_clear_all_rx_ring(struct hnae3_handle *h);
+static void hns3_force_clear_all_ring(struct hnae3_handle *h);
static void hns3_remove_hw_addr(struct net_device *netdev);
static const char hns3_driver_name[] = "hns3";
return IRQ_HANDLED;
}
-/* This callback function is used to set affinity changes to the irq affinity
- * masks when the irq_set_affinity_notifier function is used.
- */
-static void hns3_nic_irq_affinity_notify(struct irq_affinity_notify *notify,
- const cpumask_t *mask)
-{
- struct hns3_enet_tqp_vector *tqp_vectors =
- container_of(notify, struct hns3_enet_tqp_vector,
- affinity_notify);
-
- tqp_vectors->affinity_mask = *mask;
-}
-
-static void hns3_nic_irq_affinity_release(struct kref *ref)
-{
-}
-
static void hns3_nic_uninit_irq(struct hns3_nic_priv *priv)
{
struct hns3_enet_tqp_vector *tqp_vectors;
if (tqp_vectors->irq_init_flag != HNS3_VECTOR_INITED)
continue;
- /* clear the affinity notifier and affinity mask */
- irq_set_affinity_notifier(tqp_vectors->vector_irq, NULL);
+ /* clear the affinity mask */
irq_set_affinity_hint(tqp_vectors->vector_irq, NULL);
/* release the irq resource */
tqp_vectors->name[HNAE3_INT_NAME_LEN - 1] = '\0';
ret = request_irq(tqp_vectors->vector_irq, hns3_irq_handle, 0,
- tqp_vectors->name,
- tqp_vectors);
+ tqp_vectors->name, tqp_vectors);
if (ret) {
netdev_err(priv->netdev, "request irq(%d) fail\n",
tqp_vectors->vector_irq);
+ hns3_nic_uninit_irq(priv);
return ret;
}
- tqp_vectors->affinity_notify.notify =
- hns3_nic_irq_affinity_notify;
- tqp_vectors->affinity_notify.release =
- hns3_nic_irq_affinity_release;
- irq_set_affinity_notifier(tqp_vectors->vector_irq,
- &tqp_vectors->affinity_notify);
irq_set_affinity_hint(tqp_vectors->vector_irq,
&tqp_vectors->affinity_mask);
ret = netif_set_real_num_tx_queues(netdev, queue_size);
if (ret) {
netdev_err(netdev,
- "netif_set_real_num_tx_queues fail, ret=%d!\n",
- ret);
+ "netif_set_real_num_tx_queues fail, ret=%d!\n", ret);
return ret;
}
hns3_write_dev(tqp, HNS3_RING_EN_REG, rcb_reg);
}
+static void hns3_free_rx_cpu_rmap(struct net_device *netdev)
+{
+#ifdef CONFIG_RFS_ACCEL
+ free_irq_cpu_rmap(netdev->rx_cpu_rmap);
+ netdev->rx_cpu_rmap = NULL;
+#endif
+}
+
+static int hns3_set_rx_cpu_rmap(struct net_device *netdev)
+{
+#ifdef CONFIG_RFS_ACCEL
+ struct hns3_nic_priv *priv = netdev_priv(netdev);
+ struct hns3_enet_tqp_vector *tqp_vector;
+ int i, ret;
+
+ if (!netdev->rx_cpu_rmap) {
+ netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(priv->vector_num);
+ if (!netdev->rx_cpu_rmap)
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < priv->vector_num; i++) {
+ tqp_vector = &priv->tqp_vector[i];
+ ret = irq_cpu_rmap_add(netdev->rx_cpu_rmap,
+ tqp_vector->vector_irq);
+ if (ret) {
+ hns3_free_rx_cpu_rmap(netdev);
+ return ret;
+ }
+ }
+#endif
+ return 0;
+}
+
static int hns3_nic_net_up(struct net_device *netdev)
{
struct hns3_nic_priv *priv = netdev_priv(netdev);
if (ret)
return ret;
+ /* the device can work without cpu rmap, only aRFS needs it */
+ ret = hns3_set_rx_cpu_rmap(netdev);
+ if (ret)
+ netdev_warn(netdev, "set rx cpu rmap fail, ret=%d!\n", ret);
+
/* get irq resource for all vectors */
ret = hns3_nic_init_irq(priv);
if (ret) {
- netdev_err(netdev, "hns init irq failed! ret=%d\n", ret);
- return ret;
+ netdev_err(netdev, "init irq failed! ret=%d\n", ret);
+ goto free_rmap;
}
clear_bit(HNS3_NIC_STATE_DOWN, &priv->state);
hns3_vector_disable(&priv->tqp_vector[j]);
hns3_nic_uninit_irq(priv);
-
+free_rmap:
+ hns3_free_rx_cpu_rmap(netdev);
return ret;
}
ret = hns3_nic_net_up(netdev);
if (ret) {
- netdev_err(netdev,
- "hns net up fail, ret=%d!\n", ret);
+ netdev_err(netdev, "net up fail, ret=%d!\n", ret);
return ret;
}
kinfo = &h->kinfo;
- for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) {
- netdev_set_prio_tc_map(netdev, i,
- kinfo->prio_tc[i]);
- }
+ for (i = 0; i < HNAE3_MAX_USER_PRIO; i++)
+ netdev_set_prio_tc_map(netdev, i, kinfo->prio_tc[i]);
if (h->ae_algo->ops->set_timer_task)
h->ae_algo->ops->set_timer_task(priv->ae_handle, true);
if (ops->stop)
ops->stop(priv->ae_handle);
+ hns3_free_rx_cpu_rmap(netdev);
+
/* free irq resources */
hns3_nic_uninit_irq(priv);
- hns3_clear_all_ring(priv->ae_handle);
+ /* delay ring buffer clearing to hns3_reset_notify_uninit_enet
+ * during reset process, because driver may not be able
+ * to disable the ring through firmware when downing the netdev.
+ */
+ if (!hns3_nic_resetting(netdev))
+ hns3_clear_all_ring(priv->ae_handle);
}
static int hns3_nic_net_stop(struct net_device *netdev)
if (l3.v4->version == 4)
l3.v4->check = 0;
- /* tunnel packet.*/
+ /* tunnel packet */
if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE |
SKB_GSO_GRE_CSUM |
SKB_GSO_UDP_TUNNEL |
l3.v4->check = 0;
}
- /* normal or tunnel packet*/
+ /* normal or tunnel packet */
l4_offset = l4.hdr - skb->data;
hdr_len = (l4.tcp->doff << 2) + l4_offset;
- /* remove payload length from inner pseudo checksum when tso*/
+ /* remove payload length from inner pseudo checksum when tso */
l4_paylen = skb->len - l4_offset;
csum_replace_by_diff(&l4.tcp->check,
(__force __wsum)htonl(l4_paylen));
hns3_set_field(*ol_type_vlan_len_msec, HNS3_TXD_L3LEN_S, l3_len >> 2);
il2_hdr = skb_inner_mac_header(skb);
- /* compute OL4 header size, defined in 4 Bytes. */
+ /* compute OL4 header size, defined in 4 Bytes */
l4_len = il2_hdr - l4.hdr;
hns3_set_field(*ol_type_vlan_len_msec, HNS3_TXD_L4LEN_S, l4_len >> 2);
}
static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
- int size, int frag_end, enum hns_desc_type type)
+ unsigned int size, int frag_end,
+ enum hns_desc_type type)
{
struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use];
struct hns3_desc *desc = &ring->desc[ring->next_to_use];
/* Set txbd */
desc->tx.ol_type_vlan_len_msec =
cpu_to_le32(ol_type_vlan_len_msec);
- desc->tx.type_cs_vlan_tso_len =
- cpu_to_le32(type_cs_vlan_tso);
+ desc->tx.type_cs_vlan_tso_len = cpu_to_le32(type_cs_vlan_tso);
desc->tx.paylen = cpu_to_le32(paylen);
desc->tx.mss = cpu_to_le16(mss);
desc->tx.vlan_tag = cpu_to_le16(inner_vtag);
desc_cb->priv = priv;
desc_cb->dma = dma + HNS3_MAX_BD_SIZE * k;
desc_cb->type = (type == DESC_TYPE_SKB && !k) ?
- DESC_TYPE_SKB : DESC_TYPE_PAGE;
+ DESC_TYPE_SKB : DESC_TYPE_PAGE;
/* now, fill the descriptor */
desc->addr = cpu_to_le64(dma + HNS3_MAX_BD_SIZE * k);
desc->tx.send_size = cpu_to_le16((k == frag_buf_num - 1) ?
- (u16)sizeoflast : (u16)HNS3_MAX_BD_SIZE);
+ (u16)sizeoflast : (u16)HNS3_MAX_BD_SIZE);
hns3_set_txbd_baseinfo(&bdtp_fe_sc_vld_ra_ri,
frag_end && (k == frag_buf_num - 1) ?
1 : 0);
desc->tx.bdtp_fe_sc_vld_ra_ri =
cpu_to_le16(bdtp_fe_sc_vld_ra_ri);
- /* move ring pointer to next.*/
+ /* move ring pointer to next */
ring_ptr_move_fw(ring, next_to_use);
desc_cb = &ring->desc_cb[ring->next_to_use];
static int hns3_setup_tc(struct net_device *netdev, void *type_data)
{
struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
- struct hnae3_handle *h = hns3_get_handle(netdev);
- struct hnae3_knic_private_info *kinfo = &h->kinfo;
u8 *prio_tc = mqprio_qopt->qopt.prio_tc_map;
+ struct hnae3_knic_private_info *kinfo;
u8 tc = mqprio_qopt->qopt.num_tc;
u16 mode = mqprio_qopt->mode;
u8 hw = mqprio_qopt->qopt.hw;
+ struct hnae3_handle *h;
if (!((hw == TC_MQPRIO_HW_OFFLOAD_TCS &&
mode == TC_MQPRIO_MODE_CHANNEL) || (!hw && tc == 0)))
if (!netdev)
return -EINVAL;
+ h = hns3_get_handle(netdev);
+ kinfo = &h->kinfo;
+
return (kinfo->dcb_ops && kinfo->dcb_ops->setup_tc) ?
kinfo->dcb_ops->setup_tc(h, tc, prio_tc) : -EOPNOTSUPP;
}
__be16 proto, u16 vid)
{
struct hnae3_handle *h = hns3_get_handle(netdev);
- struct hns3_nic_priv *priv = netdev_priv(netdev);
int ret = -EIO;
if (h->ae_algo->ops->set_vlan_filter)
ret = h->ae_algo->ops->set_vlan_filter(h, proto, vid, false);
- if (!ret)
- set_bit(vid, priv->active_vlans);
-
return ret;
}
__be16 proto, u16 vid)
{
struct hnae3_handle *h = hns3_get_handle(netdev);
- struct hns3_nic_priv *priv = netdev_priv(netdev);
int ret = -EIO;
if (h->ae_algo->ops->set_vlan_filter)
ret = h->ae_algo->ops->set_vlan_filter(h, proto, vid, true);
- if (!ret)
- clear_bit(vid, priv->active_vlans);
-
- return ret;
-}
-
-static int hns3_restore_vlan(struct net_device *netdev)
-{
- struct hns3_nic_priv *priv = netdev_priv(netdev);
- int ret = 0;
- u16 vid;
-
- for_each_set_bit(vid, priv->active_vlans, VLAN_N_VID) {
- ret = hns3_vlan_rx_add_vid(netdev, htons(ETH_P_8021Q), vid);
- if (ret) {
- netdev_err(netdev, "Restore vlan: %d filter, ret:%d\n",
- vid, ret);
- return ret;
- }
- }
-
return ret;
}
if (h->ae_algo->ops->set_vf_vlan_filter)
ret = h->ae_algo->ops->set_vf_vlan_filter(h, vf, vlan,
- qos, vlan_proto);
+ qos, vlan_proto);
return ret;
}
h->ae_algo->ops->reset_event(h->pdev, h);
}
+#ifdef CONFIG_RFS_ACCEL
+static int hns3_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
+ u16 rxq_index, u32 flow_id)
+{
+ struct hnae3_handle *h = hns3_get_handle(dev);
+ struct flow_keys fkeys;
+
+ if (!h->ae_algo->ops->add_arfs_entry)
+ return -EOPNOTSUPP;
+
+ if (skb->encapsulation)
+ return -EPROTONOSUPPORT;
+
+ if (!skb_flow_dissect_flow_keys(skb, &fkeys, 0))
+ return -EPROTONOSUPPORT;
+
+ if ((fkeys.basic.n_proto != htons(ETH_P_IP) &&
+ fkeys.basic.n_proto != htons(ETH_P_IPV6)) ||
+ (fkeys.basic.ip_proto != IPPROTO_TCP &&
+ fkeys.basic.ip_proto != IPPROTO_UDP))
+ return -EPROTONOSUPPORT;
+
+ return h->ae_algo->ops->add_arfs_entry(h, rxq_index, flow_id, &fkeys);
+}
+#endif
+
static const struct net_device_ops hns3_nic_netdev_ops = {
.ndo_open = hns3_nic_net_open,
.ndo_stop = hns3_nic_net_stop,
.ndo_vlan_rx_add_vid = hns3_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = hns3_vlan_rx_kill_vid,
.ndo_set_vf_vlan = hns3_ndo_set_vf_vlan,
+#ifdef CONFIG_RFS_ACCEL
+ .ndo_rx_flow_steer = hns3_rx_flow_steer,
+#endif
+
};
bool hns3_is_phys_func(struct pci_dev *pdev)
struct hnae3_ae_dev *ae_dev;
int ret;
- ae_dev = devm_kzalloc(&pdev->dev, sizeof(*ae_dev),
- GFP_KERNEL);
+ ae_dev = devm_kzalloc(&pdev->dev, sizeof(*ae_dev), GFP_KERNEL);
if (!ae_dev) {
ret = -ENOMEM;
return ret;
ae_dev->pdev = pdev;
ae_dev->flag = ent->driver_data;
- ae_dev->dev_type = HNAE3_DEV_KNIC;
ae_dev->reset_type = HNAE3_NONE_RESET;
hns3_get_dev_capability(pdev, ae_dev);
pci_set_drvdata(pdev, ae_dev);
if (state == pci_channel_io_perm_failure)
return PCI_ERS_RESULT_DISCONNECT;
- if (!ae_dev) {
+ if (!ae_dev || !ae_dev->ops) {
dev_err(&pdev->dev,
- "Can't recover - error happened during device init\n");
+ "Can't recover - error happened before device initialized\n");
return PCI_ERS_RESULT_NONE;
}
static pci_ers_result_t hns3_slot_reset(struct pci_dev *pdev)
{
struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
+ const struct hnae3_ae_ops *ops;
+ enum hnae3_reset_type reset_type;
struct device *dev = &pdev->dev;
- dev_info(dev, "requesting reset due to PCI error\n");
+ if (!ae_dev || !ae_dev->ops)
+ return PCI_ERS_RESULT_NONE;
+ ops = ae_dev->ops;
/* request the reset */
- if (ae_dev->ops->reset_event) {
- if (!ae_dev->override_pci_need_reset)
- ae_dev->ops->reset_event(pdev, NULL);
+ if (ops->reset_event) {
+ if (!ae_dev->override_pci_need_reset) {
+ reset_type = ops->get_reset_level(ae_dev,
+ &ae_dev->hw_err_reset_req);
+ ops->set_default_reset_request(ae_dev, reset_type);
+ dev_info(dev, "requesting reset due to PCI error\n");
+ ops->reset_event(pdev, NULL);
+ }
return PCI_ERS_RESULT_RECOVERED;
}
return ret;
}
-/* detach a in-used buffer and replace with a reserved one */
+/* detach a in-used buffer and replace with a reserved one */
static void hns3_replace_buffer(struct hns3_enet_ring *ring, int i,
struct hns3_desc_cb *res_cb)
{
static void hns3_reuse_buffer(struct hns3_enet_ring *ring, int i)
{
ring->desc_cb[i].reuse_flag = 0;
- ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma
- + ring->desc_cb[i].page_offset);
+ ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma +
+ ring->desc_cb[i].page_offset);
ring->desc[i].rx.bd_base_info = 0;
}
return ((ntc >= ntu) ? 0 : ring->desc_num) + ntc - ntu;
}
-static void
-hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring, int cleand_count)
+static void hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring,
+ int cleand_count)
{
struct hns3_desc_cb *desc_cb;
struct hns3_desc_cb res_cbs;
/* Avoid re-using remote pages, or the stack is still using the page
* when page_offset rollback to zero, flag default unreuse
*/
- if (unlikely(page_to_nid(desc_cb->priv) != numa_node_id()) ||
+ if (unlikely(page_to_nid(desc_cb->priv) != numa_mem_id()) ||
(!desc_cb->page_offset && page_count(desc_cb->priv) > 1))
return;
if (desc_cb->page_offset + truesize <= hnae3_page_size(ring)) {
desc_cb->reuse_flag = 1;
- /* Bump ref count on page before it is given*/
+ /* Bump ref count on page before it is given */
get_page(desc_cb->priv);
} else if (page_count(desc_cb->priv) == 1) {
desc_cb->reuse_flag = 1;
}
}
-static int hns3_gro_complete(struct sk_buff *skb)
+static int hns3_gro_complete(struct sk_buff *skb, u32 l234info)
{
__be16 type = skb->protocol;
struct tcphdr *th;
int depth = 0;
- while (type == htons(ETH_P_8021Q)) {
+ while (eth_type_vlan(type)) {
struct vlan_hdr *vh;
if ((depth + VLAN_HLEN) > skb_headlen(skb))
depth += VLAN_HLEN;
}
+ skb_set_network_header(skb, depth);
+
if (type == htons(ETH_P_IP)) {
+ const struct iphdr *iph = ip_hdr(skb);
+
depth += sizeof(struct iphdr);
+ skb_set_transport_header(skb, depth);
+ th = tcp_hdr(skb);
+ th->check = ~tcp_v4_check(skb->len - depth, iph->saddr,
+ iph->daddr, 0);
} else if (type == htons(ETH_P_IPV6)) {
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+
depth += sizeof(struct ipv6hdr);
+ skb_set_transport_header(skb, depth);
+ th = tcp_hdr(skb);
+ th->check = ~tcp_v6_check(skb->len - depth, &iph->saddr,
+ &iph->daddr, 0);
} else {
netdev_err(skb->dev,
"Error: FW GRO supports only IPv4/IPv6, not 0x%04x, depth: %d\n",
return -EFAULT;
}
- th = (struct tcphdr *)(skb->data + depth);
skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
if (th->cwr)
skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
- skb->ip_summed = CHECKSUM_UNNECESSARY;
+ if (l234info & BIT(HNS3_RXD_GRO_FIXID_B))
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID;
+ skb->csum_start = (unsigned char *)th - skb->head;
+ skb->csum_offset = offsetof(struct tcphdr, check);
+ skb->ip_summed = CHECKSUM_PARTIAL;
return 0;
}
memcpy(__skb_put(skb, length), va, ALIGN(length, sizeof(long)));
/* We can reuse buffer as-is, just make sure it is local */
- if (likely(page_to_nid(desc_cb->priv) == numa_node_id()))
+ if (likely(page_to_nid(desc_cb->priv) == numa_mem_id()))
desc_cb->reuse_flag = 1;
else /* This page cannot be reused so discard it */
put_page(desc_cb->priv);
*/
if (pending) {
pre_bd = (ring->next_to_clean - 1 + ring->desc_num) %
- ring->desc_num;
+ ring->desc_num;
pre_desc = &ring->desc[pre_bd];
bd_base_info = le32_to_cpu(pre_desc->rx.bd_base_info);
} else {
struct sk_buff *skb, u32 l234info,
u32 bd_base_info, u32 ol_info)
{
- u16 gro_count;
u32 l3_type;
- gro_count = hnae3_get_field(l234info, HNS3_RXD_GRO_COUNT_M,
- HNS3_RXD_GRO_COUNT_S);
+ skb_shinfo(skb)->gso_size = hnae3_get_field(bd_base_info,
+ HNS3_RXD_GRO_SIZE_M,
+ HNS3_RXD_GRO_SIZE_S);
/* if there is no HW GRO, do not set gro params */
- if (!gro_count) {
+ if (!skb_shinfo(skb)->gso_size) {
hns3_rx_checksum(ring, skb, l234info, bd_base_info, ol_info);
return 0;
}
- NAPI_GRO_CB(skb)->count = gro_count;
+ NAPI_GRO_CB(skb)->count = hnae3_get_field(l234info,
+ HNS3_RXD_GRO_COUNT_M,
+ HNS3_RXD_GRO_COUNT_S);
- l3_type = hnae3_get_field(l234info, HNS3_RXD_L3ID_M,
- HNS3_RXD_L3ID_S);
+ l3_type = hnae3_get_field(l234info, HNS3_RXD_L3ID_M, HNS3_RXD_L3ID_S);
if (l3_type == HNS3_L3_TYPE_IPV4)
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
else if (l3_type == HNS3_L3_TYPE_IPV6)
else
return -EFAULT;
- skb_shinfo(skb)->gso_size = hnae3_get_field(bd_base_info,
- HNS3_RXD_GRO_SIZE_M,
- HNS3_RXD_GRO_SIZE_S);
-
- return hns3_gro_complete(skb);
+ return hns3_gro_complete(skb, l234info);
}
static void hns3_set_rx_skb_rss_type(struct hns3_enet_ring *ring,
return ret;
}
+ skb_record_rx_queue(skb, ring->tqp->tqp_index);
*out_skb = skb;
return 0;
}
-int hns3_clean_rx_ring(
- struct hns3_enet_ring *ring, int budget,
- void (*rx_fn)(struct hns3_enet_ring *, struct sk_buff *))
+int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget,
+ void (*rx_fn)(struct hns3_enet_ring *, struct sk_buff *))
{
#define RCB_NOF_ALLOC_RX_BUFF_ONCE 16
int recv_pkts, recv_bds, clean_count, err;
out:
/* Make all data has been write before submit */
if (clean_count + unused_count > 0)
- hns3_nic_alloc_rx_buffers(ring,
- clean_count + unused_count);
+ hns3_nic_alloc_rx_buffers(ring, clean_count + unused_count);
return recv_pkts;
}
-static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group)
+static bool hns3_get_new_flow_lvl(struct hns3_enet_ring_group *ring_group)
{
- struct hns3_enet_tqp_vector *tqp_vector =
- ring_group->ring->tqp_vector;
+#define HNS3_RX_LOW_BYTE_RATE 10000
+#define HNS3_RX_MID_BYTE_RATE 20000
+#define HNS3_RX_ULTRA_PACKET_RATE 40
+
enum hns3_flow_level_range new_flow_level;
- int packets_per_msecs;
- int bytes_per_msecs;
+ struct hns3_enet_tqp_vector *tqp_vector;
+ int packets_per_msecs, bytes_per_msecs;
u32 time_passed_ms;
- u16 new_int_gl;
-
- if (!tqp_vector->last_jiffies)
- return false;
-
- if (ring_group->total_packets == 0) {
- ring_group->coal.int_gl = HNS3_INT_GL_50K;
- ring_group->coal.flow_level = HNS3_FLOW_LOW;
- return true;
- }
- /* Simple throttlerate management
- * 0-10MB/s lower (50000 ints/s)
- * 10-20MB/s middle (20000 ints/s)
- * 20-1249MB/s high (18000 ints/s)
- * > 40000pps ultra (8000 ints/s)
- */
- new_flow_level = ring_group->coal.flow_level;
- new_int_gl = ring_group->coal.int_gl;
+ tqp_vector = ring_group->ring->tqp_vector;
time_passed_ms =
jiffies_to_msecs(jiffies - tqp_vector->last_jiffies);
-
if (!time_passed_ms)
return false;
do_div(ring_group->total_bytes, time_passed_ms);
bytes_per_msecs = ring_group->total_bytes;
-#define HNS3_RX_LOW_BYTE_RATE 10000
-#define HNS3_RX_MID_BYTE_RATE 20000
+ new_flow_level = ring_group->coal.flow_level;
+ /* Simple throttlerate management
+ * 0-10MB/s lower (50000 ints/s)
+ * 10-20MB/s middle (20000 ints/s)
+ * 20-1249MB/s high (18000 ints/s)
+ * > 40000pps ultra (8000 ints/s)
+ */
switch (new_flow_level) {
case HNS3_FLOW_LOW:
if (bytes_per_msecs > HNS3_RX_LOW_BYTE_RATE)
break;
}
-#define HNS3_RX_ULTRA_PACKET_RATE 40
-
if (packets_per_msecs > HNS3_RX_ULTRA_PACKET_RATE &&
&tqp_vector->rx_group == ring_group)
new_flow_level = HNS3_FLOW_ULTRA;
- switch (new_flow_level) {
+ ring_group->total_bytes = 0;
+ ring_group->total_packets = 0;
+ ring_group->coal.flow_level = new_flow_level;
+
+ return true;
+}
+
+static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group)
+{
+ struct hns3_enet_tqp_vector *tqp_vector;
+ u16 new_int_gl;
+
+ if (!ring_group->ring)
+ return false;
+
+ tqp_vector = ring_group->ring->tqp_vector;
+ if (!tqp_vector->last_jiffies)
+ return false;
+
+ if (ring_group->total_packets == 0) {
+ ring_group->coal.int_gl = HNS3_INT_GL_50K;
+ ring_group->coal.flow_level = HNS3_FLOW_LOW;
+ return true;
+ }
+
+ if (!hns3_get_new_flow_lvl(ring_group))
+ return false;
+
+ new_int_gl = ring_group->coal.int_gl;
+ switch (ring_group->coal.flow_level) {
case HNS3_FLOW_LOW:
new_int_gl = HNS3_INT_GL_50K;
break;
break;
}
- ring_group->total_bytes = 0;
- ring_group->total_packets = 0;
- ring_group->coal.flow_level = new_flow_level;
if (new_int_gl != ring_group->coal.int_gl) {
ring_group->coal.int_gl = new_int_gl;
return true;
if (!vector)
return -ENOMEM;
+ /* save the actual available vector number */
vector_num = h->ae_algo->ops->get_vector(h, vector_num, vector);
priv->vector_num = vector_num;
hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain);
if (tqp_vector->irq_init_flag == HNS3_VECTOR_INITED) {
- irq_set_affinity_notifier(tqp_vector->vector_irq,
- NULL);
irq_set_affinity_hint(tqp_vector->vector_irq, NULL);
free_irq(tqp_vector->vector_irq, tqp_vector);
tqp_vector->irq_init_flag = HNS3_VECTOR_NOT_INITED;
}
static int hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
- int ring_type)
+ unsigned int ring_type)
{
struct hns3_nic_ring_data *ring_data = priv->ring_data;
int queue_num = priv->ae_handle->kinfo.num_tqps;
struct hnae3_queue *q = ring->tqp;
if (!HNAE3_IS_TX_RING(ring)) {
- hns3_write_dev(q, HNS3_RING_RX_RING_BASEADDR_L_REG,
- (u32)dma);
+ hns3_write_dev(q, HNS3_RING_RX_RING_BASEADDR_L_REG, (u32)dma);
hns3_write_dev(q, HNS3_RING_RX_RING_BASEADDR_H_REG,
(u32)((dma >> 31) >> 1));
ret = hns3_client_start(handle);
if (ret) {
dev_err(priv->dev, "hns3_client_start fail! ret=%d\n", ret);
- goto out_client_start;
+ goto out_client_start;
}
hns3_dcbnl_setup(handle);
hns3_client_stop(handle);
+ hns3_uninit_phy(netdev);
+
if (!test_and_clear_bit(HNS3_NIC_STATE_INITED, &priv->state)) {
netdev_warn(netdev, "already uninitialized\n");
goto out_netdev_free;
hns3_del_all_fd_rules(netdev, true);
- hns3_force_clear_all_rx_ring(handle);
-
- hns3_uninit_phy(netdev);
+ hns3_force_clear_all_ring(handle);
hns3_nic_uninit_vector_data(priv);
ret);
return ret;
}
- hns3_replace_buffer(ring, ring->next_to_use,
- &res_cbs);
+ hns3_replace_buffer(ring, ring->next_to_use, &res_cbs);
}
ring_ptr_move_fw(ring, next_to_use);
}
}
}
-static void hns3_force_clear_all_rx_ring(struct hnae3_handle *h)
+static void hns3_force_clear_all_ring(struct hnae3_handle *h)
{
struct net_device *ndev = h->kinfo.netdev;
struct hns3_nic_priv *priv = netdev_priv(ndev);
u32 i;
for (i = 0; i < h->kinfo.num_tqps; i++) {
+ ring = priv->ring_data[i].ring;
+ hns3_clear_tx_ring(ring);
+
ring = priv->ring_data[i + h->kinfo.num_tqps].ring;
hns3_force_clear_rx_ring(ring);
}
if (ret) {
set_bit(HNS3_NIC_STATE_RESETTING, &priv->state);
netdev_err(kinfo->netdev,
- "hns net up fail, ret=%d!\n", ret);
+ "net up fail, ret=%d!\n", ret);
return ret;
}
}
vlan_filter_enable = netdev->flags & IFF_PROMISC ? false : true;
hns3_enable_vlan_filter(netdev, vlan_filter_enable);
- /* Hardware table is only clear when pf resets */
- if (!(handle->flags & HNAE3_SUPPORT_VF)) {
- ret = hns3_restore_vlan(netdev);
- if (ret)
- return ret;
- }
+ if (handle->ae_algo->ops->restore_vlan_table)
+ handle->ae_algo->ops->restore_vlan_table(handle);
return hns3_restore_fd_rules(netdev);
}
return 0;
}
- hns3_force_clear_all_rx_ring(handle);
+ hns3_clear_all_ring(handle);
+ hns3_force_clear_all_ring(handle);
hns3_nic_uninit_vector_data(priv);
+ // SPDX-License-Identifier: GPL-2.0-only
/* Copyright Altera Corporation (C) 2014. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* Adopted from dwmac-sti.c
*/
#define SYSMGR_EMACGRP_CTRL_PHYSEL_WIDTH 2
#define SYSMGR_EMACGRP_CTRL_PHYSEL_MASK 0x00000003
#define SYSMGR_EMACGRP_CTRL_PTP_REF_CLK_MASK 0x00000010
+#define SYSMGR_GEN10_EMACGRP_CTRL_PTP_REF_CLK_MASK 0x00000100
#define SYSMGR_FPGAGRP_MODULE_REG 0x00000028
#define SYSMGR_FPGAGRP_MODULE_EMAC 0x00000004
+#define SYSMGR_FPGAINTF_EMAC_REG 0x00000070
+#define SYSMGR_FPGAINTF_EMAC_BIT 0x1
#define EMAC_SPLITTER_CTRL_REG 0x0
#define EMAC_SPLITTER_CTRL_SPEED_MASK 0x3
#define EMAC_SPLITTER_CTRL_SPEED_100 0x3
#define EMAC_SPLITTER_CTRL_SPEED_1000 0x0
+struct socfpga_dwmac;
+struct socfpga_dwmac_ops {
+ int (*set_phy_mode)(struct socfpga_dwmac *dwmac_priv);
+};
+
struct socfpga_dwmac {
int interface;
u32 reg_offset;
void __iomem *splitter_base;
bool f2h_ptp_ref_clk;
struct tse_pcs pcs;
+ const struct socfpga_dwmac_ops *ops;
};
static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed)
return ret;
}
-static int socfpga_dwmac_set_phy_mode(struct socfpga_dwmac *dwmac)
+static int socfpga_set_phy_mode_common(int phymode, u32 *val)
{
- struct regmap *sys_mgr_base_addr = dwmac->sys_mgr_base_addr;
- int phymode = dwmac->interface;
- u32 reg_offset = dwmac->reg_offset;
- u32 reg_shift = dwmac->reg_shift;
- u32 ctrl, val, module;
-
switch (phymode) {
case PHY_INTERFACE_MODE_RGMII:
case PHY_INTERFACE_MODE_RGMII_ID:
- val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII;
+ *val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII;
break;
case PHY_INTERFACE_MODE_MII:
case PHY_INTERFACE_MODE_GMII:
case PHY_INTERFACE_MODE_SGMII:
- val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII;
+ *val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII;
+ break;
+ case PHY_INTERFACE_MODE_RMII:
+ *val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RMII;
break;
default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int socfpga_gen5_set_phy_mode(struct socfpga_dwmac *dwmac)
+{
+ struct regmap *sys_mgr_base_addr = dwmac->sys_mgr_base_addr;
+ int phymode = dwmac->interface;
+ u32 reg_offset = dwmac->reg_offset;
+ u32 reg_shift = dwmac->reg_shift;
+ u32 ctrl, val, module;
+
+ if (socfpga_set_phy_mode_common(phymode, &val)) {
dev_err(dwmac->dev, "bad phy mode %d\n", phymode);
return -EINVAL;
}
return 0;
}
+static int socfpga_gen10_set_phy_mode(struct socfpga_dwmac *dwmac)
+{
+ struct regmap *sys_mgr_base_addr = dwmac->sys_mgr_base_addr;
+ int phymode = dwmac->interface;
+ u32 reg_offset = dwmac->reg_offset;
+ u32 reg_shift = dwmac->reg_shift;
+ u32 ctrl, val, module;
+
+ if (socfpga_set_phy_mode_common(phymode, &val))
+ return -EINVAL;
+
+ /* Overwrite val to GMII if splitter core is enabled. The phymode here
+ * is the actual phy mode on phy hardware, but phy interface from
+ * EMAC core is GMII.
+ */
+ if (dwmac->splitter_base)
+ val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII;
+
+ /* Assert reset to the enet controller before changing the phy mode */
+ reset_control_assert(dwmac->stmmac_ocp_rst);
+ reset_control_assert(dwmac->stmmac_rst);
+
+ regmap_read(sys_mgr_base_addr, reg_offset, &ctrl);
+ ctrl &= ~(SYSMGR_EMACGRP_CTRL_PHYSEL_MASK);
+ ctrl |= val;
+
+ if (dwmac->f2h_ptp_ref_clk ||
+ phymode == PHY_INTERFACE_MODE_MII ||
+ phymode == PHY_INTERFACE_MODE_GMII ||
+ phymode == PHY_INTERFACE_MODE_SGMII) {
+ ctrl |= SYSMGR_GEN10_EMACGRP_CTRL_PTP_REF_CLK_MASK;
+ regmap_read(sys_mgr_base_addr, SYSMGR_FPGAINTF_EMAC_REG,
+ &module);
+ module |= (SYSMGR_FPGAINTF_EMAC_BIT << reg_shift);
+ regmap_write(sys_mgr_base_addr, SYSMGR_FPGAINTF_EMAC_REG,
+ module);
+ } else {
+ ctrl &= ~SYSMGR_GEN10_EMACGRP_CTRL_PTP_REF_CLK_MASK;
+ }
+
+ regmap_write(sys_mgr_base_addr, reg_offset, ctrl);
+
+ /* Deassert reset for the phy configuration to be sampled by
+ * the enet controller, and operation to start in requested mode
+ */
+ reset_control_deassert(dwmac->stmmac_ocp_rst);
+ reset_control_deassert(dwmac->stmmac_rst);
+ if (phymode == PHY_INTERFACE_MODE_SGMII) {
+ if (tse_pcs_init(dwmac->pcs.tse_pcs_base, &dwmac->pcs) != 0) {
+ dev_err(dwmac->dev, "Unable to initialize TSE PCS");
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
static int socfpga_dwmac_probe(struct platform_device *pdev)
{
struct plat_stmmacenet_data *plat_dat;
struct socfpga_dwmac *dwmac;
struct net_device *ndev;
struct stmmac_priv *stpriv;
+ const struct socfpga_dwmac_ops *ops;
+
+ ops = device_get_match_data(&pdev->dev);
+ if (!ops) {
+ dev_err(&pdev->dev, "no of match data provided\n");
+ return -EINVAL;
+ }
ret = stmmac_get_platform_resources(pdev, &stmmac_res);
if (ret)
goto err_remove_config_dt;
}
+ dwmac->ops = ops;
plat_dat->bsp_priv = dwmac;
plat_dat->fix_mac_speed = socfpga_dwmac_fix_mac_speed;
*/
dwmac->stmmac_rst = stpriv->plat->stmmac_rst;
- ret = socfpga_dwmac_set_phy_mode(dwmac);
+ ret = ops->set_phy_mode(dwmac);
if (ret)
goto err_dvr_remove;
{
struct net_device *ndev = dev_get_drvdata(dev);
struct stmmac_priv *priv = netdev_priv(ndev);
+ struct socfpga_dwmac *dwmac_priv = get_stmmac_bsp_priv(dev);
- socfpga_dwmac_set_phy_mode(priv->plat->bsp_priv);
+ dwmac_priv->ops->set_phy_mode(priv->plat->bsp_priv);
/* Before the enet controller is suspended, the phy is suspended.
* This causes the phy clock to be gated. The enet controller is
static SIMPLE_DEV_PM_OPS(socfpga_dwmac_pm_ops, stmmac_suspend,
socfpga_dwmac_resume);
+static const struct socfpga_dwmac_ops socfpga_gen5_ops = {
+ .set_phy_mode = socfpga_gen5_set_phy_mode,
+};
+
+static const struct socfpga_dwmac_ops socfpga_gen10_ops = {
+ .set_phy_mode = socfpga_gen10_set_phy_mode,
+};
+
static const struct of_device_id socfpga_dwmac_match[] = {
- { .compatible = "altr,socfpga-stmmac" },
+ { .compatible = "altr,socfpga-stmmac", .data = &socfpga_gen5_ops },
+ { .compatible = "altr,socfpga-stmmac-a10-s10", .data = &socfpga_gen10_ops },
{ }
};
MODULE_DEVICE_TABLE(of, socfpga_dwmac_match);
+ // SPDX-License-Identifier: GPL-2.0-only
/*
* VXLAN: Virtual eXtensible Local Area Network
*
* Copyright (c) 2012-2013 Vyatta Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
return jhash_2words(key, vni, vxlan_salt) & (FDB_HASH_SIZE - 1);
}
+static u32 fdb_head_index(struct vxlan_dev *vxlan, const u8 *mac, __be32 vni)
+{
+ if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)
+ return eth_vni_hash(mac, vni);
+ else
+ return eth_hash(mac);
+}
+
/* Hash chain to use given mac address */
static inline struct hlist_head *vxlan_fdb_head(struct vxlan_dev *vxlan,
const u8 *mac, __be32 vni)
{
- if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)
- return &vxlan->fdb_head[eth_vni_hash(mac, vni)];
- else
- return &vxlan->fdb_head[eth_hash(mac)];
+ return &vxlan->fdb_head[fdb_head_index(vxlan, mac, vni)];
}
/* Look up Ethernet address in forwarding table */
return -EINVAL;
vxlan = netdev_priv(dev);
- spin_lock_bh(&vxlan->hash_lock);
for (h = 0; h < FDB_HASH_SIZE; ++h) {
+ spin_lock_bh(&vxlan->hash_lock[h]);
hlist_for_each_entry(f, &vxlan->fdb_head[h], hlist) {
if (f->vni == vni) {
list_for_each_entry(rdst, &f->remotes, list) {
f, rdst,
extack);
if (rc)
- goto out;
+ goto unlock;
}
}
}
+ spin_unlock_bh(&vxlan->hash_lock[h]);
}
+ return 0;
-out:
- spin_unlock_bh(&vxlan->hash_lock);
+unlock:
+ spin_unlock_bh(&vxlan->hash_lock[h]);
return rc;
}
EXPORT_SYMBOL_GPL(vxlan_fdb_replay);
return;
vxlan = netdev_priv(dev);
- spin_lock_bh(&vxlan->hash_lock);
for (h = 0; h < FDB_HASH_SIZE; ++h) {
+ spin_lock_bh(&vxlan->hash_lock[h]);
hlist_for_each_entry(f, &vxlan->fdb_head[h], hlist)
if (f->vni == vni)
list_for_each_entry(rdst, &f->remotes, list)
rdst->offloaded = false;
+ spin_unlock_bh(&vxlan->hash_lock[h]);
}
- spin_unlock_bh(&vxlan->hash_lock);
+
}
EXPORT_SYMBOL_GPL(vxlan_fdb_clear_offload);
__be16 port;
__be32 src_vni, vni;
u32 ifindex;
+ u32 hash_index;
int err;
if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) {
if (vxlan->default_dst.remote_ip.sa.sa_family != ip.sa.sa_family)
return -EAFNOSUPPORT;
- spin_lock_bh(&vxlan->hash_lock);
+ hash_index = fdb_head_index(vxlan, addr, src_vni);
+ spin_lock_bh(&vxlan->hash_lock[hash_index]);
err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags,
port, src_vni, vni, ifindex,
ndm->ndm_flags | NTF_VXLAN_ADDED_BY_USER,
true, extack);
- spin_unlock_bh(&vxlan->hash_lock);
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
return err;
}
__be32 src_vni, vni;
__be16 port;
u32 ifindex;
+ u32 hash_index;
int err;
err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex);
if (err)
return err;
- spin_lock_bh(&vxlan->hash_lock);
+ hash_index = fdb_head_index(vxlan, addr, src_vni);
+ spin_lock_bh(&vxlan->hash_lock[hash_index]);
err = __vxlan_fdb_delete(vxlan, addr, ip, port, src_vni, vni, ifindex,
true);
- spin_unlock_bh(&vxlan->hash_lock);
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
return err;
}
f->updated = jiffies;
vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH, true, NULL);
} else {
+ u32 hash_index = fdb_head_index(vxlan, src_mac, vni);
+
/* learned new entry */
- spin_lock(&vxlan->hash_lock);
+ spin_lock(&vxlan->hash_lock[hash_index]);
/* close off race between vxlan_flush and incoming packets */
if (netif_running(dev))
vni,
vxlan->default_dst.remote_vni,
ifindex, NTF_SELF, true, NULL);
- spin_unlock(&vxlan->hash_lock);
+ spin_unlock(&vxlan->hash_lock[hash_index]);
}
return false;
fl4.fl4_sport = sport;
rt = ip_route_output_key(vxlan->net, &fl4);
- if (likely(!IS_ERR(rt))) {
+ if (!IS_ERR(rt)) {
if (rt->dst.dev == dev) {
netdev_dbg(dev, "circular route to %pI4\n", &daddr);
ip_rt_put(rt);
for (h = 0; h < FDB_HASH_SIZE; ++h) {
struct hlist_node *p, *n;
- spin_lock(&vxlan->hash_lock);
+ spin_lock(&vxlan->hash_lock[h]);
hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
struct vxlan_fdb *f
= container_of(p, struct vxlan_fdb, hlist);
} else if (time_before(timeout, next_timer))
next_timer = timeout;
}
- spin_unlock(&vxlan->hash_lock);
+ spin_unlock(&vxlan->hash_lock[h]);
}
mod_timer(&vxlan->age_timer, next_timer);
static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni)
{
struct vxlan_fdb *f;
+ u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, vni);
- spin_lock_bh(&vxlan->hash_lock);
+ spin_lock_bh(&vxlan->hash_lock[hash_index]);
f = __vxlan_find_mac(vxlan, all_zeros_mac, vni);
if (f)
vxlan_fdb_destroy(vxlan, f, true, true);
- spin_unlock_bh(&vxlan->hash_lock);
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
}
static void vxlan_uninit(struct net_device *dev)
{
unsigned int h;
- spin_lock_bh(&vxlan->hash_lock);
for (h = 0; h < FDB_HASH_SIZE; ++h) {
struct hlist_node *p, *n;
+
+ spin_lock_bh(&vxlan->hash_lock[h]);
hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
struct vxlan_fdb *f
= container_of(p, struct vxlan_fdb, hlist);
if (!is_zero_ether_addr(f->eth_addr))
vxlan_fdb_destroy(vxlan, f, true, true);
}
+ spin_unlock_bh(&vxlan->hash_lock[h]);
}
- spin_unlock_bh(&vxlan->hash_lock);
}
/* Cleanup timer and forwarding table on shutdown */
dev->max_mtu = ETH_MAX_MTU;
INIT_LIST_HEAD(&vxlan->next);
- spin_lock_init(&vxlan->hash_lock);
timer_setup(&vxlan->age_timer, vxlan_cleanup, TIMER_DEFERRABLE);
gro_cells_init(&vxlan->gro_cells, dev);
- for (h = 0; h < FDB_HASH_SIZE; ++h)
+ for (h = 0; h < FDB_HASH_SIZE; ++h) {
+ spin_lock_init(&vxlan->hash_lock[h]);
INIT_HLIST_HEAD(&vxlan->fdb_head[h]);
+ }
}
static void vxlan_ether_setup(struct net_device *dev)
/* handle default dst entry */
if (!vxlan_addr_equal(&conf.remote_ip, &dst->remote_ip)) {
- spin_lock_bh(&vxlan->hash_lock);
+ u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, conf.vni);
+
+ spin_lock_bh(&vxlan->hash_lock[hash_index]);
if (!vxlan_addr_any(&conf.remote_ip)) {
err = vxlan_fdb_update(vxlan, all_zeros_mac,
&conf.remote_ip,
conf.remote_ifindex,
NTF_SELF, true, extack);
if (err) {
- spin_unlock_bh(&vxlan->hash_lock);
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
return err;
}
}
dst->remote_vni,
dst->remote_ifindex,
true);
- spin_unlock_bh(&vxlan->hash_lock);
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
}
if (conf.age_interval != vxlan->cfg.age_interval)
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_rdst *rdst;
struct vxlan_fdb *f;
+ u32 hash_index;
+
+ hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni);
- spin_lock_bh(&vxlan->hash_lock);
+ spin_lock_bh(&vxlan->hash_lock[hash_index]);
f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
if (!f)
rdst->offloaded = fdb_info->offloaded;
out:
- spin_unlock_bh(&vxlan->hash_lock);
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
}
static int
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct netlink_ext_ack *extack;
+ u32 hash_index;
int err;
+ hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni);
extack = switchdev_notifier_info_to_extack(&fdb_info->info);
- spin_lock_bh(&vxlan->hash_lock);
+ spin_lock_bh(&vxlan->hash_lock[hash_index]);
err = vxlan_fdb_update(vxlan, fdb_info->eth_addr, &fdb_info->remote_ip,
NUD_REACHABLE,
NLM_F_CREATE | NLM_F_REPLACE,
fdb_info->remote_ifindex,
NTF_USE | NTF_SELF | NTF_EXT_LEARNED,
false, extack);
- spin_unlock_bh(&vxlan->hash_lock);
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
return err;
}
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_fdb *f;
+ u32 hash_index;
int err = 0;
- spin_lock_bh(&vxlan->hash_lock);
+ hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni);
+ spin_lock_bh(&vxlan->hash_lock[hash_index]);
f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
if (!f)
fdb_info->remote_ifindex,
false);
- spin_unlock_bh(&vxlan->hash_lock);
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
return err;
}
+ // SPDX-License-Identifier: GPL-2.0-only
/*
* mac80211_hwsim - software simulator of 802.11 radio(s) for mac80211
* Copyright (c) 2016 - 2017 Intel Deutschland GmbH
* Copyright (C) 2018 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
/*
.subcmd = QCA_NL80211_SUBCMD_TEST },
.flags = WIPHY_VENDOR_CMD_NEED_NETDEV,
.doit = mac80211_hwsim_vendor_cmd_test,
+ .policy = hwsim_vendor_test_policy,
+ .maxattr = QCA_WLAN_VENDOR_ATTR_MAX,
}
};
+ // SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2016 Chelsio Communications, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include "cxgbit.h"
ret = cxgbi_ppm_init(lldi->iscsi_ppm, cdev->lldi.ports[0],
cdev->lldi.pdev, &cdev->lldi, &tformat,
- ppmax, lldi->iscsi_llimit,
- lldi->vr->iscsi.start, 2);
+ lldi->vr->iscsi.size, lldi->iscsi_llimit,
+ lldi->vr->iscsi.start, 2,
+ lldi->vr->ppod_edram.start,
+ lldi->vr->ppod_edram.size);
if (ret >= 0) {
struct cxgbi_ppm *ppm = (struct cxgbi_ppm *)(*lldi->iscsi_ppm);
+ // SPDX-License-Identifier: GPL-2.0-only
/* Copyright (C) 2009 Red Hat, Inc.
*
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
* virtio-net server in host kernel.
*/
#include "vhost.h"
-static int experimental_zcopytx = 1;
+static int experimental_zcopytx = 0;
module_param(experimental_zcopytx, int, 0444);
MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
" 1 -Enable; 0 - Disable");
+ /* SPDX-License-Identifier: GPL-2.0-only */
/*
* IEEE 802.11 defines
*
* Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH
* Copyright (c) 2016 - 2017 Intel Deutschland GmbH
* Copyright (c) 2018 - 2019 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef LINUX_IEEE80211_H
#define FILS_ERP_MAX_RRK_LEN 64
#define PMK_MAX_LEN 64
+#define SAE_PASSWORD_MAX_LEN 128
/* Public action codes (IEEE Std 802.11-2016, 9.6.8.1, Table 9-307) */
enum ieee80211_pub_actioncode {
#define WLAN_EXT_CAPA10_TWT_REQUESTER_SUPPORT BIT(5)
#define WLAN_EXT_CAPA10_TWT_RESPONDER_SUPPORT BIT(6)
+/*
+ * When set, indicates that the AP is able to tolerate 26-tone RU UL
+ * OFDMA transmissions using HE TB PPDU from OBSS (not falsely classify the
+ * 26-tone RU UL OFDMA transmissions as radar pulses).
+ */
+#define WLAN_EXT_CAPA10_OBSS_NARROW_BW_RU_TOLERANCE_SUPPORT BIT(7)
+
/* Defines support for enhanced multi-bssid advertisement*/
#define WLAN_EXT_CAPA11_EMA_SUPPORT BIT(1)
+ /* SPDX-License-Identifier: GPL-2.0-only */
#ifndef __NET_CFG80211_H
#define __NET_CFG80211_H
/*
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
* Copyright (C) 2018-2019 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/netdevice.h>
}
/**
- * ieee80211_get_he_sta_cap - return HE capabilities for an sband's STA
- * @sband: the sband to search for the STA on
+ * ieee80211_get_he_iftype_cap - return HE capabilities for an sband's iftype
+ * @sband: the sband to search for the iftype on
+ * @iftype: enum nl80211_iftype
*
* Return: pointer to the struct ieee80211_sta_he_cap, or NULL is none found
*/
static inline const struct ieee80211_sta_he_cap *
-ieee80211_get_he_sta_cap(const struct ieee80211_supported_band *sband)
+ieee80211_get_he_iftype_cap(const struct ieee80211_supported_band *sband,
+ u8 iftype)
{
const struct ieee80211_sband_iftype_data *data =
- ieee80211_get_sband_iftype_data(sband, NL80211_IFTYPE_STATION);
+ ieee80211_get_sband_iftype_data(sband, iftype);
if (data && data->he_cap.has_he)
return &data->he_cap;
return NULL;
}
+/**
+ * ieee80211_get_he_sta_cap - return HE capabilities for an sband's STA
+ * @sband: the sband to search for the STA on
+ *
+ * Return: pointer to the struct ieee80211_sta_he_cap, or NULL is none found
+ */
+static inline const struct ieee80211_sta_he_cap *
+ieee80211_get_he_sta_cap(const struct ieee80211_supported_band *sband)
+{
+ return ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_STATION);
+}
+
/**
* wiphy_read_of_freq_limits - read frequency limits from device tree
*
* CFG80211_MAX_WEP_KEYS WEP keys
* @wep_tx_key: key index (0..3) of the default TX static WEP key
* @psk: PSK (for devices supporting 4-way-handshake offload)
+ * @sae_pwd: password for SAE authentication (for devices supporting SAE
+ * offload)
+ * @sae_pwd_len: length of SAE password (for devices supporting SAE offload)
*/
struct cfg80211_crypto_settings {
u32 wpa_versions;
struct key_params *wep_keys;
int wep_tx_key;
const u8 *psk;
+ const u8 *sae_pwd;
+ u8 sae_pwd_len;
};
/**
* @he_cap: HE capabilities (or %NULL if HE isn't enabled)
* @ht_required: stations must support HT
* @vht_required: stations must support VHT
+ * @twt_responder: Enable Target Wait Time
* @flags: flags, as defined in enum cfg80211_ap_settings_flags
*/
struct cfg80211_ap_settings {
const struct ieee80211_vht_cap *vht_cap;
const struct ieee80211_he_cap_elem *he_cap;
bool ht_required, vht_required;
+ bool twt_responder;
u32 flags;
};
u8 rx_nss;
};
+#define VENDOR_CMD_RAW_DATA ((const struct nla_policy *)ERR_PTR(-ENODATA))
+
/**
* struct wiphy_vendor_command - vendor command definition
* @info: vendor command identifying information, as used in nl80211
* @dumpit: dump callback, for transferring bigger/multiple items. The
* @storage points to cb->args[5], ie. is preserved over the multiple
* dumpit calls.
+ * @policy: policy pointer for attributes within %NL80211_ATTR_VENDOR_DATA.
+ * Set this to %VENDOR_CMD_RAW_DATA if no policy can be given and the
+ * attribute is just raw data (e.g. a firmware command).
+ * @maxattr: highest attribute number in policy
* It's recommended to not have the same sub command with both @doit and
* @dumpit, so that userspace can assume certain ones are get and others
* are used with dump requests.
int (*dumpit)(struct wiphy *wiphy, struct wireless_dev *wdev,
struct sk_buff *skb, const void *data, int data_len,
unsigned long *storage);
+ const struct nla_policy *policy;
+ unsigned int maxattr;
};
/**
*/
void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *bss);
+/**
+ * cfg80211_bss_iter - iterate all BSS entries
+ *
+ * This function iterates over the BSS entries associated with the given wiphy
+ * and calls the callback for the iterated BSS. The iterator function is not
+ * allowed to call functions that might modify the internal state of the BSS DB.
+ *
+ * @wiphy: the wiphy
+ * @chandef: if given, the iterator function will be called only if the channel
+ * of the currently iterated BSS is a subset of the given channel.
+ * @iter: the iterator function to call
+ * @iter_data: an argument to the iterator function
+ */
+void cfg80211_bss_iter(struct wiphy *wiphy,
+ struct cfg80211_chan_def *chandef,
+ void (*iter)(struct wiphy *wiphy,
+ struct cfg80211_bss *bss,
+ void *data),
+ void *iter_data);
+
static inline enum nl80211_bss_scan_width
cfg80211_chandef_to_scan_width(const struct cfg80211_chan_def *chandef)
{
* case.
* @bssid: The BSSID of the AP (may be %NULL)
* @bss: Entry of bss to which STA got connected to, can be obtained through
- * cfg80211_get_bss() (may be %NULL). Only one parameter among @bssid and
- * @bss needs to be specified.
+ * cfg80211_get_bss() (may be %NULL). But it is recommended to store the
+ * bss from the connect_request and hold a reference to it and return
+ * through this param to avoid a warning if the bss is expired during the
+ * connection, esp. for those drivers implementing connect op.
+ * Only one parameter among @bssid and @bss needs to be specified.
* @req_ie: Association request IEs (may be %NULL)
* @req_ie_len: Association request IEs length
* @resp_ie: Association response IEs (may be %NULL)
*
* @dev: network device
* @bssid: the BSSID of the AP
- * @bss: entry of bss to which STA got connected to, can be obtained
- * through cfg80211_get_bss (may be %NULL)
+ * @bss: Entry of bss to which STA got connected to, can be obtained through
+ * cfg80211_get_bss() (may be %NULL). But it is recommended to store the
+ * bss from the connect_request and hold a reference to it and return
+ * through this param to avoid a warning if the bss is expired during the
+ * connection, esp. for those drivers implementing connect op.
+ * Only one parameter among @bssid and @bss needs to be specified.
* @req_ie: association request IEs (maybe be %NULL)
* @req_ie_len: association request IEs length
* @resp_ie: association response IEs (may be %NULL)
struct ieee80211_channel *chan,
gfp_t gfp);
+/**
+ * cfg80211_tx_mgmt_expired - tx_mgmt duration expired
+ * @wdev: wireless device
+ * @cookie: the requested cookie
+ * @chan: The current channel (from tx_mgmt request)
+ * @gfp: allocation flags
+ */
+void cfg80211_tx_mgmt_expired(struct wireless_dev *wdev, u64 cookie,
+ struct ieee80211_channel *chan, gfp_t gfp);
+
/**
* cfg80211_sinfo_alloc_tid_stats - allocate per-tid statistics.
*
+ /* SPDX-License-Identifier: GPL-2.0-only */
/*
* mac80211 <-> driver interface
*
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
* Copyright (C) 2018 - 2019 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef MAC80211_H
* @BSS_CHANGED_MCAST_RATE: Multicast Rate setting changed for this interface
* @BSS_CHANGED_FTM_RESPONDER: fime timing reasurement request responder
* functionality changed for this BSS (AP mode).
+ * @BSS_CHANGED_TWT: TWT status changed
*
*/
enum ieee80211_bss_change {
BSS_CHANGED_KEEP_ALIVE = 1<<24,
BSS_CHANGED_MCAST_RATE = 1<<25,
BSS_CHANGED_FTM_RESPONDER = 1<<26,
+ BSS_CHANGED_TWT = 1<<27,
/* when adding here, make sure to change ieee80211_reconfig */
};
* @he_support: does this BSS support HE
* @twt_requester: does this BSS support TWT requester (relevant for managed
* mode only, set if the AP advertises TWT responder role)
+ * @twt_responder: does this BSS support TWT requester (relevant for managed
+ * mode only, set if the AP advertises TWT responder role)
* @assoc: association status
* @ibss_joined: indicates whether this station is part of an IBSS
* or not
u16 frame_time_rts_th;
bool he_support;
bool twt_requester;
+ bool twt_responder;
/* association related data */
bool assoc, ibss_joined;
bool ibss_creator;
* @IEEE80211_HW_EXT_KEY_ID_NATIVE: Driver and hardware are supporting Extended
* Key ID and can handle two unicast keys per station for Rx and Tx.
*
+ * @IEEE80211_HW_NO_AMPDU_KEYBORDER_SUPPORT: The card/driver can't handle
+ * active Tx A-MPDU sessions with Extended Key IDs during rekey.
+ *
* @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
*/
enum ieee80211_hw_flags {
IEEE80211_HW_SUPPORTS_MULTI_BSSID,
IEEE80211_HW_SUPPORTS_ONLY_HE_MULTI_BSSID,
IEEE80211_HW_EXT_KEY_ID_NATIVE,
+ IEEE80211_HW_NO_AMPDU_KEYBORDER_SUPPORT,
/* keep last, obviously */
NUM_IEEE80211_HW_FLAGS
return (sta == NULL || sta->supp_rates[band] & BIT(index));
}
-/**
- * rate_control_send_low - helper for drivers for management/no-ack frames
- *
- * Rate control algorithms that agree to use the lowest rate to
- * send management frames and NO_ACK data with the respective hw
- * retries should use this in the beginning of their mac80211 get_rate
- * callback. If true is returned the rate control can simply return.
- * If false is returned we guarantee that sta and sta and priv_sta is
- * not null.
- *
- * Rate control algorithms wishing to do more intelligent selection of
- * rate for multicast/broadcast frames may choose to not use this.
- *
- * @sta: &struct ieee80211_sta pointer to the target destination. Note
- * that this may be null.
- * @priv_sta: private rate control structure. This may be null.
- * @txrc: rate control information we sholud populate for mac80211.
- */
-bool rate_control_send_low(struct ieee80211_sta *sta,
- void *priv_sta,
- struct ieee80211_tx_rate_control *txrc);
-
-
static inline s8
rate_lowest_index(struct ieee80211_supported_band *sband,
struct ieee80211_sta *sta)
+ // SPDX-License-Identifier: GPL-2.0-only
/*
* Functions to manage eBPF programs attached to cgroups
*
* Copyright (c) 2016 Daniel Mack
- *
- * This file is subject to the terms and conditions of version 2 of the GNU
- * General Public License. See the file COPYING in the main directory of the
- * Linux distribution for more details.
*/
#include <linux/kernel.h>
DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
EXPORT_SYMBOL(cgroup_bpf_enabled_key);
+void cgroup_bpf_offline(struct cgroup *cgrp)
+{
+ cgroup_get(cgrp);
+ percpu_ref_kill(&cgrp->bpf.refcnt);
+}
+
/**
- * cgroup_bpf_put() - put references of all bpf programs
- * @cgrp: the cgroup to modify
+ * cgroup_bpf_release() - put references of all bpf programs and
+ * release all cgroup bpf data
+ * @work: work structure embedded into the cgroup to modify
*/
-void cgroup_bpf_put(struct cgroup *cgrp)
+static void cgroup_bpf_release(struct work_struct *work)
{
+ struct cgroup *cgrp = container_of(work, struct cgroup,
+ bpf.release_work);
enum bpf_cgroup_storage_type stype;
+ struct bpf_prog_array *old_array;
unsigned int type;
for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
kfree(pl);
static_branch_dec(&cgroup_bpf_enabled_key);
}
- bpf_prog_array_free(cgrp->bpf.effective[type]);
+ old_array = rcu_dereference_protected(
+ cgrp->bpf.effective[type],
+ percpu_ref_is_dying(&cgrp->bpf.refcnt));
+ bpf_prog_array_free(old_array);
}
+
+ percpu_ref_exit(&cgrp->bpf.refcnt);
+ cgroup_put(cgrp);
+}
+
+/**
+ * cgroup_bpf_release_fn() - callback used to schedule releasing
+ * of bpf cgroup data
+ * @ref: percpu ref counter structure
+ */
+static void cgroup_bpf_release_fn(struct percpu_ref *ref)
+{
+ struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt);
+
+ INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release);
+ queue_work(system_wq, &cgrp->bpf.release_work);
}
/* count number of elements in the list.
*/
static int compute_effective_progs(struct cgroup *cgrp,
enum bpf_attach_type type,
- struct bpf_prog_array __rcu **array)
+ struct bpf_prog_array **array)
{
enum bpf_cgroup_storage_type stype;
struct bpf_prog_array *progs;
}
} while ((p = cgroup_parent(p)));
- rcu_assign_pointer(*array, progs);
+ *array = progs;
return 0;
}
static void activate_effective_progs(struct cgroup *cgrp,
enum bpf_attach_type type,
- struct bpf_prog_array __rcu *array)
+ struct bpf_prog_array *old_array)
{
- struct bpf_prog_array __rcu *old_array;
-
- old_array = xchg(&cgrp->bpf.effective[type], array);
+ rcu_swap_protected(cgrp->bpf.effective[type], old_array,
+ lockdep_is_held(&cgroup_mutex));
/* free prog array after grace period, since __cgroup_bpf_run_*()
* might be still walking the array
*/
* that array below is variable length
*/
#define NR ARRAY_SIZE(cgrp->bpf.effective)
- struct bpf_prog_array __rcu *arrays[NR] = {};
- int i;
+ struct bpf_prog_array *arrays[NR] = {};
+ int ret, i;
+
+ ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0,
+ GFP_KERNEL);
+ if (ret)
+ return ret;
for (i = 0; i < NR; i++)
INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
cleanup:
for (i = 0; i < NR; i++)
bpf_prog_array_free(arrays[i]);
+
+ percpu_ref_exit(&cgrp->bpf.refcnt);
+
return -ENOMEM;
}
enum bpf_attach_type type = attr->query.attach_type;
struct list_head *progs = &cgrp->bpf.progs[type];
u32 flags = cgrp->bpf.flags[type];
+ struct bpf_prog_array *effective;
int cnt, ret = 0, i;
+ effective = rcu_dereference_protected(cgrp->bpf.effective[type],
+ lockdep_is_held(&cgroup_mutex));
+
if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)
- cnt = bpf_prog_array_length(cgrp->bpf.effective[type]);
+ cnt = bpf_prog_array_length(effective);
else
cnt = prog_list_length(progs);
}
if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
- return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type],
- prog_ids, cnt);
+ return bpf_prog_array_copy_to_user(effective, prog_ids, cnt);
} else {
struct bpf_prog_list *pl;
u32 id;
* The program type passed in via @type must be suitable for network
* filtering. No further check is performed to assert that.
*
- * This function will return %-EPERM if any if an attached program was found
- * and if it returned != 1 during execution. In all other cases, 0 is returned.
+ * For egress packets, this function can return:
+ * NET_XMIT_SUCCESS (0) - continue with packet output
+ * NET_XMIT_DROP (1) - drop packet and notify TCP to call cwr
+ * NET_XMIT_CN (2) - continue with packet output and notify TCP
+ * to call cwr
+ * -EPERM - drop packet
+ *
+ * For ingress packets, this function will return -EPERM if any
+ * attached program was found and if it returned != 1 during execution.
+ * Otherwise 0 is returned.
*/
int __cgroup_bpf_run_filter_skb(struct sock *sk,
struct sk_buff *skb,
/* compute pointers for the bpf prog */
bpf_compute_and_save_data_end(skb, &saved_data_end);
- ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
- __bpf_prog_run_save_cb);
+ if (type == BPF_CGROUP_INET_EGRESS) {
+ ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(
+ cgrp->bpf.effective[type], skb, __bpf_prog_run_save_cb);
+ } else {
+ ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
+ __bpf_prog_run_save_cb);
+ ret = (ret == 1 ? 0 : -EPERM);
+ }
bpf_restore_data_end(skb, saved_data_end);
__skb_pull(skb, offset);
skb->sk = save_sk;
- return ret == 1 ? 0 : -EPERM;
+
+ return ret;
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
+ // SPDX-License-Identifier: GPL-2.0-only
/*
* Longest prefix match list implementation
*
* Copyright (c) 2016,2017 Daniel Mack
* Copyright (c) 2016 David Herrmann
- *
- * This file is subject to the terms and conditions of version 2 of the GNU
- * General Public License. See the file COPYING in the main directory of the
- * Linux distribution for more details.
*/
#include <linux/bpf.h>
cost_per_node = sizeof(struct lpm_trie_node) +
attr->value_size + trie->data_size;
cost += (u64) attr->max_entries * cost_per_node;
- if (cost >= U32_MAX - PAGE_SIZE) {
- ret = -E2BIG;
- goto out_err;
- }
-
- trie->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
- ret = bpf_map_precharge_memlock(trie->map.pages);
+ ret = bpf_map_charge_init(&trie->map.memory, cost);
if (ret)
goto out_err;
}
break;
+ case SO_DETACH_REUSEPORT_BPF:
+ ret = reuseport_detach_prog(sk);
+ break;
+
case SO_DETACH_FILTER:
ret = sk_detach_filter(sk);
break;
{
u32 meminfo[SK_MEMINFO_VARS];
- if (get_user(len, optlen))
- return -EFAULT;
-
sk_get_meminfo(sk, meminfo);
len = min_t(unsigned int, len, sizeof(meminfo));
EXPORT_SYMBOL(inet_rtx_syn_ack);
/* return true if req was found in the ehash table */
-static bool reqsk_queue_unlink(struct request_sock_queue *queue,
- struct request_sock *req)
+static bool reqsk_queue_unlink(struct request_sock *req)
{
struct inet_hashinfo *hashinfo = req_to_sk(req)->sk_prot->h.hashinfo;
bool found = false;
void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
{
- if (reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req)) {
+ if (reqsk_queue_unlink(req)) {
reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
reqsk_put(req);
}
static void reqsk_queue_hash_req(struct request_sock *req,
unsigned long timeout)
{
- req->num_retrans = 0;
- req->num_timeout = 0;
- req->sk = NULL;
-
timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
mod_timer(&req->rsk_timer, jiffies + timeout);
+ // SPDX-License-Identifier: GPL-2.0-only
/* iptables module for the IPv4 and TCP ECN bits, Version 1.5
*
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/in.h>
if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
__u8 oldtos;
- if (!skb_make_writable(skb, sizeof(struct iphdr)))
+ if (skb_ensure_writable(skb, sizeof(struct iphdr)))
return false;
iph = ip_hdr(skb);
oldtos = iph->tos;
tcph->cwr == einfo->proto.tcp.cwr))
return true;
- if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph)))
+ if (skb_ensure_writable(skb, ip_hdrlen(skb) + sizeof(*tcph)))
return false;
tcph = (void *)ip_hdr(skb) + ip_hdrlen(skb);
+ // SPDX-License-Identifier: GPL-2.0-only
/*
* H.323 extension for NAT alteration.
*
*
- * This source code is licensed under General Public License version 2.
- *
* Based on the 'brute force' H.323 NAT module by
*/
net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_udp_packet error\n");
return -1;
}
- /* nf_nat_mangle_udp_packet uses skb_make_writable() to copy
+ /* nf_nat_mangle_udp_packet uses skb_ensure_writable() to copy
* or pull everything in a linear buffer, so we can safely
* use the skb pointers now */
*data = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
+ // SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2007-2008 BalaBit IT Ltd.
* Author: Krisztian Kovacs
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
*/
#include <net/netfilter/nf_tproxy.h>
__be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
{
+ const struct in_ifaddr *ifa;
struct in_device *indev;
__be32 laddr;
laddr = 0;
indev = __in_dev_get_rcu(skb->dev);
- for_primary_ifa(indev) {
+
+ in_dev_for_each_ifa_rcu(ifa, indev) {
+ if (ifa->ifa_flags & IFA_F_SECONDARY)
+ continue;
+
laddr = ifa->ifa_local;
break;
- } endfor_ifa(indev);
+ }
return laddr ? laddr : daddr;
}
#include <linux/tcp.h>
#include <linux/rcupdate.h>
#include <linux/rculist.h>
+#include <linux/siphash.h>
#include <net/inetpeer.h>
#include <net/tcp.h>
* for a valid cookie, so this is an acceptable risk.
*/
get_random_bytes(key, sizeof(key));
- tcp_fastopen_reset_cipher(net, NULL, key, sizeof(key));
+ tcp_fastopen_reset_cipher(net, NULL, key, NULL, sizeof(key));
}
static void tcp_fastopen_ctx_free(struct rcu_head *head)
{
struct tcp_fastopen_context *ctx =
container_of(head, struct tcp_fastopen_context, rcu);
- crypto_free_cipher(ctx->tfm);
- kfree(ctx);
+
+ kzfree(ctx);
}
void tcp_fastopen_destroy_cipher(struct sock *sk)
}
int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
- void *key, unsigned int len)
+ void *primary_key, void *backup_key,
+ unsigned int len)
{
struct tcp_fastopen_context *ctx, *octx;
struct fastopen_queue *q;
- int err;
+ int err = 0;
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
- if (!ctx)
- return -ENOMEM;
- ctx->tfm = crypto_alloc_cipher("aes", 0, 0);
-
- if (IS_ERR(ctx->tfm)) {
- err = PTR_ERR(ctx->tfm);
-error: kfree(ctx);
- pr_err("TCP: TFO aes cipher alloc error: %d\n", err);
- return err;
+ if (!ctx) {
+ err = -ENOMEM;
+ goto out;
}
- err = crypto_cipher_setkey(ctx->tfm, key, len);
- if (err) {
- pr_err("TCP: TFO cipher key error: %d\n", err);
- crypto_free_cipher(ctx->tfm);
- goto error;
- }
- memcpy(ctx->key, key, len);
+ memcpy(ctx->key[0], primary_key, len);
+ if (backup_key) {
+ memcpy(ctx->key[1], backup_key, len);
+ ctx->num = 2;
+ } else {
+ ctx->num = 1;
+ }
spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
if (sk) {
if (octx)
call_rcu(&octx->rcu, tcp_fastopen_ctx_free);
+out:
return err;
}
-static bool __tcp_fastopen_cookie_gen(struct sock *sk, const void *path,
- struct tcp_fastopen_cookie *foc)
+static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req,
+ struct sk_buff *syn,
+ const u8 *key,
+ struct tcp_fastopen_cookie *foc)
{
- struct tcp_fastopen_context *ctx;
- bool ok = false;
-
- rcu_read_lock();
-
- ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx);
- if (!ctx)
- ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx);
-
- if (ctx) {
- crypto_cipher_encrypt_one(ctx->tfm, foc->val, path);
- foc->len = TCP_FASTOPEN_COOKIE_SIZE;
- ok = true;
- }
- rcu_read_unlock();
- return ok;
-}
+ BUILD_BUG_ON(TCP_FASTOPEN_KEY_LENGTH != sizeof(siphash_key_t));
+ BUILD_BUG_ON(TCP_FASTOPEN_COOKIE_SIZE != sizeof(u64));
-/* Generate the fastopen cookie by doing aes128 encryption on both
- * the source and destination addresses. Pad 0s for IPv4 or IPv4-mapped-IPv6
- * addresses. For the longer IPv6 addresses use CBC-MAC.
- *
- * XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE.
- */
-static bool tcp_fastopen_cookie_gen(struct sock *sk,
- struct request_sock *req,
- struct sk_buff *syn,
- struct tcp_fastopen_cookie *foc)
-{
if (req->rsk_ops->family == AF_INET) {
const struct iphdr *iph = ip_hdr(syn);
- __be32 path[4] = { iph->saddr, iph->daddr, 0, 0 };
- return __tcp_fastopen_cookie_gen(sk, path, foc);
+ foc->val[0] = siphash(&iph->saddr,
+ sizeof(iph->saddr) +
+ sizeof(iph->daddr),
+ (const siphash_key_t *)key);
+ foc->len = TCP_FASTOPEN_COOKIE_SIZE;
+ return true;
}
-
#if IS_ENABLED(CONFIG_IPV6)
if (req->rsk_ops->family == AF_INET6) {
const struct ipv6hdr *ip6h = ipv6_hdr(syn);
- struct tcp_fastopen_cookie tmp;
- if (__tcp_fastopen_cookie_gen(sk, &ip6h->saddr, &tmp)) {
- struct in6_addr *buf = &tmp.addr;
- int i;
-
- for (i = 0; i < 4; i++)
- buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i];
- return __tcp_fastopen_cookie_gen(sk, buf, foc);
- }
+ foc->val[0] = siphash(&ip6h->saddr,
+ sizeof(ip6h->saddr) +
+ sizeof(ip6h->daddr),
+ (const siphash_key_t *)key);
+ foc->len = TCP_FASTOPEN_COOKIE_SIZE;
+ return true;
}
#endif
return false;
}
+/* Generate the fastopen cookie by applying SipHash to both the source and
+ * destination addresses.
+ */
+static void tcp_fastopen_cookie_gen(struct sock *sk,
+ struct request_sock *req,
+ struct sk_buff *syn,
+ struct tcp_fastopen_cookie *foc)
+{
+ struct tcp_fastopen_context *ctx;
+
+ rcu_read_lock();
+ ctx = tcp_fastopen_get_ctx(sk);
+ if (ctx)
+ __tcp_fastopen_cookie_gen_cipher(req, syn, ctx->key[0], foc);
+ rcu_read_unlock();
+}
/* If an incoming SYN or SYNACK frame contains a payload and/or FIN,
* queue this additional data / FIN.
tcp_fin(sk);
}
+/* returns 0 - no key match, 1 for primary, 2 for backup */
+static int tcp_fastopen_cookie_gen_check(struct sock *sk,
+ struct request_sock *req,
+ struct sk_buff *syn,
+ struct tcp_fastopen_cookie *orig,
+ struct tcp_fastopen_cookie *valid_foc)
+{
+ struct tcp_fastopen_cookie search_foc = { .len = -1 };
+ struct tcp_fastopen_cookie *foc = valid_foc;
+ struct tcp_fastopen_context *ctx;
+ int i, ret = 0;
+
+ rcu_read_lock();
+ ctx = tcp_fastopen_get_ctx(sk);
+ if (!ctx)
+ goto out;
+ for (i = 0; i < tcp_fastopen_context_len(ctx); i++) {
+ __tcp_fastopen_cookie_gen_cipher(req, syn, ctx->key[i], foc);
+ if (tcp_fastopen_cookie_match(foc, orig)) {
+ ret = i + 1;
+ goto out;
+ }
+ foc = &search_foc;
+ }
+out:
+ rcu_read_unlock();
+ return ret;
+}
+
static struct sock *tcp_fastopen_create_child(struct sock *sk,
struct sk_buff *skb,
struct request_sock *req)
struct sock *child;
bool own_req;
- req->num_retrans = 0;
- req->num_timeout = 0;
- req->sk = NULL;
-
child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
NULL, &own_req);
if (!child)
int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
struct tcp_fastopen_cookie valid_foc = { .len = -1 };
struct sock *child;
+ int ret = 0;
if (foc->len == 0) /* Client requests a cookie */
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD);
tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD))
goto fastopen;
- if (foc->len >= 0 && /* Client presents or requests a cookie */
- tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc) &&
- foc->len == TCP_FASTOPEN_COOKIE_SIZE &&
- foc->len == valid_foc.len &&
- !memcmp(foc->val, valid_foc.val, foc->len)) {
- /* Cookie is valid. Create a (full) child socket to accept
- * the data in SYN before returning a SYN-ACK to ack the
- * data. If we fail to create the socket, fall back and
- * ack the ISN only but includes the same cookie.
- *
- * Note: Data-less SYN with valid cookie is allowed to send
- * data in SYN_RECV state.
- */
+ if (foc->len == 0) {
+ /* Client requests a cookie. */
+ tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc);
+ } else if (foc->len > 0) {
+ ret = tcp_fastopen_cookie_gen_check(sk, req, skb, foc,
+ &valid_foc);
+ if (!ret) {
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
+ } else {
+ /* Cookie is valid. Create a (full) child socket to
+ * accept the data in SYN before returning a SYN-ACK to
+ * ack the data. If we fail to create the socket, fall
+ * back and ack the ISN only but includes the same
+ * cookie.
+ *
+ * Note: Data-less SYN with valid cookie is allowed to
+ * send data in SYN_RECV state.
+ */
fastopen:
- child = tcp_fastopen_create_child(sk, skb, req);
- if (child) {
- foc->len = -1;
+ child = tcp_fastopen_create_child(sk, skb, req);
+ if (child) {
+ if (ret == 2) {
+ valid_foc.exp = foc->exp;
+ *foc = valid_foc;
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPFASTOPENPASSIVEALTKEY);
+ } else {
+ foc->len = -1;
+ }
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPFASTOPENPASSIVE);
+ return child;
+ }
NET_INC_STATS(sock_net(sk),
- LINUX_MIB_TCPFASTOPENPASSIVE);
- return child;
+ LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
}
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
- } else if (foc->len > 0) /* Client presents an invalid cookie */
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
-
+ }
valid_foc.exp = foc->exp;
*foc = valid_foc;
return NULL;
memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
sizeof(struct inet6_skb_parm)));
+ tcp_add_tx_delay(skb, tp);
+
err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
if (unlikely(err > 0)) {
if (nsize < 0)
nsize = 0;
- if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf)) {
+ if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf &&
+ tcp_queue != TCP_FRAG_IN_WRITE_QUEUE)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG);
return -ENOMEM;
}
sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
limit <<= factor;
+ if (static_branch_unlikely(&tcp_tx_delay_enabled) &&
+ tcp_sk(sk)->tcp_tx_delay) {
+ u64 extra_bytes = (u64)sk->sk_pacing_rate * tcp_sk(sk)->tcp_tx_delay;
+
+ /* TSQ is based on skb truesize sum (sk_wmem_alloc), so we
+ * approximate our needs assuming an ~100% skb->truesize overhead.
+ * USEC_PER_SEC is approximated by 2^20.
+ * do_div(extra_bytes, USEC_PER_SEC/2) is replaced by a right shift.
+ */
+ extra_bytes >>= (20 - 1);
+ limit += extra_bytes;
+ }
if (refcount_read(&sk->sk_wmem_alloc) > limit) {
/* Always send skb if rtx queue is empty.
* No need to wait for TX completion to call us back,
int tcp_header_size;
struct tcphdr *th;
int mss;
+ u64 now;
skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
if (unlikely(!skb)) {
mss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
memset(&opts, 0, sizeof(opts));
+ now = tcp_clock_ns();
#ifdef CONFIG_SYN_COOKIES
if (unlikely(req->cookie_ts))
skb->skb_mstamp_ns = cookie_init_timestamp(req);
else
#endif
{
- skb->skb_mstamp_ns = tcp_clock_ns();
+ skb->skb_mstamp_ns = now;
if (!tcp_rsk(req)->snt_synack) /* Timestamp first SYNACK */
tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb);
}
rcu_read_unlock();
#endif
- /* Do not fool tcpdump (if any), clean our debris */
- skb->tstamp = 0;
+ skb->skb_mstamp_ns = now;
+ tcp_add_tx_delay(skb, tp);
+
return skb;
}
EXPORT_SYMBOL(tcp_make_synack);
gso_skb->destructor = NULL;
segs = skb_segment(gso_skb, features);
- if (unlikely(IS_ERR_OR_NULL(segs))) {
+ if (IS_ERR_OR_NULL(segs)) {
if (copy_dtor)
gso_skb->destructor = sock_wfree;
return segs;
seg = segs;
uh = udp_hdr(seg);
+ /* preserve TX timestamp flags and TS key for first segment */
+ skb_shinfo(seg)->tskey = skb_shinfo(gso_skb)->tskey;
+ skb_shinfo(seg)->tx_flags |=
+ (skb_shinfo(gso_skb)->tx_flags & SKBTX_ANY_TSTAMP);
+
/* compute checksum adjustment based on old length versus new */
newlen = htons(sizeof(*uh) + mss);
check = csum16_add(csum16_sub(uh->check, uh->len), newlen);
static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{
.procname = "nf_conntrack_frag6_timeout",
- .data = &init_net.nf_frag.frags.timeout,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{
.procname = "nf_conntrack_frag6_low_thresh",
- .data = &init_net.nf_frag.frags.low_thresh,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
- .extra2 = &init_net.nf_frag.frags.high_thresh
},
{
.procname = "nf_conntrack_frag6_high_thresh",
- .data = &init_net.nf_frag.frags.high_thresh,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
- .extra1 = &init_net.nf_frag.frags.low_thresh
},
{ }
};
GFP_KERNEL);
if (table == NULL)
goto err_alloc;
-
- table[0].data = &net->nf_frag.frags.timeout;
- table[1].data = &net->nf_frag.frags.low_thresh;
- table[1].extra2 = &net->nf_frag.frags.high_thresh;
- table[2].data = &net->nf_frag.frags.high_thresh;
- table[2].extra1 = &net->nf_frag.frags.low_thresh;
- table[2].extra2 = &init_net.nf_frag.frags.high_thresh;
}
+ table[0].data = &net->nf_frag.fqdir->timeout;
+ table[1].data = &net->nf_frag.fqdir->low_thresh;
+ table[1].extra2 = &net->nf_frag.fqdir->high_thresh;
+ table[2].data = &net->nf_frag.fqdir->high_thresh;
+ table[2].extra1 = &net->nf_frag.fqdir->low_thresh;
+ table[2].extra2 = &init_net.nf_frag.fqdir->high_thresh;
+
hdr = register_net_sysctl(net, "net/netfilter", table);
if (hdr == NULL)
goto err_reg;
{
struct inet_frag_queue *frag = from_timer(frag, t, timer);
struct frag_queue *fq;
- struct net *net;
fq = container_of(frag, struct frag_queue, q);
- net = container_of(fq->q.net, struct net, nf_frag.frags);
- ip6frag_expire_frag_queue(net, fq);
+ ip6frag_expire_frag_queue(fq->q.fqdir->net, fq);
}
/* Creation primitives. */
};
struct inet_frag_queue *q;
- q = inet_frag_find(&net->nf_frag.frags, &key);
+ q = inet_frag_find(net->nf_frag.fqdir, &key);
if (!q)
return NULL;
prev = fq->q.fragments_tail;
err = inet_frag_queue_insert(&fq->q, skb, offset, end);
- if (err)
+ if (err) {
+ if (err == IPFRAG_DUP) {
+ /* No error for duplicates, pretend they got queued. */
+ kfree_skb(skb);
+ return -EINPROGRESS;
+ }
goto insert_error;
+ }
if (dev)
fq->iif = dev->ifindex;
fq->ecn |= ecn;
if (payload_len > fq->q.max_size)
fq->q.max_size = payload_len;
- add_frag_mem_limit(fq->q.net, skb->truesize);
+ add_frag_mem_limit(fq->q.fqdir, skb->truesize);
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
skb->_skb_refdst = 0UL;
err = nf_ct_frag6_reasm(fq, skb, prev, dev);
skb->_skb_refdst = orefdst;
- return err;
+
+ /* After queue has assumed skb ownership, only 0 or
+ * -EINPROGRESS must be returned.
+ */
+ return err ? -EINPROGRESS : 0;
}
skb_dst_drop(skb);
return -EINPROGRESS;
insert_error:
- if (err == IPFRAG_DUP)
- goto err;
inet_frag_kill(&fq->q);
err:
skb_dst_drop(skb);
ret = 0;
}
- /* after queue has assumed skb ownership, only 0 or -EINPROGRESS
- * must be returned.
- */
- if (ret)
- ret = -EINPROGRESS;
-
spin_unlock_bh(&fq->q.lock);
inet_frag_put(&fq->q);
return ret;
{
int res;
- net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
- net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
- net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
- net->nf_frag.frags.f = &nf_frags;
-
- res = inet_frags_init_net(&net->nf_frag.frags);
+ res = fqdir_init(&net->nf_frag.fqdir, &nf_frags, net);
if (res < 0)
return res;
+
+ net->nf_frag.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
+ net->nf_frag.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
+ net->nf_frag.fqdir->timeout = IPV6_FRAG_TIMEOUT;
+
res = nf_ct_frag6_sysctl_register(net);
if (res < 0)
- inet_frags_exit_net(&net->nf_frag.frags);
+ fqdir_exit(net->nf_frag.fqdir);
return res;
}
+static void nf_ct_net_pre_exit(struct net *net)
+{
+ fqdir_pre_exit(net->nf_frag.fqdir);
+}
+
static void nf_ct_net_exit(struct net *net)
{
nf_ct_frags6_sysctl_unregister(net);
- inet_frags_exit_net(&net->nf_frag.frags);
+ fqdir_exit(net->nf_frag.fqdir);
}
static struct pernet_operations nf_ct_net_ops = {
- .init = nf_ct_net_init,
- .exit = nf_ct_net_exit,
+ .init = nf_ct_net_init,
+ .pre_exit = nf_ct_net_pre_exit,
+ .exit = nf_ct_net_exit,
};
static const struct rhashtable_params nfct_rhash_params = {
struct sk_buff *skb);
static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
int strict);
-static size_t rt6_nlmsg_size(struct fib6_info *rt);
+static size_t rt6_nlmsg_size(struct fib6_info *f6i);
static int rt6_fill_node(struct net *net, struct sk_buff *skb,
struct fib6_info *rt, struct dst_entry *dst,
struct in6_addr *dest, struct in6_addr *src,
struct fib6_info *sibling, *next_sibling;
struct fib6_info *match = res->f6i;
- if (!match->fib6_nsiblings || have_oif_match)
+ if ((!match->fib6_nsiblings && !match->nh) || have_oif_match)
goto out;
/* We might have already computed the hash for ICMPv6 errors. In such
* case it will always be non-zero. Otherwise now is the time to do it.
*/
- if (!fl6->mp_hash)
+ if (!fl6->mp_hash &&
+ (!match->nh || nexthop_is_multipath(match->nh)))
fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
- if (fl6->mp_hash <= atomic_read(&match->fib6_nh.fib_nh_upper_bound))
+ if (unlikely(match->nh)) {
+ nexthop_path_fib6_result(res, fl6->mp_hash);
+ return;
+ }
+
+ if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound))
goto out;
list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
fib6_siblings) {
- const struct fib6_nh *nh = &sibling->fib6_nh;
+ const struct fib6_nh *nh = sibling->fib6_nh;
int nh_upper_bound;
nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
out:
res->f6i = match;
- res->nh = &match->fib6_nh;
+ res->nh = match->fib6_nh;
}
/*
return false;
}
+struct fib6_nh_dm_arg {
+ struct net *net;
+ const struct in6_addr *saddr;
+ int oif;
+ int flags;
+ struct fib6_nh *nh;
+};
+
+static int __rt6_nh_dev_match(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_dm_arg *arg = _arg;
+
+ arg->nh = nh;
+ return __rt6_device_match(arg->net, nh, arg->saddr, arg->oif,
+ arg->flags);
+}
+
+/* returns fib6_nh from nexthop or NULL */
+static struct fib6_nh *rt6_nh_dev_match(struct net *net, struct nexthop *nh,
+ struct fib6_result *res,
+ const struct in6_addr *saddr,
+ int oif, int flags)
+{
+ struct fib6_nh_dm_arg arg = {
+ .net = net,
+ .saddr = saddr,
+ .oif = oif,
+ .flags = flags,
+ };
+
+ if (nexthop_is_blackhole(nh))
+ return NULL;
+
+ if (nexthop_for_each_fib6_nh(nh, __rt6_nh_dev_match, &arg))
+ return arg.nh;
+
+ return NULL;
+}
+
static void rt6_device_match(struct net *net, struct fib6_result *res,
const struct in6_addr *saddr, int oif, int flags)
{
struct fib6_nh *nh;
if (!oif && ipv6_addr_any(saddr)) {
- nh = &f6i->fib6_nh;
+ if (unlikely(f6i->nh)) {
+ nh = nexthop_fib6_nh(f6i->nh);
+ if (nexthop_is_blackhole(f6i->nh))
+ goto out_blackhole;
+ } else {
+ nh = f6i->fib6_nh;
+ }
if (!(nh->fib_nh_flags & RTNH_F_DEAD))
goto out;
}
for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
- nh = &spf6i->fib6_nh;
- if (__rt6_device_match(net, nh, saddr, oif, flags)) {
+ bool matched = false;
+
+ if (unlikely(spf6i->nh)) {
+ nh = rt6_nh_dev_match(net, spf6i->nh, res, saddr,
+ oif, flags);
+ if (nh)
+ matched = true;
+ } else {
+ nh = spf6i->fib6_nh;
+ if (__rt6_device_match(net, nh, saddr, oif, flags))
+ matched = true;
+ }
+ if (matched) {
res->f6i = spf6i;
goto out;
}
if (oif && flags & RT6_LOOKUP_F_IFACE) {
res->f6i = net->ipv6.fib6_null_entry;
- nh = &res->f6i->fib6_nh;
+ nh = res->f6i->fib6_nh;
goto out;
}
- nh = &f6i->fib6_nh;
+ if (unlikely(f6i->nh)) {
+ nh = nexthop_fib6_nh(f6i->nh);
+ if (nexthop_is_blackhole(f6i->nh))
+ goto out_blackhole;
+ } else {
+ nh = f6i->fib6_nh;
+ }
+
if (nh->fib_nh_flags & RTNH_F_DEAD) {
res->f6i = net->ipv6.fib6_null_entry;
- nh = &res->f6i->fib6_nh;
+ nh = res->f6i->fib6_nh;
}
out:
res->nh = nh;
res->fib6_type = res->f6i->fib6_type;
res->fib6_flags = res->f6i->fib6_flags;
+ return;
+
+out_blackhole:
+ res->fib6_flags |= RTF_REJECT;
+ res->fib6_type = RTN_BLACKHOLE;
+ res->nh = nh;
}
#ifdef CONFIG_IPV6_ROUTER_PREF
return rc;
}
+struct fib6_nh_frl_arg {
+ u32 flags;
+ int oif;
+ int strict;
+ int *mpri;
+ bool *do_rr;
+ struct fib6_nh *nh;
+};
+
+static int rt6_nh_find_match(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_frl_arg *arg = _arg;
+
+ arg->nh = nh;
+ return find_match(nh, arg->flags, arg->oif, arg->strict,
+ arg->mpri, arg->do_rr);
+}
+
static void __find_rr_leaf(struct fib6_info *f6i_start,
struct fib6_info *nomatch, u32 metric,
struct fib6_result *res, struct fib6_info **cont,
for (f6i = f6i_start;
f6i && f6i != nomatch;
f6i = rcu_dereference(f6i->fib6_next)) {
+ bool matched = false;
struct fib6_nh *nh;
if (cont && f6i->fib6_metric != metric) {
if (fib6_check_expired(f6i))
continue;
- nh = &f6i->fib6_nh;
- if (find_match(nh, f6i->fib6_flags, oif, strict, mpri, do_rr)) {
+ if (unlikely(f6i->nh)) {
+ struct fib6_nh_frl_arg arg = {
+ .flags = f6i->fib6_flags,
+ .oif = oif,
+ .strict = strict,
+ .mpri = mpri,
+ .do_rr = do_rr
+ };
+
+ if (nexthop_is_blackhole(f6i->nh)) {
+ res->fib6_flags = RTF_REJECT;
+ res->fib6_type = RTN_BLACKHOLE;
+ res->f6i = f6i;
+ res->nh = nexthop_fib6_nh(f6i->nh);
+ return;
+ }
+ if (nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_find_match,
+ &arg)) {
+ matched = true;
+ nh = arg.nh;
+ }
+ } else {
+ nh = f6i->fib6_nh;
+ if (find_match(nh, f6i->fib6_flags, oif, strict,
+ mpri, do_rr))
+ matched = true;
+ }
+ if (matched) {
res->f6i = f6i;
res->nh = nh;
res->fib6_flags = f6i->fib6_flags;
out:
if (!res->f6i) {
res->f6i = net->ipv6.fib6_null_entry;
- res->nh = &res->f6i->fib6_nh;
+ res->nh = res->f6i->fib6_nh;
res->fib6_flags = res->f6i->fib6_flags;
res->fib6_type = res->f6i->fib6_type;
}
rt = net->ipv6.ip6_null_entry;
dst_hold(&rt->dst);
goto out;
+ } else if (res.fib6_flags & RTF_REJECT) {
+ goto do_create;
}
fib6_select_path(net, &res, fl6, fl6->flowi6_oif,
if (ip6_hold_safe(net, &rt))
dst_use_noref(&rt->dst, jiffies);
} else {
+do_create:
rt = ip6_create_rt_rcu(&res);
}
/* It should be called with rcu_read_lock() acquired */
static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
{
- struct rt6_info *pcpu_rt, **p;
+ struct rt6_info *pcpu_rt;
- p = this_cpu_ptr(res->f6i->rt6i_pcpu);
- pcpu_rt = *p;
+ pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu);
if (pcpu_rt)
ip6_hold_safe(NULL, &pcpu_rt);
}
dst_hold(&pcpu_rt->dst);
- p = this_cpu_ptr(res->f6i->rt6i_pcpu);
+ p = this_cpu_ptr(res->nh->rt6i_pcpu);
prev = cmpxchg(p, NULL, pcpu_rt);
BUG_ON(prev);
return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
}
+#define FIB6_EXCEPTION_BUCKET_FLUSHED 0x1UL
+
+/* used when the flushed bit is not relevant, only access to the bucket
+ * (ie., all bucket users except rt6_insert_exception);
+ *
+ * called under rcu lock; sometimes called with rt6_exception_lock held
+ */
+static
+struct rt6_exception_bucket *fib6_nh_get_excptn_bucket(const struct fib6_nh *nh,
+ spinlock_t *lock)
+{
+ struct rt6_exception_bucket *bucket;
+
+ if (lock)
+ bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
+ lockdep_is_held(lock));
+ else
+ bucket = rcu_dereference(nh->rt6i_exception_bucket);
+
+ /* remove bucket flushed bit if set */
+ if (bucket) {
+ unsigned long p = (unsigned long)bucket;
+
+ p &= ~FIB6_EXCEPTION_BUCKET_FLUSHED;
+ bucket = (struct rt6_exception_bucket *)p;
+ }
+
+ return bucket;
+}
+
+static bool fib6_nh_excptn_bucket_flushed(struct rt6_exception_bucket *bucket)
+{
+ unsigned long p = (unsigned long)bucket;
+
+ return !!(p & FIB6_EXCEPTION_BUCKET_FLUSHED);
+}
+
+/* called with rt6_exception_lock held */
+static void fib6_nh_excptn_bucket_set_flushed(struct fib6_nh *nh,
+ spinlock_t *lock)
+{
+ struct rt6_exception_bucket *bucket;
+ unsigned long p;
+
+ bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
+ lockdep_is_held(lock));
+
+ p = (unsigned long)bucket;
+ p |= FIB6_EXCEPTION_BUCKET_FLUSHED;
+ bucket = (struct rt6_exception_bucket *)p;
+ rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
+}
+
static int rt6_insert_exception(struct rt6_info *nrt,
const struct fib6_result *res)
{
struct net *net = dev_net(nrt->dst.dev);
struct rt6_exception_bucket *bucket;
+ struct fib6_info *f6i = res->f6i;
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
- struct fib6_info *f6i = res->f6i;
+ struct fib6_nh *nh = res->nh;
int err = 0;
spin_lock_bh(&rt6_exception_lock);
- if (f6i->exception_bucket_flushed) {
- err = -EINVAL;
- goto out;
- }
-
- bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket,
- lockdep_is_held(&rt6_exception_lock));
+ bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
+ lockdep_is_held(&rt6_exception_lock));
if (!bucket) {
bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
GFP_ATOMIC);
err = -ENOMEM;
goto out;
}
- rcu_assign_pointer(f6i->rt6i_exception_bucket, bucket);
+ rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
+ } else if (fib6_nh_excptn_bucket_flushed(bucket)) {
+ err = -EINVAL;
+ goto out;
}
#ifdef CONFIG_IPV6_SUBTREES
return err;
}
-void rt6_flush_exceptions(struct fib6_info *rt)
+static void fib6_nh_flush_exceptions(struct fib6_nh *nh, struct fib6_info *from)
{
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
int i;
spin_lock_bh(&rt6_exception_lock);
- /* Prevent rt6_insert_exception() to recreate the bucket list */
- rt->exception_bucket_flushed = 1;
- bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
- lockdep_is_held(&rt6_exception_lock));
+ bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
if (!bucket)
goto out;
+ /* Prevent rt6_insert_exception() to recreate the bucket list */
+ if (!from)
+ fib6_nh_excptn_bucket_set_flushed(nh, &rt6_exception_lock);
+
for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
- hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
- rt6_remove_exception(bucket, rt6_ex);
- WARN_ON_ONCE(bucket->depth);
+ hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) {
+ if (!from ||
+ rcu_access_pointer(rt6_ex->rt6i->from) == from)
+ rt6_remove_exception(bucket, rt6_ex);
+ }
+ WARN_ON_ONCE(!from && bucket->depth);
bucket++;
}
-
out:
spin_unlock_bh(&rt6_exception_lock);
}
+static int rt6_nh_flush_exceptions(struct fib6_nh *nh, void *arg)
+{
+ struct fib6_info *f6i = arg;
+
+ fib6_nh_flush_exceptions(nh, f6i);
+
+ return 0;
+}
+
+void rt6_flush_exceptions(struct fib6_info *f6i)
+{
+ if (f6i->nh)
+ nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_flush_exceptions,
+ f6i);
+ else
+ fib6_nh_flush_exceptions(f6i->fib6_nh, f6i);
+}
+
/* Find cached rt in the hash table inside passed in rt
* Caller has to hold rcu_read_lock()
*/
src_key = saddr;
find_ex:
#endif
- bucket = rcu_dereference(res->f6i->rt6i_exception_bucket);
+ bucket = fib6_nh_get_excptn_bucket(res->nh, NULL);
rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
}
/* Remove the passed in cached rt from the hash table that contains it */
-static int rt6_remove_exception_rt(struct rt6_info *rt)
+static int fib6_nh_remove_exception(const struct fib6_nh *nh, int plen,
+ const struct rt6_info *rt)
{
+ const struct in6_addr *src_key = NULL;
struct rt6_exception_bucket *bucket;
- struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
- struct fib6_info *from;
int err;
- from = rcu_dereference(rt->from);
- if (!from ||
- !(rt->rt6i_flags & RTF_CACHE))
- return -EINVAL;
-
- if (!rcu_access_pointer(from->rt6i_exception_bucket))
+ if (!rcu_access_pointer(nh->rt6i_exception_bucket))
return -ENOENT;
spin_lock_bh(&rt6_exception_lock);
- bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
- lockdep_is_held(&rt6_exception_lock));
+ bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
+
#ifdef CONFIG_IPV6_SUBTREES
/* rt6i_src.plen != 0 indicates 'from' is in subtree
* and exception table is indexed by a hash of
* Otherwise, the exception table is indexed by
* a hash of only rt6i_dst.
*/
- if (from->fib6_src.plen)
+ if (plen)
src_key = &rt->rt6i_src.addr;
#endif
rt6_ex = __rt6_find_exception_spinlock(&bucket,
return err;
}
-/* Find rt6_ex which contains the passed in rt cache and
- * refresh its stamp
- */
-static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
+struct fib6_nh_excptn_arg {
+ struct rt6_info *rt;
+ int plen;
+};
+
+static int rt6_nh_remove_exception_rt(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_excptn_arg *arg = _arg;
+ int err;
+
+ err = fib6_nh_remove_exception(nh, arg->plen, arg->rt);
+ if (err == 0)
+ return 1;
+
+ return 0;
+}
+
+static int rt6_remove_exception_rt(struct rt6_info *rt)
{
- struct rt6_exception_bucket *bucket;
- struct in6_addr *src_key = NULL;
- struct rt6_exception *rt6_ex;
struct fib6_info *from;
- rcu_read_lock();
from = rcu_dereference(rt->from);
if (!from || !(rt->rt6i_flags & RTF_CACHE))
- goto unlock;
+ return -EINVAL;
+
+ if (from->nh) {
+ struct fib6_nh_excptn_arg arg = {
+ .rt = rt,
+ .plen = from->fib6_src.plen
+ };
+ int rc;
- bucket = rcu_dereference(from->rt6i_exception_bucket);
+ /* rc = 1 means an entry was found */
+ rc = nexthop_for_each_fib6_nh(from->nh,
+ rt6_nh_remove_exception_rt,
+ &arg);
+ return rc ? 0 : -ENOENT;
+ }
+
+ return fib6_nh_remove_exception(from->fib6_nh,
+ from->fib6_src.plen, rt);
+}
+/* Find rt6_ex which contains the passed in rt cache and
+ * refresh its stamp
+ */
+static void fib6_nh_update_exception(const struct fib6_nh *nh, int plen,
+ const struct rt6_info *rt)
+{
+ const struct in6_addr *src_key = NULL;
+ struct rt6_exception_bucket *bucket;
+ struct rt6_exception *rt6_ex;
+
+ bucket = fib6_nh_get_excptn_bucket(nh, NULL);
#ifdef CONFIG_IPV6_SUBTREES
/* rt6i_src.plen != 0 indicates 'from' is in subtree
* and exception table is indexed by a hash of
* Otherwise, the exception table is indexed by
* a hash of only rt6i_dst.
*/
- if (from->fib6_src.plen)
+ if (plen)
src_key = &rt->rt6i_src.addr;
#endif
- rt6_ex = __rt6_find_exception_rcu(&bucket,
- &rt->rt6i_dst.addr,
- src_key);
+ rt6_ex = __rt6_find_exception_rcu(&bucket, &rt->rt6i_dst.addr, src_key);
if (rt6_ex)
rt6_ex->stamp = jiffies;
+}
+
+struct fib6_nh_match_arg {
+ const struct net_device *dev;
+ const struct in6_addr *gw;
+ struct fib6_nh *match;
+};
+
+/* determine if fib6_nh has given device and gateway */
+static int fib6_nh_find_match(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_match_arg *arg = _arg;
+
+ if (arg->dev != nh->fib_nh_dev ||
+ (arg->gw && !nh->fib_nh_gw_family) ||
+ (!arg->gw && nh->fib_nh_gw_family) ||
+ (arg->gw && !ipv6_addr_equal(arg->gw, &nh->fib_nh_gw6)))
+ return 0;
+
+ arg->match = nh;
+
+ /* found a match, break the loop */
+ return 1;
+}
+
+static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
+{
+ struct fib6_info *from;
+ struct fib6_nh *fib6_nh;
+
+ rcu_read_lock();
+
+ from = rcu_dereference(rt->from);
+ if (!from || !(rt->rt6i_flags & RTF_CACHE))
+ goto unlock;
+
+ if (from->nh) {
+ struct fib6_nh_match_arg arg = {
+ .dev = rt->dst.dev,
+ .gw = &rt->rt6i_gateway,
+ };
+
+ nexthop_for_each_fib6_nh(from->nh, fib6_nh_find_match, &arg);
+ if (!arg.match)
+ return;
+ fib6_nh = arg.match;
+ } else {
+ fib6_nh = from->fib6_nh;
+ }
+ fib6_nh_update_exception(fib6_nh, from->fib6_src.plen, rt);
unlock:
rcu_read_unlock();
}
}
static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
- struct fib6_info *rt, int mtu)
+ const struct fib6_nh *nh, int mtu)
{
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
int i;
- bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
- lockdep_is_held(&rt6_exception_lock));
-
+ bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
if (!bucket)
return;
#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
-static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
- struct in6_addr *gateway)
+static void fib6_nh_exceptions_clean_tohost(const struct fib6_nh *nh,
+ const struct in6_addr *gateway)
{
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
struct hlist_node *tmp;
int i;
- if (!rcu_access_pointer(rt->rt6i_exception_bucket))
+ if (!rcu_access_pointer(nh->rt6i_exception_bucket))
return;
spin_lock_bh(&rt6_exception_lock);
- bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
- lockdep_is_held(&rt6_exception_lock));
-
+ bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
if (bucket) {
for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
hlist_for_each_entry_safe(rt6_ex, tmp,
gc_args->more++;
}
-void rt6_age_exceptions(struct fib6_info *rt,
- struct fib6_gc_args *gc_args,
- unsigned long now)
+static void fib6_nh_age_exceptions(const struct fib6_nh *nh,
+ struct fib6_gc_args *gc_args,
+ unsigned long now)
{
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
struct hlist_node *tmp;
int i;
- if (!rcu_access_pointer(rt->rt6i_exception_bucket))
+ if (!rcu_access_pointer(nh->rt6i_exception_bucket))
return;
rcu_read_lock_bh();
spin_lock(&rt6_exception_lock);
- bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
- lockdep_is_held(&rt6_exception_lock));
-
+ bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
if (bucket) {
for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
hlist_for_each_entry_safe(rt6_ex, tmp,
rcu_read_unlock_bh();
}
+struct fib6_nh_age_excptn_arg {
+ struct fib6_gc_args *gc_args;
+ unsigned long now;
+};
+
+static int rt6_nh_age_exceptions(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_age_excptn_arg *arg = _arg;
+
+ fib6_nh_age_exceptions(nh, arg->gc_args, arg->now);
+ return 0;
+}
+
+void rt6_age_exceptions(struct fib6_info *f6i,
+ struct fib6_gc_args *gc_args,
+ unsigned long now)
+{
+ if (f6i->nh) {
+ struct fib6_nh_age_excptn_arg arg = {
+ .gc_args = gc_args,
+ .now = now
+ };
+
+ nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_age_exceptions,
+ &arg);
+ } else {
+ fib6_nh_age_exceptions(f6i->fib6_nh, gc_args, now);
+ }
+}
+
/* must be called with rcu lock held */
int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
struct flowi6 *fl6, struct fib6_result *res, int strict)
rcu_read_unlock();
return;
}
- res.nh = &res.f6i->fib6_nh;
res.fib6_flags = res.f6i->fib6_flags;
res.fib6_type = res.f6i->fib6_type;
+ if (res.f6i->nh) {
+ struct fib6_nh_match_arg arg = {
+ .dev = dst->dev,
+ .gw = &rt6->rt6i_gateway,
+ };
+
+ nexthop_for_each_fib6_nh(res.f6i->nh,
+ fib6_nh_find_match, &arg);
+
+ /* fib6_info uses a nexthop that does not have fib6_nh
+ * using the dst->dev + gw. Should be impossible.
+ */
+ if (!arg.match) {
+ rcu_read_unlock();
+ return;
+ }
+
+ res.nh = arg.match;
+ } else {
+ res.nh = res.f6i->fib6_nh;
+ }
+
nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr);
if (nrt6) {
rt6_do_update_pmtu(nrt6, mtu);
return true;
}
+struct fib6_nh_rd_arg {
+ struct fib6_result *res;
+ struct flowi6 *fl6;
+ const struct in6_addr *gw;
+ struct rt6_info **ret;
+};
+
+static int fib6_nh_redirect_match(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_rd_arg *arg = _arg;
+
+ arg->res->nh = nh;
+ return ip6_redirect_nh_match(arg->res, arg->fl6, arg->gw, arg->ret);
+}
+
/* Handle redirects */
struct ip6rd_flowi {
struct flowi6 fl6;
struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
struct rt6_info *ret = NULL;
struct fib6_result res = {};
+ struct fib6_nh_rd_arg arg = {
+ .res = &res,
+ .fl6 = fl6,
+ .gw = &rdfl->gateway,
+ .ret = &ret
+ };
struct fib6_info *rt;
struct fib6_node *fn;
restart:
for_each_fib6_node_rt_rcu(fn) {
res.f6i = rt;
- res.nh = &rt->fib6_nh;
-
if (fib6_check_expired(rt))
continue;
if (rt->fib6_flags & RTF_REJECT)
break;
- if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway, &ret))
- goto out;
+ if (unlikely(rt->nh)) {
+ if (nexthop_is_blackhole(rt->nh))
+ continue;
+ /* on match, res->nh is filled in and potentially ret */
+ if (nexthop_for_each_fib6_nh(rt->nh,
+ fib6_nh_redirect_match,
+ &arg))
+ goto out;
+ } else {
+ res.nh = rt->fib6_nh;
+ if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway,
+ &ret))
+ goto out;
+ }
}
if (!rt)
}
res.f6i = rt;
- res.nh = &rt->fib6_nh;
+ res.nh = rt->fib6_nh;
out:
if (ret) {
ip6_hold_safe(net, &ret);
goto out;
}
}
- goto set_dev;
+ goto pcpu_alloc;
}
if (cfg->fc_flags & RTF_GATEWAY) {
cfg->fc_encap_type, cfg, gfp_flags, extack);
if (err)
goto out;
-set_dev:
+
+pcpu_alloc:
+ fib6_nh->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
+ if (!fib6_nh->rt6i_pcpu) {
+ err = -ENOMEM;
+ goto out;
+ }
+
fib6_nh->fib_nh_dev = dev;
fib6_nh->fib_nh_oif = dev->ifindex;
err = 0;
void fib6_nh_release(struct fib6_nh *fib6_nh)
{
+ struct rt6_exception_bucket *bucket;
+
+ rcu_read_lock();
+
+ fib6_nh_flush_exceptions(fib6_nh, NULL);
+ bucket = fib6_nh_get_excptn_bucket(fib6_nh, NULL);
+ if (bucket) {
+ rcu_assign_pointer(fib6_nh->rt6i_exception_bucket, NULL);
+ kfree(bucket);
+ }
+
+ rcu_read_unlock();
+
+ if (fib6_nh->rt6i_pcpu) {
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct rt6_info **ppcpu_rt;
+ struct rt6_info *pcpu_rt;
+
+ ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
+ pcpu_rt = *ppcpu_rt;
+ if (pcpu_rt) {
+ dst_dev_put(&pcpu_rt->dst);
+ dst_release(&pcpu_rt->dst);
+ *ppcpu_rt = NULL;
+ }
+ }
+
+ free_percpu(fib6_nh->rt6i_pcpu);
+ }
+
fib_nh_common_release(&fib6_nh->nh_common);
}
{
struct net *net = cfg->fc_nlinfo.nl_net;
struct fib6_info *rt = NULL;
+ struct nexthop *nh = NULL;
struct fib6_table *table;
+ struct fib6_nh *fib6_nh;
int err = -EINVAL;
int addr_type;
goto out;
}
#endif
+ if (cfg->fc_nh_id) {
+ nh = nexthop_find_by_id(net, cfg->fc_nh_id);
+ if (!nh) {
+ NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
+ goto out;
+ }
+ err = fib6_check_nexthop(nh, cfg, extack);
+ if (err)
+ goto out;
+ }
err = -ENOBUFS;
if (cfg->fc_nlinfo.nlh &&
goto out;
err = -ENOMEM;
- rt = fib6_info_alloc(gfp_flags);
+ rt = fib6_info_alloc(gfp_flags, !nh);
if (!rt)
goto out;
rt->fib6_table = table;
rt->fib6_metric = cfg->fc_metric;
- rt->fib6_type = cfg->fc_type;
+ rt->fib6_type = cfg->fc_type ? : RTN_UNICAST;
rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
rt->fib6_src.plen = cfg->fc_src_len;
#endif
- err = fib6_nh_init(net, &rt->fib6_nh, cfg, gfp_flags, extack);
- if (err)
- goto out;
+ if (nh) {
+ if (!nexthop_get(nh)) {
+ NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
+ goto out;
+ }
+ if (rt->fib6_src.plen) {
+ NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
+ goto out;
+ }
+ rt->nh = nh;
+ fib6_nh = nexthop_fib6_nh(rt->nh);
+ } else {
+ err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
+ if (err)
+ goto out;
- /* We cannot add true routes via loopback here,
- * they would result in kernel looping; promote them to reject routes
- */
- addr_type = ipv6_addr_type(&cfg->fc_dst);
- if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh.fib_nh_dev, addr_type))
- rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
+ fib6_nh = rt->fib6_nh;
+
+ /* We cannot add true routes via loopback here, they would
+ * result in kernel looping; promote them to reject routes
+ */
+ addr_type = ipv6_addr_type(&cfg->fc_dst);
+ if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh->fib_nh_dev,
+ addr_type))
+ rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
+ }
if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
- struct net_device *dev = fib6_info_nh_dev(rt);
+ struct net_device *dev = fib6_nh->fib_nh_dev;
if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
NL_SET_ERR_MSG(extack, "Invalid source address");
info->skip_notify = 1;
}
+ info->skip_notify_kernel = 1;
+ call_fib6_multipath_entry_notifiers(net,
+ FIB_EVENT_ENTRY_DEL,
+ rt,
+ rt->fib6_nsiblings,
+ NULL);
list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings,
fib6_siblings) {
return err;
}
-static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
+static int __ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
{
int rc = -ESRCH;
return rc;
}
+static int ip6_del_cached_rt(struct fib6_config *cfg, struct fib6_info *rt,
+ struct fib6_nh *nh)
+{
+ struct fib6_result res = {
+ .f6i = rt,
+ .nh = nh,
+ };
+ struct rt6_info *rt_cache;
+
+ rt_cache = rt6_find_cached_rt(&res, &cfg->fc_dst, &cfg->fc_src);
+ if (rt_cache)
+ return __ip6_del_cached_rt(rt_cache, cfg);
+
+ return 0;
+}
+
+struct fib6_nh_del_cached_rt_arg {
+ struct fib6_config *cfg;
+ struct fib6_info *f6i;
+};
+
+static int fib6_nh_del_cached_rt(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_del_cached_rt_arg *arg = _arg;
+ int rc;
+
+ rc = ip6_del_cached_rt(arg->cfg, arg->f6i, nh);
+ return rc != -ESRCH ? rc : 0;
+}
+
+static int ip6_del_cached_rt_nh(struct fib6_config *cfg, struct fib6_info *f6i)
+{
+ struct fib6_nh_del_cached_rt_arg arg = {
+ .cfg = cfg,
+ .f6i = f6i
+ };
+
+ return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_del_cached_rt, &arg);
+}
+
static int ip6_route_del(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
- struct rt6_info *rt_cache;
struct fib6_table *table;
struct fib6_info *rt;
struct fib6_node *fn;
for_each_fib6_node_rt_rcu(fn) {
struct fib6_nh *nh;
+ if (rt->nh && rt->nh->id != cfg->fc_nh_id)
+ continue;
+
if (cfg->fc_flags & RTF_CACHE) {
- struct fib6_result res = {
- .f6i = rt,
- };
- int rc;
-
- rt_cache = rt6_find_cached_rt(&res,
- &cfg->fc_dst,
- &cfg->fc_src);
- if (rt_cache) {
- rc = ip6_del_cached_rt(rt_cache, cfg);
- if (rc != -ESRCH) {
- rcu_read_unlock();
- return rc;
- }
+ int rc = 0;
+
+ if (rt->nh) {
+ rc = ip6_del_cached_rt_nh(cfg, rt);
+ } else if (cfg->fc_nh_id) {
+ continue;
+ } else {
+ nh = rt->fib6_nh;
+ rc = ip6_del_cached_rt(cfg, rt, nh);
+ }
+ if (rc != -ESRCH) {
+ rcu_read_unlock();
+ return rc;
}
continue;
}
- nh = &rt->fib6_nh;
+ if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
+ continue;
+ if (cfg->fc_protocol &&
+ cfg->fc_protocol != rt->fib6_protocol)
+ continue;
+
+ if (rt->nh) {
+ if (!fib6_info_hold_safe(rt))
+ continue;
+ rcu_read_unlock();
+
+ return __ip6_del_rt(rt, &cfg->fc_nlinfo);
+ }
+ if (cfg->fc_nh_id)
+ continue;
+
+ nh = rt->fib6_nh;
if (cfg->fc_ifindex &&
(!nh->fib_nh_dev ||
nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
if (cfg->fc_flags & RTF_GATEWAY &&
!ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
continue;
- if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
- continue;
- if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
- continue;
if (!fib6_info_hold_safe(rt))
continue;
rcu_read_unlock();
if (!res.f6i)
goto out;
- res.nh = &res.f6i->fib6_nh;
+ if (res.f6i->nh) {
+ struct fib6_nh_match_arg arg = {
+ .dev = dst->dev,
+ .gw = &rt->rt6i_gateway,
+ };
+
+ nexthop_for_each_fib6_nh(res.f6i->nh,
+ fib6_nh_find_match, &arg);
+
+ /* fib6_info uses a nexthop that does not have fib6_nh
+ * using the dst->dev. Should be impossible
+ */
+ if (!arg.match)
+ goto out;
+ res.nh = arg.match;
+ } else {
+ res.nh = res.f6i->fib6_nh;
+ }
+
res.fib6_flags = res.f6i->fib6_flags;
res.fib6_type = res.f6i->fib6_type;
nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL);
goto out;
for_each_fib6_node_rt_rcu(fn) {
- if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex)
+ /* these routes do not use nexthops */
+ if (rt->nh)
+ continue;
+ if (rt->fib6_nh->fib_nh_dev->ifindex != ifindex)
continue;
if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
- !rt->fib6_nh.fib_nh_gw_family)
+ !rt->fib6_nh->fib_nh_gw_family)
continue;
- if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr))
+ if (!ipv6_addr_equal(&rt->fib6_nh->fib_nh_gw6, gwaddr))
continue;
if (!fib6_info_hold_safe(rt))
continue;
rcu_read_lock();
for_each_fib6_node_rt_rcu(&table->tb6_root) {
- struct fib6_nh *nh = &rt->fib6_nh;
+ struct fib6_nh *nh;
+
+ /* RA routes do not use nexthops */
+ if (rt->nh)
+ continue;
+ nh = rt->fib6_nh;
if (dev == nh->fib_nh_dev &&
((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
ipv6_addr_equal(&nh->fib_nh_gw6, addr))
struct net *net = ((struct arg_dev_net_ip *)arg)->net;
struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
- if (((void *)rt->fib6_nh.fib_nh_dev == dev || !dev) &&
+ if (!rt->nh &&
+ ((void *)rt->fib6_nh->fib_nh_dev == dev || !dev) &&
rt != net->ipv6.fib6_null_entry &&
ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
spin_lock_bh(&rt6_exception_lock);
static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
{
struct in6_addr *gateway = (struct in6_addr *)arg;
+ struct fib6_nh *nh;
+
+ /* RA routes do not use nexthops */
+ if (rt->nh)
+ return 0;
+ nh = rt->fib6_nh;
if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
- rt->fib6_nh.fib_nh_gw_family &&
- ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) {
+ nh->fib_nh_gw_family && ipv6_addr_equal(gateway, &nh->fib_nh_gw6))
return -1;
- }
/* Further clean up cached routes in exception table.
* This is needed because cached route may have a different
* gateway than its 'parent' in the case of an ip redirect.
*/
- rt6_exceptions_clean_tohost(rt, gateway);
+ fib6_nh_exceptions_clean_tohost(nh, gateway);
return 0;
}
return NULL;
}
+/* only called for fib entries with builtin fib6_nh */
static bool rt6_is_dead(const struct fib6_info *rt)
{
- if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ||
- (rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN &&
- ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev)))
+ if (rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD ||
+ (rt->fib6_nh->fib_nh_flags & RTNH_F_LINKDOWN &&
+ ip6_ignore_linkdown(rt->fib6_nh->fib_nh_dev)))
return true;
return false;
int total = 0;
if (!rt6_is_dead(rt))
- total += rt->fib6_nh.fib_nh_weight;
+ total += rt->fib6_nh->fib_nh_weight;
list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
if (!rt6_is_dead(iter))
- total += iter->fib6_nh.fib_nh_weight;
+ total += iter->fib6_nh->fib_nh_weight;
}
return total;
int upper_bound = -1;
if (!rt6_is_dead(rt)) {
- *weight += rt->fib6_nh.fib_nh_weight;
+ *weight += rt->fib6_nh->fib_nh_weight;
upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
total) - 1;
}
- atomic_set(&rt->fib6_nh.fib_nh_upper_bound, upper_bound);
+ atomic_set(&rt->fib6_nh->fib_nh_upper_bound, upper_bound);
}
static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
const struct arg_netdev_event *arg = p_arg;
struct net *net = dev_net(arg->dev);
- if (rt != net->ipv6.fib6_null_entry &&
- rt->fib6_nh.fib_nh_dev == arg->dev) {
- rt->fib6_nh.fib_nh_flags &= ~arg->nh_flags;
+ if (rt != net->ipv6.fib6_null_entry && !rt->nh &&
+ rt->fib6_nh->fib_nh_dev == arg->dev) {
+ rt->fib6_nh->fib_nh_flags &= ~arg->nh_flags;
fib6_update_sernum_upto_root(net, rt);
rt6_multipath_rebalance(rt);
}
fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
}
+/* only called for fib entries with inline fib6_nh */
static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
const struct net_device *dev)
{
struct fib6_info *iter;
- if (rt->fib6_nh.fib_nh_dev == dev)
+ if (rt->fib6_nh->fib_nh_dev == dev)
return true;
list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
- if (iter->fib6_nh.fib_nh_dev == dev)
+ if (iter->fib6_nh->fib_nh_dev == dev)
return true;
return false;
struct fib6_info *iter;
unsigned int dead = 0;
- if (rt->fib6_nh.fib_nh_dev == down_dev ||
- rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
+ if (rt->fib6_nh->fib_nh_dev == down_dev ||
+ rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
dead++;
list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
- if (iter->fib6_nh.fib_nh_dev == down_dev ||
- iter->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
+ if (iter->fib6_nh->fib_nh_dev == down_dev ||
+ iter->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
dead++;
return dead;
{
struct fib6_info *iter;
- if (rt->fib6_nh.fib_nh_dev == dev)
- rt->fib6_nh.fib_nh_flags |= nh_flags;
+ if (rt->fib6_nh->fib_nh_dev == dev)
+ rt->fib6_nh->fib_nh_flags |= nh_flags;
list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
- if (iter->fib6_nh.fib_nh_dev == dev)
- iter->fib6_nh.fib_nh_flags |= nh_flags;
+ if (iter->fib6_nh->fib_nh_dev == dev)
+ iter->fib6_nh->fib_nh_flags |= nh_flags;
}
/* called with write lock held for table with rt */
const struct net_device *dev = arg->dev;
struct net *net = dev_net(dev);
- if (rt == net->ipv6.fib6_null_entry)
+ if (rt == net->ipv6.fib6_null_entry || rt->nh)
return 0;
switch (arg->event) {
case NETDEV_UNREGISTER:
- return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
+ return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
case NETDEV_DOWN:
if (rt->should_flush)
return -1;
if (!rt->fib6_nsiblings)
- return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
+ return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
if (rt6_multipath_uses_dev(rt, dev)) {
unsigned int count;
}
return -2;
case NETDEV_CHANGE:
- if (rt->fib6_nh.fib_nh_dev != dev ||
+ if (rt->fib6_nh->fib_nh_dev != dev ||
rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
break;
- rt->fib6_nh.fib_nh_flags |= RTNH_F_LINKDOWN;
+ rt->fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
rt6_multipath_rebalance(rt);
break;
}
struct rt6_mtu_change_arg {
struct net_device *dev;
unsigned int mtu;
+ struct fib6_info *f6i;
};
-static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
+static int fib6_nh_mtu_change(struct fib6_nh *nh, void *_arg)
+{
+ struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *)_arg;
+ struct fib6_info *f6i = arg->f6i;
+
+ /* For administrative MTU increase, there is no way to discover
+ * IPv6 PMTU increase, so PMTU increase should be updated here.
+ * Since RFC 1981 doesn't include administrative MTU increase
+ * update PMTU increase is a MUST. (i.e. jumbo frame)
+ */
+ if (nh->fib_nh_dev == arg->dev) {
+ struct inet6_dev *idev = __in6_dev_get(arg->dev);
+ u32 mtu = f6i->fib6_pmtu;
+
+ if (mtu >= arg->mtu ||
+ (mtu < arg->mtu && mtu == idev->cnf.mtu6))
+ fib6_metric_set(f6i, RTAX_MTU, arg->mtu);
+
+ spin_lock_bh(&rt6_exception_lock);
+ rt6_exceptions_update_pmtu(idev, nh, arg->mtu);
+ spin_unlock_bh(&rt6_exception_lock);
+ }
+
+ return 0;
+}
+
+static int rt6_mtu_change_route(struct fib6_info *f6i, void *p_arg)
{
struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
struct inet6_dev *idev;
if (!idev)
return 0;
- /* For administrative MTU increase, there is no way to discover
- IPv6 PMTU increase, so PMTU increase should be updated here.
- Since RFC 1981 doesn't include administrative MTU increase
- update PMTU increase is a MUST. (i.e. jumbo frame)
- */
- if (rt->fib6_nh.fib_nh_dev == arg->dev &&
- !fib6_metric_locked(rt, RTAX_MTU)) {
- u32 mtu = rt->fib6_pmtu;
-
- if (mtu >= arg->mtu ||
- (mtu < arg->mtu && mtu == idev->cnf.mtu6))
- fib6_metric_set(rt, RTAX_MTU, arg->mtu);
+ if (fib6_metric_locked(f6i, RTAX_MTU))
+ return 0;
- spin_lock_bh(&rt6_exception_lock);
- rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
- spin_unlock_bh(&rt6_exception_lock);
+ arg->f6i = f6i;
+ if (f6i->nh) {
+ /* fib6_nh_mtu_change only returns 0, so this is safe */
+ return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_mtu_change,
+ arg);
}
- return 0;
+
+ return fib6_nh_mtu_change(f6i->fib6_nh, arg);
}
void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
}
static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
+ [RTA_UNSPEC] = { .strict_start_type = RTA_DPORT + 1 },
[RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
[RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
[RTA_OIF] = { .type = NLA_U32 },
[RTA_IP_PROTO] = { .type = NLA_U8 },
[RTA_SPORT] = { .type = NLA_U16 },
[RTA_DPORT] = { .type = NLA_U16 },
+ [RTA_NH_ID] = { .type = NLA_U32 },
};
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
+ if (tb[RTA_NH_ID]) {
+ if (tb[RTA_GATEWAY] || tb[RTA_OIF] ||
+ tb[RTA_MULTIPATH] || tb[RTA_ENCAP]) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop specification and nexthop id are mutually exclusive");
+ goto errout;
+ }
+ cfg->fc_nh_id = nla_get_u32(tb[RTA_NH_ID]);
+ }
+
if (tb[RTA_GATEWAY]) {
cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
cfg->fc_flags |= RTF_GATEWAY;
{
struct fib6_info *rt_notif = NULL, *rt_last = NULL;
struct nl_info *info = &cfg->fc_nlinfo;
+ enum fib_event_type event_type;
struct fib6_config r_cfg;
struct rtnexthop *rtnh;
struct fib6_info *rt;
goto cleanup;
}
- rt->fib6_nh.fib_nh_weight = rtnh->rtnh_hops + 1;
+ rt->fib6_nh->fib_nh_weight = rtnh->rtnh_hops + 1;
err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
rt, &r_cfg);
*/
info->skip_notify = 1;
+ /* For add and replace, send one notification with all nexthops. For
+ * append, send one notification with all appended nexthops.
+ */
+ info->skip_notify_kernel = 1;
+
err_nh = NULL;
list_for_each_entry(nh, &rt6_nh_list, next) {
err = __ip6_ins_rt(nh->fib6_info, info, extack);
nhn++;
}
+ event_type = replace ? FIB_EVENT_ENTRY_REPLACE : FIB_EVENT_ENTRY_ADD;
+ err = call_fib6_multipath_entry_notifiers(info->nl_net, event_type,
+ rt_notif, nhn - 1, extack);
+ if (err) {
+ /* Delete all the siblings that were just added */
+ err_nh = NULL;
+ goto add_errout;
+ }
+
/* success ... tell user about new route */
ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
goto cleanup;
if (err < 0)
return err;
+ if (cfg.fc_nh_id &&
+ !nexthop_find_by_id(sock_net(skb->sk), cfg.fc_nh_id)) {
+ NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
+ return -EINVAL;
+ }
+
if (cfg.fc_mp)
return ip6_route_multipath_del(&cfg, extack);
else {
return ip6_route_add(&cfg, GFP_KERNEL, extack);
}
-static size_t rt6_nlmsg_size(struct fib6_info *rt)
+/* add the overhead of this fib6_nh to nexthop_len */
+static int rt6_nh_nlmsg_size(struct fib6_nh *nh, void *arg)
{
- int nexthop_len = 0;
+ int *nexthop_len = arg;
- if (rt->fib6_nsiblings) {
- nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
- + NLA_ALIGN(sizeof(struct rtnexthop))
- + nla_total_size(16) /* RTA_GATEWAY */
- + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws);
+ *nexthop_len += nla_total_size(0) /* RTA_MULTIPATH */
+ + NLA_ALIGN(sizeof(struct rtnexthop))
+ + nla_total_size(16); /* RTA_GATEWAY */
- nexthop_len *= rt->fib6_nsiblings;
+ if (nh->fib_nh_lws) {
+ /* RTA_ENCAP_TYPE */
+ *nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
+ /* RTA_ENCAP */
+ *nexthop_len += nla_total_size(2);
+ }
+
+ return 0;
+}
+
+static size_t rt6_nlmsg_size(struct fib6_info *f6i)
+{
+ int nexthop_len;
+
+ if (f6i->nh) {
+ nexthop_len = nla_total_size(4); /* RTA_NH_ID */
+ nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size,
+ &nexthop_len);
+ } else {
+ struct fib6_nh *nh = f6i->fib6_nh;
+
+ nexthop_len = 0;
+ if (f6i->fib6_nsiblings) {
+ nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
+ + NLA_ALIGN(sizeof(struct rtnexthop))
+ + nla_total_size(16) /* RTA_GATEWAY */
+ + lwtunnel_get_encap_size(nh->fib_nh_lws);
+
+ nexthop_len *= f6i->fib6_nsiblings;
+ }
+ nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
}
return NLMSG_ALIGN(sizeof(struct rtmsg))
+ nla_total_size(sizeof(struct rta_cacheinfo))
+ nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
+ nla_total_size(1) /* RTA_PREF */
- + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws)
+ nexthop_len;
}
+static int rt6_fill_node_nexthop(struct sk_buff *skb, struct nexthop *nh,
+ unsigned char *flags)
+{
+ if (nexthop_is_multipath(nh)) {
+ struct nlattr *mp;
+
+ mp = nla_nest_start(skb, RTA_MULTIPATH);
+ if (!mp)
+ goto nla_put_failure;
+
+ if (nexthop_mpath_fill_node(skb, nh))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, mp);
+ } else {
+ struct fib6_nh *fib6_nh;
+
+ fib6_nh = nexthop_fib6_nh(nh);
+ if (fib_nexthop_info(skb, &fib6_nh->nh_common,
+ flags, false) < 0)
+ goto nla_put_failure;
+ }
+
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
static int rt6_fill_node(struct net *net, struct sk_buff *skb,
struct fib6_info *rt, struct dst_entry *dst,
struct in6_addr *dest, struct in6_addr *src,
struct rt6_info *rt6 = (struct rt6_info *)dst;
struct rt6key *rt6_dst, *rt6_src;
u32 *pmetrics, table, rt6_flags;
+ unsigned char nh_flags = 0;
struct nlmsghdr *nlh;
struct rtmsg *rtm;
long expires = 0;
if (!mp)
goto nla_put_failure;
- if (fib_add_nexthop(skb, &rt->fib6_nh.nh_common,
- rt->fib6_nh.fib_nh_weight) < 0)
+ if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common,
+ rt->fib6_nh->fib_nh_weight) < 0)
goto nla_put_failure;
list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings, fib6_siblings) {
- if (fib_add_nexthop(skb, &sibling->fib6_nh.nh_common,
- sibling->fib6_nh.fib_nh_weight) < 0)
+ if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common,
+ sibling->fib6_nh->fib_nh_weight) < 0)
goto nla_put_failure;
}
nla_nest_end(skb, mp);
- } else {
- unsigned char nh_flags = 0;
+ } else if (rt->nh) {
+ if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id))
+ goto nla_put_failure;
+
+ if (nexthop_is_blackhole(rt->nh))
+ rtm->rtm_type = RTN_BLACKHOLE;
- if (fib_nexthop_info(skb, &rt->fib6_nh.nh_common,
+ if (rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0)
+ goto nla_put_failure;
+
+ rtm->rtm_flags |= nh_flags;
+ } else {
+ if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common,
&nh_flags, false) < 0)
goto nla_put_failure;
return -EMSGSIZE;
}
+static int fib6_info_nh_uses_dev(struct fib6_nh *nh, void *arg)
+{
+ const struct net_device *dev = arg;
+
+ if (nh->fib_nh_dev == dev)
+ return 1;
+
+ return 0;
+}
+
static bool fib6_info_uses_dev(const struct fib6_info *f6i,
const struct net_device *dev)
{
- if (f6i->fib6_nh.fib_nh_dev == dev)
+ if (f6i->nh) {
+ struct net_device *_dev = (struct net_device *)dev;
+
+ return !!nexthop_for_each_fib6_nh(f6i->nh,
+ fib6_info_nh_uses_dev,
+ _dev);
+ }
+
+ if (f6i->fib6_nh->fib_nh_dev == dev)
return true;
if (f6i->fib6_nsiblings) {
list_for_each_entry_safe(sibling, next_sibling,
&f6i->fib6_siblings, fib6_siblings) {
- if (sibling->fib6_nh.fib_nh_dev == dev)
+ if (sibling->fib6_nh->fib_nh_dev == dev)
return true;
}
}
rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
}
+void fib6_rt_update(struct net *net, struct fib6_info *rt,
+ struct nl_info *info)
+{
+ u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+
+ /* call_fib6_entry_notifiers will be removed when in-kernel notifier
+ * is implemented and supported for nexthop objects
+ */
+ call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, rt, NULL);
+
+ skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
+ if (!skb)
+ goto errout;
+
+ err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
+ RTM_NEWROUTE, info->portid, seq, NLM_F_REPLACE);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto errout;
+ }
+ rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
+ info->nlh, gfp_any());
+ return;
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
+}
+
static int ip6_route_dev_notify(struct notifier_block *this,
unsigned long event, void *ptr)
{
return NOTIFY_OK;
if (event == NETDEV_REGISTER) {
- net->ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = dev;
+ net->ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = dev;
net->ipv6.ip6_null_entry->dst.dev = dev;
net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
goto out_ip6_dst_ops;
- net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
- sizeof(*net->ipv6.fib6_null_entry),
- GFP_KERNEL);
+ net->ipv6.fib6_null_entry = fib6_info_alloc(GFP_KERNEL, true);
if (!net->ipv6.fib6_null_entry)
goto out_ip6_dst_entries;
+ memcpy(net->ipv6.fib6_null_entry, &fib6_null_entry_template,
+ sizeof(*net->ipv6.fib6_null_entry));
net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
sizeof(*net->ipv6.ip6_null_entry),
/* Registering of the loopback is done before this portion of code,
* the loopback reference in rt6_info will not be taken, do it
* manually for init_net */
- init_net.ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = init_net.loopback_dev;
+ init_net.ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = init_net.loopback_dev;
init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ // SPDX-License-Identifier: GPL-2.0-only
/*
* mac80211 debugfs for wireless PHYs
*
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2018 - 2019 Intel Corporation
- *
- * GPLv2
- *
*/
#include <linux/debugfs.h>
FLAG(SUPPORTS_MULTI_BSSID),
FLAG(SUPPORTS_ONLY_HE_MULTI_BSSID),
FLAG(EXT_KEY_ID_NATIVE),
+ FLAG(NO_AMPDU_KEYBORDER_SUPPORT),
#undef FLAG
};
+ // SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2003-2005 Devicescape Software, Inc.
* Copyright (C) 2015 Intel Deutschland GmbH
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/kobject.h>
key->debugfs.dir = debugfs_create_dir(buf,
key->local->debugfs.keys);
- if (!key->debugfs.dir)
- return;
-
sta = key->sta;
if (sta) {
sprintf(buf, "../../netdev:%s/stations/%pM",
+ // SPDX-License-Identifier: GPL-2.0-only
/*
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/kernel.h>
sprintf(buf, "netdev:%s", sdata->name);
sdata->vif.debugfs_dir = debugfs_create_dir(buf,
sdata->local->hw.wiphy->debugfsdir);
- if (sdata->vif.debugfs_dir)
- sdata->debugfs.subdir_stations = debugfs_create_dir("stations",
- sdata->vif.debugfs_dir);
+ sdata->debugfs.subdir_stations = debugfs_create_dir("stations",
+ sdata->vif.debugfs_dir);
add_files(sdata);
}
return;
sprintf(buf, "netdev:%s", sdata->name);
- if (!debugfs_rename(dir->d_parent, dir, dir->d_parent, buf))
- sdata_err(sdata,
- "debugfs: failed to rename debugfs dir to %s\n",
- buf);
+ debugfs_rename(dir->d_parent, dir, dir->d_parent, buf);
}
+ // SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2003-2005 Devicescape Software, Inc.
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright(c) 2016 Intel Deutschland GmbH
* Copyright (C) 2018 - 2019 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/debugfs.h>
* dir might still be around.
*/
sta->debugfs_dir = debugfs_create_dir(mac, stations_dir);
- if (!sta->debugfs_dir)
- return;
DEBUGFS_ADD(flags);
DEBUGFS_ADD(aid);
+ // SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2002-2005, Instant802 Networks, Inc.
* Copyright 2005-2006, Devicescape Software, Inc.
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/if_ether.h>
assert_key_lock(local);
sta->ptk_idx = key->conf.keyidx;
+
+ if (ieee80211_hw_check(&local->hw, NO_AMPDU_KEYBORDER_SUPPORT))
+ clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
ieee80211_check_fast_xmit(sta);
return 0;
}
-static int ieee80211_hw_key_replace(struct ieee80211_key *old_key,
- struct ieee80211_key *new_key,
- bool pairwise)
+static void ieee80211_pairwise_rekey(struct ieee80211_key *old,
+ struct ieee80211_key *new)
{
- struct ieee80211_sub_if_data *sdata;
- struct ieee80211_local *local;
- struct sta_info *sta;
- int ret;
-
- /* Aggregation sessions are OK when running on SW crypto.
- * A broken remote STA may cause issues not observed with HW
- * crypto, though.
- */
- if (!(old_key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
- return 0;
+ struct ieee80211_local *local = new->local;
+ struct sta_info *sta = new->sta;
+ int i;
- assert_key_lock(old_key->local);
- sta = old_key->sta;
+ assert_key_lock(local);
- /* Unicast rekey without Extended Key ID needs special handling */
- if (new_key && sta && pairwise &&
- rcu_access_pointer(sta->ptk[sta->ptk_idx]) == old_key) {
- local = old_key->local;
- sdata = old_key->sdata;
+ if (new->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX) {
+ /* Extended Key ID key install, initial one or rekey */
+
+ if (sta->ptk_idx != INVALID_PTK_KEYIDX &&
+ ieee80211_hw_check(&local->hw,
+ NO_AMPDU_KEYBORDER_SUPPORT)) {
+ /* Aggregation Sessions with Extended Key ID must not
+ * mix MPDUs with different keyIDs within one A-MPDU.
+ * Tear down any running Tx aggregation and all new
+ * Rx/Tx aggregation request during rekey if the driver
+ * asks us to do so. (Blocking Tx only would be
+ * sufficient but WLAN_STA_BLOCK_BA gets the job done
+ * for the few ms we need it.)
+ */
+ set_sta_flag(sta, WLAN_STA_BLOCK_BA);
+ mutex_lock(&sta->ampdu_mlme.mtx);
+ for (i = 0; i < IEEE80211_NUM_TIDS; i++)
+ ___ieee80211_stop_tx_ba_session(sta, i,
+ AGG_STOP_LOCAL_REQUEST);
+ mutex_unlock(&sta->ampdu_mlme.mtx);
+ }
+ } else if (old) {
+ /* Rekey without Extended Key ID.
+ * Aggregation sessions are OK when running on SW crypto.
+ * A broken remote STA may cause issues not observed with HW
+ * crypto, though.
+ */
+ if (!(old->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
+ return;
- /* Stop TX till we are on the new key */
- old_key->flags |= KEY_FLAG_TAINTED;
+ /* Stop Tx till we are on the new key */
+ old->flags |= KEY_FLAG_TAINTED;
ieee80211_clear_fast_xmit(sta);
-
- /* Aggregation sessions during rekey are complicated due to the
- * reorder buffer and retransmits. Side step that by blocking
- * aggregation during rekey and tear down running sessions.
- */
if (ieee80211_hw_check(&local->hw, AMPDU_AGGREGATION)) {
set_sta_flag(sta, WLAN_STA_BLOCK_BA);
ieee80211_sta_tear_down_BA_sessions(sta,
AGG_STOP_LOCAL_REQUEST);
}
-
if (!wiphy_ext_feature_isset(local->hw.wiphy,
NL80211_EXT_FEATURE_CAN_REPLACE_PTK0)) {
pr_warn_ratelimited("Rekeying PTK for STA %pM but driver can't safely do that.",
/* Flushing the driver queues *may* help prevent
* the clear text leaks and freezes.
*/
- ieee80211_flush_queues(local, sdata, false);
+ ieee80211_flush_queues(local, old->sdata, false);
}
}
-
- ieee80211_key_disable_hw_accel(old_key);
-
- if (new_key)
- ret = ieee80211_key_enable_hw_accel(new_key);
- else
- ret = 0;
-
- return ret;
}
static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata,
mutex_unlock(&sdata->local->key_mtx);
}
-
static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta,
bool pairwise,
struct ieee80211_key *new)
{
int idx;
- int ret;
+ int ret = 0;
bool defunikey, defmultikey, defmgmtkey;
/* caller must provide at least one old/new */
WARN_ON(new && old && new->conf.keyidx != old->conf.keyidx);
+ if (new && sta && pairwise) {
+ /* Unicast rekey needs special handling. With Extended Key ID
+ * old is still NULL for the first rekey.
+ */
+ ieee80211_pairwise_rekey(old, new);
+ }
+
if (old) {
idx = old->conf.keyidx;
- ret = ieee80211_hw_key_replace(old, new, pairwise);
+
+ if (old->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) {
+ ieee80211_key_disable_hw_accel(old);
+
+ if (new)
+ ret = ieee80211_key_enable_hw_accel(new);
+ }
} else {
/* new must be provided in case old is not */
idx = new->conf.keyidx;
if (!new->local->wowlan)
ret = ieee80211_key_enable_hw_accel(new);
- else
- ret = 0;
}
if (ret)
+ // SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2002-2005, Instant802 Networks, Inc.
* Copyright 2005-2006, Devicescape Software, Inc.
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2017 Intel Deutschland GmbH
* Copyright (C) 2018 - 2019 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <net/mac80211.h>
sdata_lock(sdata);
/* Copy the addresses to the bss_conf list */
- ifa = idev->ifa_list;
+ ifa = rtnl_dereference(idev->ifa_list);
while (ifa) {
if (c < IEEE80211_BSS_ARP_ADDR_LIST_LEN)
bss_conf->arp_addr_list[c] = ifa->ifa_address;
- ifa = ifa->ifa_next;
+ ifa = rtnl_dereference(ifa->ifa_next);
c++;
}
+ // SPDX-License-Identifier: GPL-2.0-only
/*
* BSS client mode implementation
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
* Copyright (C) 2018 - 2019 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/delay.h>
IEEE80211_HE_MAC_CAP0_TWT_RES;
}
+static int ieee80211_recalc_twt_req(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta,
+ struct ieee802_11_elems *elems)
+{
+ bool twt = ieee80211_twt_req_supported(sta, elems);
+
+ if (sdata->vif.bss_conf.twt_requester != twt) {
+ sdata->vif.bss_conf.twt_requester = twt;
+ return BSS_CHANGED_TWT;
+ }
+ return 0;
+}
+
static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
struct cfg80211_bss *cbss,
struct ieee80211_mgmt *mgmt, size_t len)
sta);
bss_conf->he_support = sta->sta.he_cap.has_he;
- bss_conf->twt_requester =
- ieee80211_twt_req_supported(sta, &elems);
+ changed |= ieee80211_recalc_twt_req(sdata, sta, &elems);
} else {
bss_conf->he_support = false;
bss_conf->twt_requester = false;
mutex_lock(&local->sta_mtx);
sta = sta_info_get(sdata, bssid);
+ changed |= ieee80211_recalc_twt_req(sdata, sta, &elems);
+
if (ieee80211_config_bw(sdata, sta,
elems.ht_cap_elem, elems.ht_operation,
elems.vht_operation, elems.he_operation,
basic_rates = BIT(min_rate_index);
}
- new_sta->sta.supp_rates[cbss->channel->band] = rates;
+ if (rates)
+ new_sta->sta.supp_rates[cbss->channel->band] = rates;
+ else
+ sdata_info(sdata,
+ "No rates found, keeping mandatory only\n");
+
sdata->vif.bss_conf.basic_rates = basic_rates;
/* cf. IEEE 802.11 9.2.12 */
+ // SPDX-License-Identifier: GPL-2.0-only
/*
* Off-channel operation helpers
*
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/export.h>
#include <net/mac80211.h>
cfg80211_remain_on_channel_expired(&roc->sdata->wdev,
roc->cookie, roc->chan,
GFP_KERNEL);
+ else
+ cfg80211_tx_mgmt_expired(&roc->sdata->wdev,
+ roc->mgmt_tx_cookie,
+ roc->chan, GFP_KERNEL);
list_del(&roc->list);
kfree(roc);
+ // SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2002-2005, Instant802 Networks, Inc.
* Copyright 2005-2006, Devicescape Software, Inc.
* Copyright 2017 Intel Deutschland GmbH
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/kernel.h>
break;
}
WARN_ONCE(i == sband->n_bitrates,
- "no supported rates (0x%x) in rate_mask 0x%x with flags 0x%x\n",
+ "no supported rates for sta %pM (0x%x, band %d) in rate_mask 0x%x with flags 0x%x\n",
+ sta ? sta->addr : NULL,
sta ? sta->supp_rates[sband->band] : -1,
+ sband->band,
rate_mask, rate_flags);
info->control.rates[0].count =
}
-bool rate_control_send_low(struct ieee80211_sta *pubsta,
- void *priv_sta,
- struct ieee80211_tx_rate_control *txrc)
+static bool rate_control_send_low(struct ieee80211_sta *pubsta,
+ struct ieee80211_tx_rate_control *txrc)
{
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb);
struct ieee80211_supported_band *sband = txrc->sband;
int mcast_rate;
bool use_basicrate = false;
- if (!pubsta || !priv_sta || rc_no_data_or_no_ack_use_min(txrc)) {
+ if (!pubsta || rc_no_data_or_no_ack_use_min(txrc)) {
__rate_control_send_low(txrc->hw, sband, pubsta, info,
txrc->rate_idx_mask);
}
return false;
}
-EXPORT_SYMBOL(rate_control_send_low);
static bool rate_idx_match_legacy_mask(s8 *rate_idx, int n_bitrates, u32 mask)
{
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb);
int i;
- if (sta && test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) {
- ista = &sta->sta;
- priv_sta = sta->rate_ctrl_priv;
- }
-
for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
info->control.rates[i].idx = -1;
info->control.rates[i].flags = 0;
info->control.rates[i].count = 0;
}
+ if (rate_control_send_low(sta ? &sta->sta : NULL, txrc))
+ return;
+
if (ieee80211_hw_check(&sdata->local->hw, HAS_RATE_CONTROL))
return;
+ if (sta && test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) {
+ ista = &sta->sta;
+ priv_sta = sta->rate_ctrl_priv;
+ }
+
if (ista) {
spin_lock_bh(&sta->rate_ctrl_lock);
ref->ops->get_rate(ref->priv, ista, priv_sta, txrc);
spin_unlock_bh(&sta->rate_ctrl_lock);
} else {
- ref->ops->get_rate(ref->priv, NULL, NULL, txrc);
+ rate_control_send_low(NULL, txrc);
}
if (ieee80211_hw_check(&sdata->local->hw, SUPPORTS_RC_TABLE))
+ // SPDX-License-Identifier: GPL-2.0-only
/*
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/netdevice.h>
#include <linux/types.h>
}
if (mp->hw->max_rates >= 2) {
- /*
- * At least 2 tx rates supported, use max_prob_rate next */
minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_prob_rate);
}
struct minstrel_priv *mp = priv;
int sample_idx;
- if (rate_control_send_low(sta, priv_sta, txrc))
- return;
-
if (!msp->is_ht)
return mac80211_minstrel.get_rate(priv, sta, &msp->legacy, txrc);
+ // SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2002-2005, Instant802 Networks, Inc.
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018-2019 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/module.h>
for (i = 0; i < IEEE80211_NUM_TIDS; i++)
sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX);
+ for (i = 0; i < NUM_NL80211_BANDS; i++) {
+ u32 mandatory = 0;
+ int r;
+
+ if (!hw->wiphy->bands[i])
+ continue;
+
+ switch (i) {
+ case NL80211_BAND_2GHZ:
+ /*
+ * We use both here, even if we cannot really know for
+ * sure the station will support both, but the only use
+ * for this is when we don't know anything yet and send
+ * management frames, and then we'll pick the lowest
+ * possible rate anyway.
+ * If we don't include _G here, we cannot find a rate
+ * in P2P, and thus trigger the WARN_ONCE() in rate.c
+ */
+ mandatory = IEEE80211_RATE_MANDATORY_B |
+ IEEE80211_RATE_MANDATORY_G;
+ break;
+ case NL80211_BAND_5GHZ:
+ mandatory = IEEE80211_RATE_MANDATORY_A;
+ break;
+ case NL80211_BAND_60GHZ:
+ WARN_ON(1);
+ mandatory = 0;
+ break;
+ }
+
+ for (r = 0; r < hw->wiphy->bands[i]->n_bitrates; r++) {
+ struct ieee80211_rate *rate;
+
+ rate = &hw->wiphy->bands[i]->bitrates[r];
+
+ if (!(rate->flags & mandatory))
+ continue;
+ sta->sta.supp_rates[i] |= BIT(r);
+ }
+ }
+
sta->sta.smps_mode = IEEE80211_SMPS_OFF;
if (sdata->vif.type == NL80211_IFTYPE_AP ||
sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
+ // SPDX-License-Identifier: GPL-2.0-only
/*
* Connection tracking protocol helper module for SCTP.
*
*
* SCTP is defined in RFC 2960. References to various sections in this code
* are to this RFC.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/types.h>
if (state->hook == NF_INET_PRE_ROUTING &&
state->net->ct.sysctl_checksum &&
skb->ip_summed == CHECKSUM_NONE) {
- if (!skb_make_writable(skb, dataoff + sizeof(struct sctphdr))) {
+ if (skb_ensure_writable(skb, dataoff + sizeof(*sh))) {
logmsg = "nf_ct_sctp: failed to read header ";
goto out_invalid;
}
+ // SPDX-License-Identifier: GPL-2.0-only
/* nf_nat_helper.c - generic support functions for NAT helpers
*
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/module.h>
#include <linux/gfp.h>
struct tcphdr *tcph;
int oldlen, datalen;
- if (!skb_make_writable(skb, skb->len))
+ if (skb_ensure_writable(skb, skb->len))
return false;
if (rep_len > match_len &&
struct udphdr *udph;
int datalen, oldlen;
- if (!skb_make_writable(skb, skb->len))
+ if (skb_ensure_writable(skb, skb->len))
return false;
if (rep_len > match_len &&
+ // SPDX-License-Identifier: GPL-2.0-only
/* (C) 1999-2001 Paul `Rusty' Russell
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/types.h>
struct udphdr *hdr;
bool do_csum;
- if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+ if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
return false;
hdr = (struct udphdr *)(skb->data + hdroff);
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
struct udphdr *hdr;
- if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+ if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
return false;
hdr = (struct udphdr *)(skb->data + hdroff);
if (skb->len >= hdroff + sizeof(*hdr))
hdrsize = sizeof(*hdr);
- if (!skb_make_writable(skb, hdroff + hdrsize))
+ if (skb_ensure_writable(skb, hdroff + hdrsize))
return false;
hdr = (struct sctphdr *)(skb->data + hdroff);
if (skb->len >= hdroff + sizeof(struct tcphdr))
hdrsize = sizeof(struct tcphdr);
- if (!skb_make_writable(skb, hdroff + hdrsize))
+ if (skb_ensure_writable(skb, hdroff + hdrsize))
return false;
hdr = (struct tcphdr *)(skb->data + hdroff);
if (skb->len >= hdroff + sizeof(struct dccp_hdr))
hdrsize = sizeof(struct dccp_hdr);
- if (!skb_make_writable(skb, hdroff + hdrsize))
+ if (skb_ensure_writable(skb, hdroff + hdrsize))
return false;
hdr = (struct dccp_hdr *)(skb->data + hdroff);
{
struct icmphdr *hdr;
- if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+ if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
return false;
hdr = (struct icmphdr *)(skb->data + hdroff);
{
struct icmp6hdr *hdr;
- if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+ if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
return false;
hdr = (struct icmp6hdr *)(skb->data + hdroff);
/* pgreh includes two optional 32bit fields which are not required
* to be there. That's where the magic '8' comes from */
- if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8))
+ if (skb_ensure_writable(skb, hdroff + sizeof(*pgreh) - 8))
return false;
greh = (void *)skb->data + hdroff;
struct iphdr *iph;
unsigned int hdroff;
- if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
+ if (skb_ensure_writable(skb, iphdroff + sizeof(*iph)))
return false;
iph = (void *)skb->data + iphdroff;
int hdroff;
u8 nexthdr;
- if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h)))
+ if (skb_ensure_writable(skb, iphdroff + sizeof(*ipv6h)))
return false;
ipv6h = (void *)skb->data + iphdroff;
WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
- if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
+ if (skb_ensure_writable(skb, hdrlen + sizeof(*inside)))
return 0;
if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
return 0;
WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
- if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
+ if (skb_ensure_writable(skb, hdrlen + sizeof(*inside)))
return 0;
if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
return 0;
+ // SPDX-License-Identifier: GPL-2.0-only
/*
* (C) 1999-2001 Paul `Rusty' Russell
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
* Based on Rusty Russell's IPv4 REDIRECT target. Development of IPv6
* NAT funded by Astaro.
*/
if (hooknum == NF_INET_LOCAL_OUT) {
newdst = htonl(0x7F000001);
} else {
- struct in_device *indev;
- struct in_ifaddr *ifa;
+ const struct in_device *indev;
newdst = 0;
indev = __in_dev_get_rcu(skb->dev);
- if (indev && indev->ifa_list) {
- ifa = indev->ifa_list;
- newdst = ifa->ifa_local;
+ if (indev) {
+ const struct in_ifaddr *ifa;
+
+ ifa = rcu_dereference(indev->ifa_list);
+ if (ifa)
+ newdst = ifa->ifa_local;
}
if (!newdst)
+ // SPDX-License-Identifier: GPL-2.0-only
/* SIP extension for NAT alteration.
*
* based on RR's ip_nat_ftp.c and other modules.
* (C) 2007 United Security Providers
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/module.h>
if (dir == IP_CT_DIR_REPLY && ct_sip_info->forced_dport) {
struct udphdr *uh;
- if (!skb_make_writable(skb, skb->len)) {
+ if (skb_ensure_writable(skb, skb->len)) {
nf_ct_helper_log(skb, ct, "cannot mangle packet");
return NF_DROP;
}
+ // SPDX-License-Identifier: GPL-2.0-only
/*
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/module.h>
optoff = protoff + sizeof(struct tcphdr);
optend = protoff + th->doff * 4;
- if (!skb_make_writable(skb, optend))
+ if (skb_ensure_writable(skb, optend))
return 0;
while (optoff < optend) {
+ // SPDX-License-Identifier: GPL-2.0-only
/*
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*/
return newstats;
}
-static void nft_chain_stats_replace(struct net *net,
- struct nft_base_chain *chain,
- struct nft_stats __percpu *newstats)
+static void nft_chain_stats_replace(struct nft_trans *trans)
{
- struct nft_stats __percpu *oldstats;
+ struct nft_base_chain *chain = nft_base_chain(trans->ctx.chain);
- if (newstats == NULL)
+ if (!nft_trans_chain_stats(trans))
return;
- if (rcu_access_pointer(chain->stats)) {
- oldstats = rcu_dereference_protected(chain->stats,
- lockdep_commit_lock_is_held(net));
- rcu_assign_pointer(chain->stats, newstats);
- synchronize_rcu();
- free_percpu(oldstats);
- } else {
- rcu_assign_pointer(chain->stats, newstats);
+ rcu_swap_protected(chain->stats, nft_trans_chain_stats(trans),
+ lockdep_commit_lock_is_held(trans->ctx.net));
+
+ if (!nft_trans_chain_stats(trans))
static_branch_inc(&nft_counters_enabled);
- }
}
static void nf_tables_chain_free_chain_rules(struct nft_chain *chain)
if (!nft_is_base_chain(trans->ctx.chain))
return;
+ nft_chain_stats_replace(trans);
+
basechain = nft_base_chain(trans->ctx.chain);
- nft_chain_stats_replace(trans->ctx.net, basechain,
- nft_trans_chain_stats(trans));
switch (nft_trans_chain_policy(trans)) {
case NF_DROP:
nf_tables_table_destroy(&trans->ctx);
break;
case NFT_MSG_NEWCHAIN:
+ free_percpu(nft_trans_chain_stats(trans));
kfree(nft_trans_chain_name(trans));
break;
case NFT_MSG_DELCHAIN:
+ // SPDX-License-Identifier: GPL-2.0-only
/*
* This is a module which is used for queueing packets and communicating with
* userspace via nfnetlink.
* Based on the old ipv4-only ip_queue.c:
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
}
skb_put(e->skb, diff);
}
- if (!skb_make_writable(e->skb, data_len))
+ if (skb_ensure_writable(e->skb, data_len))
return -ENOMEM;
skb_copy_to_linear_data(e->skb, data, data_len);
e->skb->ip_summed = CHECKSUM_NONE;
+ // SPDX-License-Identifier: GPL-2.0-only
/*
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*/
if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
return;
- if (!skb_make_writable(pkt->skb, pkt->xt.thoff + i + priv->len))
+ if (skb_ensure_writable(pkt->skb,
+ pkt->xt.thoff + i + priv->len))
return;
tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff,
+ // SPDX-License-Identifier: GPL-2.0-only
/*
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*/
tsum));
}
- if (!skb_make_writable(skb, l4csum_offset + sizeof(sum)) ||
+ if (skb_ensure_writable(skb, l4csum_offset + sizeof(sum)) ||
skb_store_bits(skb, l4csum_offset, &sum, sizeof(sum)) < 0)
return -1;
return -1;
nft_csum_replace(&sum, fsum, tsum);
- if (!skb_make_writable(skb, csum_offset + sizeof(sum)) ||
+ if (skb_ensure_writable(skb, csum_offset + sizeof(sum)) ||
skb_store_bits(skb, csum_offset, &sum, sizeof(sum)) < 0)
return -1;
goto err;
}
- if (!skb_make_writable(skb, max(offset + priv->len, 0)) ||
+ if (skb_ensure_writable(skb, max(offset + priv->len, 0)) ||
skb_store_bits(skb, offset, src, priv->len) < 0)
goto err;
+ // SPDX-License-Identifier: GPL-2.0-only
/* x_tables module for setting the IPv4/IPv6 DSCP field, Version 1.8
*
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
* See RFC2474 for a description of the DSCP field within the IP Header.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
if (dscp != dinfo->dscp) {
- if (!skb_make_writable(skb, sizeof(struct iphdr)))
+ if (skb_ensure_writable(skb, sizeof(struct iphdr)))
return NF_DROP;
ipv4_change_dsfield(ip_hdr(skb),
u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
if (dscp != dinfo->dscp) {
- if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
+ if (skb_ensure_writable(skb, sizeof(struct ipv6hdr)))
return NF_DROP;
ipv6_change_dsfield(ipv6_hdr(skb),
nv = (orig & ~info->tos_mask) ^ info->tos_value;
if (orig != nv) {
- if (!skb_make_writable(skb, sizeof(struct iphdr)))
+ if (skb_ensure_writable(skb, sizeof(struct iphdr)))
return NF_DROP;
iph = ip_hdr(skb);
ipv4_change_dsfield(iph, 0, nv);
nv = (orig & ~info->tos_mask) ^ info->tos_value;
if (orig != nv) {
- if (!skb_make_writable(skb, sizeof(struct iphdr)))
+ if (skb_ensure_writable(skb, sizeof(struct iphdr)))
return NF_DROP;
iph = ipv6_hdr(skb);
ipv6_change_dsfield(iph, 0, nv);
+ // SPDX-License-Identifier: GPL-2.0-only
/*
* TTL modification target for IP tables
*
* Hop Limit modification target for ip6tables
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
const struct ipt_TTL_info *info = par->targinfo;
int new_ttl;
- if (!skb_make_writable(skb, skb->len))
+ if (skb_ensure_writable(skb, sizeof(*iph)))
return NF_DROP;
iph = ip_hdr(skb);
const struct ip6t_HL_info *info = par->targinfo;
int new_hl;
- if (!skb_make_writable(skb, skb->len))
+ if (skb_ensure_writable(skb, sizeof(*ip6h)))
return NF_DROP;
ip6h = ipv6_hdr(skb);
+ // SPDX-License-Identifier: GPL-2.0-only
/*
* This is a module which is used for setting the MSS option in TCP packets.
*
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
if (par->fragoff != 0)
return 0;
- if (!skb_make_writable(skb, skb->len))
+ if (skb_ensure_writable(skb, skb->len))
return -1;
len = skb->len - tcphoff;
+ // SPDX-License-Identifier: GPL-2.0-only
/*
* A module for stripping a specific TCP option from TCP packets.
*
* Copyright © CC Computer Consultants GmbH, 2007
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/module.h>
static unsigned int
tcpoptstrip_mangle_packet(struct sk_buff *skb,
const struct xt_action_param *par,
- unsigned int tcphoff, unsigned int minlen)
+ unsigned int tcphoff)
{
const struct xt_tcpoptstrip_target_info *info = par->targinfo;
+ struct tcphdr *tcph, _th;
unsigned int optl, i, j;
- struct tcphdr *tcph;
u_int16_t n, o;
u_int8_t *opt;
- int len, tcp_hdrlen;
+ int tcp_hdrlen;
/* This is a fragment, no TCP header is available */
if (par->fragoff != 0)
return XT_CONTINUE;
- if (!skb_make_writable(skb, skb->len))
+ tcph = skb_header_pointer(skb, tcphoff, sizeof(_th), &_th);
+ if (!tcph)
return NF_DROP;
- len = skb->len - tcphoff;
- if (len < (int)sizeof(struct tcphdr))
- return NF_DROP;
-
- tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
tcp_hdrlen = tcph->doff * 4;
+ if (tcp_hdrlen < sizeof(struct tcphdr))
+ return NF_DROP;
- if (len < tcp_hdrlen)
+ if (skb_ensure_writable(skb, tcphoff + tcp_hdrlen))
return NF_DROP;
- opt = (u_int8_t *)tcph;
+ /* must reload tcph, might have been moved */
+ tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
+ opt = (u8 *)tcph;
/*
* Walk through all TCP options - if we find some option to remove,
static unsigned int
tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_action_param *par)
{
- return tcpoptstrip_mangle_packet(skb, par, ip_hdrlen(skb),
- sizeof(struct iphdr) + sizeof(struct tcphdr));
+ return tcpoptstrip_mangle_packet(skb, par, ip_hdrlen(skb));
}
#if IS_ENABLED(CONFIG_IP6_NF_MANGLE)
if (tcphoff < 0)
return NF_DROP;
- return tcpoptstrip_mangle_packet(skb, par, tcphoff,
- sizeof(*ipv6h) + sizeof(struct tcphdr));
+ return tcpoptstrip_mangle_packet(skb, par, tcphoff);
}
#endif
+ // SPDX-License-Identifier: GPL-2.0-only
/*
* Kernel module to match various things tied to sockets associated with
* locally generated outgoing packets.
*
* Copyright © CC Computer Consultants GmbH, 2007 - 2008
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/module.h>
#include <linux/skbuff.h>
}
if (info->match & XT_OWNER_GID) {
+ unsigned int i, match = false;
kgid_t gid_min = make_kgid(net->user_ns, info->gid_min);
kgid_t gid_max = make_kgid(net->user_ns, info->gid_max);
- if ((gid_gte(filp->f_cred->fsgid, gid_min) &&
- gid_lte(filp->f_cred->fsgid, gid_max)) ^
- !(info->invert & XT_OWNER_GID))
+ struct group_info *gi = filp->f_cred->group_info;
+
+ if (gid_gte(filp->f_cred->fsgid, gid_min) &&
+ gid_lte(filp->f_cred->fsgid, gid_max))
+ match = true;
+
+ if (!match && (info->match & XT_OWNER_SUPPL_GROUPS) && gi) {
+ for (i = 0; i < gi->ngroups; ++i) {
+ kgid_t group = gi->gid[i];
+
+ if (gid_gte(group, gid_min) &&
+ gid_lte(group, gid_max)) {
+ match = true;
+ break;
+ }
+ }
+ }
+
+ if (match ^ !(info->invert & XT_OWNER_GID))
return false;
}
+ // SPDX-License-Identifier: GPL-2.0-only
/*
* Stream Parser
*
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
*/
#include <linux/bpf.h>
return 0;
}
- skb = alloc_skb(0, GFP_ATOMIC);
+ skb = alloc_skb_for_msg(head);
if (!skb) {
STRP_STATS_INCR(strp->stats.mem_fail);
desc->error = -ENOMEM;
return 0;
}
- skb->len = head->len;
- skb->data_len = head->len;
- skb->truesize = head->truesize;
- *_strp_msg(skb) = *_strp_msg(head);
+
strp->skb_nextp = &head->next;
- skb_shinfo(skb)->frag_list = head;
strp->skb_head = skb;
head = skb;
} else {
struct sk_buff_head *xmitq);
static bool tipc_link_release_pkts(struct tipc_link *l, u16 to);
static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data);
-static void tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap,
- struct tipc_gap_ack_blks *ga,
- struct sk_buff_head *xmitq);
+static int tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap,
+ struct tipc_gap_ack_blks *ga,
+ struct sk_buff_head *xmitq);
/*
* Simple non-static link routines (i.e. referenced outside this file)
l->snd_nxt = seqno;
}
-static void link_retransmit_failure(struct tipc_link *l, struct sk_buff *skb)
+/**
+ * link_retransmit_failure() - Detect repeated retransmit failures
+ * @l: tipc link sender
+ * @r: tipc link receiver (= l in case of unicast)
+ * @from: seqno of the 1st packet in retransmit request
+ * @rc: returned code
+ *
+ * Return: true if the repeated retransmit failures happens, otherwise
+ * false
+ */
+static bool link_retransmit_failure(struct tipc_link *l, struct tipc_link *r,
+ u16 from, int *rc)
{
- struct tipc_msg *hdr = buf_msg(skb);
+ struct sk_buff *skb = skb_peek(&l->transmq);
+ struct tipc_msg *hdr;
+
+ if (!skb)
+ return false;
+ hdr = buf_msg(skb);
+
+ /* Detect repeated retransmit failures on same packet */
+ if (r->prev_from != from) {
+ r->prev_from = from;
+ r->stale_limit = jiffies + msecs_to_jiffies(r->tolerance);
+ r->stale_cnt = 0;
+ } else if (++r->stale_cnt > 99 && time_after(jiffies, r->stale_limit)) {
+ pr_warn("Retransmission failure on link <%s>\n", l->name);
+ link_print(l, "State of link ");
+ pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
+ msg_user(hdr), msg_type(hdr), msg_size(hdr),
+ msg_errcode(hdr));
+ pr_info("sqno %u, prev: %x, src: %x\n",
+ msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr));
+
+ trace_tipc_list_dump(&l->transmq, true, "retrans failure!");
+ trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!");
+ trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!");
+
+ if (link_is_bc_sndlink(l))
+ *rc = TIPC_LINK_DOWN_EVT;
+
+ *rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
+ return true;
+ }
- pr_warn("Retransmission failure on link <%s>\n", l->name);
- link_print(l, "State of link ");
- pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
- msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr));
- pr_info("sqno %u, prev: %x, src: %x\n",
- msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr));
+ return false;
}
-/* tipc_link_retrans() - retransmit one or more packets
+/* tipc_link_bc_retrans() - retransmit zero or more packets
* @l: the link to transmit on
* @r: the receiving link ordering the retransmit. Same as l if unicast
* @from: retransmit from (inclusive) this sequence number
* @to: retransmit to (inclusive) this sequence number
* xmitq: queue for accumulating the retransmitted packets
*/
-static int tipc_link_retrans(struct tipc_link *l, struct tipc_link *r,
- u16 from, u16 to, struct sk_buff_head *xmitq)
+static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r,
+ u16 from, u16 to, struct sk_buff_head *xmitq)
{
struct sk_buff *_skb, *skb = skb_peek(&l->transmq);
u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
u16 ack = l->rcv_nxt - 1;
struct tipc_msg *hdr;
+ int rc = 0;
if (!skb)
return 0;
return 0;
trace_tipc_link_retrans(r, from, to, &l->transmq);
- /* Detect repeated retransmit failures on same packet */
- if (r->prev_from != from) {
- r->prev_from = from;
- r->stale_limit = jiffies + msecs_to_jiffies(r->tolerance);
- r->stale_cnt = 0;
- } else if (++r->stale_cnt > 99 && time_after(jiffies, r->stale_limit)) {
- link_retransmit_failure(l, skb);
- trace_tipc_list_dump(&l->transmq, true, "retrans failure!");
- trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!");
- trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!");
- if (link_is_bc_sndlink(l))
- return TIPC_LINK_DOWN_EVT;
- return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
- }
+
+ if (link_retransmit_failure(l, r, from, &rc))
+ return rc;
skb_queue_walk(&l->transmq, skb) {
hdr = buf_msg(skb);
* @gap: # of gap packets
* @ga: buffer pointer to Gap ACK blocks from peer
* @xmitq: queue for accumulating the retransmitted packets if any
+ *
+ * In case of a repeated retransmit failures, the call will return shortly
+ * with a returned code (e.g. TIPC_LINK_DOWN_EVT)
*/
-static void tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap,
- struct tipc_gap_ack_blks *ga,
- struct sk_buff_head *xmitq)
+static int tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap,
+ struct tipc_gap_ack_blks *ga,
+ struct sk_buff_head *xmitq)
{
struct sk_buff *skb, *_skb, *tmp;
struct tipc_msg *hdr;
u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
u16 ack = l->rcv_nxt - 1;
- u16 seqno;
- u16 n = 0;
+ u16 seqno, n = 0;
+ int rc = 0;
+
+ if (gap && link_retransmit_failure(l, l, acked + 1, &rc))
+ return rc;
skb_queue_walk_safe(&l->transmq, skb, tmp) {
seqno = buf_seqno(skb);
goto next_gap_ack;
}
}
+
+ return 0;
}
/* tipc_link_build_state_msg: prepare link state message for transmission
* node has entered SELF_DOWN_PEER_LEAVING and both peer nodes
* would have to start over from scratch instead.
*/
- WARN_ON(l && tipc_link_is_up(l));
tnl->drop_point = 1;
tnl->failover_reasm_skb = NULL;
tipc_link_build_proto_msg(l, STATE_MSG, 0, reply,
rcvgap, 0, 0, xmitq);
- tipc_link_advance_transmq(l, ack, gap, ga, xmitq);
+ rc |= tipc_link_advance_transmq(l, ack, gap, ga, xmitq);
/* If NACK, retransmit will now start at right position */
if (gap)
if (more(peers_snd_nxt, l->rcv_nxt + l->window))
return rc;
- rc = tipc_link_retrans(snd_l, l, from, to, xmitq);
+ rc = tipc_link_bc_retrans(snd_l, l, from, to, xmitq);
l->snd_nxt = peers_snd_nxt;
if (link_bc_rcv_gap(l))
if (dnode == tipc_own_addr(l->net)) {
tipc_link_bc_ack_rcv(l, acked, xmitq);
- rc = tipc_link_retrans(l->bc_sndlink, l, from, to, xmitq);
+ rc = tipc_link_bc_retrans(l->bc_sndlink, l, from, to, xmitq);
l->stats.recv_nacks++;
return rc;
}
#include <net/sock.h>
#include <net/af_vsock.h>
-/* The host side's design of the feature requires 6 exact 4KB pages for
- * recv/send rings respectively -- this is suboptimal considering memory
- * consumption, however unluckily we have to live with it, before the
- * host comes up with a better design in the future.
+/* Older (VMBUS version 'VERSION_WIN10' or before) Windows hosts have some
+ * stricter requirements on the hv_sock ring buffer size of six 4K pages. Newer
+ * hosts don't have this limitation; but, keep the defaults the same for compat.
*/
#define PAGE_SIZE_4K 4096
#define RINGBUFFER_HVS_RCV_SIZE (PAGE_SIZE_4K * 6)
#define RINGBUFFER_HVS_SND_SIZE (PAGE_SIZE_4K * 6)
+#define RINGBUFFER_HVS_MAX_SIZE (PAGE_SIZE_4K * 64)
/* The MTU is 16KB per the host side's design */
#define HVS_MTU_SIZE (1024 * 16)
};
/* We can send up to HVS_MTU_SIZE bytes of payload to the host, but let's use
- * a small size, i.e. HVS_SEND_BUF_SIZE, to minimize the dynamically-allocated
- * buffer, because tests show there is no significant performance difference.
+ * a smaller size, i.e. HVS_SEND_BUF_SIZE, to maximize concurrency between the
+ * guest and the host processing as one VMBUS packet is the smallest processing
+ * unit.
*
* Note: the buffer can be eliminated in the future when we add new VMBus
* ringbuffer APIs that allow us to directly copy data from userspace buffer
set_channel_pending_send_size(chan,
HVS_PKT_LEN(HVS_SEND_BUF_SIZE));
- /* See hvs_stream_has_space(): we must make sure the host has seen
- * the new pending send size, before we can re-check the writable
- * bytes.
- */
- virt_mb();
- }
-
- static void hvs_clear_channel_pending_send_size(struct vmbus_channel *chan)
- {
- set_channel_pending_send_size(chan, 0);
-
- /* Ditto */
virt_mb();
}
if (hvs_channel_readable(chan))
sk->sk_data_ready(sk);
- /* See hvs_stream_has_space(): when we reach here, the writable bytes
- * may be already less than HVS_PKT_LEN(HVS_SEND_BUF_SIZE).
- */
if (hv_get_bytes_to_write(&chan->outbound) > 0)
sk->sk_write_space(sk);
}
struct sockaddr_vm addr;
struct sock *sk, *new = NULL;
struct vsock_sock *vnew = NULL;
- struct hvsock *hvs, *hvs_new = NULL;
+ struct hvsock *hvs = NULL;
+ struct hvsock *hvs_new = NULL;
+ int rcvbuf;
int ret;
+ int sndbuf;
if_type = &chan->offermsg.offer.if_type;
if_instance = &chan->offermsg.offer.if_instance;
}
set_channel_read_mode(chan, HV_CALL_DIRECT);
- ret = vmbus_open(chan, RINGBUFFER_HVS_SND_SIZE,
- RINGBUFFER_HVS_RCV_SIZE, NULL, 0,
- hvs_channel_cb, conn_from_host ? new : sk);
+
+ /* Use the socket buffer sizes as hints for the VMBUS ring size. For
+ * server side sockets, 'sk' is the parent socket and thus, this will
+ * allow the child sockets to inherit the size from the parent. Keep
+ * the mins to the default value and align to page size as per VMBUS
+ * requirements.
+ * For the max, the socket core library will limit the socket buffer
+ * size that can be set by the user, but, since currently, the hv_sock
+ * VMBUS ring buffer is physically contiguous allocation, restrict it
+ * further.
+ * Older versions of hv_sock host side code cannot handle bigger VMBUS
+ * ring buffer size. Use the version number to limit the change to newer
+ * versions.
+ */
+ if (vmbus_proto_version < VERSION_WIN10_V5) {
+ sndbuf = RINGBUFFER_HVS_SND_SIZE;
+ rcvbuf = RINGBUFFER_HVS_RCV_SIZE;
+ } else {
+ sndbuf = max_t(int, sk->sk_sndbuf, RINGBUFFER_HVS_SND_SIZE);
+ sndbuf = min_t(int, sndbuf, RINGBUFFER_HVS_MAX_SIZE);
+ sndbuf = ALIGN(sndbuf, PAGE_SIZE);
+ rcvbuf = max_t(int, sk->sk_rcvbuf, RINGBUFFER_HVS_RCV_SIZE);
+ rcvbuf = min_t(int, rcvbuf, RINGBUFFER_HVS_MAX_SIZE);
+ rcvbuf = ALIGN(rcvbuf, PAGE_SIZE);
+ }
+
+ ret = vmbus_open(chan, sndbuf, rcvbuf, NULL, 0, hvs_channel_cb,
+ conn_from_host ? new : sk);
if (ret != 0) {
if (conn_from_host) {
hvs_new->chan = NULL;
set_per_channel_state(chan, conn_from_host ? new : sk);
vmbus_set_chn_rescind_callback(chan, hvs_close_connection);
+ /* Set the pending send size to max packet size to always get
+ * notifications from the host when there is enough writable space.
+ * The host is optimized to send notifications only when the pending
+ * size boundary is crossed, and not always.
+ */
+ hvs_set_channel_pending_send_size(chan);
+
if (conn_from_host) {
new->sk_state = TCP_ESTABLISHED;
sk->sk_ack_backlog++;
static int hvs_sock_init(struct vsock_sock *vsk, struct vsock_sock *psk)
{
struct hvsock *hvs;
+ struct sock *sk = sk_vsock(vsk);
hvs = kzalloc(sizeof(*hvs), GFP_KERNEL);
if (!hvs)
vsk->trans = hvs;
hvs->vsk = vsk;
-
+ sk->sk_sndbuf = RINGBUFFER_HVS_SND_SIZE;
+ sk->sk_rcvbuf = RINGBUFFER_HVS_RCV_SIZE;
return 0;
}
struct hvsock *hvs = vsk->trans;
struct vmbus_channel *chan = hvs->chan;
struct hvs_send_buf *send_buf;
- ssize_t to_write, max_writable, ret;
+ ssize_t to_write, max_writable;
+ ssize_t ret = 0;
+ ssize_t bytes_written = 0;
BUILD_BUG_ON(sizeof(*send_buf) != PAGE_SIZE_4K);
if (!send_buf)
return -ENOMEM;
- max_writable = hvs_channel_writable_bytes(chan);
- to_write = min_t(ssize_t, len, max_writable);
- to_write = min_t(ssize_t, to_write, HVS_SEND_BUF_SIZE);
-
- ret = memcpy_from_msg(send_buf->data, msg, to_write);
- if (ret < 0)
- goto out;
+ /* Reader(s) could be draining data from the channel as we write.
+ * Maximize bandwidth, by iterating until the channel is found to be
+ * full.
+ */
+ while (len) {
+ max_writable = hvs_channel_writable_bytes(chan);
+ if (!max_writable)
+ break;
+ to_write = min_t(ssize_t, len, max_writable);
+ to_write = min_t(ssize_t, to_write, HVS_SEND_BUF_SIZE);
+ /* memcpy_from_msg is safe for loop as it advances the offsets
+ * within the message iterator.
+ */
+ ret = memcpy_from_msg(send_buf->data, msg, to_write);
+ if (ret < 0)
+ goto out;
- ret = hvs_send_data(hvs->chan, send_buf, to_write);
- if (ret < 0)
- goto out;
+ ret = hvs_send_data(hvs->chan, send_buf, to_write);
+ if (ret < 0)
+ goto out;
- ret = to_write;
+ bytes_written += to_write;
+ len -= to_write;
+ }
out:
+ /* If any data has been sent, return that */
+ if (bytes_written)
+ ret = bytes_written;
kfree(send_buf);
return ret;
}
static s64 hvs_stream_has_space(struct vsock_sock *vsk)
{
struct hvsock *hvs = vsk->trans;
- struct vmbus_channel *chan = hvs->chan;
- s64 ret;
-
- ret = hvs_channel_writable_bytes(chan);
- if (ret > 0) {
- hvs_clear_channel_pending_send_size(chan);
- } else {
- /* See hvs_channel_cb() */
- hvs_set_channel_pending_send_size(chan);
-
- /* Re-check the writable bytes to avoid race */
- ret = hvs_channel_writable_bytes(chan);
- if (ret > 0)
- hvs_clear_channel_pending_send_size(chan);
- }
- return ret;
+ return hvs_channel_writable_bytes(hvs->chan);
}
static u64 hvs_stream_rcvhiwat(struct vsock_sock *vsk)
- /* GPLv2 Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
+ // SPDX-License-Identifier: GPL-2.0-only
+ /* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
*/
static const char *__doc__ =
" XDP redirect with a CPU-map type \"BPF_MAP_TYPE_CPUMAP\"";
#define MAX_PROG 6
#include <bpf/bpf.h>
-#include "bpf/libbpf.h"
+#include "libbpf.h"
#include "bpf_util.h"
+ // SPDX-License-Identifier: GPL-2.0-only
/*
* Netlink message type permission tables, for user generated messages.
*
*
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2,
- * as published by the Free Software Foundation.
*/
#include <linux/types.h>
#include <linux/kernel.h>
{ RTM_NEWCHAIN, NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
{ RTM_DELCHAIN, NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
{ RTM_GETCHAIN, NETLINK_ROUTE_SOCKET__NLMSG_READ },
+ { RTM_NEWNEXTHOP, NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
+ { RTM_DELNEXTHOP, NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
+ { RTM_GETNEXTHOP, NETLINK_ROUTE_SOCKET__NLMSG_READ },
};
static const struct nlmsg_perm nlmsg_tcpdiag_perms[] =
* structures at the top of this file with the new mappings
* before updating the BUILD_BUG_ON() macro!
*/
- BUILD_BUG_ON(RTM_MAX != (RTM_NEWCHAIN + 3));
+ BUILD_BUG_ON(RTM_MAX != (RTM_NEWNEXTHOP + 3));
err = nlmsg_perm(nlmsg_type, perm, nlmsg_route_perms,
sizeof(nlmsg_route_perms));
break;