DPAA2 ETHERNET DRIVER
-L: linux-kernel@vger.kernel.org
+L: netdev@vger.kernel.org
S: Maintained
-F: drivers/staging/fsl-dpaa2/ethernet
+F: drivers/net/ethernet/freescale/dpaa2/dpaa2-eth*
+F: drivers/net/ethernet/freescale/dpaa2/dpni*
+F: drivers/net/ethernet/freescale/dpaa2/dpkg.h
+F: drivers/net/ethernet/freescale/dpaa2/Makefile
+F: drivers/net/ethernet/freescale/dpaa2/Kconfig
DPAA2 ETHERNET SWITCH DRIVER
DPAA2 PTP CLOCK DRIVER
-L: linux-kernel@vger.kernel.org
+L: netdev@vger.kernel.org
S: Maintained
-F: drivers/staging/fsl-dpaa2/rtc
+F: drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp*
+F: drivers/net/ethernet/freescale/dpaa2/dprtc*
DPT_I2O SCSI RAID DRIVER
F: Documentation/networking/ixgbe.txt
F: Documentation/networking/ixgbevf.txt
F: Documentation/networking/i40e.txt
-F: Documentation/networking/i40evf.txt
+F: Documentation/networking/iavf.txt
F: Documentation/networking/ice.txt
F: drivers/net/ethernet/intel/
F: drivers/net/ethernet/intel/*/
F: net/l3mdev
F: include/net/l3mdev.h
+LANTIQ / INTEL Ethernet drivers
+S: Maintained
+F: net/dsa/tag_gswip.c
+F: drivers/net/ethernet/lantiq_xrx200.c
+F: drivers/net/dsa/lantiq_pce.h
+F: drivers/net/dsa/lantiq_gswip.c
+
LANTIQ MIPS ARCHITECTURE
S: Maintained
F: drivers/net/dsa/mv88e6xxx/
-F: linux/platform_data/mv88e6xxx.h
+F: include/linux/platform_data/mv88e6xxx.h
F: Documentation/devicetree/bindings/net/dsa/marvell.txt
MARVELL ARMADA DRM SUPPORT
F: drivers/mmc/host/sdhci-xenon*
F: Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.txt
+MARVELL OCTEONTX2 RVU ADMIN FUNCTION DRIVER
+S: Supported
+F: drivers/net/ethernet/marvell/octeontx2/af/
+
MATROX FRAMEBUFFER DRIVER
S: Orphan
S: Maintained
- F: arch/mips/loongson64/*{2e/2f}*
+ F: arch/mips/loongson64/fuloong-2e/
+ F: arch/mips/loongson64/lemote-2f/
F: arch/mips/include/asm/mach-loongson64/
F: drivers/*/*loongson2*
F: drivers/*/*/*loongson2*
S: Maintained
F: Documentation/ABI/testing/sysfs-class-mux*
F: Documentation/devicetree/bindings/mux/
- F: include/linux/dt-bindings/mux/
+ F: include/dt-bindings/mux/
F: include/linux/mux/
F: drivers/mux/
};
static int qca_power_setup(struct hci_uart *hu, bool on);
-static void qca_power_shutdown(struct hci_dev *hdev);
+static void qca_power_shutdown(struct hci_uart *hu);
+static int qca_power_off(struct hci_dev *hdev);
static void __serial_clock_on(struct tty_struct *tty)
{
hu->priv = qca;
if (hu->serdev) {
- serdev_device_open(hu->serdev);
qcadev = serdev_device_get_drvdata(hu->serdev);
if (qcadev->btsoc_type != QCA_WCN3990) {
if (hu->serdev) {
qcadev = serdev_device_get_drvdata(hu->serdev);
if (qcadev->btsoc_type == QCA_WCN3990)
- qca_power_shutdown(hu->hdev);
+ qca_power_shutdown(hu);
else
gpiod_set_value_cansleep(qcadev->bt_en, 0);
- serdev_device_close(hu->serdev);
}
kfree_skb(qca->rx_skb);
static int qca_wcn3990_init(struct hci_uart *hu)
{
struct hci_dev *hdev = hu->hdev;
+ struct qca_serdev *qcadev;
int ret;
+ /* Check for vregs status, may be hci down has turned
+ * off the voltage regulator.
+ */
+ qcadev = serdev_device_get_drvdata(hu->serdev);
+ if (!qcadev->bt_power->vregs_on) {
+ serdev_device_close(hu->serdev);
+ ret = qca_power_setup(hu, true);
+ if (ret)
+ return ret;
+
+ ret = serdev_device_open(hu->serdev);
+ if (ret) {
+ bt_dev_err(hu->hdev, "failed to open port");
+ return ret;
+ }
+ }
+
/* Forcefully enable wcn3990 to enter in to boot mode. */
host_set_baudrate(hu, 2400);
ret = qca_send_power_pulse(hdev, QCA_WCN3990_POWEROFF_PULSE);
if (qcadev->btsoc_type == QCA_WCN3990) {
bt_dev_info(hdev, "setting up wcn3990");
+
+ /* Enable NON_PERSISTENT_SETUP QUIRK to ensure to execute
+ * setup for every hci up.
+ */
+ set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks);
+ hu->hdev->shutdown = qca_power_off;
ret = qca_wcn3990_init(hu);
if (ret)
return ret;
.num_vregs = 4,
};
-static void qca_power_shutdown(struct hci_dev *hdev)
+static void qca_power_shutdown(struct hci_uart *hu)
{
- struct hci_uart *hu = hci_get_drvdata(hdev);
+ struct serdev_device *serdev = hu->serdev;
+ unsigned char cmd = QCA_WCN3990_POWEROFF_PULSE;
host_set_baudrate(hu, 2400);
- qca_send_power_pulse(hdev, QCA_WCN3990_POWEROFF_PULSE);
+ hci_uart_set_flow_control(hu, true);
+ serdev_device_write_buf(serdev, &cmd, sizeof(cmd));
+ hci_uart_set_flow_control(hu, false);
qca_power_setup(hu, false);
}
+static int qca_power_off(struct hci_dev *hdev)
+{
+ struct hci_uart *hu = hci_get_drvdata(hdev);
+
+ qca_power_shutdown(hu);
+ return 0;
+}
+
static int qca_enable_regulator(struct qca_vreg vregs,
struct regulator *regulator)
{
{
int i;
- qca->vreg_bulk = devm_kzalloc(qca->dev, num_vregs *
+ qca->vreg_bulk = devm_kcalloc(qca->dev, num_vregs,
sizeof(struct regulator_bulk_data),
GFP_KERNEL);
if (!qca->vreg_bulk)
struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev);
if (qcadev->btsoc_type == QCA_WCN3990)
- qca_power_shutdown(qcadev->serdev_hu.hdev);
+ qca_power_shutdown(&qcadev->serdev_hu);
else
clk_disable_unprepare(qcadev->susclk);
return cdesc;
}
-static inline void ena_com_cq_inc_head(struct ena_com_io_cq *io_cq)
-{
- io_cq->head++;
-
- /* Switch phase bit in case of wrap around */
- if (unlikely((io_cq->head & (io_cq->q_depth - 1)) == 0))
- io_cq->phase ^= 1;
-}
-
-static inline void *get_sq_desc(struct ena_com_io_sq *io_sq)
+static inline void *get_sq_desc_regular_queue(struct ena_com_io_sq *io_sq)
{
u16 tail_masked;
u32 offset;
return (void *)((uintptr_t)io_sq->desc_addr.virt_addr + offset);
}
-static inline void ena_com_copy_curr_sq_desc_to_dev(struct ena_com_io_sq *io_sq)
+static inline int ena_com_write_bounce_buffer_to_dev(struct ena_com_io_sq *io_sq,
+ u8 *bounce_buffer)
{
- u16 tail_masked = io_sq->tail & (io_sq->q_depth - 1);
- u32 offset = tail_masked * io_sq->desc_entry_size;
+ struct ena_com_llq_info *llq_info = &io_sq->llq_info;
- /* In case this queue isn't a LLQ */
- if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
- return;
+ u16 dst_tail_mask;
+ u32 dst_offset;
- memcpy_toio(io_sq->desc_addr.pbuf_dev_addr + offset,
- io_sq->desc_addr.virt_addr + offset,
- io_sq->desc_entry_size);
-}
+ dst_tail_mask = io_sq->tail & (io_sq->q_depth - 1);
+ dst_offset = dst_tail_mask * llq_info->desc_list_entry_size;
+
+ /* Make sure everything was written into the bounce buffer before
+ * writing the bounce buffer to the device
+ */
+ wmb();
+
+ /* The line is completed. Copy it to dev */
+ __iowrite64_copy(io_sq->desc_addr.pbuf_dev_addr + dst_offset,
+ bounce_buffer, (llq_info->desc_list_entry_size) / 8);
-static inline void ena_com_sq_update_tail(struct ena_com_io_sq *io_sq)
-{
io_sq->tail++;
/* Switch phase bit in case of wrap around */
if (unlikely((io_sq->tail & (io_sq->q_depth - 1)) == 0))
io_sq->phase ^= 1;
+
+ return 0;
}
-static inline int ena_com_write_header(struct ena_com_io_sq *io_sq,
- u8 *head_src, u16 header_len)
+static inline int ena_com_write_header_to_bounce(struct ena_com_io_sq *io_sq,
+ u8 *header_src,
+ u16 header_len)
{
- u16 tail_masked = io_sq->tail & (io_sq->q_depth - 1);
- u8 __iomem *dev_head_addr =
- io_sq->header_addr + (tail_masked * io_sq->tx_max_header_size);
+ struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl;
+ struct ena_com_llq_info *llq_info = &io_sq->llq_info;
+ u8 *bounce_buffer = pkt_ctrl->curr_bounce_buf;
+ u16 header_offset;
- if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
+ if (unlikely(io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST))
return 0;
- if (unlikely(!io_sq->header_addr)) {
- pr_err("Push buffer header ptr is NULL\n");
- return -EINVAL;
+ header_offset =
+ llq_info->descs_num_before_header * io_sq->desc_entry_size;
+
+ if (unlikely((header_offset + header_len) >
+ llq_info->desc_list_entry_size)) {
+ pr_err("trying to write header larger than llq entry can accommodate\n");
+ return -EFAULT;
+ }
+
+ if (unlikely(!bounce_buffer)) {
+ pr_err("bounce buffer is NULL\n");
+ return -EFAULT;
+ }
+
+ memcpy(bounce_buffer + header_offset, header_src, header_len);
+
+ return 0;
+}
+
+static inline void *get_sq_desc_llq(struct ena_com_io_sq *io_sq)
+{
+ struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl;
+ u8 *bounce_buffer;
+ void *sq_desc;
+
+ bounce_buffer = pkt_ctrl->curr_bounce_buf;
+
+ if (unlikely(!bounce_buffer)) {
+ pr_err("bounce buffer is NULL\n");
+ return NULL;
+ }
+
+ sq_desc = bounce_buffer + pkt_ctrl->idx * io_sq->desc_entry_size;
+ pkt_ctrl->idx++;
+ pkt_ctrl->descs_left_in_line--;
+
+ return sq_desc;
+}
+
+static inline int ena_com_close_bounce_buffer(struct ena_com_io_sq *io_sq)
+{
+ struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl;
+ struct ena_com_llq_info *llq_info = &io_sq->llq_info;
+ int rc;
+
+ if (unlikely(io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST))
+ return 0;
+
+ /* bounce buffer was used, so write it and get a new one */
+ if (pkt_ctrl->idx) {
+ rc = ena_com_write_bounce_buffer_to_dev(io_sq,
+ pkt_ctrl->curr_bounce_buf);
+ if (unlikely(rc))
+ return rc;
+
+ pkt_ctrl->curr_bounce_buf =
+ ena_com_get_next_bounce_buffer(&io_sq->bounce_buf_ctrl);
+ memset(io_sq->llq_buf_ctrl.curr_bounce_buf,
+ 0x0, llq_info->desc_list_entry_size);
+ }
+
+ pkt_ctrl->idx = 0;
+ pkt_ctrl->descs_left_in_line = llq_info->descs_num_before_header;
+ return 0;
+}
+
+static inline void *get_sq_desc(struct ena_com_io_sq *io_sq)
+{
+ if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
+ return get_sq_desc_llq(io_sq);
+
+ return get_sq_desc_regular_queue(io_sq);
+}
+
+static inline int ena_com_sq_update_llq_tail(struct ena_com_io_sq *io_sq)
+{
+ struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl;
+ struct ena_com_llq_info *llq_info = &io_sq->llq_info;
+ int rc;
+
+ if (!pkt_ctrl->descs_left_in_line) {
+ rc = ena_com_write_bounce_buffer_to_dev(io_sq,
+ pkt_ctrl->curr_bounce_buf);
+ if (unlikely(rc))
+ return rc;
+
+ pkt_ctrl->curr_bounce_buf =
+ ena_com_get_next_bounce_buffer(&io_sq->bounce_buf_ctrl);
+ memset(io_sq->llq_buf_ctrl.curr_bounce_buf,
+ 0x0, llq_info->desc_list_entry_size);
+
+ pkt_ctrl->idx = 0;
+ if (unlikely(llq_info->desc_stride_ctrl == ENA_ADMIN_SINGLE_DESC_PER_ENTRY))
+ pkt_ctrl->descs_left_in_line = 1;
+ else
+ pkt_ctrl->descs_left_in_line =
+ llq_info->desc_list_entry_size / io_sq->desc_entry_size;
}
- memcpy_toio(dev_head_addr, head_src, header_len);
+ return 0;
+}
+
+static inline int ena_com_sq_update_tail(struct ena_com_io_sq *io_sq)
+{
+ if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
+ return ena_com_sq_update_llq_tail(io_sq);
+
+ io_sq->tail++;
+
+ /* Switch phase bit in case of wrap around */
+ if (unlikely((io_sq->tail & (io_sq->q_depth - 1)) == 0))
+ io_sq->phase ^= 1;
return 0;
}
return false;
}
-static inline void ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq,
- struct ena_com_tx_ctx *ena_tx_ctx)
+static inline int ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq,
+ struct ena_com_tx_ctx *ena_tx_ctx)
{
struct ena_eth_io_tx_meta_desc *meta_desc = NULL;
struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
memcpy(&io_sq->cached_tx_meta, ena_meta,
sizeof(struct ena_com_tx_meta));
- ena_com_copy_curr_sq_desc_to_dev(io_sq);
- ena_com_sq_update_tail(io_sq);
+ return ena_com_sq_update_tail(io_sq);
}
static inline void ena_com_rx_set_flags(struct ena_com_rx_ctx *ena_rx_ctx,
(cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK) >>
ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT;
ena_rx_ctx->l3_csum_err =
- (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK) >>
- ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT;
+ !!((cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK) >>
+ ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT);
ena_rx_ctx->l4_csum_err =
- (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK) >>
- ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT;
+ !!((cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK) >>
+ ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT);
+ ena_rx_ctx->l4_csum_checked =
+ !!((cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_CHECKED_MASK) >>
+ ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_CHECKED_SHIFT);
ena_rx_ctx->hash = cdesc->hash;
ena_rx_ctx->frag =
(cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK) >>
{
struct ena_eth_io_tx_desc *desc = NULL;
struct ena_com_buf *ena_bufs = ena_tx_ctx->ena_bufs;
- void *push_header = ena_tx_ctx->push_header;
+ void *buffer_to_push = ena_tx_ctx->push_header;
u16 header_len = ena_tx_ctx->header_len;
u16 num_bufs = ena_tx_ctx->num_bufs;
- int total_desc, i, rc;
+ u16 start_tail = io_sq->tail;
+ int i, rc;
bool have_meta;
u64 addr_hi;
WARN(io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_TX, "wrong Q type");
/* num_bufs +1 for potential meta desc */
- if (ena_com_sq_empty_space(io_sq) < (num_bufs + 1)) {
- pr_err("Not enough space in the tx queue\n");
+ if (unlikely(!ena_com_sq_have_enough_space(io_sq, num_bufs + 1))) {
+ pr_debug("Not enough space in the tx queue\n");
return -ENOMEM;
}
return -EINVAL;
}
- /* start with pushing the header (if needed) */
- rc = ena_com_write_header(io_sq, push_header, header_len);
+ if (unlikely(io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV &&
+ !buffer_to_push))
+ return -EINVAL;
+
+ rc = ena_com_write_header_to_bounce(io_sq, buffer_to_push, header_len);
if (unlikely(rc))
return rc;
have_meta = ena_tx_ctx->meta_valid && ena_com_meta_desc_changed(io_sq,
ena_tx_ctx);
- if (have_meta)
- ena_com_create_and_store_tx_meta_desc(io_sq, ena_tx_ctx);
+ if (have_meta) {
+ rc = ena_com_create_and_store_tx_meta_desc(io_sq, ena_tx_ctx);
+ if (unlikely(rc))
+ return rc;
+ }
- /* If the caller doesn't want send packets */
+ /* If the caller doesn't want to send packets */
if (unlikely(!num_bufs && !header_len)) {
- *nb_hw_desc = have_meta ? 0 : 1;
- return 0;
+ rc = ena_com_close_bounce_buffer(io_sq);
+ *nb_hw_desc = io_sq->tail - start_tail;
+ return rc;
}
desc = get_sq_desc(io_sq);
+ if (unlikely(!desc))
+ return -EFAULT;
memset(desc, 0x0, sizeof(struct ena_eth_io_tx_desc));
/* Set first desc when we don't have meta descriptor */
for (i = 0; i < num_bufs; i++) {
/* The first desc share the same desc as the header */
if (likely(i != 0)) {
- ena_com_copy_curr_sq_desc_to_dev(io_sq);
- ena_com_sq_update_tail(io_sq);
+ rc = ena_com_sq_update_tail(io_sq);
+ if (unlikely(rc))
+ return rc;
desc = get_sq_desc(io_sq);
+ if (unlikely(!desc))
+ return -EFAULT;
+
memset(desc, 0x0, sizeof(struct ena_eth_io_tx_desc));
desc->len_ctrl |= (io_sq->phase <<
/* set the last desc indicator */
desc->len_ctrl |= ENA_ETH_IO_TX_DESC_LAST_MASK;
- ena_com_copy_curr_sq_desc_to_dev(io_sq);
-
- ena_com_sq_update_tail(io_sq);
+ rc = ena_com_sq_update_tail(io_sq);
+ if (unlikely(rc))
+ return rc;
- total_desc = max_t(u16, num_bufs, 1);
- total_desc += have_meta ? 1 : 0;
+ rc = ena_com_close_bounce_buffer(io_sq);
- *nb_hw_desc = total_desc;
- return 0;
+ *nb_hw_desc = io_sq->tail - start_tail;
+ return rc;
}
int ena_com_rx_pkt(struct ena_com_io_cq *io_cq,
WARN(io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX, "wrong Q type");
- if (unlikely(ena_com_sq_empty_space(io_sq) == 0))
+ if (unlikely(!ena_com_sq_have_enough_space(io_sq, 1)))
return -ENOSPC;
desc = get_sq_desc(io_sq);
+ if (unlikely(!desc))
+ return -EFAULT;
+
memset(desc, 0x0, sizeof(struct ena_eth_io_rx_desc));
desc->length = ena_buf->len;
- desc->ctrl |= ENA_ETH_IO_RX_DESC_FIRST_MASK;
+ desc->ctrl = ENA_ETH_IO_RX_DESC_FIRST_MASK;
desc->ctrl |= ENA_ETH_IO_RX_DESC_LAST_MASK;
desc->ctrl |= io_sq->phase & ENA_ETH_IO_RX_DESC_PHASE_MASK;
desc->ctrl |= ENA_ETH_IO_RX_DESC_COMP_REQ_MASK;
desc->buff_addr_hi =
((ena_buf->paddr & GENMASK_ULL(io_sq->dma_addr_bits - 1, 32)) >> 32);
- ena_com_sq_update_tail(io_sq);
-
- return 0;
-}
-
-int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id)
-{
- u8 expected_phase, cdesc_phase;
- struct ena_eth_io_tx_cdesc *cdesc;
- u16 masked_head;
-
- masked_head = io_cq->head & (io_cq->q_depth - 1);
- expected_phase = io_cq->phase;
-
- cdesc = (struct ena_eth_io_tx_cdesc *)
- ((uintptr_t)io_cq->cdesc_addr.virt_addr +
- (masked_head * io_cq->cdesc_entry_size_in_bytes));
-
- /* When the current completion descriptor phase isn't the same as the
- * expected, it mean that the device still didn't update
- * this completion.
- */
- cdesc_phase = READ_ONCE(cdesc->flags) & ENA_ETH_IO_TX_CDESC_PHASE_MASK;
- if (cdesc_phase != expected_phase)
- return -EAGAIN;
-
- dma_rmb();
- if (unlikely(cdesc->req_id >= io_cq->q_depth)) {
- pr_err("Invalid req id %d\n", cdesc->req_id);
- return -EINVAL;
- }
-
- ena_com_cq_inc_head(io_cq);
-
- *req_id = READ_ONCE(cdesc->req_id);
-
- return 0;
+ return ena_com_sq_update_tail(io_sq);
}
bool ena_com_cq_empty(struct ena_com_io_cq *io_cq)
#include <linux/if_vlan.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/moduleparam.h>
#include <linux/numa.h>
#include <linux/pci.h>
#include <linux/utsname.h>
}
}
+ size = tx_ring->tx_max_header_size;
+ tx_ring->push_buf_intermediate_buf = vzalloc_node(size, node);
+ if (!tx_ring->push_buf_intermediate_buf) {
+ tx_ring->push_buf_intermediate_buf = vzalloc(size);
+ if (!tx_ring->push_buf_intermediate_buf) {
+ vfree(tx_ring->tx_buffer_info);
+ vfree(tx_ring->free_tx_ids);
+ return -ENOMEM;
+ }
+ }
+
/* Req id ring for TX out of order completions */
for (i = 0; i < tx_ring->ring_size; i++)
tx_ring->free_tx_ids[i] = i;
vfree(tx_ring->free_tx_ids);
tx_ring->free_tx_ids = NULL;
+
+ vfree(tx_ring->push_buf_intermediate_buf);
+ tx_ring->push_buf_intermediate_buf = NULL;
}
/* ena_setup_all_tx_resources - allocate I/O Tx queues resources for All queues
ena_free_rx_bufs(adapter, i);
}
+static inline void ena_unmap_tx_skb(struct ena_ring *tx_ring,
+ struct ena_tx_buffer *tx_info)
+{
+ struct ena_com_buf *ena_buf;
+ u32 cnt;
+ int i;
+
+ ena_buf = tx_info->bufs;
+ cnt = tx_info->num_of_bufs;
+
+ if (unlikely(!cnt))
+ return;
+
+ if (tx_info->map_linear_data) {
+ dma_unmap_single(tx_ring->dev,
+ dma_unmap_addr(ena_buf, paddr),
+ dma_unmap_len(ena_buf, len),
+ DMA_TO_DEVICE);
+ ena_buf++;
+ cnt--;
+ }
+
+ /* unmap remaining mapped pages */
+ for (i = 0; i < cnt; i++) {
+ dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
+ dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
+ ena_buf++;
+ }
+}
+
/* ena_free_tx_bufs - Free Tx Buffers per Queue
* @tx_ring: TX ring for which buffers be freed
*/
for (i = 0; i < tx_ring->ring_size; i++) {
struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
- struct ena_com_buf *ena_buf;
- int nr_frags;
- int j;
if (!tx_info->skb)
continue;
tx_ring->qid, i);
}
- ena_buf = tx_info->bufs;
- dma_unmap_single(tx_ring->dev,
- ena_buf->paddr,
- ena_buf->len,
- DMA_TO_DEVICE);
-
- /* unmap remaining mapped pages */
- nr_frags = tx_info->num_of_bufs - 1;
- for (j = 0; j < nr_frags; j++) {
- ena_buf++;
- dma_unmap_page(tx_ring->dev,
- ena_buf->paddr,
- ena_buf->len,
- DMA_TO_DEVICE);
- }
+ ena_unmap_tx_skb(tx_ring, tx_info);
dev_kfree_skb_any(tx_info->skb);
}
while (tx_pkts < budget) {
struct ena_tx_buffer *tx_info;
struct sk_buff *skb;
- struct ena_com_buf *ena_buf;
- int i, nr_frags;
rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq,
&req_id);
tx_info->skb = NULL;
tx_info->last_jiffies = 0;
- if (likely(tx_info->num_of_bufs != 0)) {
- ena_buf = tx_info->bufs;
-
- dma_unmap_single(tx_ring->dev,
- dma_unmap_addr(ena_buf, paddr),
- dma_unmap_len(ena_buf, len),
- DMA_TO_DEVICE);
-
- /* unmap remaining mapped pages */
- nr_frags = tx_info->num_of_bufs - 1;
- for (i = 0; i < nr_frags; i++) {
- ena_buf++;
- dma_unmap_page(tx_ring->dev,
- dma_unmap_addr(ena_buf, paddr),
- dma_unmap_len(ena_buf, len),
- DMA_TO_DEVICE);
- }
- }
+ ena_unmap_tx_skb(tx_ring, tx_info);
netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
"tx_poll: q %d skb %p completed\n", tx_ring->qid,
*/
smp_mb();
- above_thresh = ena_com_sq_empty_space(tx_ring->ena_com_io_sq) >
- ENA_TX_WAKEUP_THRESH;
+ above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
+ ENA_TX_WAKEUP_THRESH);
if (unlikely(netif_tx_queue_stopped(txq) && above_thresh)) {
__netif_tx_lock(txq, smp_processor_id());
- above_thresh = ena_com_sq_empty_space(tx_ring->ena_com_io_sq) >
- ENA_TX_WAKEUP_THRESH;
+ above_thresh =
+ ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
+ ENA_TX_WAKEUP_THRESH);
if (netif_tx_queue_stopped(txq) && above_thresh) {
netif_tx_wake_queue(txq);
u64_stats_update_begin(&tx_ring->syncp);
return;
}
- skb->ip_summed = CHECKSUM_UNNECESSARY;
+ if (likely(ena_rx_ctx->l4_csum_checked)) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ } else {
+ u64_stats_update_begin(&rx_ring->syncp);
+ rx_ring->rx_stats.csum_unchecked++;
+ u64_stats_update_end(&rx_ring->syncp);
+ skb->ip_summed = CHECKSUM_NONE;
+ }
+ } else {
+ skb->ip_summed = CHECKSUM_NONE;
+ return;
}
+
}
static void ena_set_rx_hash(struct ena_ring *rx_ring,
rx_ring->next_to_clean = next_to_clean;
- refill_required = ena_com_sq_empty_space(rx_ring->ena_com_io_sq);
- refill_threshold = rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER;
+ refill_required = ena_com_free_desc(rx_ring->ena_com_io_sq);
+ refill_threshold =
+ min_t(int, rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER,
+ ENA_RX_REFILL_THRESH_PACKET);
/* Optimization, try to batch new rx buffers */
if (refill_required > refill_threshold) {
/* Reserved the max msix vectors we might need */
msix_vecs = ENA_MAX_MSIX_VEC(num_queues);
-
netif_dbg(adapter, probe, adapter->netdev,
"trying to enable MSI-X, vectors %d\n", msix_vecs);
if (rc)
return rc;
- ena_init_napi(adapter);
-
ena_change_mtu(adapter->netdev, adapter->netdev->mtu);
ena_refill_all_rx_bufs(adapter);
static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
{
- struct ena_com_create_io_ctx ctx = { 0 };
+ struct ena_com_create_io_ctx ctx;
struct ena_com_dev *ena_dev;
struct ena_ring *tx_ring;
u32 msix_vector;
msix_vector = ENA_IO_IRQ_IDX(qid);
ena_qid = ENA_IO_TXQ_IDX(qid);
+ memset(&ctx, 0x0, sizeof(ctx));
+
ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
ctx.qid = ena_qid;
ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid)
{
struct ena_com_dev *ena_dev;
- struct ena_com_create_io_ctx ctx = { 0 };
+ struct ena_com_create_io_ctx ctx;
struct ena_ring *rx_ring;
u32 msix_vector;
u16 ena_qid;
msix_vector = ENA_IO_IRQ_IDX(qid);
ena_qid = ENA_IO_RXQ_IDX(qid);
+ memset(&ctx, 0x0, sizeof(ctx));
+
ctx.qid = ena_qid;
ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
ena_setup_io_intr(adapter);
+ /* napi poll functions should be initialized before running
+ * request_irq(), to handle a rare condition where there is a pending
+ * interrupt, causing the ISR to fire immediately while the poll
+ * function wasn't set yet, causing a null dereference
+ */
+ ena_init_napi(adapter);
+
rc = ena_request_io_irq(adapter);
if (rc)
goto err_req_irq;
return rc;
}
-/* Called with netif_tx_lock. */
-static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static int ena_tx_map_skb(struct ena_ring *tx_ring,
+ struct ena_tx_buffer *tx_info,
+ struct sk_buff *skb,
+ void **push_hdr,
+ u16 *header_len)
{
- struct ena_adapter *adapter = netdev_priv(dev);
- struct ena_tx_buffer *tx_info;
- struct ena_com_tx_ctx ena_tx_ctx;
- struct ena_ring *tx_ring;
- struct netdev_queue *txq;
+ struct ena_adapter *adapter = tx_ring->adapter;
struct ena_com_buf *ena_buf;
- void *push_hdr;
- u32 len, last_frag;
- u16 next_to_use;
- u16 req_id;
- u16 push_len;
- u16 header_len;
dma_addr_t dma;
- int qid, rc, nb_hw_desc;
- int i = -1;
-
- netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb);
- /* Determine which tx ring we will be placed on */
- qid = skb_get_queue_mapping(skb);
- tx_ring = &adapter->tx_ring[qid];
- txq = netdev_get_tx_queue(dev, qid);
+ u32 skb_head_len, frag_len, last_frag;
+ u16 push_len = 0;
+ u16 delta = 0;
+ int i = 0;
- rc = ena_check_and_linearize_skb(tx_ring, skb);
- if (unlikely(rc))
- goto error_drop_packet;
-
- skb_tx_timestamp(skb);
- len = skb_headlen(skb);
-
- next_to_use = tx_ring->next_to_use;
- req_id = tx_ring->free_tx_ids[next_to_use];
- tx_info = &tx_ring->tx_buffer_info[req_id];
- tx_info->num_of_bufs = 0;
-
- WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id);
- ena_buf = tx_info->bufs;
+ skb_head_len = skb_headlen(skb);
tx_info->skb = skb;
+ ena_buf = tx_info->bufs;
if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
- /* prepared the push buffer */
- push_len = min_t(u32, len, tx_ring->tx_max_header_size);
- header_len = push_len;
- push_hdr = skb->data;
+ /* When the device is LLQ mode, the driver will copy
+ * the header into the device memory space.
+ * the ena_com layer assume the header is in a linear
+ * memory space.
+ * This assumption might be wrong since part of the header
+ * can be in the fragmented buffers.
+ * Use skb_header_pointer to make sure the header is in a
+ * linear memory space.
+ */
+
+ push_len = min_t(u32, skb->len, tx_ring->tx_max_header_size);
+ *push_hdr = skb_header_pointer(skb, 0, push_len,
+ tx_ring->push_buf_intermediate_buf);
+ *header_len = push_len;
+ if (unlikely(skb->data != *push_hdr)) {
+ u64_stats_update_begin(&tx_ring->syncp);
+ tx_ring->tx_stats.llq_buffer_copy++;
+ u64_stats_update_end(&tx_ring->syncp);
+
+ delta = push_len - skb_head_len;
+ }
} else {
- push_len = 0;
- header_len = min_t(u32, len, tx_ring->tx_max_header_size);
- push_hdr = NULL;
+ *push_hdr = NULL;
+ *header_len = min_t(u32, skb_head_len,
+ tx_ring->tx_max_header_size);
}
- netif_dbg(adapter, tx_queued, dev,
+ netif_dbg(adapter, tx_queued, adapter->netdev,
"skb: %p header_buf->vaddr: %p push_len: %d\n", skb,
- push_hdr, push_len);
+ *push_hdr, push_len);
- if (len > push_len) {
+ if (skb_head_len > push_len) {
dma = dma_map_single(tx_ring->dev, skb->data + push_len,
- len - push_len, DMA_TO_DEVICE);
- if (dma_mapping_error(tx_ring->dev, dma))
+ skb_head_len - push_len, DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
goto error_report_dma_error;
ena_buf->paddr = dma;
- ena_buf->len = len - push_len;
+ ena_buf->len = skb_head_len - push_len;
ena_buf++;
tx_info->num_of_bufs++;
+ tx_info->map_linear_data = 1;
+ } else {
+ tx_info->map_linear_data = 0;
}
last_frag = skb_shinfo(skb)->nr_frags;
for (i = 0; i < last_frag; i++) {
const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
- len = skb_frag_size(frag);
- dma = skb_frag_dma_map(tx_ring->dev, frag, 0, len,
- DMA_TO_DEVICE);
- if (dma_mapping_error(tx_ring->dev, dma))
+ frag_len = skb_frag_size(frag);
+
+ if (unlikely(delta >= frag_len)) {
+ delta -= frag_len;
+ continue;
+ }
+
+ dma = skb_frag_dma_map(tx_ring->dev, frag, delta,
+ frag_len - delta, DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
goto error_report_dma_error;
ena_buf->paddr = dma;
- ena_buf->len = len;
+ ena_buf->len = frag_len - delta;
ena_buf++;
+ tx_info->num_of_bufs++;
+ delta = 0;
}
- tx_info->num_of_bufs += last_frag;
+ return 0;
+
+error_report_dma_error:
+ u64_stats_update_begin(&tx_ring->syncp);
+ tx_ring->tx_stats.dma_mapping_err++;
+ u64_stats_update_end(&tx_ring->syncp);
+ netdev_warn(adapter->netdev, "failed to map skb\n");
+
+ tx_info->skb = NULL;
+
+ tx_info->num_of_bufs += i;
+ ena_unmap_tx_skb(tx_ring, tx_info);
+
+ return -EINVAL;
+}
+
+/* Called with netif_tx_lock. */
+static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ena_adapter *adapter = netdev_priv(dev);
+ struct ena_tx_buffer *tx_info;
+ struct ena_com_tx_ctx ena_tx_ctx;
+ struct ena_ring *tx_ring;
+ struct netdev_queue *txq;
+ void *push_hdr;
+ u16 next_to_use, req_id, header_len;
+ int qid, rc, nb_hw_desc;
+
+ netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb);
+ /* Determine which tx ring we will be placed on */
+ qid = skb_get_queue_mapping(skb);
+ tx_ring = &adapter->tx_ring[qid];
+ txq = netdev_get_tx_queue(dev, qid);
+
+ rc = ena_check_and_linearize_skb(tx_ring, skb);
+ if (unlikely(rc))
+ goto error_drop_packet;
+
+ skb_tx_timestamp(skb);
+
+ next_to_use = tx_ring->next_to_use;
+ req_id = tx_ring->free_tx_ids[next_to_use];
+ tx_info = &tx_ring->tx_buffer_info[req_id];
+ tx_info->num_of_bufs = 0;
+
+ WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id);
+
+ rc = ena_tx_map_skb(tx_ring, tx_info, skb, &push_hdr, &header_len);
+ if (unlikely(rc))
+ goto error_drop_packet;
memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
ena_tx_ctx.ena_bufs = tx_info->bufs;
rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx,
&nb_hw_desc);
+ /* ena_com_prepare_tx() can't fail due to overflow of tx queue,
+ * since the number of free descriptors in the queue is checked
+ * after sending the previous packet. In case there isn't enough
+ * space in the queue for the next packet, it is stopped
+ * until there is again enough available space in the queue.
+ * All other failure reasons of ena_com_prepare_tx() are fatal
+ * and therefore require a device reset.
+ */
if (unlikely(rc)) {
netif_err(adapter, tx_queued, dev,
"failed to prepare tx bufs\n");
u64_stats_update_begin(&tx_ring->syncp);
- tx_ring->tx_stats.queue_stop++;
tx_ring->tx_stats.prepare_ctx_err++;
u64_stats_update_end(&tx_ring->syncp);
- netif_tx_stop_queue(txq);
+ adapter->reset_reason = ENA_REGS_RESET_DRIVER_INVALID_STATE;
+ set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
goto error_unmap_dma;
}
* to sgl_size + 2. one for the meta descriptor and one for header
* (if the header is larger than tx_max_header_size).
*/
- if (unlikely(ena_com_sq_empty_space(tx_ring->ena_com_io_sq) <
- (tx_ring->sgl_size + 2))) {
+ if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
+ tx_ring->sgl_size + 2))) {
netif_dbg(adapter, tx_queued, dev, "%s stop queue %d\n",
__func__, qid);
*/
smp_mb();
- if (ena_com_sq_empty_space(tx_ring->ena_com_io_sq)
- > ENA_TX_WAKEUP_THRESH) {
+ if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
+ ENA_TX_WAKEUP_THRESH)) {
netif_tx_wake_queue(txq);
u64_stats_update_begin(&tx_ring->syncp);
tx_ring->tx_stats.queue_wakeup++;
return NETDEV_TX_OK;
-error_report_dma_error:
- u64_stats_update_begin(&tx_ring->syncp);
- tx_ring->tx_stats.dma_mapping_err++;
- u64_stats_update_end(&tx_ring->syncp);
- netdev_warn(adapter->netdev, "failed to map skb\n");
-
- tx_info->skb = NULL;
-
error_unmap_dma:
- if (i >= 0) {
- /* save value of frag that failed */
- last_frag = i;
-
- /* start back at beginning and unmap skb */
- tx_info->skb = NULL;
- ena_buf = tx_info->bufs;
- dma_unmap_single(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
- dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
-
- /* unmap remaining mapped pages */
- for (i = 0; i < last_frag; i++) {
- ena_buf++;
- dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
- dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
- }
- }
+ ena_unmap_tx_skb(tx_ring, tx_info);
+ tx_info->skb = NULL;
error_drop_packet:
-
dev_kfree_skb(skb);
return NETDEV_TX_OK;
}
return qid;
}
-static void ena_config_host_info(struct ena_com_dev *ena_dev)
+static void ena_config_host_info(struct ena_com_dev *ena_dev,
+ struct pci_dev *pdev)
{
struct ena_admin_host_info *host_info;
int rc;
host_info = ena_dev->host_attr.host_info;
+ host_info->bdf = (pdev->bus->number << 8) | pdev->devfn;
host_info->os_type = ENA_ADMIN_OS_LINUX;
host_info->kernel_ver = LINUX_VERSION_CODE;
strncpy(host_info->kernel_ver_str, utsname()->version,
host_info->driver_version =
(DRV_MODULE_VER_MAJOR) |
(DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
- (DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT);
+ (DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT) |
+ ("K"[0] << ENA_ADMIN_HOST_INFO_MODULE_TYPE_SHIFT);
+ host_info->num_cpus = num_online_cpus();
rc = ena_com_set_host_attributes(ena_dev);
if (rc) {
}
/* ENA admin level init */
- rc = ena_com_admin_init(ena_dev, &aenq_handlers, true);
+ rc = ena_com_admin_init(ena_dev, &aenq_handlers);
if (rc) {
dev_err(dev,
"Can not initialize ena admin queue with device\n");
*/
ena_com_set_admin_polling_mode(ena_dev, true);
- ena_config_host_info(ena_dev);
+ ena_config_host_info(ena_dev, pdev);
/* Get Device Attributes*/
rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
adapter->dev_up_before_reset = dev_up;
-
if (!graceful)
ena_com_set_admin_running_state(ena_dev, false);
if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
ena_down(adapter);
- /* Before releasing the ENA resources, a device reset is required.
- * (to prevent the device from accessing them).
+ /* Stop the device from sending AENQ events (in case reset flag is set
+ * and device is up, ena_close already reset the device
* In case the reset flag is set and the device is up, ena_down()
* already perform the reset, so it can be skipped.
*/
set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
- dev_err(&pdev->dev, "Device reset completed successfully\n");
+ dev_err(&pdev->dev,
+ "Device reset completed successfully, Driver info: %s\n",
+ version);
return rc;
err_disable_msix:
ena_free_mgmnt_irq(adapter);
ena_disable_msix(adapter);
err_device_destroy:
+ ena_com_abort_admin_commands(ena_dev);
+ ena_com_wait_for_abort_completion(ena_dev);
ena_com_admin_destroy(ena_dev);
+ ena_com_mmio_reg_read_request_destroy(ena_dev);
+ ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE);
err:
clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
rx_ring = &adapter->rx_ring[i];
refill_required =
- ena_com_sq_empty_space(rx_ring->ena_com_io_sq);
+ ena_com_free_desc(rx_ring->ena_com_io_sq);
if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
rx_ring->empty_rx_queue++;
/* In case of LLQ use the llq number in the get feature cmd */
if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
- io_sq_num = get_feat_ctx->max_queues.max_llq_num;
+ io_sq_num = get_feat_ctx->max_queues.max_legacy_llq_num;
if (io_sq_num == 0) {
dev_err(&pdev->dev,
return io_queue_num;
}
-static void ena_set_push_mode(struct pci_dev *pdev, struct ena_com_dev *ena_dev,
- struct ena_com_dev_get_features_ctx *get_feat_ctx)
+static int ena_set_queues_placement_policy(struct pci_dev *pdev,
+ struct ena_com_dev *ena_dev,
+ struct ena_admin_feature_llq_desc *llq,
+ struct ena_llq_configurations *llq_default_configurations)
{
bool has_mem_bar;
+ int rc;
+ u32 llq_feature_mask;
+
+ llq_feature_mask = 1 << ENA_ADMIN_LLQ;
+ if (!(ena_dev->supported_features & llq_feature_mask)) {
+ dev_err(&pdev->dev,
+ "LLQ is not supported Fallback to host mode policy.\n");
+ ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
+ return 0;
+ }
has_mem_bar = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(ENA_MEM_BAR);
- /* Enable push mode if device supports LLQ */
- if (has_mem_bar && (get_feat_ctx->max_queues.max_llq_num > 0))
- ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_DEV;
- else
+ rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations);
+ if (unlikely(rc)) {
+ dev_err(&pdev->dev,
+ "Failed to configure the device mode. Fallback to host mode policy.\n");
ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
+ return 0;
+ }
+
+ /* Nothing to config, exit */
+ if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
+ return 0;
+
+ if (!has_mem_bar) {
+ dev_err(&pdev->dev,
+ "ENA device does not expose LLQ bar. Fallback to host mode policy.\n");
+ ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
+ return 0;
+ }
+
+ ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev,
+ pci_resource_start(pdev, ENA_MEM_BAR),
+ pci_resource_len(pdev, ENA_MEM_BAR));
+
+ if (!ena_dev->mem_bar)
+ return -EFAULT;
+
+ return 0;
}
static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat,
static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
{
- int release_bars;
-
- if (ena_dev->mem_bar)
- devm_iounmap(&pdev->dev, ena_dev->mem_bar);
-
- if (ena_dev->reg_bar)
- devm_iounmap(&pdev->dev, ena_dev->reg_bar);
+ int release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
- release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
pci_release_selected_regions(pdev, release_bars);
}
+static inline void set_default_llq_configurations(struct ena_llq_configurations *llq_config)
+{
+ llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER;
+ llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B;
+ llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
+ llq_config->llq_num_decs_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
+ llq_config->llq_ring_entry_size_value = 128;
+}
+
static int ena_calc_queue_size(struct pci_dev *pdev,
struct ena_com_dev *ena_dev,
u16 *max_tx_sgl_size,
if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
queue_size = min_t(u32, queue_size,
- get_feat_ctx->max_queues.max_llq_depth);
+ get_feat_ctx->max_queues.max_legacy_llq_depth);
queue_size = rounddown_pow_of_two(queue_size);
static int version_printed;
struct net_device *netdev;
struct ena_adapter *adapter;
+ struct ena_llq_configurations llq_config;
struct ena_com_dev *ena_dev = NULL;
+ char *queue_type_str;
static int adapters_found;
int io_queue_num, bars, rc;
int queue_size;
goto err_free_region;
}
- ena_set_push_mode(pdev, ena_dev, &get_feat_ctx);
+ set_default_llq_configurations(&llq_config);
- if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
- ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev,
- pci_resource_start(pdev, ENA_MEM_BAR),
- pci_resource_len(pdev, ENA_MEM_BAR));
- if (!ena_dev->mem_bar) {
- rc = -EFAULT;
- goto err_device_destroy;
- }
+ rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx.llq,
+ &llq_config);
+ if (rc) {
+ dev_err(&pdev->dev, "ena device init failed\n");
+ goto err_device_destroy;
}
/* initial Tx interrupt delay, Assumes 1 usec granularity.
goto err_device_destroy;
}
- dev_info(&pdev->dev, "creating %d io queues. queue size: %d\n",
- io_queue_num, queue_size);
+ dev_info(&pdev->dev, "creating %d io queues. queue size: %d. LLQ is %s\n",
+ io_queue_num, queue_size,
+ (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) ?
+ "ENABLED" : "DISABLED");
/* dev zeroed in init_etherdev */
netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), io_queue_num);
timer_setup(&adapter->timer_service, ena_timer_service, 0);
mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
- dev_info(&pdev->dev, "%s found at mem %lx, mac addr %pM Queues %d\n",
+ if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
+ queue_type_str = "Regular";
+ else
+ queue_type_str = "Low Latency";
+
+ dev_info(&pdev->dev,
+ "%s found at mem %lx, mac addr %pM Queues %d, Placement policy: %s\n",
DEVICE_NAME, (long)pci_resource_start(pdev, 0),
- netdev->dev_addr, io_queue_num);
+ netdev->dev_addr, io_queue_num, queue_type_str);
set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
#define R8169_TX_RING_BYTES (NUM_TX_DESC * sizeof(struct TxDesc))
#define R8169_RX_RING_BYTES (NUM_RX_DESC * sizeof(struct RxDesc))
-#define RTL8169_TX_TIMEOUT (6*HZ)
-
/* write/read MMIO register */
#define RTL_W8(tp, reg, val8) writeb((val8), tp->mmio_addr + (reg))
#define RTL_W16(tp, reg, val16) writew((val16), tp->mmio_addr + (reg))
static void rtl8169_irq_mask_and_ack(struct rtl8169_private *tp)
{
rtl_irq_disable(tp);
- rtl_ack_events(tp, RTL_EVENT_NAPI | tp->event_slow);
+ rtl_ack_events(tp, 0xffff);
+ /* PCI commit */
RTL_R8(tp, ChipCmd);
}
rtl_hw_phy_config(dev);
if (tp->mac_version <= RTL_GIGA_MAC_VER_06) {
- netif_dbg(tp, drv, dev,
- "Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
- RTL_W8(tp, 0x82, 0x01);
- }
-
- pci_write_config_byte(tp->pci_dev, PCI_LATENCY_TIMER, 0x40);
-
- if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
+ pci_write_config_byte(tp->pci_dev, PCI_LATENCY_TIMER, 0x40);
pci_write_config_byte(tp->pci_dev, PCI_CACHE_LINE_SIZE, 0x08);
-
- if (tp->mac_version == RTL_GIGA_MAC_VER_02) {
netif_dbg(tp, drv, dev,
"Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
RTL_W8(tp, 0x82, 0x01);
- netif_dbg(tp, drv, dev,
- "Set PHY Reg 0x0bh = 0x00h\n");
- rtl_writephy(tp, 0x0b, 0x0000); //w 0x0b 15 0 0
}
/* We may have called phy_speed_down before */
RTL_W32(tp, RxConfig, RX_FIFO_THRESH | RX_DMA_BURST);
break;
case RTL_GIGA_MAC_VER_18 ... RTL_GIGA_MAC_VER_24:
- case RTL_GIGA_MAC_VER_34:
- case RTL_GIGA_MAC_VER_35:
+ case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_36:
+ case RTL_GIGA_MAC_VER_38:
RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST);
break;
case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
tp->cp_cmd = RTL_R16(tp, CPlusCmd);
- if ((sizeof(dma_addr_t) > 4) &&
- (use_dac == 1 || (use_dac == -1 && pci_is_pcie(pdev) &&
- tp->mac_version >= RTL_GIGA_MAC_VER_18)) &&
- !pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) &&
- !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) {
+ if (sizeof(dma_addr_t) > 4 && (use_dac == 1 || (use_dac == -1 &&
+ tp->mac_version >= RTL_GIGA_MAC_VER_18)) &&
+ !dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
/* CPlusCmd Dual Access Cycle is only needed for non-PCIe */
if (!pci_is_pcie(pdev))
rtl_init_rxcfg(tp);
- rtl_irq_disable(tp);
+ rtl8169_irq_mask_and_ack(tp);
rtl_hw_initialize(tp);
rtl_hw_reset(tp);
- rtl_ack_events(tp, 0xffff);
-
pci_set_master(pdev);
rtl_init_mdio_ops(tp);
dev->dev_addr[i] = RTL_R8(tp, MAC0 + i);
dev->ethtool_ops = &rtl8169_ethtool_ops;
- dev->watchdog_timeo = RTL8169_TX_TIMEOUT;
netif_napi_add(dev, &tp->napi, rtl8169_poll, NAPI_POLL_WEIGHT);
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
+#include <linux/module.h>
#include "mt76.h"
#include "usb_trace.h"
#include "dma.h"
return ret;
}
+EXPORT_SYMBOL_GPL(mt76u_rr);
/* should be called with usb_ctrl_mtx locked */
static void __mt76u_wr(struct mt76_dev *dev, u32 addr, u32 val)
__mt76u_wr(dev, addr, val);
mutex_unlock(&dev->usb.usb_ctrl_mtx);
}
+EXPORT_SYMBOL_GPL(mt76u_wr);
static u32 mt76u_rmw(struct mt76_dev *dev, u32 addr,
u32 mask, u32 val)
}
EXPORT_SYMBOL_GPL(mt76u_single_wr);
+static int
+mt76u_req_wr_rp(struct mt76_dev *dev, u32 base,
+ const struct mt76_reg_pair *data, int len)
+{
+ struct mt76_usb *usb = &dev->usb;
+
+ mutex_lock(&usb->usb_ctrl_mtx);
+ while (len > 0) {
+ __mt76u_wr(dev, base + data->reg, data->value);
+ len--;
+ data++;
+ }
+ mutex_unlock(&usb->usb_ctrl_mtx);
+
+ return 0;
+}
+
+static int
+mt76u_wr_rp(struct mt76_dev *dev, u32 base,
+ const struct mt76_reg_pair *data, int n)
+{
+ if (test_bit(MT76_STATE_MCU_RUNNING, &dev->state))
+ return dev->mcu_ops->mcu_wr_rp(dev, base, data, n);
+ else
+ return mt76u_req_wr_rp(dev, base, data, n);
+}
+
+static int
+mt76u_req_rd_rp(struct mt76_dev *dev, u32 base, struct mt76_reg_pair *data,
+ int len)
+{
+ struct mt76_usb *usb = &dev->usb;
+
+ mutex_lock(&usb->usb_ctrl_mtx);
+ while (len > 0) {
+ data->value = __mt76u_rr(dev, base + data->reg);
+ len--;
+ data++;
+ }
+ mutex_unlock(&usb->usb_ctrl_mtx);
+
+ return 0;
+}
+
+static int
+mt76u_rd_rp(struct mt76_dev *dev, u32 base,
+ struct mt76_reg_pair *data, int n)
+{
+ if (test_bit(MT76_STATE_MCU_RUNNING, &dev->state))
+ return dev->mcu_ops->mcu_rd_rp(dev, base, data, n);
+ else
+ return mt76u_req_rd_rp(dev, base, data, n);
+}
+
static int
mt76u_set_endpoints(struct usb_interface *intf,
struct mt76_usb *usb)
mt76u_fill_rx_sg(struct mt76_dev *dev, struct mt76u_buf *buf,
int nsgs, int len, int sglen)
{
+ struct mt76_queue *q = &dev->q_rx[MT_RXQ_MAIN];
struct urb *urb = buf->urb;
int i;
+ spin_lock_bh(&q->rx_page_lock);
for (i = 0; i < nsgs; i++) {
struct page *page;
void *data;
int offset;
- data = netdev_alloc_frag(len);
+ data = page_frag_alloc(&q->rx_page, len, GFP_ATOMIC);
if (!data)
break;
offset = data - page_address(page);
sg_set_page(&urb->sg[i], page, sglen, offset);
}
+ spin_unlock_bh(&q->rx_page_lock);
if (i < nsgs) {
int j;
if (!buf->urb)
return -ENOMEM;
- buf->urb->sg = devm_kzalloc(dev->dev, nsgs * sizeof(*buf->urb->sg),
+ buf->urb->sg = devm_kcalloc(dev->dev, nsgs, sizeof(*buf->urb->sg),
gfp);
if (!buf->urb->sg)
return -ENOMEM;
min_len = MT_DMA_HDR_LEN + MT_RX_RXWI_LEN +
MT_FCE_INFO_LEN;
- if (data_len < min_len || WARN_ON(!dma_len) ||
- WARN_ON(dma_len + MT_DMA_HDR_LEN > data_len) ||
- WARN_ON(dma_len & 0x3))
+ if (data_len < min_len || !dma_len ||
+ dma_len + MT_DMA_HDR_LEN > data_len ||
+ (dma_len & 0x3))
return -EINVAL;
return dma_len;
}
struct mt76_queue *q = &dev->q_rx[MT_RXQ_MAIN];
int i, err, nsgs;
+ spin_lock_init(&q->rx_page_lock);
spin_lock_init(&q->lock);
- q->entry = devm_kzalloc(dev->dev,
- MT_NUM_RX_ENTRIES * sizeof(*q->entry),
+ q->entry = devm_kcalloc(dev->dev,
+ MT_NUM_RX_ENTRIES, sizeof(*q->entry),
GFP_KERNEL);
if (!q->entry)
return -ENOMEM;
static void mt76u_free_rx(struct mt76_dev *dev)
{
struct mt76_queue *q = &dev->q_rx[MT_RXQ_MAIN];
+ struct page *page;
int i;
for (i = 0; i < q->ndesc; i++)
mt76u_buf_free(&q->entry[i].ubuf);
+
+ spin_lock_bh(&q->rx_page_lock);
+ if (!q->rx_page.va)
+ goto out;
+
+ page = virt_to_page(q->rx_page.va);
+ __page_frag_cache_drain(page, q->rx_page.pagecnt_bias);
+ memset(&q->rx_page, 0, sizeof(q->rx_page));
+out:
+ spin_unlock_bh(&q->rx_page_lock);
}
static void mt76u_stop_rx(struct mt76_dev *dev)
usb_kill_urb(q->entry[i].ubuf.urb);
}
-int mt76u_skb_dma_info(struct sk_buff *skb, int port, u32 flags)
-{
- struct sk_buff *iter, *last = skb;
- u32 info, pad;
-
- /* Buffer layout:
- * | 4B | xfer len | pad | 4B |
- * | TXINFO | pkt/cmd | zero pad to 4B | zero |
- *
- * length field of TXINFO should be set to 'xfer len'.
- */
- info = FIELD_PREP(MT_TXD_INFO_LEN, round_up(skb->len, 4)) |
- FIELD_PREP(MT_TXD_INFO_DPORT, port) | flags;
- put_unaligned_le32(info, skb_push(skb, sizeof(info)));
-
- pad = round_up(skb->len, 4) + 4 - skb->len;
- skb_walk_frags(skb, iter) {
- last = iter;
- if (!iter->next) {
- skb->data_len += pad;
- skb->len += pad;
- break;
- }
- }
-
- if (unlikely(pad)) {
- if (__skb_pad(last, pad, true))
- return -ENOMEM;
- __skb_put(last, pad);
- }
- return 0;
-}
-EXPORT_SYMBOL_GPL(mt76u_skb_dma_info);
-
static void mt76u_tx_tasklet(unsigned long data)
{
struct mt76_dev *dev = (struct mt76_dev *)data;
q = &dev->q_tx[i];
spin_lock_init(&q->lock);
INIT_LIST_HEAD(&q->swq);
- q->hw_idx = q2hwq(i);
+ q->hw_idx = mt76_ac_to_hwq(i);
- q->entry = devm_kzalloc(dev->dev,
- MT_NUM_TX_ENTRIES * sizeof(*q->entry),
+ q->entry = devm_kcalloc(dev->dev,
+ MT_NUM_TX_ENTRIES, sizeof(*q->entry),
GFP_KERNEL);
if (!q->entry)
return -ENOMEM;
.wr = mt76u_wr,
.rmw = mt76u_rmw,
.copy = mt76u_copy,
+ .wr_rp = mt76u_wr_rp,
+ .rd_rp = mt76u_rd_rp,
};
struct mt76_usb *usb = &dev->usb;
struct rcu_head rcu; /* Must be first */
refcount_t usage;
u32 version; /* Version */
- unsigned short nr_addrs;
- unsigned short index; /* Address currently in use */
- unsigned short nr_ipv4; /* Number of IPv4 addresses */
+ unsigned char max_addrs;
+ unsigned char nr_addrs;
+ unsigned char index; /* Address currently in use */
+ unsigned char nr_ipv4; /* Number of IPv4 addresses */
unsigned long probed; /* Mask of servers that have been probed */
unsigned long yfs; /* Mask of servers that are YFS */
struct sockaddr_rxrpc addrs[];
+#define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8))
};
/*
seqlock_t cells_lock;
struct mutex proc_cells_lock;
- struct list_head proc_cells;
+ struct hlist_head proc_cells;
/* Known servers. Theoretically each fileserver can only be in one
* cell, but in practice, people create aliases and subsets and there's
struct afs_net *net;
struct key *anonymous_key; /* anonymous user key for this cell */
struct work_struct manager; /* Manager for init/deinit/dns */
- struct list_head proc_link; /* /proc cell list link */
+ struct hlist_node proc_link; /* /proc cell list link */
#ifdef CONFIG_AFS_FSCACHE
struct fscache_cookie *cache; /* caching cookie */
#endif
barrier();
}
+/**
+ * napi_if_scheduled_mark_missed - if napi is running, set the
+ * NAPIF_STATE_MISSED
+ * @n: NAPI context
+ *
+ * If napi is running, set the NAPIF_STATE_MISSED, and return true if
+ * NAPI is scheduled.
+ **/
+static inline bool napi_if_scheduled_mark_missed(struct napi_struct *n)
+{
+ unsigned long val, new;
+
+ do {
+ val = READ_ONCE(n->state);
+ if (val & NAPIF_STATE_DISABLE)
+ return true;
+
+ if (!(val & NAPIF_STATE_SCHED))
+ return false;
+
+ new = val | NAPIF_STATE_MISSED;
+ } while (cmpxchg(&n->state, val, new) != val);
+
+ return true;
+}
+
enum netdev_queue_state_t {
__QUEUE_STATE_DRV_XOFF,
__QUEUE_STATE_STACK_XOFF,
/* Subordinate device that the queue has been assigned to */
struct net_device *sb_dev;
+#ifdef CONFIG_XDP_SOCKETS
+ struct xdp_umem *umem;
+#endif
/*
* write-mostly part
*/
struct kobject kobj;
struct net_device *dev;
struct xdp_rxq_info xdp_rxq;
+#ifdef CONFIG_XDP_SOCKETS
+ struct xdp_umem *umem;
+#endif
} ____cacheline_aligned_in_smp;
/*
struct pcpu_lstats __percpu *lstats;
struct pcpu_sw_netstats __percpu *tstats;
struct pcpu_dstats __percpu *dstats;
- struct pcpu_vstats __percpu *vstats;
};
#if IS_ENABLED(CONFIG_GARP)
struct packet_type {
__be16 type; /* This is really htons(ether_type). */
+ bool ignore_outgoing;
struct net_device *dev; /* NULL is wildcarded here */
int (*func) (struct sk_buff *,
struct net_device *,
struct u64_stats_sync syncp;
};
+struct pcpu_lstats {
+ u64 packets;
+ u64 bytes;
+ struct u64_stats_sync syncp;
+};
+
#define __netdev_alloc_pcpu_stats(type, gfp) \
({ \
typeof(type) __percpu *pcpu_stats = alloc_percpu_gfp(type, gfp);\
struct netlink_ext_ack *extack;
};
+ struct netdev_notifier_info_ext {
+ struct netdev_notifier_info info; /* must be first */
+ union {
+ u32 mtu;
+ } ext;
+ };
+
struct netdev_notifier_change_info {
struct netdev_notifier_info info; /* must be first */
unsigned int flags_changed;
#define DEVLINK_RESOURCE_ID_PARENT_TOP 0
- #define DEVLINK_PARAM_MAX_STRING_VALUE 32
+ #define __DEVLINK_PARAM_MAX_STRING_VALUE 32
enum devlink_param_type {
DEVLINK_PARAM_TYPE_U8,
DEVLINK_PARAM_TYPE_U16,
u8 vu8;
u16 vu16;
u32 vu32;
- const char *vstr;
+ char vstr[__DEVLINK_PARAM_MAX_STRING_VALUE];
bool vbool;
};
DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV,
DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
+ DEVLINK_PARAM_GENERIC_ID_IGNORE_ARI,
+ DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX,
+ DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN,
/* add new param generic ids above here*/
__DEVLINK_PARAM_GENERIC_ID_MAX,
#define DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_NAME "region_snapshot_enable"
#define DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_TYPE DEVLINK_PARAM_TYPE_BOOL
+#define DEVLINK_PARAM_GENERIC_IGNORE_ARI_NAME "ignore_ari"
+#define DEVLINK_PARAM_GENERIC_IGNORE_ARI_TYPE DEVLINK_PARAM_TYPE_BOOL
+
+#define DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_NAME "msix_vec_per_pf_max"
+#define DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_TYPE DEVLINK_PARAM_TYPE_U32
+
+#define DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_NAME "msix_vec_per_pf_min"
+#define DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_TYPE DEVLINK_PARAM_TYPE_U32
+
#define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \
{ \
.id = DEVLINK_PARAM_GENERIC_ID_##_id, \
u32 *p_cur, u32 *p_max);
int (*eswitch_mode_get)(struct devlink *devlink, u16 *p_mode);
- int (*eswitch_mode_set)(struct devlink *devlink, u16 mode);
+ int (*eswitch_mode_set)(struct devlink *devlink, u16 mode,
+ struct netlink_ext_ack *extack);
int (*eswitch_inline_mode_get)(struct devlink *devlink, u8 *p_inline_mode);
- int (*eswitch_inline_mode_set)(struct devlink *devlink, u8 inline_mode);
+ int (*eswitch_inline_mode_set)(struct devlink *devlink, u8 inline_mode,
+ struct netlink_ext_ack *extack);
int (*eswitch_encap_mode_get)(struct devlink *devlink, u8 *p_encap_mode);
- int (*eswitch_encap_mode_set)(struct devlink *devlink, u8 encap_mode);
+ int (*eswitch_encap_mode_set)(struct devlink *devlink, u8 encap_mode,
+ struct netlink_ext_ack *extack);
};
static inline void *devlink_priv(struct devlink *devlink)
int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
union devlink_param_value init_val);
void devlink_param_value_changed(struct devlink *devlink, u32 param_id);
+ void devlink_param_value_str_fill(union devlink_param_value *dst_val,
+ const char *src);
struct devlink_region *devlink_region_create(struct devlink *devlink,
const char *region_name,
u32 region_max_snapshots,
{
}
+ static inline void
+ devlink_param_value_str_fill(union devlink_param_value *dst_val,
+ const char *src)
+ {
+ }
+
static inline struct devlink_region *
devlink_region_create(struct devlink *devlink,
const char *region_name,
extern const struct nla_policy rtm_ipv4_policy[];
void ip_fib_init(void);
__be32 fib_compute_spec_dst(struct sk_buff *skb);
+bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev);
int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
u8 tos, int oif, struct net_device *dev,
struct in_device *idev, u32 *itag);
int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force);
int fib_sync_down_addr(struct net_device *dev, __be32 local);
int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
+ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu);
#ifdef CONFIG_IP_ROUTE_MULTIPATH
int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr);
+int ip_valid_fib_dump_req(const struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack);
#endif /* _NET_FIB_H */
}
EXPORT_SYMBOL(call_netdevice_notifiers);
+ /**
+ * call_netdevice_notifiers_mtu - call all network notifier blocks
+ * @val: value passed unmodified to notifier function
+ * @dev: net_device pointer passed unmodified to notifier function
+ * @arg: additional u32 argument passed to the notifier function
+ *
+ * Call all network notifier blocks. Parameters and return value
+ * are as for raw_notifier_call_chain().
+ */
+ static int call_netdevice_notifiers_mtu(unsigned long val,
+ struct net_device *dev, u32 arg)
+ {
+ struct netdev_notifier_info_ext info = {
+ .info.dev = dev,
+ .ext.mtu = arg,
+ };
+
+ BUILD_BUG_ON(offsetof(struct netdev_notifier_info_ext, info) != 0);
+
+ return call_netdevice_notifiers_info(val, &info.info);
+ }
+
#ifdef CONFIG_NET_INGRESS
static DEFINE_STATIC_KEY_FALSE(ingress_needed_key);
rcu_read_lock();
again:
list_for_each_entry_rcu(ptype, ptype_list, list) {
+ if (ptype->ignore_outgoing)
+ continue;
+
/* Never send packets back to the socket
*/
while (skb) {
struct sk_buff *next = skb->next;
- skb->next = NULL;
+ skb_mark_not_on_list(skb);
rc = xmit_one(skb, dev, txq, next != NULL);
if (unlikely(!dev_xmit_complete(rc))) {
skb->next = next;
for (; skb != NULL; skb = next) {
next = skb->next;
- skb->next = NULL;
+ skb_mark_not_on_list(skb);
/* in case skb wont be segmented, point to itself */
skb->prev = skb;
list_for_each_entry_safe_reverse(skb, p, head, list) {
if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
return;
- list_del(&skb->list);
- skb->next = NULL;
+ skb_list_del_init(skb);
napi_gro_complete(skb);
napi->gro_hash[index].count--;
}
ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
if (pp) {
- list_del(&pp->list);
- pp->next = NULL;
+ skb_list_del_init(pp);
napi_gro_complete(pp);
napi->gro_hash[hash].count--;
}
err = __dev_set_mtu(dev, new_mtu);
if (!err) {
- err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
+ err = call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
+ orig_mtu);
err = notifier_to_errno(err);
if (err) {
/* setting mtu back and notifying everyone again,
* so that they have a chance to revert changes.
*/
__dev_set_mtu(dev, orig_mtu);
- call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
+ call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
+ new_mtu);
}
}
return err;
if (!ops->eswitch_mode_set)
return -EOPNOTSUPP;
mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]);
- err = ops->eswitch_mode_set(devlink, mode);
+ err = ops->eswitch_mode_set(devlink, mode, info->extack);
if (err)
return err;
}
return -EOPNOTSUPP;
inline_mode = nla_get_u8(
info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]);
- err = ops->eswitch_inline_mode_set(devlink, inline_mode);
+ err = ops->eswitch_inline_mode_set(devlink, inline_mode,
+ info->extack);
if (err)
return err;
}
if (!ops->eswitch_encap_mode_set)
return -EOPNOTSUPP;
encap_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]);
- err = ops->eswitch_encap_mode_set(devlink, encap_mode);
+ err = ops->eswitch_encap_mode_set(devlink, encap_mode,
+ info->extack);
if (err)
return err;
}
.name = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_NAME,
.type = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_TYPE,
},
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_IGNORE_ARI,
+ .name = DEVLINK_PARAM_GENERIC_IGNORE_ARI_NAME,
+ .type = DEVLINK_PARAM_GENERIC_IGNORE_ARI_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX,
+ .name = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_NAME,
+ .type = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN,
+ .name = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_NAME,
+ .type = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_TYPE,
+ },
};
static int devlink_param_generic_verify(const struct devlink_param *param)
struct genl_info *info,
union devlink_param_value *value)
{
+ int len;
+
if (param->type != DEVLINK_PARAM_TYPE_BOOL &&
!info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])
return -EINVAL;
value->vu32 = nla_get_u32(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
break;
case DEVLINK_PARAM_TYPE_STRING:
- if (nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) >
- DEVLINK_PARAM_MAX_STRING_VALUE)
+ len = strnlen(nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]),
+ nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]));
+ if (len == nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) ||
+ len >= __DEVLINK_PARAM_MAX_STRING_VALUE)
return -EINVAL;
- value->vstr = nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+ strcpy(value->vstr,
+ nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]));
break;
case DEVLINK_PARAM_TYPE_BOOL:
value->vbool = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA] ?
return -EOPNOTSUPP;
if (cmode == DEVLINK_PARAM_CMODE_DRIVERINIT) {
- param_item->driverinit_value = value;
+ if (param->type == DEVLINK_PARAM_TYPE_STRING)
+ strcpy(param_item->driverinit_value.vstr, value.vstr);
+ else
+ param_item->driverinit_value = value;
param_item->driverinit_value_valid = true;
} else {
if (!param->set)
start_offset = *((u64 *)&cb->args[0]);
err = nlmsg_parse(cb->nlh, GENL_HDRLEN + devlink_nl_family.hdrsize,
- attrs, DEVLINK_ATTR_MAX, ops->policy, NULL);
+ attrs, DEVLINK_ATTR_MAX, ops->policy, cb->extack);
if (err)
goto out;
DEVLINK_PARAM_CMODE_DRIVERINIT))
return -EOPNOTSUPP;
- *init_val = param_item->driverinit_value;
+ if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING)
+ strcpy(init_val->vstr, param_item->driverinit_value.vstr);
+ else
+ *init_val = param_item->driverinit_value;
return 0;
}
DEVLINK_PARAM_CMODE_DRIVERINIT))
return -EOPNOTSUPP;
- param_item->driverinit_value = init_val;
+ if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING)
+ strcpy(param_item->driverinit_value.vstr, init_val.vstr);
+ else
+ param_item->driverinit_value = init_val;
param_item->driverinit_value_valid = true;
devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW);
}
EXPORT_SYMBOL_GPL(devlink_param_value_changed);
+ /**
+ * devlink_param_value_str_fill - Safely fill-up the string preventing
+ * from overflow of the preallocated buffer
+ *
+ * @dst_val: destination devlink_param_value
+ * @src: source buffer
+ */
+ void devlink_param_value_str_fill(union devlink_param_value *dst_val,
+ const char *src)
+ {
+ size_t len;
+
+ len = strlcpy(dst_val->vstr, src, __DEVLINK_PARAM_MAX_STRING_VALUE);
+ WARN_ON(len >= __DEVLINK_PARAM_MAX_STRING_VALUE);
+ }
+ EXPORT_SYMBOL_GPL(devlink_param_value_str_fill);
+
/**
* devlink_region_create - create a new address region
*
}
EXPORT_SYMBOL(skb_find_text);
-/**
- * skb_append_datato_frags - append the user data to a skb
- * @sk: sock structure
- * @skb: skb structure to be appended with user data.
- * @getfrag: call back function to be used for getting the user data
- * @from: pointer to user message iov
- * @length: length of the iov message
- *
- * Description: This procedure append the user data in the fragment part
- * of the skb if any page alloc fails user this procedure returns -ENOMEM
- */
-int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
- int (*getfrag)(void *from, char *to, int offset,
- int len, int odd, struct sk_buff *skb),
- void *from, int length)
-{
- int frg_cnt = skb_shinfo(skb)->nr_frags;
- int copy;
- int offset = 0;
- int ret;
- struct page_frag *pfrag = ¤t->task_frag;
-
- do {
- /* Return error if we don't have space for new frag */
- if (frg_cnt >= MAX_SKB_FRAGS)
- return -EMSGSIZE;
-
- if (!sk_page_frag_refill(sk, pfrag))
- return -ENOMEM;
-
- /* copy the user data to page */
- copy = min_t(int, length, pfrag->size - pfrag->offset);
-
- ret = getfrag(from, page_address(pfrag->page) + pfrag->offset,
- offset, copy, 0, skb);
- if (ret < 0)
- return -EFAULT;
-
- /* copy was successful so update the size parameters */
- skb_fill_page_desc(skb, frg_cnt, pfrag->page, pfrag->offset,
- copy);
- frg_cnt++;
- pfrag->offset += copy;
- get_page(pfrag->page);
-
- skb->truesize += copy;
- refcount_add(copy, &sk->sk_wmem_alloc);
- skb->len += copy;
- skb->data_len += copy;
- offset += copy;
- length -= copy;
-
- } while (length > 0);
-
- return 0;
-}
-EXPORT_SYMBOL(skb_append_datato_frags);
-
int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
int offset, size_t size)
{
*/
bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
{
- if (unlikely(start > skb_headlen(skb)) ||
- unlikely((int)start + off > skb_headlen(skb) - 2)) {
- net_warn_ratelimited("bad partial csum: csum=%u/%u len=%u\n",
- start, off, skb_headlen(skb));
+ u32 csum_end = (u32)start + (u32)off + sizeof(__sum16);
+ u32 csum_start = skb_headroom(skb) + (u32)start;
+
+ if (unlikely(csum_start > U16_MAX || csum_end > skb_headlen(skb))) {
+ net_warn_ratelimited("bad partial csum: csum=%u/%u headroom=%u headlen=%u\n",
+ start, off, skb_headroom(skb), skb_headlen(skb));
return false;
}
skb->ip_summed = CHECKSUM_PARTIAL;
- skb->csum_start = skb_headroom(skb) + start;
+ skb->csum_start = csum_start;
skb->csum_offset = off;
skb_set_transport_header(skb, start);
return true;
return inet_select_addr(dev, ip_hdr(skb)->saddr, scope);
}
+bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev)
+{
+ bool dev_match = false;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ int ret;
+
+ for (ret = 0; ret < fi->fib_nhs; ret++) {
+ struct fib_nh *nh = &fi->fib_nh[ret];
+
+ if (nh->nh_dev == dev) {
+ dev_match = true;
+ break;
+ } else if (l3mdev_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) {
+ dev_match = true;
+ break;
+ }
+ }
+#else
+ if (fi->fib_nh[0].nh_dev == dev)
+ dev_match = true;
+#endif
+
+ return dev_match;
+}
+EXPORT_SYMBOL_GPL(fib_info_nh_uses_dev);
+
/* Given (packet source, input interface) and optional (dst, oif, tos):
* - (main) check, that source is valid i.e. not broadcast or our local
* address.
(res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
goto e_inval;
fib_combine_itag(itag, &res);
- dev_match = false;
-
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
- for (ret = 0; ret < res.fi->fib_nhs; ret++) {
- struct fib_nh *nh = &res.fi->fib_nh[ret];
- if (nh->nh_dev == dev) {
- dev_match = true;
- break;
- } else if (l3mdev_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) {
- dev_match = true;
- break;
- }
- }
-#else
- if (FIB_RES_DEV(res) == dev)
- dev_match = true;
-#endif
+ dev_match = fib_info_nh_uses_dev(res.fi, dev);
if (dev_match) {
ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
return ret;
return err;
}
+int ip_valid_fib_dump_req(const struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct rtmsg *rtm;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
+ NL_SET_ERR_MSG(extack, "Invalid header for FIB dump request");
+ return -EINVAL;
+ }
+
+ rtm = nlmsg_data(nlh);
+ if (rtm->rtm_dst_len || rtm->rtm_src_len || rtm->rtm_tos ||
+ rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
+ rtm->rtm_type) {
+ NL_SET_ERR_MSG(extack, "Invalid values in header for FIB dump request");
+ return -EINVAL;
+ }
+ if (rtm->rtm_flags & ~(RTM_F_CLONED | RTM_F_PREFIX)) {
+ NL_SET_ERR_MSG(extack, "Invalid flags for FIB dump request");
+ return -EINVAL;
+ }
+
+ if (nlmsg_attrlen(nlh, sizeof(*rtm))) {
+ NL_SET_ERR_MSG(extack, "Invalid data after header in FIB dump request");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ip_valid_fib_dump_req);
+
static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
+ const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
unsigned int h, s_h;
unsigned int e = 0, s_e;
struct hlist_head *head;
int dumped = 0, err;
- if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
- ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
+ if (cb->strict_check) {
+ err = ip_valid_fib_dump_req(nlh, cb->extack);
+ if (err < 0)
+ return err;
+ }
+
+ if (nlmsg_len(nlh) >= sizeof(struct rtmsg) &&
+ ((struct rtmsg *)nlmsg_data(nlh))->rtm_flags & RTM_F_CLONED)
return skb->len;
s_h = cb->args[0];
static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct netdev_notifier_changeupper_info *info;
+ struct netdev_notifier_changeupper_info *upper_info = ptr;
+ struct netdev_notifier_info_ext *info_ext = ptr;
struct in_device *in_dev;
struct net *net = dev_net(dev);
unsigned int flags;
fib_sync_up(dev, RTNH_F_LINKDOWN);
else
fib_sync_down_dev(dev, event, false);
- /* fall through */
+ rt_cache_flush(net);
+ break;
case NETDEV_CHANGEMTU:
+ fib_sync_mtu(dev, info_ext->ext.mtu);
rt_cache_flush(net);
break;
case NETDEV_CHANGEUPPER:
- info = ptr;
+ upper_info = ptr;
/* flush all routes if dev is linked to or unlinked from
* an L3 master device (e.g., VRF)
*/
- if (info->upper_dev && netif_is_l3_master(info->upper_dev))
+ if (upper_info->upper_dev &&
+ netif_is_l3_master(upper_info->upper_dev))
fib_disable_ip(dev, NETDEV_DOWN, true);
break;
}
static void free_fib_info_rcu(struct rcu_head *head)
{
struct fib_info *fi = container_of(head, struct fib_info, rcu);
- struct dst_metrics *m;
change_nexthops(fi) {
if (nexthop_nh->nh_dev)
rt_fibinfo_free(&nexthop_nh->nh_rth_input);
} endfor_nexthops(fi);
- m = fi->fib_metrics;
- if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
- kfree(m);
+ ip_fib_metrics_put(fi->fib_metrics);
+
kfree(fi);
}
return -EINVAL;
}
dev = __dev_get_by_index(net, nh->nh_oif);
- if (!dev)
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "Nexthop device required for onlink");
return -ENODEV;
+ }
if (!(dev->flags & IFF_UP)) {
NL_SET_ERR_MSG(extack,
"Nexthop device is not up");
return true;
}
-static int
-fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
-{
- return ip_metrics_convert(fi->fib_net, cfg->fc_mx, cfg->fc_mx_len,
- fi->fib_metrics->metrics);
-}
-
struct fib_info *fib_create_info(struct fib_config *cfg,
struct netlink_ext_ack *extack)
{
fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
if (!fi)
goto failure;
- if (cfg->fc_mx) {
- fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL);
- if (unlikely(!fi->fib_metrics)) {
- kfree(fi);
- return ERR_PTR(err);
- }
- refcount_set(&fi->fib_metrics->refcnt, 1);
- } else {
- fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics;
+ fi->fib_metrics = ip_fib_metrics_init(fi->fib_net, cfg->fc_mx,
+ cfg->fc_mx_len);
+ if (unlikely(IS_ERR(fi->fib_metrics))) {
+ err = PTR_ERR(fi->fib_metrics);
+ kfree(fi);
+ return ERR_PTR(err);
}
+
fib_info_cnt++;
fi->fib_net = net;
fi->fib_protocol = cfg->fc_protocol;
goto failure;
} endfor_nexthops(fi)
- err = fib_convert_metrics(fi, cfg);
- if (err)
- goto failure;
-
if (cfg->fc_mp) {
#ifdef CONFIG_IP_ROUTE_MULTIPATH
err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack);
return NOTIFY_DONE;
}
+ /* Update the PMTU of exceptions when:
+ * - the new MTU of the first hop becomes smaller than the PMTU
+ * - the old MTU was the same as the PMTU, and it limited discovery of
+ * larger MTUs on the path. With that limit raised, we can now
+ * discover larger MTUs
+ * A special case is locked exceptions, for which the PMTU is smaller
+ * than the minimal accepted PMTU:
+ * - if the new MTU is greater than the PMTU, don't make any change
+ * - otherwise, unlock and set PMTU
+ */
+ static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig)
+ {
+ struct fnhe_hash_bucket *bucket;
+ int i;
+
+ bucket = rcu_dereference_protected(nh->nh_exceptions, 1);
+ if (!bucket)
+ return;
+
+ for (i = 0; i < FNHE_HASH_SIZE; i++) {
+ struct fib_nh_exception *fnhe;
+
+ for (fnhe = rcu_dereference_protected(bucket[i].chain, 1);
+ fnhe;
+ fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) {
+ if (fnhe->fnhe_mtu_locked) {
+ if (new <= fnhe->fnhe_pmtu) {
+ fnhe->fnhe_pmtu = new;
+ fnhe->fnhe_mtu_locked = false;
+ }
+ } else if (new < fnhe->fnhe_pmtu ||
+ orig == fnhe->fnhe_pmtu) {
+ fnhe->fnhe_pmtu = new;
+ }
+ }
+ }
+ }
+
+ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
+ {
+ unsigned int hash = fib_devindex_hashfn(dev->ifindex);
+ struct hlist_head *head = &fib_info_devhash[hash];
+ struct fib_nh *nh;
+
+ hlist_for_each_entry(nh, head, nh_hash) {
+ if (nh->nh_dev == dev)
+ nh_update_mtu(nh, dev->mtu, orig_mtu);
+ }
+ }
+
/* Event force Flags Description
* NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host
* NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host
static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
{
struct dst_entry *dst = &rt->dst;
+ u32 old_mtu = ipv4_mtu(dst);
struct fib_result res;
bool lock = false;
if (ip_mtu_locked(dst))
return;
- if (ipv4_mtu(dst) < mtu)
+ if (old_mtu < mtu)
return;
if (mtu < ip_rt_min_pmtu) {
lock = true;
- mtu = ip_rt_min_pmtu;
+ mtu = min(old_mtu, ip_rt_min_pmtu);
}
- if (rt->rt_pmtu == mtu &&
+ if (rt->rt_pmtu == mtu && !lock &&
time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
return;
}
void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
- int oif, u32 mark, u8 protocol, int flow_flags)
+ int oif, u8 protocol)
{
const struct iphdr *iph = (const struct iphdr *) skb->data;
struct flowi4 fl4;
struct rtable *rt;
-
- if (!mark)
- mark = IP4_REPLY_MARK(net, skb->mark);
+ u32 mark = IP4_REPLY_MARK(net, skb->mark);
__build_flow_key(net, &fl4, NULL, iph, oif,
- RT_TOS(iph->tos), protocol, mark, flow_flags);
+ RT_TOS(iph->tos), protocol, mark, 0);
rt = __ip_route_output_key(net, &fl4);
if (!IS_ERR(rt)) {
__ip_rt_update_pmtu(rt, &fl4, mtu);
EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
void ipv4_redirect(struct sk_buff *skb, struct net *net,
- int oif, u32 mark, u8 protocol, int flow_flags)
+ int oif, u8 protocol)
{
const struct iphdr *iph = (const struct iphdr *) skb->data;
struct flowi4 fl4;
struct rtable *rt;
__build_flow_key(net, &fl4, NULL, iph, oif,
- RT_TOS(iph->tos), protocol, mark, flow_flags);
+ RT_TOS(iph->tos), protocol, 0, 0);
rt = __ip_route_output_key(net, &fl4);
if (!IS_ERR(rt)) {
__ip_do_redirect(rt, skb, &fl4, false);
src = ip_hdr(skb)->saddr;
else {
struct fib_result res;
- struct flowi4 fl4;
- struct iphdr *iph;
-
- iph = ip_hdr(skb);
-
- memset(&fl4, 0, sizeof(fl4));
- fl4.daddr = iph->daddr;
- fl4.saddr = iph->saddr;
- fl4.flowi4_tos = RT_TOS(iph->tos);
- fl4.flowi4_oif = rt->dst.dev->ifindex;
- fl4.flowi4_iif = skb->dev->ifindex;
- fl4.flowi4_mark = skb->mark;
+ struct iphdr *iph = ip_hdr(skb);
+ struct flowi4 fl4 = {
+ .daddr = iph->daddr,
+ .saddr = iph->saddr,
+ .flowi4_tos = RT_TOS(iph->tos),
+ .flowi4_oif = rt->dst.dev->ifindex,
+ .flowi4_iif = skb->dev->ifindex,
+ .flowi4_mark = skb->mark,
+ };
rcu_read_lock();
if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0)
static void ipv4_dst_destroy(struct dst_entry *dst)
{
- struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
struct rtable *rt = (struct rtable *)dst;
- if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
- kfree(p);
-
+ ip_dst_metrics_put(dst);
rt_del_uncached_list(rt);
}
rt->rt_gateway = nh->nh_gw;
rt->rt_uses_gateway = 1;
}
- dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true);
- if (fi->fib_metrics != &dst_default_metrics) {
- rt->dst._metrics |= DST_METRICS_REFCOUNTED;
- refcount_inc(&fi->fib_metrics->refcnt);
- }
+ ip_dst_init_metrics(&rt->dst, fi->fib_metrics);
+
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid;
#endif
struct rtable *rt = NULL;
struct sk_buff *skb;
struct rtmsg *rtm;
- struct flowi4 fl4;
+ struct flowi4 fl4 = {};
__be32 dst = 0;
__be32 src = 0;
kuid_t uid;
if (!skb)
return -ENOBUFS;
- memset(&fl4, 0, sizeof(fl4));
fl4.daddr = dst;
fl4.saddr = src;
fl4.flowi4_tos = rtm->rtm_tos;
}
if (ipv4_is_multicast(daddr)) {
- if (!ipc.oif)
+ if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif))
ipc.oif = inet->mc_index;
if (!saddr)
saddr = inet->mc_addr;
*err = error;
return NULL;
}
- EXPORT_SYMBOL_GPL(__skb_recv_udp);
+ EXPORT_SYMBOL(__skb_recv_udp);
/*
* This should be easy, if there is something there we
return 0;
}
-static DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
+DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
void udp_encap_enable(void)
{
static_branch_enable(&udp_encap_needed_key);
#include <linux/list.h>
#include <linux/slab.h>
+#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ndisc.h>
#include <net/addrconf.h>
int (*func)(struct fib6_info *, void *arg);
int sernum;
void *arg;
+ bool skip_notify;
};
#ifdef CONFIG_IPV6_SUBTREES
}
INIT_LIST_HEAD(&f6i->fib6_siblings);
- f6i->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
-
atomic_inc(&f6i->fib6_ref);
return f6i;
{
struct fib6_info *f6i = container_of(head, struct fib6_info, rcu);
struct rt6_exception_bucket *bucket;
- struct dst_metrics *m;
WARN_ON(f6i->fib6_node);
*ppcpu_rt = NULL;
}
}
+
+ free_percpu(f6i->rt6i_pcpu);
}
lwtstate_put(f6i->fib6_nh.nh_lwtstate);
if (f6i->fib6_nh.nh_dev)
dev_put(f6i->fib6_nh.nh_dev);
- m = f6i->fib6_metrics;
- if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
- kfree(m);
+ ip_fib_metrics_put(f6i->fib6_metrics);
kfree(f6i);
}
static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
+ const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
unsigned int h, s_h;
unsigned int e = 0, s_e;
struct hlist_head *head;
int res = 0;
+ if (cb->strict_check) {
+ int err = ip_valid_fib_dump_req(nlh, cb->extack);
+
+ if (err < 0)
+ return err;
+ }
+
s_h = cb->args[0];
s_e = cb->args[1];
struct fib6_cleaner *c = container_of(w, struct fib6_cleaner, w);
struct nl_info info = {
.nl_net = c->net,
+ .skip_notify = c->skip_notify,
};
if (c->sernum != FIB6_NO_SERNUM_CHANGE &&
static void fib6_clean_tree(struct net *net, struct fib6_node *root,
int (*func)(struct fib6_info *, void *arg),
- int sernum, void *arg)
+ int sernum, void *arg, bool skip_notify)
{
struct fib6_cleaner c;
c.sernum = sernum;
c.arg = arg;
c.net = net;
+ c.skip_notify = skip_notify;
fib6_walk(net, &c.w);
}
static void __fib6_clean_all(struct net *net,
int (*func)(struct fib6_info *, void *),
- int sernum, void *arg)
+ int sernum, void *arg, bool skip_notify)
{
struct fib6_table *table;
struct hlist_head *head;
hlist_for_each_entry_rcu(table, head, tb6_hlist) {
spin_lock_bh(&table->tb6_lock);
fib6_clean_tree(net, &table->tb6_root,
- func, sernum, arg);
+ func, sernum, arg, skip_notify);
spin_unlock_bh(&table->tb6_lock);
}
}
void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *),
void *arg)
{
- __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg);
+ __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, false);
+}
+
+void fib6_clean_all_skip_notify(struct net *net,
+ int (*func)(struct fib6_info *, void *),
+ void *arg)
+{
+ __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, true);
}
static void fib6_flush_trees(struct net *net)
{
int new_sernum = fib6_new_sernum(net);
- __fib6_clean_all(net, NULL, new_sernum, NULL);
+ __fib6_clean_all(net, NULL, new_sernum, NULL, false);
}
/*
/* calculated RTT cache */
#define RXRPC_RTT_CACHE_SIZE 32
+ spinlock_t rtt_input_lock; /* RTT lock for input routine */
ktime_t rtt_last_req; /* Time of last RTT request */
u64 rtt; /* Current RTT estimate (in nS) */
u64 rtt_sum; /* Sum of cache contents */
spinlock_t state_lock; /* state-change lock */
enum rxrpc_conn_cache_state cache_state;
enum rxrpc_conn_proto_state state; /* current state of connection */
- u32 local_abort; /* local abort code */
- u32 remote_abort; /* remote abort code */
+ u32 abort_code; /* Abort code of connection abort */
int debug_id; /* debug ID for printks */
atomic_t serial; /* packet serial number counter */
unsigned int hi_serial; /* highest serial number received */
u32 security_nonce; /* response re-use preventer */
- u16 service_id; /* Service ID, possibly upgraded */
+ u32 service_id; /* Service ID, possibly upgraded */
u8 size_align; /* data size alignment (for security) */
u8 security_size; /* security header size */
u8 security_ix; /* security type */
u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */
+ short error; /* Local error code */
};
static inline bool rxrpc_to_server(const struct rxrpc_skb_priv *sp)
bool tx_phase; /* T if transmission phase, F if receive phase */
u8 nr_jumbo_bad; /* Number of jumbo dups/exceeds-windows */
+ spinlock_t input_lock; /* Lock for packet input to this call */
+
/* receive-phase ACK management */
u8 ackr_reason; /* reason to ACK */
u16 ackr_skew; /* skew on packet being ACK'd */
void rxrpc_discard_prealloc(struct rxrpc_sock *);
struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *,
struct rxrpc_sock *,
- struct rxrpc_peer *,
- struct rxrpc_connection *,
struct sk_buff *);
void rxrpc_accept_incoming_calls(struct rxrpc_local *);
struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long,
extern struct idr rxrpc_client_conn_ids;
void rxrpc_destroy_client_conn_ids(void);
- int rxrpc_connect_call(struct rxrpc_call *, struct rxrpc_conn_parameters *,
- struct sockaddr_rxrpc *, gfp_t);
+ int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_call *,
+ struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *,
+ gfp_t);
void rxrpc_expose_client_call(struct rxrpc_call *);
void rxrpc_disconnect_client_call(struct rxrpc_call *);
void rxrpc_put_client_conn(struct rxrpc_connection *);
/*
* input.c
*/
- void rxrpc_data_ready(struct sock *);
+ int rxrpc_input_packet(struct sock *, struct sk_buff *);
/*
* insecure.c
*/
struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *,
const struct sockaddr_rxrpc *);
- struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *,
+ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *, struct rxrpc_local *,
struct sockaddr_rxrpc *, gfp_t);
struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t);
- void rxrpc_new_incoming_peer(struct rxrpc_local *, struct rxrpc_peer *);
+ void rxrpc_new_incoming_peer(struct rxrpc_sock *, struct rxrpc_local *,
+ struct rxrpc_peer *);
void rxrpc_destroy_all_peers(struct rxrpc_net *);
struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *);
struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *);
void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace);
void rxrpc_get_skb(struct sk_buff *, enum rxrpc_skb_trace);
void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace);
-void rxrpc_lose_skb(struct sk_buff *, enum rxrpc_skb_trace);
void rxrpc_purge_queue(struct sk_buff_head *);
/*
/*
* utils.c
*/
-int rxrpc_extract_addr_from_skb(struct rxrpc_local *, struct sockaddr_rxrpc *,
- struct sk_buff *);
+int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
static inline bool before(u32 seq1, u32 seq2)
{
peer = NULL;
if (!peer) {
peer = b->peer_backlog[peer_tail];
- if (rxrpc_extract_addr_from_skb(local, &peer->srx, skb) < 0)
+ if (rxrpc_extract_addr_from_skb(&peer->srx, skb) < 0)
return NULL;
b->peer_backlog[peer_tail] = NULL;
smp_store_release(&b->peer_backlog_tail,
(peer_tail + 1) &
(RXRPC_BACKLOG_MAX - 1));
- rxrpc_new_incoming_peer(local, peer);
+ rxrpc_new_incoming_peer(rx, local, peer);
}
/* Now allocate and set up the connection */
*/
struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
struct rxrpc_sock *rx,
- struct rxrpc_peer *peer,
- struct rxrpc_connection *conn,
struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxrpc_connection *conn;
+ struct rxrpc_peer *peer;
struct rxrpc_call *call;
_enter("");
goto out;
}
+ /* The peer, connection and call may all have sprung into existence due
+ * to a duplicate packet being handled on another CPU in parallel, so
+ * we have to recheck the routing. However, we're now holding
+ * rx->incoming_lock, so the values should remain stable.
+ */
+ conn = rxrpc_find_connection_rcu(local, skb, &peer);
+
call = rxrpc_alloc_incoming_call(rx, local, peer, conn, skb);
if (!call) {
skb->mark = RXRPC_SKB_MARK_REJECT_BUSY;
case RXRPC_CONN_SERVICE:
write_lock(&call->state_lock);
- if (rx->discard_new_call)
- call->state = RXRPC_CALL_SERVER_RECV_REQUEST;
- else
- call->state = RXRPC_CALL_SERVER_ACCEPTING;
+ if (call->state < RXRPC_CALL_COMPLETE) {
+ if (rx->discard_new_call)
+ call->state = RXRPC_CALL_SERVER_RECV_REQUEST;
+ else
+ call->state = RXRPC_CALL_SERVER_ACCEPTING;
+ }
write_unlock(&call->state_lock);
break;
case RXRPC_CONN_REMOTELY_ABORTED:
rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
- conn->remote_abort, -ECONNABORTED);
+ conn->abort_code, conn->error);
break;
case RXRPC_CONN_LOCALLY_ABORTED:
rxrpc_abort_call("CON", call, sp->hdr.seq,
- conn->local_abort, -ECONNABORTED);
+ conn->abort_code, conn->error);
break;
default:
BUG();
/*
* Apply a hard ACK by advancing the Tx window.
*/
- static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
+ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
struct rxrpc_ack_summary *summary)
{
struct sk_buff *skb, *list = NULL;
+ bool rot_last = false;
int ix;
u8 annotation;
skb->next = list;
list = skb;
- if (annotation & RXRPC_TX_ANNO_LAST)
+ if (annotation & RXRPC_TX_ANNO_LAST) {
set_bit(RXRPC_CALL_TX_LAST, &call->flags);
+ rot_last = true;
+ }
if ((annotation & RXRPC_TX_ANNO_MASK) != RXRPC_TX_ANNO_ACK)
summary->nr_rot_new_acks++;
}
spin_unlock(&call->lock);
- trace_rxrpc_transmit(call, (test_bit(RXRPC_CALL_TX_LAST, &call->flags) ?
+ trace_rxrpc_transmit(call, (rot_last ?
rxrpc_transmit_rotate_last :
rxrpc_transmit_rotate));
wake_up(&call->waitq);
while (list) {
skb = list;
list = skb->next;
- skb->next = NULL;
+ skb_mark_not_on_list(skb);
rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
}
+
+ return rot_last;
}
/*
static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
const char *abort_why)
{
+ unsigned int state;
ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags));
write_lock(&call->state_lock);
- switch (call->state) {
+ state = call->state;
+ switch (state) {
case RXRPC_CALL_CLIENT_SEND_REQUEST:
case RXRPC_CALL_CLIENT_AWAIT_REPLY:
if (reply_begun)
- call->state = RXRPC_CALL_CLIENT_RECV_REPLY;
+ call->state = state = RXRPC_CALL_CLIENT_RECV_REPLY;
else
- call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
+ call->state = state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
break;
case RXRPC_CALL_SERVER_AWAIT_ACK:
__rxrpc_call_completed(call);
rxrpc_notify_socket(call);
+ state = call->state;
break;
default:
}
write_unlock(&call->state_lock);
- if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) {
+ if (state == RXRPC_CALL_CLIENT_AWAIT_REPLY)
trace_rxrpc_transmit(call, rxrpc_transmit_await_reply);
- } else {
+ else
trace_rxrpc_transmit(call, rxrpc_transmit_end);
- }
_leave(" = ok");
return true;
trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now);
}
- if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags))
- rxrpc_rotate_tx_window(call, top, &summary);
if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) {
- rxrpc_proto_abort("TXL", call, top);
- return false;
+ if (!rxrpc_rotate_tx_window(call, top, &summary)) {
+ rxrpc_proto_abort("TXL", call, top);
+ return false;
+ }
}
if (!rxrpc_end_tx_phase(call, true, "ETD"))
return false;
}
}
+ spin_lock(&call->input_lock);
+
/* Received data implicitly ACKs all of the request packets we sent
* when we're acting as a client.
*/
if ((state == RXRPC_CALL_CLIENT_SEND_REQUEST ||
state == RXRPC_CALL_CLIENT_AWAIT_REPLY) &&
!rxrpc_receiving_reply(call))
- return;
+ goto unlock;
call->ackr_prev_seq = seq;
if (flags & RXRPC_LAST_PACKET) {
if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
- seq != call->rx_top)
- return rxrpc_proto_abort("LSN", call, seq);
+ seq != call->rx_top) {
+ rxrpc_proto_abort("LSN", call, seq);
+ goto unlock;
+ }
} else {
if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
- after_eq(seq, call->rx_top))
- return rxrpc_proto_abort("LSA", call, seq);
+ after_eq(seq, call->rx_top)) {
+ rxrpc_proto_abort("LSA", call, seq);
+ goto unlock;
+ }
}
trace_rxrpc_rx_data(call->debug_id, seq, serial, flags, annotation);
skip:
offset += len;
if (flags & RXRPC_JUMBO_PACKET) {
- if (skb_copy_bits(skb, offset, &flags, 1) < 0)
- return rxrpc_proto_abort("XJF", call, seq);
+ if (skb_copy_bits(skb, offset, &flags, 1) < 0) {
+ rxrpc_proto_abort("XJF", call, seq);
+ goto unlock;
+ }
offset += sizeof(struct rxrpc_jumbo_header);
seq++;
serial++;
trace_rxrpc_notify_socket(call->debug_id, serial);
rxrpc_notify_socket(call);
}
+
+ unlock:
+ spin_unlock(&call->input_lock);
_leave(" [queued]");
}
ping_time = call->ping_time;
smp_rmb();
- ping_serial = call->ping_serial;
+ ping_serial = READ_ONCE(call->ping_serial);
if (orig_serial == call->acks_lost_ping)
rxrpc_input_check_for_lost_ack(call);
- if (!test_bit(RXRPC_CALL_PINGING, &call->flags) ||
- before(orig_serial, ping_serial))
+ if (before(orig_serial, ping_serial) ||
+ !test_and_clear_bit(RXRPC_CALL_PINGING, &call->flags))
return;
- clear_bit(RXRPC_CALL_PINGING, &call->flags);
if (after(orig_serial, ping_serial))
return;
rxrpc_propose_ack_respond_to_ack);
}
+ /* Discard any out-of-order or duplicate ACKs. */
+ if (before_eq(sp->hdr.serial, call->acks_latest))
+ return;
+
+ buf.info.rxMTU = 0;
ioffset = offset + nr_acks + 3;
- if (skb->len >= ioffset + sizeof(buf.info)) {
- if (skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0)
- return rxrpc_proto_abort("XAI", call, 0);
+ if (skb->len >= ioffset + sizeof(buf.info) &&
+ skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0)
+ return rxrpc_proto_abort("XAI", call, 0);
+
+ spin_lock(&call->input_lock);
+
+ /* Discard any out-of-order or duplicate ACKs. */
+ if (before_eq(sp->hdr.serial, call->acks_latest))
+ goto out;
+ call->acks_latest_ts = skb->tstamp;
+ call->acks_latest = sp->hdr.serial;
+
+ /* Parse rwind and mtu sizes if provided. */
+ if (buf.info.rxMTU)
rxrpc_input_ackinfo(call, skb, &buf.info);
- }
- if (first_soft_ack == 0)
- return rxrpc_proto_abort("AK0", call, 0);
+ if (first_soft_ack == 0) {
+ rxrpc_proto_abort("AK0", call, 0);
+ goto out;
+ }
/* Ignore ACKs unless we are or have just been transmitting. */
switch (READ_ONCE(call->state)) {
case RXRPC_CALL_SERVER_AWAIT_ACK:
break;
default:
- return;
- }
-
- /* Discard any out-of-order or duplicate ACKs. */
- if (before_eq(sp->hdr.serial, call->acks_latest)) {
- _debug("discard ACK %d <= %d",
- sp->hdr.serial, call->acks_latest);
- return;
+ goto out;
}
- call->acks_latest_ts = skb->tstamp;
- call->acks_latest = sp->hdr.serial;
if (before(hard_ack, call->tx_hard_ack) ||
- after(hard_ack, call->tx_top))
- return rxrpc_proto_abort("AKW", call, 0);
- if (nr_acks > call->tx_top - hard_ack)
- return rxrpc_proto_abort("AKN", call, 0);
+ after(hard_ack, call->tx_top)) {
+ rxrpc_proto_abort("AKW", call, 0);
+ goto out;
+ }
+ if (nr_acks > call->tx_top - hard_ack) {
+ rxrpc_proto_abort("AKN", call, 0);
+ goto out;
+ }
- if (after(hard_ack, call->tx_hard_ack))
- rxrpc_rotate_tx_window(call, hard_ack, &summary);
+ if (after(hard_ack, call->tx_hard_ack)) {
+ if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) {
+ rxrpc_end_tx_phase(call, false, "ETA");
+ goto out;
+ }
+ }
if (nr_acks > 0) {
- if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0)
- return rxrpc_proto_abort("XSA", call, 0);
+ if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0) {
+ rxrpc_proto_abort("XSA", call, 0);
+ goto out;
+ }
rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks,
&summary);
}
- if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) {
- rxrpc_end_tx_phase(call, false, "ETA");
- return;
- }
-
if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] &
RXRPC_TX_ANNO_LAST &&
summary.nr_acks == call->tx_top - hard_ack &&
false, true,
rxrpc_propose_ack_ping_for_lost_reply);
- return rxrpc_congestion_management(call, skb, &summary, acked_serial);
+ rxrpc_congestion_management(call, skb, &summary, acked_serial);
+ out:
+ spin_unlock(&call->input_lock);
}
/*
_proto("Rx ACKALL %%%u", sp->hdr.serial);
- rxrpc_rotate_tx_window(call, call->tx_top, &summary);
- if (test_bit(RXRPC_CALL_TX_LAST, &call->flags))
+ spin_lock(&call->input_lock);
+
+ if (rxrpc_rotate_tx_window(call, call->tx_top, &summary))
rxrpc_end_tx_phase(call, false, "ETL");
+
+ spin_unlock(&call->input_lock);
}
/*
}
/*
- * Handle a new call on a channel implicitly completing the preceding call on
- * that channel.
+ * Handle a new service call on a channel implicitly completing the preceding
+ * call on that channel. This does not apply to client conns.
*
* TODO: If callNumber > call_id + 1, renegotiate security.
*/
- static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn,
+ static void rxrpc_input_implicit_end_call(struct rxrpc_sock *rx,
+ struct rxrpc_connection *conn,
struct rxrpc_call *call)
{
switch (READ_ONCE(call->state)) {
case RXRPC_CALL_SERVER_AWAIT_ACK:
rxrpc_call_completed(call);
- break;
+ /* Fall through */
case RXRPC_CALL_COMPLETE:
break;
default:
set_bit(RXRPC_CALL_EV_ABORT, &call->events);
rxrpc_queue_call(call);
}
+ trace_rxrpc_improper_term(call);
break;
}
- trace_rxrpc_improper_term(call);
+ spin_lock(&rx->incoming_lock);
__rxrpc_disconnect_call(conn, call);
+ spin_unlock(&rx->incoming_lock);
rxrpc_notify_socket(call);
}
* The socket is locked by the caller and this prevents the socket from being
* shut down and the local endpoint from going away, thus sk_user_data will not
* be cleared until this function returns.
+ *
+ * Called with the RCU read lock held from the IP layer via UDP.
*/
- void rxrpc_data_ready(struct sock *udp_sk)
+ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
{
struct rxrpc_connection *conn;
struct rxrpc_channel *chan;
struct rxrpc_local *local = udp_sk->sk_user_data;
struct rxrpc_peer *peer = NULL;
struct rxrpc_sock *rx = NULL;
- struct sk_buff *skb;
unsigned int channel;
- int ret, skew = 0;
+ int skew = 0;
_enter("%p", udp_sk);
- ASSERT(!irqs_disabled());
-
- skb = skb_recv_udp(udp_sk, 0, 1, &ret);
- if (!skb) {
- if (ret == -EAGAIN)
- return;
- _debug("UDP socket error %d", ret);
- return;
- }
-
if (skb->tstamp == 0)
skb->tstamp = ktime_get_real();
rxrpc_new_skb(skb, rxrpc_skb_rx_received);
- _net("recv skb %p", skb);
-
- /* we'll probably need to checksum it (didn't call sock_recvmsg) */
- if (skb_checksum_complete(skb)) {
- rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
- __UDP_INC_STATS(&init_net, UDP_MIB_INERRORS, 0);
- _leave(" [CSUM failed]");
- return;
- }
-
- __UDP_INC_STATS(&init_net, UDP_MIB_INDATAGRAMS, 0);
+ skb_pull(skb, sizeof(struct udphdr));
/* The UDP protocol already released all skb resources;
* we are free to add our own data there.
if ((lose++ & 7) == 7) {
trace_rxrpc_rx_lose(sp);
rxrpc_free_skb(skb, rxrpc_skb_rx_lost);
- return;
+ return 0;
}
}
+ if (skb->tstamp == 0)
+ skb->tstamp = ktime_get_real();
trace_rxrpc_rx_packet(sp);
switch (sp->hdr.type) {
if (sp->hdr.serviceId == 0)
goto bad_message;
- rcu_read_lock();
-
if (rxrpc_to_server(sp)) {
/* Weed out packets to services we're not offering. Packets
* that would begin a call are explicitly rejected and the rest
if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
sp->hdr.seq == 1)
goto unsupported_service;
- goto discard_unlock;
+ goto discard;
}
}
goto wrong_security;
if (sp->hdr.serviceId != conn->service_id) {
- if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) ||
- conn->service_id != conn->params.service_id)
+ int old_id;
+
+ if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags))
+ goto reupgrade;
+ old_id = cmpxchg(&conn->service_id, conn->params.service_id,
+ sp->hdr.serviceId);
+
+ if (old_id != conn->params.service_id &&
+ old_id != sp->hdr.serviceId)
goto reupgrade;
- conn->service_id = sp->hdr.serviceId;
}
if (sp->hdr.callNumber == 0) {
/* Connection-level packet */
_debug("CONN %p {%d}", conn, conn->debug_id);
rxrpc_post_packet_to_conn(conn, skb);
- goto out_unlock;
+ goto out;
}
/* Note the serial number skew here */
/* Ignore really old calls */
if (sp->hdr.callNumber < chan->last_call)
- goto discard_unlock;
+ goto discard;
if (sp->hdr.callNumber == chan->last_call) {
if (chan->call ||
sp->hdr.type == RXRPC_PACKET_TYPE_ABORT)
- goto discard_unlock;
+ goto discard;
/* For the previous service call, if completed
* successfully, we discard all further packets.
*/
if (rxrpc_conn_is_service(conn) &&
chan->last_type == RXRPC_PACKET_TYPE_ACK)
- goto discard_unlock;
+ goto discard;
/* But otherwise we need to retransmit the final packet
* from data cached in the connection record.
sp->hdr.serial,
sp->hdr.flags, 0);
rxrpc_post_packet_to_conn(conn, skb);
- goto out_unlock;
+ goto out;
}
call = rcu_dereference(chan->call);
if (sp->hdr.callNumber > chan->call_id) {
- if (rxrpc_to_client(sp)) {
- rcu_read_unlock();
+ if (rxrpc_to_client(sp))
goto reject_packet;
- }
if (call)
- rxrpc_input_implicit_end_call(conn, call);
+ rxrpc_input_implicit_end_call(rx, conn, call);
call = NULL;
}
if (!call || atomic_read(&call->usage) == 0) {
if (rxrpc_to_client(sp) ||
sp->hdr.type != RXRPC_PACKET_TYPE_DATA)
- goto bad_message_unlock;
+ goto bad_message;
if (sp->hdr.seq != 1)
- goto discard_unlock;
- call = rxrpc_new_incoming_call(local, rx, peer, conn, skb);
- if (!call) {
- rcu_read_unlock();
+ goto discard;
+ call = rxrpc_new_incoming_call(local, rx, skb);
+ if (!call)
goto reject_packet;
- }
rxrpc_send_ping(call, skb, skew);
mutex_unlock(&call->user_mutex);
}
rxrpc_input_call_packet(call, skb, skew);
- goto discard_unlock;
+ goto discard;
- discard_unlock:
- rcu_read_unlock();
discard:
rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
out:
trace_rxrpc_rx_done(0, 0);
- return;
-
- out_unlock:
- rcu_read_unlock();
- goto out;
+ return 0;
wrong_security:
- rcu_read_unlock();
trace_rxrpc_abort(0, "SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
RXKADINCONSISTENCY, EBADMSG);
skb->priority = RXKADINCONSISTENCY;
goto post_abort;
unsupported_service:
- rcu_read_unlock();
trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
RX_INVALID_OPERATION, EOPNOTSUPP);
skb->priority = RX_INVALID_OPERATION;
goto post_abort;
reupgrade:
- rcu_read_unlock();
trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
RX_PROTOCOL_ERROR, EBADMSG);
goto protocol_error;
- bad_message_unlock:
- rcu_read_unlock();
bad_message:
trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
RX_PROTOCOL_ERROR, EBADMSG);
trace_rxrpc_rx_done(skb->mark, skb->priority);
rxrpc_reject_packet(local, skb);
_leave(" [badmsg]");
+ return 0;
}
*/
switch (srx->transport.family) {
case AF_INET:
+ srx->transport_len = sizeof(srx->transport.sin);
+ srx->transport.family = AF_INET;
srx->transport.sin.sin_port = serr->port;
switch (serr->ee.ee_origin) {
case SO_EE_ORIGIN_ICMP:
#ifdef CONFIG_AF_RXRPC_IPV6
case AF_INET6:
- srx->transport.sin6.sin6_port = serr->port;
switch (serr->ee.ee_origin) {
case SO_EE_ORIGIN_ICMP6:
_net("Rx ICMP6");
+ srx->transport.sin6.sin6_port = serr->port;
memcpy(&srx->transport.sin6.sin6_addr,
skb_network_header(skb) + serr->addr_offset,
sizeof(struct in6_addr));
break;
case SO_EE_ORIGIN_ICMP:
_net("Rx ICMP on v6 sock");
- srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
- srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
- srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
- memcpy(srx->transport.sin6.sin6_addr.s6_addr + 12,
+ srx->transport_len = sizeof(srx->transport.sin);
+ srx->transport.family = AF_INET;
+ srx->transport.sin.sin_port = serr->port;
+ memcpy(&srx->transport.sin.sin_addr,
skb_network_header(skb) + serr->addr_offset,
sizeof(struct in_addr));
break;
if (rtt < 0)
return;
+ spin_lock(&peer->rtt_input_lock);
+
/* Replace the oldest datum in the RTT buffer */
sum -= peer->rtt_cache[cursor];
sum += rtt;
peer->rtt_usage = usage;
}
+ spin_unlock(&peer->rtt_input_lock);
+
/* Now recalculate the average */
if (usage == RXRPC_RTT_CACHE_SIZE) {
avg = sum / RXRPC_RTT_CACHE_SIZE;
do_div(avg, usage);
}
+ /* Don't need to update this under lock */
peer->rtt = avg;
trace_rxrpc_rtt_rx(call, why, send_serial, resp_serial, rtt,
usage, avg);
u32 mask;
u32 __percpu *pcpu_success;
#endif
- struct tcf_proto *tp;
struct rcu_work rwork;
/* The 'sel' field MUST be the last field in structure to allow for
* tc_u32_keys allocated at end of structure.
struct tc_u_hnode __rcu *next;
u32 handle;
u32 prio;
- struct tc_u_common *tp_c;
int refcnt;
unsigned int divisor;
struct idr handle_idr;
+ bool is_root;
struct rcu_head rcu;
u32 flags;
/* The 'ht' field MUST be the last field in structure to allow for
int refcnt;
struct idr handle_idr;
struct hlist_node hnode;
- struct rcu_head rcu;
+ long knodes;
};
static inline unsigned int u32_hash_fold(__be32 key,
return block->q;
}
-static unsigned int tc_u_hash(const struct tcf_proto *tp)
+static struct hlist_head *tc_u_hash(void *key)
{
- return hash_ptr(tc_u_common_ptr(tp), U32_HASH_SHIFT);
+ return tc_u_common_hash + hash_ptr(key, U32_HASH_SHIFT);
}
-static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
+static struct tc_u_common *tc_u_common_find(void *key)
{
struct tc_u_common *tc;
- unsigned int h;
-
- h = tc_u_hash(tp);
- hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) {
- if (tc->ptr == tc_u_common_ptr(tp))
+ hlist_for_each_entry(tc, tc_u_hash(key), hnode) {
+ if (tc->ptr == key)
return tc;
}
return NULL;
static int u32_init(struct tcf_proto *tp)
{
struct tc_u_hnode *root_ht;
- struct tc_u_common *tp_c;
- unsigned int h;
-
- tp_c = tc_u_common_find(tp);
+ void *key = tc_u_common_ptr(tp);
+ struct tc_u_common *tp_c = tc_u_common_find(key);
root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL);
if (root_ht == NULL)
root_ht->refcnt++;
root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000;
root_ht->prio = tp->prio;
+ root_ht->is_root = true;
idr_init(&root_ht->handle_idr);
if (tp_c == NULL) {
kfree(root_ht);
return -ENOBUFS;
}
- tp_c->ptr = tc_u_common_ptr(tp);
+ tp_c->ptr = key;
INIT_HLIST_NODE(&tp_c->hnode);
idr_init(&tp_c->handle_idr);
- h = tc_u_hash(tp);
- hlist_add_head(&tp_c->hnode, &tc_u_common_hash[h]);
+ hlist_add_head(&tp_c->hnode, tc_u_hash(key));
}
tp_c->refcnt++;
RCU_INIT_POINTER(root_ht->next, tp_c->hlist);
rcu_assign_pointer(tp_c->hlist, root_ht);
- root_ht->tp_c = tp_c;
+ root_ht->refcnt++;
rcu_assign_pointer(tp->root, root_ht);
tp->data = tp_c;
return 0;
}
-static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
- bool free_pf)
+static int u32_destroy_key(struct tc_u_knode *n, bool free_pf)
{
struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
struct tc_u_knode,
rwork);
rtnl_lock();
- u32_destroy_key(key->tp, key, false);
+ u32_destroy_key(key, false);
rtnl_unlock();
}
struct tc_u_knode,
rwork);
rtnl_lock();
- u32_destroy_key(key->tp, key, true);
+ u32_destroy_key(key, true);
rtnl_unlock();
}
static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
{
+ struct tc_u_common *tp_c = tp->data;
struct tc_u_knode __rcu **kp;
struct tc_u_knode *pkp;
struct tc_u_hnode *ht = rtnl_dereference(key->ht_up);
kp = &pkp->next, pkp = rtnl_dereference(*kp)) {
if (pkp == key) {
RCU_INIT_POINTER(*kp, key->next);
+ tp_c->knodes--;
tcf_unbind_filter(tp, &key->res);
idr_remove(&ht->handle_idr, key->handle);
static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
struct netlink_ext_ack *extack)
{
+ struct tc_u_common *tp_c = tp->data;
struct tc_u_knode *n;
unsigned int h;
while ((n = rtnl_dereference(ht->ht[h])) != NULL) {
RCU_INIT_POINTER(ht->ht[h],
rtnl_dereference(n->next));
+ tp_c->knodes--;
tcf_unbind_filter(tp, &n->res);
u32_remove_hw_knode(tp, n, extack);
idr_remove(&ht->handle_idr, n->handle);
if (tcf_exts_get_net(&n->exts))
tcf_queue_work(&n->rwork, u32_delete_key_freepf_work);
else
- u32_destroy_key(n->tp, n, true);
+ u32_destroy_key(n, true);
}
}
}
struct tc_u_hnode __rcu **hn;
struct tc_u_hnode *phn;
- WARN_ON(ht->refcnt);
+ WARN_ON(--ht->refcnt);
u32_clear_hnode(tp, ht, extack);
return -ENOENT;
}
-static bool ht_empty(struct tc_u_hnode *ht)
-{
- unsigned int h;
-
- for (h = 0; h <= ht->divisor; h++)
- if (rcu_access_pointer(ht->ht[h]))
- return false;
-
- return true;
-}
-
static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct tc_u_common *tp_c = tp->data;
WARN_ON(root_ht == NULL);
- if (root_ht && --root_ht->refcnt == 0)
+ if (root_ht && --root_ht->refcnt == 1)
u32_destroy_hnode(tp, root_ht, extack);
if (--tp_c->refcnt == 0) {
struct netlink_ext_ack *extack)
{
struct tc_u_hnode *ht = arg;
- struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
struct tc_u_common *tp_c = tp->data;
int ret = 0;
- if (ht == NULL)
- goto out;
-
if (TC_U32_KEY(ht->handle)) {
u32_remove_hw_knode(tp, (struct tc_u_knode *)ht, extack);
ret = u32_delete_key(tp, (struct tc_u_knode *)ht);
goto out;
}
- if (root_ht == ht) {
+ if (ht->is_root) {
NL_SET_ERR_MSG_MOD(extack, "Not allowed to delete root node");
return -EINVAL;
}
if (ht->refcnt == 1) {
- ht->refcnt--;
u32_destroy_hnode(tp, ht, extack);
} else {
NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter");
}
out:
- *last = true;
- if (root_ht) {
- if (root_ht->refcnt > 2) {
- *last = false;
- goto ret;
- }
- if (root_ht->refcnt == 2) {
- if (!ht_empty(root_ht)) {
- *last = false;
- goto ret;
- }
- }
- }
-
- if (tp_c->refcnt > 1) {
- *last = false;
- goto ret;
- }
-
- if (tp_c->refcnt == 1) {
- struct tc_u_hnode *ht;
-
- for (ht = rtnl_dereference(tp_c->hlist);
- ht;
- ht = rtnl_dereference(ht->next))
- if (!ht_empty(ht)) {
- *last = false;
- break;
- }
- }
-
-ret:
+ *last = tp_c->refcnt == 1 && tp_c->knodes == 0;
return ret;
}
};
static int u32_set_parms(struct net *net, struct tcf_proto *tp,
- unsigned long base, struct tc_u_hnode *ht,
+ unsigned long base,
struct tc_u_knode *n, struct nlattr **tb,
struct nlattr *est, bool ovr,
struct netlink_ext_ack *extack)
}
if (handle) {
- ht_down = u32_lookup_ht(ht->tp_c, handle);
+ ht_down = u32_lookup_ht(tp->data, handle);
if (!ht_down) {
NL_SET_ERR_MSG_MOD(extack, "Link hash table not found");
return -EINVAL;
}
+ if (ht_down->is_root) {
+ NL_SET_ERR_MSG_MOD(extack, "Not linking to root node");
+ return -EINVAL;
+ }
ht_down->refcnt++;
}
/* Similarly success statistics must be moved as pointers */
new->pcpu_success = n->pcpu_success;
#endif
- new->tp = tp;
memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
if (tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE)) {
if (!new)
return -ENOMEM;
- err = u32_set_parms(net, tp, base,
- rtnl_dereference(n->ht_up), new, tb,
+ err = u32_set_parms(net, tp, base, new, tb,
tca[TCA_RATE], ovr, extack);
if (err) {
- u32_destroy_key(tp, new, false);
+ u32_destroy_key(new, false);
return err;
}
err = u32_replace_hw_knode(tp, new, flags, extack);
if (err) {
- u32_destroy_key(tp, new, false);
+ u32_destroy_key(new, false);
return err;
}
if (tb[TCA_U32_DIVISOR]) {
unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
- if (--divisor > 0x100) {
+ if (!is_power_of_2(divisor)) {
+ NL_SET_ERR_MSG_MOD(extack, "Divisor is not a power of 2");
+ return -EINVAL;
+ }
+ if (divisor-- > 0x100) {
NL_SET_ERR_MSG_MOD(extack, "Exceeded maximum 256 hash buckets");
return -EINVAL;
}
return err;
}
}
- ht->tp_c = tp_c;
ht->refcnt = 1;
ht->divisor = divisor;
ht->handle = handle;
n->handle = handle;
n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
n->flags = flags;
- n->tp = tp;
err = tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
if (err < 0)
}
#endif
- err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr,
+ err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], ovr,
extack);
if (err == 0) {
struct tc_u_knode __rcu **ins;
RCU_INIT_POINTER(n->next, pins);
rcu_assign_pointer(*ins, n);
+ tp_c->knodes++;
*arg = n;
return 0;
}
if (skb) {
flow->head = skb->next;
- skb->next = NULL;
+ skb_mark_not_on_list(skb);
}
return skb;
else
flow->head = elig_ack->next;
- elig_ack->next = NULL;
+ skb_mark_not_on_list(elig_ack);
return elig_ack;
}
while (segs) {
nskb = segs->next;
- segs->next = NULL;
+ skb_mark_not_on_list(segs);
qdisc_skb_cb(segs)->pkt_len = segs->len;
cobalt_set_enqueue_time(segs, now);
get_cobalt_cb(segs)->adjusted_len = cake_overhead(q,
for (i = 1; i <= CAKE_QUEUES; i++)
quantum_div[i] = 65535 / i;
- q->tins = kvzalloc(CAKE_MAX_TINS * sizeof(struct cake_tin_data),
+ q->tins = kvcalloc(CAKE_MAX_TINS, sizeof(struct cake_tin_data),
GFP_KERNEL);
if (!q->tins)
goto nomem;
#include "netlink.h"
#include "group.h"
-#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
+#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
#define CONN_PROBING_INTV msecs_to_jiffies(3600000) /* [ms] => 1 h */
#define TIPC_FWD_MSG 1
#define TIPC_MAX_PORT 0xffffffff
* @publications: list of publications for port
* @blocking_link: address of the congested link we are currently sleeping on
* @pub_count: total # of publications port has made during its lifetime
- * @probing_state:
* @conn_timeout: the time we can wait for an unresponded setup request
* @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
* @cong_link_cnt: number of congested links
struct list_head cong_links;
struct list_head publications;
u32 pub_count;
- uint conn_timeout;
atomic_t dupl_rcvcnt;
+ u16 conn_timeout;
bool probe_unacked;
u16 cong_link_cnt;
u16 snt_unacked;
tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt &&
!tsk_conn_cong(tsk)));
+ /* Remove any pending SYN message */
+ __skb_queue_purge(&sk->sk_write_queue);
+
/* Reject all unreceived messages, except on an active connection
* (which disconnects locally & sends a 'FIN+' to peer).
*/
* @skb: pointer to message buffer.
*/
static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
+ struct sk_buff_head *inputq,
struct sk_buff_head *xmitq)
{
struct tipc_msg *hdr = buf_msg(skb);
tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk),
tsk_peer_port(tsk));
sk->sk_state_change(sk);
- goto exit;
+
+ /* State change is ignored if socket already awake,
+ * - convert msg to abort msg and add to inqueue
+ */
+ msg_set_user(hdr, TIPC_CRITICAL_IMPORTANCE);
+ msg_set_type(hdr, TIPC_CONN_MSG);
+ msg_set_size(hdr, BASIC_H_SIZE);
+ msg_set_hdr_sz(hdr, BASIC_H_SIZE);
+ __skb_queue_tail(inputq, skb);
+ return;
}
tsk->probe_unacked = false;
tsk->conn_type = dest->addr.name.name.type;
tsk->conn_instance = dest->addr.name.name.instance;
}
+ msg_set_syn(hdr, 1);
}
seq = &dest->addr.nameseq;
rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
if (unlikely(rc != dlen))
return rc;
+ if (unlikely(syn && !tipc_msg_skb_clone(&pkts, &sk->sk_write_queue)))
+ return -ENOMEM;
rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
if (unlikely(rc == -ELINKCONG)) {
struct net *net = sock_net(sk);
struct tipc_msg *msg = &tsk->phdr;
+ msg_set_syn(msg, 0);
msg_set_destnode(msg, peer_node);
msg_set_destport(msg, peer_port);
msg_set_type(msg, TIPC_CONN_MSG);
tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
tsk->peer_caps = tipc_node_get_capabilities(net, peer_node);
+ __skb_queue_purge(&sk->sk_write_queue);
if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
return;
switch (msg_user(hdr)) {
case CONN_MANAGER:
- tipc_sk_conn_proto_rcv(tsk, skb, xmitq);
+ tipc_sk_conn_proto_rcv(tsk, skb, inputq, xmitq);
return;
case SOCK_WAKEUP:
tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0);
}
/**
- * tipc_filter_connect - Handle incoming message for a connection-based socket
+ * tipc_sk_filter_connect - check incoming message for a connection-based socket
* @tsk: TIPC socket
- * @skb: pointer to message buffer. Set to NULL if buffer is consumed
- *
- * Returns true if everything ok, false otherwise
+ * @skb: pointer to message buffer.
+ * Returns true if message should be added to receive queue, false otherwise
*/
static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
{
struct sock *sk = &tsk->sk;
struct net *net = sock_net(sk);
struct tipc_msg *hdr = buf_msg(skb);
- u32 pport = msg_origport(hdr);
- u32 pnode = msg_orignode(hdr);
+ bool con_msg = msg_connected(hdr);
+ u32 pport = tsk_peer_port(tsk);
+ u32 pnode = tsk_peer_node(tsk);
+ u32 oport = msg_origport(hdr);
+ u32 onode = msg_orignode(hdr);
+ int err = msg_errcode(hdr);
+ unsigned long delay;
if (unlikely(msg_mcast(hdr)))
return false;
switch (sk->sk_state) {
case TIPC_CONNECTING:
- /* Accept only ACK or NACK message */
- if (unlikely(!msg_connected(hdr))) {
- if (pport != tsk_peer_port(tsk) ||
- pnode != tsk_peer_node(tsk))
- return false;
-
- tipc_set_sk_state(sk, TIPC_DISCONNECTING);
- sk->sk_err = ECONNREFUSED;
- sk->sk_state_change(sk);
- return true;
- }
-
- if (unlikely(msg_errcode(hdr))) {
- tipc_set_sk_state(sk, TIPC_DISCONNECTING);
- sk->sk_err = ECONNREFUSED;
- sk->sk_state_change(sk);
- return true;
- }
-
- if (unlikely(!msg_isdata(hdr))) {
- tipc_set_sk_state(sk, TIPC_DISCONNECTING);
- sk->sk_err = EINVAL;
- sk->sk_state_change(sk);
- return true;
+ /* Setup ACK */
+ if (likely(con_msg)) {
+ if (err)
+ break;
+ tipc_sk_finish_conn(tsk, oport, onode);
+ msg_set_importance(&tsk->phdr, msg_importance(hdr));
+ /* ACK+ message with data is added to receive queue */
+ if (msg_data_sz(hdr))
+ return true;
+ /* Empty ACK-, - wake up sleeping connect() and drop */
+ sk->sk_data_ready(sk);
+ msg_set_dest_droppable(hdr, 1);
+ return false;
}
+ /* Ignore connectionless message if not from listening socket */
+ if (oport != pport || onode != pnode)
+ return false;
- tipc_sk_finish_conn(tsk, msg_origport(hdr), msg_orignode(hdr));
- msg_set_importance(&tsk->phdr, msg_importance(hdr));
-
- /* If 'ACK+' message, add to socket receive queue */
- if (msg_data_sz(hdr))
- return true;
-
- /* If empty 'ACK-' message, wake up sleeping connect() */
- sk->sk_data_ready(sk);
+ /* Rejected SYN */
+ if (err != TIPC_ERR_OVERLOAD)
+ break;
- /* 'ACK-' message is neither accepted nor rejected: */
- msg_set_dest_droppable(hdr, 1);
+ /* Prepare for new setup attempt if we have a SYN clone */
+ if (skb_queue_empty(&sk->sk_write_queue))
+ break;
+ get_random_bytes(&delay, 2);
+ delay %= (tsk->conn_timeout / 4);
+ delay = msecs_to_jiffies(delay + 100);
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + delay);
return false;
-
case TIPC_OPEN:
case TIPC_DISCONNECTING:
- break;
+ return false;
case TIPC_LISTEN:
/* Accept only SYN message */
- if (!msg_connected(hdr) && !(msg_errcode(hdr)))
+ if (!msg_is_syn(hdr) &&
+ tipc_node_get_capabilities(net, onode) & TIPC_SYN_BIT)
+ return false;
+ if (!con_msg && !err)
return true;
- break;
+ return false;
case TIPC_ESTABLISHED:
/* Accept only connection-based messages sent by peer */
- if (unlikely(!tsk_peer_msg(tsk, hdr)))
+ if (likely(con_msg && !err && pport == oport && pnode == onode))
+ return true;
+ if (!tsk_peer_msg(tsk, hdr))
return false;
-
- if (unlikely(msg_errcode(hdr))) {
- tipc_set_sk_state(sk, TIPC_DISCONNECTING);
- /* Let timer expire on it's own */
- tipc_node_remove_conn(net, tsk_peer_node(tsk),
- tsk->portid);
- sk->sk_state_change(sk);
- }
+ if (!err)
+ return true;
+ tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+ tipc_node_remove_conn(net, pnode, tsk->portid);
+ sk->sk_state_change(sk);
return true;
default:
pr_err("Unknown sk_state %u\n", sk->sk_state);
}
-
- return false;
+ /* Abort connection setup attempt */
+ tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+ sk->sk_err = ECONNREFUSED;
+ sk->sk_state_change(sk);
+ return true;
}
/**
return res;
}
+static void tipc_sk_check_probing_state(struct sock *sk,
+ struct sk_buff_head *list)
+{
+ struct tipc_sock *tsk = tipc_sk(sk);
+ u32 pnode = tsk_peer_node(tsk);
+ u32 pport = tsk_peer_port(tsk);
+ u32 self = tsk_own_node(tsk);
+ u32 oport = tsk->portid;
+ struct sk_buff *skb;
+
+ if (tsk->probe_unacked) {
+ tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+ sk->sk_err = ECONNABORTED;
+ tipc_node_remove_conn(sock_net(sk), pnode, pport);
+ sk->sk_state_change(sk);
+ return;
+ }
+ /* Prepare new probe */
+ skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0,
+ pnode, self, pport, oport, TIPC_OK);
+ if (skb)
+ __skb_queue_tail(list, skb);
+ tsk->probe_unacked = true;
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
+}
+
+static void tipc_sk_retry_connect(struct sock *sk, struct sk_buff_head *list)
+{
+ struct tipc_sock *tsk = tipc_sk(sk);
+
+ /* Try again later if dest link is congested */
+ if (tsk->cong_link_cnt) {
+ sk_reset_timer(sk, &sk->sk_timer, msecs_to_jiffies(100));
+ return;
+ }
+ /* Prepare SYN for retransmit */
+ tipc_msg_skb_clone(&sk->sk_write_queue, list);
+}
+
static void tipc_sk_timeout(struct timer_list *t)
{
struct sock *sk = from_timer(sk, t, sk_timer);
struct tipc_sock *tsk = tipc_sk(sk);
- u32 peer_port = tsk_peer_port(tsk);
- u32 peer_node = tsk_peer_node(tsk);
- u32 own_node = tsk_own_node(tsk);
- u32 own_port = tsk->portid;
- struct net *net = sock_net(sk);
- struct sk_buff *skb = NULL;
+ u32 pnode = tsk_peer_node(tsk);
+ struct sk_buff_head list;
+ int rc = 0;
+ skb_queue_head_init(&list);
bh_lock_sock(sk);
- if (!tipc_sk_connected(sk))
- goto exit;
/* Try again later if socket is busy */
if (sock_owned_by_user(sk)) {
sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 20);
- goto exit;
+ bh_unlock_sock(sk);
+ return;
}
- if (tsk->probe_unacked) {
- tipc_set_sk_state(sk, TIPC_DISCONNECTING);
- tipc_node_remove_conn(net, peer_node, peer_port);
- sk->sk_state_change(sk);
- goto exit;
- }
- /* Send new probe */
- skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0,
- peer_node, own_node, peer_port, own_port,
- TIPC_OK);
- tsk->probe_unacked = true;
- sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
-exit:
+ if (sk->sk_state == TIPC_ESTABLISHED)
+ tipc_sk_check_probing_state(sk, &list);
+ else if (sk->sk_state == TIPC_CONNECTING)
+ tipc_sk_retry_connect(sk, &list);
+
bh_unlock_sock(sk);
- if (skb)
- tipc_node_xmit_skb(net, skb, peer_node, own_port);
+
+ if (!skb_queue_empty(&list))
+ rc = tipc_node_xmit(sock_net(sk), &list, pnode, tsk->portid);
+
+ /* SYN messages may cause link congestion */
+ if (rc == -ELINKCONG) {
+ tipc_dest_push(&tsk->cong_links, pnode, 0);
+ tsk->cong_link_cnt = 1;
+ }
sock_put(sk);
}