From: David S. Miller Date: Sat, 13 Oct 2018 04:38:46 +0000 (-0700) Subject: Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net X-Git-Tag: v4.20-rc1~27^2~84 X-Git-Url: https://repo.jachan.dev/J-linux.git/commitdiff_plain/d864991b220b7c62e81d21209e1fd978fd67352c?hp=-c Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net Conflicts were easy to resolve using immediate context mostly, except the cls_u32.c one where I simply too the entire HEAD chunk. Signed-off-by: David S. Miller --- d864991b220b7c62e81d21209e1fd978fd67352c diff --combined MAINTAINERS index fe223e606b9d,6ac000cc006d..6d5161def3f3 --- a/MAINTAINERS +++ b/MAINTAINERS @@@ -4527,13 -4527,9 +4527,13 @@@ F: drivers/soc/fsl/dpi DPAA2 ETHERNET DRIVER M: Ioana Radulescu -L: linux-kernel@vger.kernel.org +L: netdev@vger.kernel.org S: Maintained -F: drivers/staging/fsl-dpaa2/ethernet +F: drivers/net/ethernet/freescale/dpaa2/dpaa2-eth* +F: drivers/net/ethernet/freescale/dpaa2/dpni* +F: drivers/net/ethernet/freescale/dpaa2/dpkg.h +F: drivers/net/ethernet/freescale/dpaa2/Makefile +F: drivers/net/ethernet/freescale/dpaa2/Kconfig DPAA2 ETHERNET SWITCH DRIVER M: Ioana Radulescu @@@ -4544,10 -4540,9 +4544,10 @@@ F: drivers/staging/fsl-dpaa2/eths DPAA2 PTP CLOCK DRIVER M: Yangbo Lu -L: linux-kernel@vger.kernel.org +L: netdev@vger.kernel.org S: Maintained -F: drivers/staging/fsl-dpaa2/rtc +F: drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp* +F: drivers/net/ethernet/freescale/dpaa2/dprtc* DPT_I2O SCSI RAID DRIVER M: Adaptec OEM Raid Solutions @@@ -7353,7 -7348,7 +7353,7 @@@ F: Documentation/networking/ixgb.tx F: Documentation/networking/ixgbe.txt F: Documentation/networking/ixgbevf.txt F: Documentation/networking/i40e.txt -F: Documentation/networking/i40evf.txt +F: Documentation/networking/iavf.txt F: Documentation/networking/ice.txt F: drivers/net/ethernet/intel/ F: drivers/net/ethernet/intel/*/ @@@ -8188,15 -8183,6 +8188,15 @@@ S: Maintaine F: net/l3mdev F: include/net/l3mdev.h +LANTIQ / INTEL Ethernet drivers +M: Hauke Mehrtens +L: netdev@vger.kernel.org +S: Maintained +F: net/dsa/tag_gswip.c +F: drivers/net/ethernet/lantiq_xrx200.c +F: drivers/net/dsa/lantiq_pce.h +F: drivers/net/dsa/lantiq_gswip.c + LANTIQ MIPS ARCHITECTURE M: John Crispin L: linux-mips@linux-mips.org @@@ -8757,7 -8743,7 +8757,7 @@@ M: Vivien Didelot +M: Linu Cherian +M: Geetha sowjanya +M: Jerin Jacob +L: netdev@vger.kernel.org +S: Supported +F: drivers/net/ethernet/marvell/octeontx2/af/ + MATROX FRAMEBUFFER DRIVER L: linux-fbdev@vger.kernel.org S: Orphan @@@ -9680,7 -9657,8 +9680,8 @@@ MIPS/LOONGSON2 ARCHITECTUR M: Jiaxun Yang L: linux-mips@linux-mips.org S: Maintained - F: arch/mips/loongson64/*{2e/2f}* + F: arch/mips/loongson64/fuloong-2e/ + F: arch/mips/loongson64/lemote-2f/ F: arch/mips/include/asm/mach-loongson64/ F: drivers/*/*loongson2* F: drivers/*/*/*loongson2* @@@ -9887,7 -9865,7 +9888,7 @@@ M: Peter Rosin priv = qca; if (hu->serdev) { - serdev_device_open(hu->serdev); qcadev = serdev_device_get_drvdata(hu->serdev); if (qcadev->btsoc_type != QCA_WCN3990) { @@@ -609,10 -609,11 +609,10 @@@ static int qca_close(struct hci_uart *h if (hu->serdev) { qcadev = serdev_device_get_drvdata(hu->serdev); if (qcadev->btsoc_type == QCA_WCN3990) - qca_power_shutdown(hu->hdev); + qca_power_shutdown(hu); else gpiod_set_value_cansleep(qcadev->bt_en, 0); - serdev_device_close(hu->serdev); } kfree_skb(qca->rx_skb); @@@ -1100,26 -1101,8 +1100,26 @@@ static int qca_set_speed(struct hci_uar static int qca_wcn3990_init(struct hci_uart *hu) { struct hci_dev *hdev = hu->hdev; + struct qca_serdev *qcadev; int ret; + /* Check for vregs status, may be hci down has turned + * off the voltage regulator. + */ + qcadev = serdev_device_get_drvdata(hu->serdev); + if (!qcadev->bt_power->vregs_on) { + serdev_device_close(hu->serdev); + ret = qca_power_setup(hu, true); + if (ret) + return ret; + + ret = serdev_device_open(hu->serdev); + if (ret) { + bt_dev_err(hu->hdev, "failed to open port"); + return ret; + } + } + /* Forcefully enable wcn3990 to enter in to boot mode. */ host_set_baudrate(hu, 2400); ret = qca_send_power_pulse(hdev, QCA_WCN3990_POWEROFF_PULSE); @@@ -1171,12 -1154,6 +1171,12 @@@ static int qca_setup(struct hci_uart *h if (qcadev->btsoc_type == QCA_WCN3990) { bt_dev_info(hdev, "setting up wcn3990"); + + /* Enable NON_PERSISTENT_SETUP QUIRK to ensure to execute + * setup for every hci up. + */ + set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks); + hu->hdev->shutdown = qca_power_off; ret = qca_wcn3990_init(hu); if (ret) return ret; @@@ -1255,26 -1232,15 +1255,26 @@@ static const struct qca_vreg_data qca_s .num_vregs = 4, }; -static void qca_power_shutdown(struct hci_dev *hdev) +static void qca_power_shutdown(struct hci_uart *hu) { - struct hci_uart *hu = hci_get_drvdata(hdev); + struct serdev_device *serdev = hu->serdev; + unsigned char cmd = QCA_WCN3990_POWEROFF_PULSE; host_set_baudrate(hu, 2400); - qca_send_power_pulse(hdev, QCA_WCN3990_POWEROFF_PULSE); + hci_uart_set_flow_control(hu, true); + serdev_device_write_buf(serdev, &cmd, sizeof(cmd)); + hci_uart_set_flow_control(hu, false); qca_power_setup(hu, false); } +static int qca_power_off(struct hci_dev *hdev) +{ + struct hci_uart *hu = hci_get_drvdata(hdev); + + qca_power_shutdown(hu); + return 0; +} + static int qca_enable_regulator(struct qca_vreg vregs, struct regulator *regulator) { @@@ -1356,7 -1322,7 +1356,7 @@@ static int qca_init_regulators(struct q { int i; - qca->vreg_bulk = devm_kzalloc(qca->dev, num_vregs * + qca->vreg_bulk = devm_kcalloc(qca->dev, num_vregs, sizeof(struct regulator_bulk_data), GFP_KERNEL); if (!qca->vreg_bulk) @@@ -1447,7 -1413,7 +1447,7 @@@ static void qca_serdev_remove(struct se struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev); if (qcadev->btsoc_type == QCA_WCN3990) - qca_power_shutdown(qcadev->serdev_hu.hdev); + qca_power_shutdown(&qcadev->serdev_hu); else clk_disable_unprepare(qcadev->susclk); diff --combined drivers/net/ethernet/amazon/ena/ena_eth_com.c index 6f8e15b9b3cf,2b3ff0c20155..f6c2d3855be8 --- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c @@@ -59,7 -59,16 +59,7 @@@ static inline struct ena_eth_io_rx_cdes return cdesc; } -static inline void ena_com_cq_inc_head(struct ena_com_io_cq *io_cq) -{ - io_cq->head++; - - /* Switch phase bit in case of wrap around */ - if (unlikely((io_cq->head & (io_cq->q_depth - 1)) == 0)) - io_cq->phase ^= 1; -} - -static inline void *get_sq_desc(struct ena_com_io_sq *io_sq) +static inline void *get_sq_desc_regular_queue(struct ena_com_io_sq *io_sq) { u16 tail_masked; u32 offset; @@@ -71,159 -80,45 +71,159 @@@ return (void *)((uintptr_t)io_sq->desc_addr.virt_addr + offset); } -static inline void ena_com_copy_curr_sq_desc_to_dev(struct ena_com_io_sq *io_sq) +static inline int ena_com_write_bounce_buffer_to_dev(struct ena_com_io_sq *io_sq, + u8 *bounce_buffer) { - u16 tail_masked = io_sq->tail & (io_sq->q_depth - 1); - u32 offset = tail_masked * io_sq->desc_entry_size; + struct ena_com_llq_info *llq_info = &io_sq->llq_info; - /* In case this queue isn't a LLQ */ - if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) - return; + u16 dst_tail_mask; + u32 dst_offset; - memcpy_toio(io_sq->desc_addr.pbuf_dev_addr + offset, - io_sq->desc_addr.virt_addr + offset, - io_sq->desc_entry_size); -} + dst_tail_mask = io_sq->tail & (io_sq->q_depth - 1); + dst_offset = dst_tail_mask * llq_info->desc_list_entry_size; + + /* Make sure everything was written into the bounce buffer before + * writing the bounce buffer to the device + */ + wmb(); + + /* The line is completed. Copy it to dev */ + __iowrite64_copy(io_sq->desc_addr.pbuf_dev_addr + dst_offset, + bounce_buffer, (llq_info->desc_list_entry_size) / 8); -static inline void ena_com_sq_update_tail(struct ena_com_io_sq *io_sq) -{ io_sq->tail++; /* Switch phase bit in case of wrap around */ if (unlikely((io_sq->tail & (io_sq->q_depth - 1)) == 0)) io_sq->phase ^= 1; + + return 0; } -static inline int ena_com_write_header(struct ena_com_io_sq *io_sq, - u8 *head_src, u16 header_len) +static inline int ena_com_write_header_to_bounce(struct ena_com_io_sq *io_sq, + u8 *header_src, + u16 header_len) { - u16 tail_masked = io_sq->tail & (io_sq->q_depth - 1); - u8 __iomem *dev_head_addr = - io_sq->header_addr + (tail_masked * io_sq->tx_max_header_size); + struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl; + struct ena_com_llq_info *llq_info = &io_sq->llq_info; + u8 *bounce_buffer = pkt_ctrl->curr_bounce_buf; + u16 header_offset; - if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) + if (unlikely(io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)) return 0; - if (unlikely(!io_sq->header_addr)) { - pr_err("Push buffer header ptr is NULL\n"); - return -EINVAL; + header_offset = + llq_info->descs_num_before_header * io_sq->desc_entry_size; + + if (unlikely((header_offset + header_len) > + llq_info->desc_list_entry_size)) { + pr_err("trying to write header larger than llq entry can accommodate\n"); + return -EFAULT; + } + + if (unlikely(!bounce_buffer)) { + pr_err("bounce buffer is NULL\n"); + return -EFAULT; + } + + memcpy(bounce_buffer + header_offset, header_src, header_len); + + return 0; +} + +static inline void *get_sq_desc_llq(struct ena_com_io_sq *io_sq) +{ + struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl; + u8 *bounce_buffer; + void *sq_desc; + + bounce_buffer = pkt_ctrl->curr_bounce_buf; + + if (unlikely(!bounce_buffer)) { + pr_err("bounce buffer is NULL\n"); + return NULL; + } + + sq_desc = bounce_buffer + pkt_ctrl->idx * io_sq->desc_entry_size; + pkt_ctrl->idx++; + pkt_ctrl->descs_left_in_line--; + + return sq_desc; +} + +static inline int ena_com_close_bounce_buffer(struct ena_com_io_sq *io_sq) +{ + struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl; + struct ena_com_llq_info *llq_info = &io_sq->llq_info; + int rc; + + if (unlikely(io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)) + return 0; + + /* bounce buffer was used, so write it and get a new one */ + if (pkt_ctrl->idx) { + rc = ena_com_write_bounce_buffer_to_dev(io_sq, + pkt_ctrl->curr_bounce_buf); + if (unlikely(rc)) + return rc; + + pkt_ctrl->curr_bounce_buf = + ena_com_get_next_bounce_buffer(&io_sq->bounce_buf_ctrl); + memset(io_sq->llq_buf_ctrl.curr_bounce_buf, + 0x0, llq_info->desc_list_entry_size); + } + + pkt_ctrl->idx = 0; + pkt_ctrl->descs_left_in_line = llq_info->descs_num_before_header; + return 0; +} + +static inline void *get_sq_desc(struct ena_com_io_sq *io_sq) +{ + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) + return get_sq_desc_llq(io_sq); + + return get_sq_desc_regular_queue(io_sq); +} + +static inline int ena_com_sq_update_llq_tail(struct ena_com_io_sq *io_sq) +{ + struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl; + struct ena_com_llq_info *llq_info = &io_sq->llq_info; + int rc; + + if (!pkt_ctrl->descs_left_in_line) { + rc = ena_com_write_bounce_buffer_to_dev(io_sq, + pkt_ctrl->curr_bounce_buf); + if (unlikely(rc)) + return rc; + + pkt_ctrl->curr_bounce_buf = + ena_com_get_next_bounce_buffer(&io_sq->bounce_buf_ctrl); + memset(io_sq->llq_buf_ctrl.curr_bounce_buf, + 0x0, llq_info->desc_list_entry_size); + + pkt_ctrl->idx = 0; + if (unlikely(llq_info->desc_stride_ctrl == ENA_ADMIN_SINGLE_DESC_PER_ENTRY)) + pkt_ctrl->descs_left_in_line = 1; + else + pkt_ctrl->descs_left_in_line = + llq_info->desc_list_entry_size / io_sq->desc_entry_size; } - memcpy_toio(dev_head_addr, head_src, header_len); + return 0; +} + +static inline int ena_com_sq_update_tail(struct ena_com_io_sq *io_sq) +{ + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) + return ena_com_sq_update_llq_tail(io_sq); + + io_sq->tail++; + + /* Switch phase bit in case of wrap around */ + if (unlikely((io_sq->tail & (io_sq->q_depth - 1)) == 0)) + io_sq->phase ^= 1; return 0; } @@@ -291,8 -186,8 +291,8 @@@ static inline bool ena_com_meta_desc_ch return false; } -static inline void ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq, - struct ena_com_tx_ctx *ena_tx_ctx) +static inline int ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq, + struct ena_com_tx_ctx *ena_tx_ctx) { struct ena_eth_io_tx_meta_desc *meta_desc = NULL; struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta; @@@ -337,7 -232,8 +337,7 @@@ memcpy(&io_sq->cached_tx_meta, ena_meta, sizeof(struct ena_com_tx_meta)); - ena_com_copy_curr_sq_desc_to_dev(io_sq); - ena_com_sq_update_tail(io_sq); + return ena_com_sq_update_tail(io_sq); } static inline void ena_com_rx_set_flags(struct ena_com_rx_ctx *ena_rx_ctx, @@@ -349,14 -245,11 +349,14 @@@ (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK) >> ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT; ena_rx_ctx->l3_csum_err = - (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK) >> - ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT; + !!((cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT); ena_rx_ctx->l4_csum_err = - (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK) >> - ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT; + !!((cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT); + ena_rx_ctx->l4_csum_checked = + !!((cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_CHECKED_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_CHECKED_SHIFT); ena_rx_ctx->hash = cdesc->hash; ena_rx_ctx->frag = (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK) >> @@@ -378,19 -271,18 +378,19 @@@ int ena_com_prepare_tx(struct ena_com_i { struct ena_eth_io_tx_desc *desc = NULL; struct ena_com_buf *ena_bufs = ena_tx_ctx->ena_bufs; - void *push_header = ena_tx_ctx->push_header; + void *buffer_to_push = ena_tx_ctx->push_header; u16 header_len = ena_tx_ctx->header_len; u16 num_bufs = ena_tx_ctx->num_bufs; - int total_desc, i, rc; + u16 start_tail = io_sq->tail; + int i, rc; bool have_meta; u64 addr_hi; WARN(io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_TX, "wrong Q type"); /* num_bufs +1 for potential meta desc */ - if (ena_com_sq_empty_space(io_sq) < (num_bufs + 1)) { - pr_err("Not enough space in the tx queue\n"); + if (unlikely(!ena_com_sq_have_enough_space(io_sq, num_bufs + 1))) { + pr_debug("Not enough space in the tx queue\n"); return -ENOMEM; } @@@ -400,32 -292,23 +400,32 @@@ return -EINVAL; } - /* start with pushing the header (if needed) */ - rc = ena_com_write_header(io_sq, push_header, header_len); + if (unlikely(io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV && + !buffer_to_push)) + return -EINVAL; + + rc = ena_com_write_header_to_bounce(io_sq, buffer_to_push, header_len); if (unlikely(rc)) return rc; have_meta = ena_tx_ctx->meta_valid && ena_com_meta_desc_changed(io_sq, ena_tx_ctx); - if (have_meta) - ena_com_create_and_store_tx_meta_desc(io_sq, ena_tx_ctx); + if (have_meta) { + rc = ena_com_create_and_store_tx_meta_desc(io_sq, ena_tx_ctx); + if (unlikely(rc)) + return rc; + } - /* If the caller doesn't want send packets */ + /* If the caller doesn't want to send packets */ if (unlikely(!num_bufs && !header_len)) { - *nb_hw_desc = have_meta ? 0 : 1; - return 0; + rc = ena_com_close_bounce_buffer(io_sq); + *nb_hw_desc = io_sq->tail - start_tail; + return rc; } desc = get_sq_desc(io_sq); + if (unlikely(!desc)) + return -EFAULT; memset(desc, 0x0, sizeof(struct ena_eth_io_tx_desc)); /* Set first desc when we don't have meta descriptor */ @@@ -477,14 -360,10 +477,14 @@@ for (i = 0; i < num_bufs; i++) { /* The first desc share the same desc as the header */ if (likely(i != 0)) { - ena_com_copy_curr_sq_desc_to_dev(io_sq); - ena_com_sq_update_tail(io_sq); + rc = ena_com_sq_update_tail(io_sq); + if (unlikely(rc)) + return rc; desc = get_sq_desc(io_sq); + if (unlikely(!desc)) + return -EFAULT; + memset(desc, 0x0, sizeof(struct ena_eth_io_tx_desc)); desc->len_ctrl |= (io_sq->phase << @@@ -507,14 -386,15 +507,14 @@@ /* set the last desc indicator */ desc->len_ctrl |= ENA_ETH_IO_TX_DESC_LAST_MASK; - ena_com_copy_curr_sq_desc_to_dev(io_sq); - - ena_com_sq_update_tail(io_sq); + rc = ena_com_sq_update_tail(io_sq); + if (unlikely(rc)) + return rc; - total_desc = max_t(u16, num_bufs, 1); - total_desc += have_meta ? 1 : 0; + rc = ena_com_close_bounce_buffer(io_sq); - *nb_hw_desc = total_desc; - return 0; + *nb_hw_desc = io_sq->tail - start_tail; + return rc; } int ena_com_rx_pkt(struct ena_com_io_cq *io_cq, @@@ -573,18 -453,15 +573,18 @@@ int ena_com_add_single_rx_desc(struct e WARN(io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX, "wrong Q type"); - if (unlikely(ena_com_sq_empty_space(io_sq) == 0)) + if (unlikely(!ena_com_sq_have_enough_space(io_sq, 1))) return -ENOSPC; desc = get_sq_desc(io_sq); + if (unlikely(!desc)) + return -EFAULT; + memset(desc, 0x0, sizeof(struct ena_eth_io_rx_desc)); desc->length = ena_buf->len; - desc->ctrl |= ENA_ETH_IO_RX_DESC_FIRST_MASK; + desc->ctrl = ENA_ETH_IO_RX_DESC_FIRST_MASK; desc->ctrl |= ENA_ETH_IO_RX_DESC_LAST_MASK; desc->ctrl |= io_sq->phase & ENA_ETH_IO_RX_DESC_PHASE_MASK; desc->ctrl |= ENA_ETH_IO_RX_DESC_COMP_REQ_MASK; @@@ -595,7 -472,43 +595,7 @@@ desc->buff_addr_hi = ((ena_buf->paddr & GENMASK_ULL(io_sq->dma_addr_bits - 1, 32)) >> 32); - ena_com_sq_update_tail(io_sq); - - return 0; -} - -int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id) -{ - u8 expected_phase, cdesc_phase; - struct ena_eth_io_tx_cdesc *cdesc; - u16 masked_head; - - masked_head = io_cq->head & (io_cq->q_depth - 1); - expected_phase = io_cq->phase; - - cdesc = (struct ena_eth_io_tx_cdesc *) - ((uintptr_t)io_cq->cdesc_addr.virt_addr + - (masked_head * io_cq->cdesc_entry_size_in_bytes)); - - /* When the current completion descriptor phase isn't the same as the - * expected, it mean that the device still didn't update - * this completion. - */ - cdesc_phase = READ_ONCE(cdesc->flags) & ENA_ETH_IO_TX_CDESC_PHASE_MASK; - if (cdesc_phase != expected_phase) - return -EAGAIN; - - dma_rmb(); - if (unlikely(cdesc->req_id >= io_cq->q_depth)) { - pr_err("Invalid req id %d\n", cdesc->req_id); - return -EINVAL; - } - - ena_com_cq_inc_head(io_cq); - - *req_id = READ_ONCE(cdesc->req_id); - - return 0; + return ena_com_sq_update_tail(io_sq); } bool ena_com_cq_empty(struct ena_com_io_cq *io_cq) diff --combined drivers/net/ethernet/amazon/ena/ena_netdev.c index 3494d4ac9932,d906293ce07d..284a0a612131 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@@ -39,6 -39,7 +39,6 @@@ #include #include #include -#include #include #include #include @@@ -237,17 -238,6 +237,17 @@@ static int ena_setup_tx_resources(struc } } + size = tx_ring->tx_max_header_size; + tx_ring->push_buf_intermediate_buf = vzalloc_node(size, node); + if (!tx_ring->push_buf_intermediate_buf) { + tx_ring->push_buf_intermediate_buf = vzalloc(size); + if (!tx_ring->push_buf_intermediate_buf) { + vfree(tx_ring->tx_buffer_info); + vfree(tx_ring->free_tx_ids); + return -ENOMEM; + } + } + /* Req id ring for TX out of order completions */ for (i = 0; i < tx_ring->ring_size; i++) tx_ring->free_tx_ids[i] = i; @@@ -276,9 -266,6 +276,9 @@@ static void ena_free_tx_resources(struc vfree(tx_ring->free_tx_ids); tx_ring->free_tx_ids = NULL; + + vfree(tx_ring->push_buf_intermediate_buf); + tx_ring->push_buf_intermediate_buf = NULL; } /* ena_setup_all_tx_resources - allocate I/O Tx queues resources for All queues @@@ -616,36 -603,6 +616,36 @@@ static void ena_free_all_rx_bufs(struc ena_free_rx_bufs(adapter, i); } +static inline void ena_unmap_tx_skb(struct ena_ring *tx_ring, + struct ena_tx_buffer *tx_info) +{ + struct ena_com_buf *ena_buf; + u32 cnt; + int i; + + ena_buf = tx_info->bufs; + cnt = tx_info->num_of_bufs; + + if (unlikely(!cnt)) + return; + + if (tx_info->map_linear_data) { + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(ena_buf, paddr), + dma_unmap_len(ena_buf, len), + DMA_TO_DEVICE); + ena_buf++; + cnt--; + } + + /* unmap remaining mapped pages */ + for (i = 0; i < cnt; i++) { + dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr), + dma_unmap_len(ena_buf, len), DMA_TO_DEVICE); + ena_buf++; + } +} + /* ena_free_tx_bufs - Free Tx Buffers per Queue * @tx_ring: TX ring for which buffers be freed */ @@@ -656,6 -613,9 +656,6 @@@ static void ena_free_tx_bufs(struct ena for (i = 0; i < tx_ring->ring_size; i++) { struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i]; - struct ena_com_buf *ena_buf; - int nr_frags; - int j; if (!tx_info->skb) continue; @@@ -671,7 -631,21 +671,7 @@@ tx_ring->qid, i); } - ena_buf = tx_info->bufs; - dma_unmap_single(tx_ring->dev, - ena_buf->paddr, - ena_buf->len, - DMA_TO_DEVICE); - - /* unmap remaining mapped pages */ - nr_frags = tx_info->num_of_bufs - 1; - for (j = 0; j < nr_frags; j++) { - ena_buf++; - dma_unmap_page(tx_ring->dev, - ena_buf->paddr, - ena_buf->len, - DMA_TO_DEVICE); - } + ena_unmap_tx_skb(tx_ring, tx_info); dev_kfree_skb_any(tx_info->skb); } @@@ -762,6 -736,8 +762,6 @@@ static int ena_clean_tx_irq(struct ena_ while (tx_pkts < budget) { struct ena_tx_buffer *tx_info; struct sk_buff *skb; - struct ena_com_buf *ena_buf; - int i, nr_frags; rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq, &req_id); @@@ -781,7 -757,24 +781,7 @@@ tx_info->skb = NULL; tx_info->last_jiffies = 0; - if (likely(tx_info->num_of_bufs != 0)) { - ena_buf = tx_info->bufs; - - dma_unmap_single(tx_ring->dev, - dma_unmap_addr(ena_buf, paddr), - dma_unmap_len(ena_buf, len), - DMA_TO_DEVICE); - - /* unmap remaining mapped pages */ - nr_frags = tx_info->num_of_bufs - 1; - for (i = 0; i < nr_frags; i++) { - ena_buf++; - dma_unmap_page(tx_ring->dev, - dma_unmap_addr(ena_buf, paddr), - dma_unmap_len(ena_buf, len), - DMA_TO_DEVICE); - } - } + ena_unmap_tx_skb(tx_ring, tx_info); netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev, "tx_poll: q %d skb %p completed\n", tx_ring->qid, @@@ -812,13 -805,12 +812,13 @@@ */ smp_mb(); - above_thresh = ena_com_sq_empty_space(tx_ring->ena_com_io_sq) > - ENA_TX_WAKEUP_THRESH; + above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, + ENA_TX_WAKEUP_THRESH); if (unlikely(netif_tx_queue_stopped(txq) && above_thresh)) { __netif_tx_lock(txq, smp_processor_id()); - above_thresh = ena_com_sq_empty_space(tx_ring->ena_com_io_sq) > - ENA_TX_WAKEUP_THRESH; + above_thresh = + ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, + ENA_TX_WAKEUP_THRESH); if (netif_tx_queue_stopped(txq) && above_thresh) { netif_tx_wake_queue(txq); u64_stats_update_begin(&tx_ring->syncp); @@@ -994,19 -986,8 +994,19 @@@ static inline void ena_rx_checksum(stru return; } - skb->ip_summed = CHECKSUM_UNNECESSARY; + if (likely(ena_rx_ctx->l4_csum_checked)) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + } else { + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.csum_unchecked++; + u64_stats_update_end(&rx_ring->syncp); + skb->ip_summed = CHECKSUM_NONE; + } + } else { + skb->ip_summed = CHECKSUM_NONE; + return; } + } static void ena_set_rx_hash(struct ena_ring *rx_ring, @@@ -1121,10 -1102,8 +1121,10 @@@ static int ena_clean_rx_irq(struct ena_ rx_ring->next_to_clean = next_to_clean; - refill_required = ena_com_sq_empty_space(rx_ring->ena_com_io_sq); - refill_threshold = rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER; + refill_required = ena_com_free_desc(rx_ring->ena_com_io_sq); + refill_threshold = + min_t(int, rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER, + ENA_RX_REFILL_THRESH_PACKET); /* Optimization, try to batch new rx buffers */ if (refill_required > refill_threshold) { @@@ -1321,6 -1300,7 +1321,6 @@@ static int ena_enable_msix(struct ena_a /* Reserved the max msix vectors we might need */ msix_vecs = ENA_MAX_MSIX_VEC(num_queues); - netif_dbg(adapter, probe, adapter->netdev, "trying to enable MSI-X, vectors %d\n", msix_vecs); @@@ -1595,8 -1575,6 +1595,6 @@@ static int ena_up_complete(struct ena_a if (rc) return rc; - ena_init_napi(adapter); - ena_change_mtu(adapter->netdev, adapter->netdev->mtu); ena_refill_all_rx_bufs(adapter); @@@ -1613,7 -1591,7 +1611,7 @@@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid) { - struct ena_com_create_io_ctx ctx = { 0 }; + struct ena_com_create_io_ctx ctx; struct ena_com_dev *ena_dev; struct ena_ring *tx_ring; u32 msix_vector; @@@ -1626,8 -1604,6 +1624,8 @@@ msix_vector = ENA_IO_IRQ_IDX(qid); ena_qid = ENA_IO_TXQ_IDX(qid); + memset(&ctx, 0x0, sizeof(ctx)); + ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX; ctx.qid = ena_qid; ctx.mem_queue_type = ena_dev->tx_mem_queue_type; @@@ -1681,7 -1657,7 +1679,7 @@@ create_err static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid) { struct ena_com_dev *ena_dev; - struct ena_com_create_io_ctx ctx = { 0 }; + struct ena_com_create_io_ctx ctx; struct ena_ring *rx_ring; u32 msix_vector; u16 ena_qid; @@@ -1693,8 -1669,6 +1691,8 @@@ msix_vector = ENA_IO_IRQ_IDX(qid); ena_qid = ENA_IO_RXQ_IDX(qid); + memset(&ctx, 0x0, sizeof(ctx)); + ctx.qid = ena_qid; ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; @@@ -1754,6 -1728,13 +1752,13 @@@ static int ena_up(struct ena_adapter *a ena_setup_io_intr(adapter); + /* napi poll functions should be initialized before running + * request_irq(), to handle a rare condition where there is a pending + * interrupt, causing the ISR to fire immediately while the poll + * function wasn't set yet, causing a null dereference + */ + ena_init_napi(adapter); + rc = ena_request_io_irq(adapter); if (rc) goto err_req_irq; @@@ -2005,70 -1986,73 +2010,70 @@@ static int ena_check_and_linearize_skb( return rc; } -/* Called with netif_tx_lock. */ -static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) +static int ena_tx_map_skb(struct ena_ring *tx_ring, + struct ena_tx_buffer *tx_info, + struct sk_buff *skb, + void **push_hdr, + u16 *header_len) { - struct ena_adapter *adapter = netdev_priv(dev); - struct ena_tx_buffer *tx_info; - struct ena_com_tx_ctx ena_tx_ctx; - struct ena_ring *tx_ring; - struct netdev_queue *txq; + struct ena_adapter *adapter = tx_ring->adapter; struct ena_com_buf *ena_buf; - void *push_hdr; - u32 len, last_frag; - u16 next_to_use; - u16 req_id; - u16 push_len; - u16 header_len; dma_addr_t dma; - int qid, rc, nb_hw_desc; - int i = -1; - - netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb); - /* Determine which tx ring we will be placed on */ - qid = skb_get_queue_mapping(skb); - tx_ring = &adapter->tx_ring[qid]; - txq = netdev_get_tx_queue(dev, qid); + u32 skb_head_len, frag_len, last_frag; + u16 push_len = 0; + u16 delta = 0; + int i = 0; - rc = ena_check_and_linearize_skb(tx_ring, skb); - if (unlikely(rc)) - goto error_drop_packet; - - skb_tx_timestamp(skb); - len = skb_headlen(skb); - - next_to_use = tx_ring->next_to_use; - req_id = tx_ring->free_tx_ids[next_to_use]; - tx_info = &tx_ring->tx_buffer_info[req_id]; - tx_info->num_of_bufs = 0; - - WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id); - ena_buf = tx_info->bufs; + skb_head_len = skb_headlen(skb); tx_info->skb = skb; + ena_buf = tx_info->bufs; if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { - /* prepared the push buffer */ - push_len = min_t(u32, len, tx_ring->tx_max_header_size); - header_len = push_len; - push_hdr = skb->data; + /* When the device is LLQ mode, the driver will copy + * the header into the device memory space. + * the ena_com layer assume the header is in a linear + * memory space. + * This assumption might be wrong since part of the header + * can be in the fragmented buffers. + * Use skb_header_pointer to make sure the header is in a + * linear memory space. + */ + + push_len = min_t(u32, skb->len, tx_ring->tx_max_header_size); + *push_hdr = skb_header_pointer(skb, 0, push_len, + tx_ring->push_buf_intermediate_buf); + *header_len = push_len; + if (unlikely(skb->data != *push_hdr)) { + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.llq_buffer_copy++; + u64_stats_update_end(&tx_ring->syncp); + + delta = push_len - skb_head_len; + } } else { - push_len = 0; - header_len = min_t(u32, len, tx_ring->tx_max_header_size); - push_hdr = NULL; + *push_hdr = NULL; + *header_len = min_t(u32, skb_head_len, + tx_ring->tx_max_header_size); } - netif_dbg(adapter, tx_queued, dev, + netif_dbg(adapter, tx_queued, adapter->netdev, "skb: %p header_buf->vaddr: %p push_len: %d\n", skb, - push_hdr, push_len); + *push_hdr, push_len); - if (len > push_len) { + if (skb_head_len > push_len) { dma = dma_map_single(tx_ring->dev, skb->data + push_len, - len - push_len, DMA_TO_DEVICE); - if (dma_mapping_error(tx_ring->dev, dma)) + skb_head_len - push_len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(tx_ring->dev, dma))) goto error_report_dma_error; ena_buf->paddr = dma; - ena_buf->len = len - push_len; + ena_buf->len = skb_head_len - push_len; ena_buf++; tx_info->num_of_bufs++; + tx_info->map_linear_data = 1; + } else { + tx_info->map_linear_data = 0; } last_frag = skb_shinfo(skb)->nr_frags; @@@ -2076,75 -2060,18 +2081,75 @@@ for (i = 0; i < last_frag; i++) { const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - len = skb_frag_size(frag); - dma = skb_frag_dma_map(tx_ring->dev, frag, 0, len, - DMA_TO_DEVICE); - if (dma_mapping_error(tx_ring->dev, dma)) + frag_len = skb_frag_size(frag); + + if (unlikely(delta >= frag_len)) { + delta -= frag_len; + continue; + } + + dma = skb_frag_dma_map(tx_ring->dev, frag, delta, + frag_len - delta, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(tx_ring->dev, dma))) goto error_report_dma_error; ena_buf->paddr = dma; - ena_buf->len = len; + ena_buf->len = frag_len - delta; ena_buf++; + tx_info->num_of_bufs++; + delta = 0; } - tx_info->num_of_bufs += last_frag; + return 0; + +error_report_dma_error: + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.dma_mapping_err++; + u64_stats_update_end(&tx_ring->syncp); + netdev_warn(adapter->netdev, "failed to map skb\n"); + + tx_info->skb = NULL; + + tx_info->num_of_bufs += i; + ena_unmap_tx_skb(tx_ring, tx_info); + + return -EINVAL; +} + +/* Called with netif_tx_lock. */ +static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ena_adapter *adapter = netdev_priv(dev); + struct ena_tx_buffer *tx_info; + struct ena_com_tx_ctx ena_tx_ctx; + struct ena_ring *tx_ring; + struct netdev_queue *txq; + void *push_hdr; + u16 next_to_use, req_id, header_len; + int qid, rc, nb_hw_desc; + + netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb); + /* Determine which tx ring we will be placed on */ + qid = skb_get_queue_mapping(skb); + tx_ring = &adapter->tx_ring[qid]; + txq = netdev_get_tx_queue(dev, qid); + + rc = ena_check_and_linearize_skb(tx_ring, skb); + if (unlikely(rc)) + goto error_drop_packet; + + skb_tx_timestamp(skb); + + next_to_use = tx_ring->next_to_use; + req_id = tx_ring->free_tx_ids[next_to_use]; + tx_info = &tx_ring->tx_buffer_info[req_id]; + tx_info->num_of_bufs = 0; + + WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id); + + rc = ena_tx_map_skb(tx_ring, tx_info, skb, &push_hdr, &header_len); + if (unlikely(rc)) + goto error_drop_packet; memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx)); ena_tx_ctx.ena_bufs = tx_info->bufs; @@@ -2160,22 -2087,14 +2165,22 @@@ rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx, &nb_hw_desc); + /* ena_com_prepare_tx() can't fail due to overflow of tx queue, + * since the number of free descriptors in the queue is checked + * after sending the previous packet. In case there isn't enough + * space in the queue for the next packet, it is stopped + * until there is again enough available space in the queue. + * All other failure reasons of ena_com_prepare_tx() are fatal + * and therefore require a device reset. + */ if (unlikely(rc)) { netif_err(adapter, tx_queued, dev, "failed to prepare tx bufs\n"); u64_stats_update_begin(&tx_ring->syncp); - tx_ring->tx_stats.queue_stop++; tx_ring->tx_stats.prepare_ctx_err++; u64_stats_update_end(&tx_ring->syncp); - netif_tx_stop_queue(txq); + adapter->reset_reason = ENA_REGS_RESET_DRIVER_INVALID_STATE; + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); goto error_unmap_dma; } @@@ -2197,8 -2116,8 +2202,8 @@@ * to sgl_size + 2. one for the meta descriptor and one for header * (if the header is larger than tx_max_header_size). */ - if (unlikely(ena_com_sq_empty_space(tx_ring->ena_com_io_sq) < - (tx_ring->sgl_size + 2))) { + if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, + tx_ring->sgl_size + 2))) { netif_dbg(adapter, tx_queued, dev, "%s stop queue %d\n", __func__, qid); @@@ -2217,8 -2136,8 +2222,8 @@@ */ smp_mb(); - if (ena_com_sq_empty_space(tx_ring->ena_com_io_sq) - > ENA_TX_WAKEUP_THRESH) { + if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, + ENA_TX_WAKEUP_THRESH)) { netif_tx_wake_queue(txq); u64_stats_update_begin(&tx_ring->syncp); tx_ring->tx_stats.queue_wakeup++; @@@ -2238,11 -2157,35 +2243,11 @@@ return NETDEV_TX_OK; -error_report_dma_error: - u64_stats_update_begin(&tx_ring->syncp); - tx_ring->tx_stats.dma_mapping_err++; - u64_stats_update_end(&tx_ring->syncp); - netdev_warn(adapter->netdev, "failed to map skb\n"); - - tx_info->skb = NULL; - error_unmap_dma: - if (i >= 0) { - /* save value of frag that failed */ - last_frag = i; - - /* start back at beginning and unmap skb */ - tx_info->skb = NULL; - ena_buf = tx_info->bufs; - dma_unmap_single(tx_ring->dev, dma_unmap_addr(ena_buf, paddr), - dma_unmap_len(ena_buf, len), DMA_TO_DEVICE); - - /* unmap remaining mapped pages */ - for (i = 0; i < last_frag; i++) { - ena_buf++; - dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr), - dma_unmap_len(ena_buf, len), DMA_TO_DEVICE); - } - } + ena_unmap_tx_skb(tx_ring, tx_info); + tx_info->skb = NULL; error_drop_packet: - dev_kfree_skb(skb); return NETDEV_TX_OK; } @@@ -2264,8 -2207,7 +2269,8 @@@ static u16 ena_select_queue(struct net_ return qid; } -static void ena_config_host_info(struct ena_com_dev *ena_dev) +static void ena_config_host_info(struct ena_com_dev *ena_dev, + struct pci_dev *pdev) { struct ena_admin_host_info *host_info; int rc; @@@ -2279,7 -2221,6 +2284,7 @@@ host_info = ena_dev->host_attr.host_info; + host_info->bdf = (pdev->bus->number << 8) | pdev->devfn; host_info->os_type = ENA_ADMIN_OS_LINUX; host_info->kernel_ver = LINUX_VERSION_CODE; strncpy(host_info->kernel_ver_str, utsname()->version, @@@ -2290,9 -2231,7 +2295,9 @@@ host_info->driver_version = (DRV_MODULE_VER_MAJOR) | (DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) | - (DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT); + (DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT) | + ("K"[0] << ENA_ADMIN_HOST_INFO_MODULE_TYPE_SHIFT); + host_info->num_cpus = num_online_cpus(); rc = ena_com_set_host_attributes(ena_dev); if (rc) { @@@ -2503,7 -2442,7 +2508,7 @@@ static int ena_device_init(struct ena_c } /* ENA admin level init */ - rc = ena_com_admin_init(ena_dev, &aenq_handlers, true); + rc = ena_com_admin_init(ena_dev, &aenq_handlers); if (rc) { dev_err(dev, "Can not initialize ena admin queue with device\n"); @@@ -2516,7 -2455,7 +2521,7 @@@ */ ena_com_set_admin_polling_mode(ena_dev, true); - ena_config_host_info(ena_dev); + ena_config_host_info(ena_dev, pdev); /* Get Device Attributes*/ rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx); @@@ -2601,14 -2540,15 +2606,14 @@@ static void ena_destroy_device(struct e dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); adapter->dev_up_before_reset = dev_up; - if (!graceful) ena_com_set_admin_running_state(ena_dev, false); if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) ena_down(adapter); - /* Before releasing the ENA resources, a device reset is required. - * (to prevent the device from accessing them). + /* Stop the device from sending AENQ events (in case reset flag is set + * and device is up, ena_close already reset the device * In case the reset flag is set and the device is up, ena_down() * already perform the reset, so it can be skipped. */ @@@ -2677,16 -2617,18 +2682,20 @@@ static int ena_restore_device(struct en set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); - dev_err(&pdev->dev, "Device reset completed successfully\n"); + dev_err(&pdev->dev, + "Device reset completed successfully, Driver info: %s\n", + version); return rc; err_disable_msix: ena_free_mgmnt_irq(adapter); ena_disable_msix(adapter); err_device_destroy: + ena_com_abort_admin_commands(ena_dev); + ena_com_wait_for_abort_completion(ena_dev); ena_com_admin_destroy(ena_dev); + ena_com_mmio_reg_read_request_destroy(ena_dev); + ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE); err: clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags); @@@ -2868,7 -2810,7 +2877,7 @@@ static void check_for_empty_rx_ring(str rx_ring = &adapter->rx_ring[i]; refill_required = - ena_com_sq_empty_space(rx_ring->ena_com_io_sq); + ena_com_free_desc(rx_ring->ena_com_io_sq); if (unlikely(refill_required == (rx_ring->ring_size - 1))) { rx_ring->empty_rx_queue++; @@@ -3014,7 -2956,7 +3023,7 @@@ static int ena_calc_io_queue_num(struc /* In case of LLQ use the llq number in the get feature cmd */ if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { - io_sq_num = get_feat_ctx->max_queues.max_llq_num; + io_sq_num = get_feat_ctx->max_queues.max_legacy_llq_num; if (io_sq_num == 0) { dev_err(&pdev->dev, @@@ -3042,52 -2984,18 +3051,52 @@@ return io_queue_num; } -static void ena_set_push_mode(struct pci_dev *pdev, struct ena_com_dev *ena_dev, - struct ena_com_dev_get_features_ctx *get_feat_ctx) +static int ena_set_queues_placement_policy(struct pci_dev *pdev, + struct ena_com_dev *ena_dev, + struct ena_admin_feature_llq_desc *llq, + struct ena_llq_configurations *llq_default_configurations) { bool has_mem_bar; + int rc; + u32 llq_feature_mask; + + llq_feature_mask = 1 << ENA_ADMIN_LLQ; + if (!(ena_dev->supported_features & llq_feature_mask)) { + dev_err(&pdev->dev, + "LLQ is not supported Fallback to host mode policy.\n"); + ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; + return 0; + } has_mem_bar = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(ENA_MEM_BAR); - /* Enable push mode if device supports LLQ */ - if (has_mem_bar && (get_feat_ctx->max_queues.max_llq_num > 0)) - ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_DEV; - else + rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations); + if (unlikely(rc)) { + dev_err(&pdev->dev, + "Failed to configure the device mode. Fallback to host mode policy.\n"); ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; + return 0; + } + + /* Nothing to config, exit */ + if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) + return 0; + + if (!has_mem_bar) { + dev_err(&pdev->dev, + "ENA device does not expose LLQ bar. Fallback to host mode policy.\n"); + ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; + return 0; + } + + ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev, + pci_resource_start(pdev, ENA_MEM_BAR), + pci_resource_len(pdev, ENA_MEM_BAR)); + + if (!ena_dev->mem_bar) + return -EFAULT; + + return 0; } static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat, @@@ -3200,27 -3108,11 +3209,20 @@@ err_rss_init static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev) { - int release_bars; - - if (ena_dev->mem_bar) - devm_iounmap(&pdev->dev, ena_dev->mem_bar); - - if (ena_dev->reg_bar) - devm_iounmap(&pdev->dev, ena_dev->reg_bar); + int release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; - release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; pci_release_selected_regions(pdev, release_bars); } +static inline void set_default_llq_configurations(struct ena_llq_configurations *llq_config) +{ + llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER; + llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B; + llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY; + llq_config->llq_num_decs_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2; + llq_config->llq_ring_entry_size_value = 128; +} + static int ena_calc_queue_size(struct pci_dev *pdev, struct ena_com_dev *ena_dev, u16 *max_tx_sgl_size, @@@ -3236,7 -3128,7 +3238,7 @@@ if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) queue_size = min_t(u32, queue_size, - get_feat_ctx->max_queues.max_llq_depth); + get_feat_ctx->max_queues.max_legacy_llq_depth); queue_size = rounddown_pow_of_two(queue_size); @@@ -3269,9 -3161,7 +3271,9 @@@ static int ena_probe(struct pci_dev *pd static int version_printed; struct net_device *netdev; struct ena_adapter *adapter; + struct ena_llq_configurations llq_config; struct ena_com_dev *ena_dev = NULL; + char *queue_type_str; static int adapters_found; int io_queue_num, bars, rc; int queue_size; @@@ -3325,13 -3215,16 +3327,13 @@@ goto err_free_region; } - ena_set_push_mode(pdev, ena_dev, &get_feat_ctx); + set_default_llq_configurations(&llq_config); - if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { - ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev, - pci_resource_start(pdev, ENA_MEM_BAR), - pci_resource_len(pdev, ENA_MEM_BAR)); - if (!ena_dev->mem_bar) { - rc = -EFAULT; - goto err_device_destroy; - } + rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx.llq, + &llq_config); + if (rc) { + dev_err(&pdev->dev, "ena device init failed\n"); + goto err_device_destroy; } /* initial Tx interrupt delay, Assumes 1 usec granularity. @@@ -3346,10 -3239,8 +3348,10 @@@ goto err_device_destroy; } - dev_info(&pdev->dev, "creating %d io queues. queue size: %d\n", - io_queue_num, queue_size); + dev_info(&pdev->dev, "creating %d io queues. queue size: %d. LLQ is %s\n", + io_queue_num, queue_size, + (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) ? + "ENABLED" : "DISABLED"); /* dev zeroed in init_etherdev */ netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), io_queue_num); @@@ -3439,15 -3330,9 +3441,15 @@@ timer_setup(&adapter->timer_service, ena_timer_service, 0); mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); - dev_info(&pdev->dev, "%s found at mem %lx, mac addr %pM Queues %d\n", + if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) + queue_type_str = "Regular"; + else + queue_type_str = "Low Latency"; + + dev_info(&pdev->dev, + "%s found at mem %lx, mac addr %pM Queues %d, Placement policy: %s\n", DEVICE_NAME, (long)pci_resource_start(pdev, 0), - netdev->dev_addr, io_queue_num); + netdev->dev_addr, io_queue_num, queue_type_str); set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); diff --combined drivers/net/ethernet/realtek/r8169.c index 7d3f671e1bb3,3a5e6160bf0d..b68e32186d67 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@@ -77,6 -77,8 +77,6 @@@ static const int multicast_filter_limi #define R8169_TX_RING_BYTES (NUM_TX_DESC * sizeof(struct TxDesc)) #define R8169_RX_RING_BYTES (NUM_RX_DESC * sizeof(struct RxDesc)) -#define RTL8169_TX_TIMEOUT (6*HZ) - /* write/read MMIO register */ #define RTL_W8(tp, reg, val8) writeb((val8), tp->mmio_addr + (reg)) #define RTL_W16(tp, reg, val16) writew((val16), tp->mmio_addr + (reg)) @@@ -1352,8 -1354,7 +1352,8 @@@ static void rtl_irq_enable_all(struct r static void rtl8169_irq_mask_and_ack(struct rtl8169_private *tp) { rtl_irq_disable(tp); - rtl_ack_events(tp, RTL_EVENT_NAPI | tp->event_slow); + rtl_ack_events(tp, 0xffff); + /* PCI commit */ RTL_R8(tp, ChipCmd); } @@@ -4047,11 -4048,23 +4047,11 @@@ static void rtl8169_init_phy(struct net rtl_hw_phy_config(dev); if (tp->mac_version <= RTL_GIGA_MAC_VER_06) { - netif_dbg(tp, drv, dev, - "Set MAC Reg C+CR Offset 0x82h = 0x01h\n"); - RTL_W8(tp, 0x82, 0x01); - } - - pci_write_config_byte(tp->pci_dev, PCI_LATENCY_TIMER, 0x40); - - if (tp->mac_version <= RTL_GIGA_MAC_VER_06) + pci_write_config_byte(tp->pci_dev, PCI_LATENCY_TIMER, 0x40); pci_write_config_byte(tp->pci_dev, PCI_CACHE_LINE_SIZE, 0x08); - - if (tp->mac_version == RTL_GIGA_MAC_VER_02) { netif_dbg(tp, drv, dev, "Set MAC Reg C+CR Offset 0x82h = 0x01h\n"); RTL_W8(tp, 0x82, 0x01); - netif_dbg(tp, drv, dev, - "Set PHY Reg 0x0bh = 0x00h\n"); - rtl_writephy(tp, 0x0b, 0x0000); //w 0x0b 15 0 0 } /* We may have called phy_speed_down before */ @@@ -4269,8 -4282,8 +4269,8 @@@ static void rtl_init_rxcfg(struct rtl81 RTL_W32(tp, RxConfig, RX_FIFO_THRESH | RX_DMA_BURST); break; case RTL_GIGA_MAC_VER_18 ... RTL_GIGA_MAC_VER_24: - case RTL_GIGA_MAC_VER_34: - case RTL_GIGA_MAC_VER_35: + case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_36: + case RTL_GIGA_MAC_VER_38: RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST); break; case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: @@@ -7355,9 -7368,11 +7355,9 @@@ static int rtl_init_one(struct pci_dev tp->cp_cmd = RTL_R16(tp, CPlusCmd); - if ((sizeof(dma_addr_t) > 4) && - (use_dac == 1 || (use_dac == -1 && pci_is_pcie(pdev) && - tp->mac_version >= RTL_GIGA_MAC_VER_18)) && - !pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) && - !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) { + if (sizeof(dma_addr_t) > 4 && (use_dac == 1 || (use_dac == -1 && + tp->mac_version >= RTL_GIGA_MAC_VER_18)) && + !dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) { /* CPlusCmd Dual Access Cycle is only needed for non-PCIe */ if (!pci_is_pcie(pdev)) @@@ -7373,12 -7388,14 +7373,12 @@@ rtl_init_rxcfg(tp); - rtl_irq_disable(tp); + rtl8169_irq_mask_and_ack(tp); rtl_hw_initialize(tp); rtl_hw_reset(tp); - rtl_ack_events(tp, 0xffff); - pci_set_master(pdev); rtl_init_mdio_ops(tp); @@@ -7418,6 -7435,7 +7418,6 @@@ dev->dev_addr[i] = RTL_R8(tp, MAC0 + i); dev->ethtool_ops = &rtl8169_ethtool_ops; - dev->watchdog_timeo = RTL8169_TX_TIMEOUT; netif_napi_add(dev, &tp->napi, rtl8169_poll, NAPI_POLL_WEIGHT); diff --combined drivers/net/wireless/mediatek/mt76/usb.c index 6b643ea701e3,79e59f2379a2..6a255643c1f0 --- a/drivers/net/wireless/mediatek/mt76/usb.c +++ b/drivers/net/wireless/mediatek/mt76/usb.c @@@ -14,7 -14,6 +14,7 @@@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include #include "mt76.h" #include "usb_trace.h" #include "dma.h" @@@ -110,7 -109,6 +110,7 @@@ u32 mt76u_rr(struct mt76_dev *dev, u32 return ret; } +EXPORT_SYMBOL_GPL(mt76u_rr); /* should be called with usb_ctrl_mtx locked */ static void __mt76u_wr(struct mt76_dev *dev, u32 addr, u32 val) @@@ -142,7 -140,6 +142,7 @@@ void mt76u_wr(struct mt76_dev *dev, u3 __mt76u_wr(dev, addr, val); mutex_unlock(&dev->usb.usb_ctrl_mtx); } +EXPORT_SYMBOL_GPL(mt76u_wr); static u32 mt76u_rmw(struct mt76_dev *dev, u32 addr, u32 mask, u32 val) @@@ -189,60 -186,6 +189,60 @@@ void mt76u_single_wr(struct mt76_dev *d } EXPORT_SYMBOL_GPL(mt76u_single_wr); +static int +mt76u_req_wr_rp(struct mt76_dev *dev, u32 base, + const struct mt76_reg_pair *data, int len) +{ + struct mt76_usb *usb = &dev->usb; + + mutex_lock(&usb->usb_ctrl_mtx); + while (len > 0) { + __mt76u_wr(dev, base + data->reg, data->value); + len--; + data++; + } + mutex_unlock(&usb->usb_ctrl_mtx); + + return 0; +} + +static int +mt76u_wr_rp(struct mt76_dev *dev, u32 base, + const struct mt76_reg_pair *data, int n) +{ + if (test_bit(MT76_STATE_MCU_RUNNING, &dev->state)) + return dev->mcu_ops->mcu_wr_rp(dev, base, data, n); + else + return mt76u_req_wr_rp(dev, base, data, n); +} + +static int +mt76u_req_rd_rp(struct mt76_dev *dev, u32 base, struct mt76_reg_pair *data, + int len) +{ + struct mt76_usb *usb = &dev->usb; + + mutex_lock(&usb->usb_ctrl_mtx); + while (len > 0) { + data->value = __mt76u_rr(dev, base + data->reg); + len--; + data++; + } + mutex_unlock(&usb->usb_ctrl_mtx); + + return 0; +} + +static int +mt76u_rd_rp(struct mt76_dev *dev, u32 base, + struct mt76_reg_pair *data, int n) +{ + if (test_bit(MT76_STATE_MCU_RUNNING, &dev->state)) + return dev->mcu_ops->mcu_rd_rp(dev, base, data, n); + else + return mt76u_req_rd_rp(dev, base, data, n); +} + static int mt76u_set_endpoints(struct usb_interface *intf, struct mt76_usb *usb) @@@ -276,17 -219,15 +276,17 @@@ static in mt76u_fill_rx_sg(struct mt76_dev *dev, struct mt76u_buf *buf, int nsgs, int len, int sglen) { + struct mt76_queue *q = &dev->q_rx[MT_RXQ_MAIN]; struct urb *urb = buf->urb; int i; + spin_lock_bh(&q->rx_page_lock); for (i = 0; i < nsgs; i++) { struct page *page; void *data; int offset; - data = netdev_alloc_frag(len); + data = page_frag_alloc(&q->rx_page, len, GFP_ATOMIC); if (!data) break; @@@ -294,7 -235,6 +294,7 @@@ offset = data - page_address(page); sg_set_page(&urb->sg[i], page, sglen, offset); } + spin_unlock_bh(&q->rx_page_lock); if (i < nsgs) { int j; @@@ -318,7 -258,7 +318,7 @@@ int mt76u_buf_alloc(struct mt76_dev *de if (!buf->urb) return -ENOMEM; - buf->urb->sg = devm_kzalloc(dev->dev, nsgs * sizeof(*buf->urb->sg), + buf->urb->sg = devm_kcalloc(dev->dev, nsgs, sizeof(*buf->urb->sg), gfp); if (!buf->urb->sg) return -ENOMEM; @@@ -386,9 -326,9 +386,9 @@@ static int mt76u_get_rx_entry_len(u8 *d min_len = MT_DMA_HDR_LEN + MT_RX_RXWI_LEN + MT_FCE_INFO_LEN; - if (data_len < min_len || WARN_ON(!dma_len) || - WARN_ON(dma_len + MT_DMA_HDR_LEN > data_len) || - WARN_ON(dma_len & 0x3)) + if (data_len < min_len || !dma_len || + dma_len + MT_DMA_HDR_LEN > data_len || + (dma_len & 0x3)) return -EINVAL; return dma_len; } @@@ -523,10 -463,9 +523,10 @@@ static int mt76u_alloc_rx(struct mt76_d struct mt76_queue *q = &dev->q_rx[MT_RXQ_MAIN]; int i, err, nsgs; + spin_lock_init(&q->rx_page_lock); spin_lock_init(&q->lock); - q->entry = devm_kzalloc(dev->dev, - MT_NUM_RX_ENTRIES * sizeof(*q->entry), + q->entry = devm_kcalloc(dev->dev, + MT_NUM_RX_ENTRIES, sizeof(*q->entry), GFP_KERNEL); if (!q->entry) return -ENOMEM; @@@ -555,21 -494,10 +555,21 @@@ static void mt76u_free_rx(struct mt76_dev *dev) { struct mt76_queue *q = &dev->q_rx[MT_RXQ_MAIN]; + struct page *page; int i; for (i = 0; i < q->ndesc; i++) mt76u_buf_free(&q->entry[i].ubuf); + + spin_lock_bh(&q->rx_page_lock); + if (!q->rx_page.va) + goto out; + + page = virt_to_page(q->rx_page.va); + __page_frag_cache_drain(page, q->rx_page.pagecnt_bias); + memset(&q->rx_page, 0, sizeof(q->rx_page)); +out: + spin_unlock_bh(&q->rx_page_lock); } static void mt76u_stop_rx(struct mt76_dev *dev) @@@ -581,6 -509,40 +581,6 @@@ usb_kill_urb(q->entry[i].ubuf.urb); } -int mt76u_skb_dma_info(struct sk_buff *skb, int port, u32 flags) -{ - struct sk_buff *iter, *last = skb; - u32 info, pad; - - /* Buffer layout: - * | 4B | xfer len | pad | 4B | - * | TXINFO | pkt/cmd | zero pad to 4B | zero | - * - * length field of TXINFO should be set to 'xfer len'. - */ - info = FIELD_PREP(MT_TXD_INFO_LEN, round_up(skb->len, 4)) | - FIELD_PREP(MT_TXD_INFO_DPORT, port) | flags; - put_unaligned_le32(info, skb_push(skb, sizeof(info))); - - pad = round_up(skb->len, 4) + 4 - skb->len; - skb_walk_frags(skb, iter) { - last = iter; - if (!iter->next) { - skb->data_len += pad; - skb->len += pad; - break; - } - } - - if (unlikely(pad)) { - if (__skb_pad(last, pad, true)) - return -ENOMEM; - __skb_put(last, pad); - } - return 0; -} -EXPORT_SYMBOL_GPL(mt76u_skb_dma_info); - static void mt76u_tx_tasklet(unsigned long data) { struct mt76_dev *dev = (struct mt76_dev *)data; @@@ -753,10 -715,10 +753,10 @@@ static int mt76u_alloc_tx(struct mt76_d q = &dev->q_tx[i]; spin_lock_init(&q->lock); INIT_LIST_HEAD(&q->swq); - q->hw_idx = q2hwq(i); + q->hw_idx = mt76_ac_to_hwq(i); - q->entry = devm_kzalloc(dev->dev, - MT_NUM_TX_ENTRIES * sizeof(*q->entry), + q->entry = devm_kcalloc(dev->dev, + MT_NUM_TX_ENTRIES, sizeof(*q->entry), GFP_KERNEL); if (!q->entry) return -ENOMEM; @@@ -860,8 -822,6 +860,8 @@@ int mt76u_init(struct mt76_dev *dev .wr = mt76u_wr, .rmw = mt76u_rmw, .copy = mt76u_copy, + .wr_rp = mt76u_wr_rp, + .rd_rp = mt76u_rd_rp, }; struct mt76_usb *usb = &dev->usb; diff --combined fs/afs/internal.h index 8ae4e2ebb99a,34c02fdcc25f..72de1f157d20 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@@ -73,14 -73,12 +73,14 @@@ struct afs_addr_list struct rcu_head rcu; /* Must be first */ refcount_t usage; u32 version; /* Version */ - unsigned short nr_addrs; - unsigned short index; /* Address currently in use */ - unsigned short nr_ipv4; /* Number of IPv4 addresses */ + unsigned char max_addrs; + unsigned char nr_addrs; + unsigned char index; /* Address currently in use */ + unsigned char nr_ipv4; /* Number of IPv4 addresses */ unsigned long probed; /* Mask of servers that have been probed */ unsigned long yfs; /* Mask of servers that are YFS */ struct sockaddr_rxrpc addrs[]; +#define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8)) }; /* @@@ -244,7 -242,7 +244,7 @@@ struct afs_net seqlock_t cells_lock; struct mutex proc_cells_lock; - struct list_head proc_cells; + struct hlist_head proc_cells; /* Known servers. Theoretically each fileserver can only be in one * cell, but in practice, people create aliases and subsets and there's @@@ -322,7 -320,7 +322,7 @@@ struct afs_cell struct afs_net *net; struct key *anonymous_key; /* anonymous user key for this cell */ struct work_struct manager; /* Manager for init/deinit/dns */ - struct list_head proc_link; /* /proc cell list link */ + struct hlist_node proc_link; /* /proc cell list link */ #ifdef CONFIG_AFS_FSCACHE struct fscache_cookie *cache; /* caching cookie */ #endif diff --combined include/linux/netdevice.h index 76603ee136a8,d837dad24b4c..22e4ef7bb701 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@@ -535,32 -535,6 +535,32 @@@ static inline void napi_synchronize(con barrier(); } +/** + * napi_if_scheduled_mark_missed - if napi is running, set the + * NAPIF_STATE_MISSED + * @n: NAPI context + * + * If napi is running, set the NAPIF_STATE_MISSED, and return true if + * NAPI is scheduled. + **/ +static inline bool napi_if_scheduled_mark_missed(struct napi_struct *n) +{ + unsigned long val, new; + + do { + val = READ_ONCE(n->state); + if (val & NAPIF_STATE_DISABLE) + return true; + + if (!(val & NAPIF_STATE_SCHED)) + return false; + + new = val | NAPIF_STATE_MISSED; + } while (cmpxchg(&n->state, val, new) != val); + + return true; +} + enum netdev_queue_state_t { __QUEUE_STATE_DRV_XOFF, __QUEUE_STATE_STACK_XOFF, @@@ -609,9 -583,6 +609,9 @@@ struct netdev_queue /* Subordinate device that the queue has been assigned to */ struct net_device *sb_dev; +#ifdef CONFIG_XDP_SOCKETS + struct xdp_umem *umem; +#endif /* * write-mostly part */ @@@ -741,9 -712,6 +741,9 @@@ struct netdev_rx_queue struct kobject kobj; struct net_device *dev; struct xdp_rxq_info xdp_rxq; +#ifdef CONFIG_XDP_SOCKETS + struct xdp_umem *umem; +#endif } ____cacheline_aligned_in_smp; /* @@@ -2008,6 -1976,7 +2008,6 @@@ struct net_device struct pcpu_lstats __percpu *lstats; struct pcpu_sw_netstats __percpu *tstats; struct pcpu_dstats __percpu *dstats; - struct pcpu_vstats __percpu *vstats; }; #if IS_ENABLED(CONFIG_GARP) @@@ -2351,7 -2320,6 +2351,7 @@@ static inline struct sk_buff *call_gro_ struct packet_type { __be16 type; /* This is really htons(ether_type). */ + bool ignore_outgoing; struct net_device *dev; /* NULL is wildcarded here */ int (*func) (struct sk_buff *, struct net_device *, @@@ -2390,12 -2358,6 +2390,12 @@@ struct pcpu_sw_netstats struct u64_stats_sync syncp; }; +struct pcpu_lstats { + u64 packets; + u64 bytes; + struct u64_stats_sync syncp; +}; + #define __netdev_alloc_pcpu_stats(type, gfp) \ ({ \ typeof(type) __percpu *pcpu_stats = alloc_percpu_gfp(type, gfp);\ @@@ -2496,6 -2458,13 +2496,13 @@@ struct netdev_notifier_info struct netlink_ext_ack *extack; }; + struct netdev_notifier_info_ext { + struct netdev_notifier_info info; /* must be first */ + union { + u32 mtu; + } ext; + }; + struct netdev_notifier_change_info { struct netdev_notifier_info info; /* must be first */ unsigned int flags_changed; diff --combined include/net/devlink.h index 9a70755ad1c2,99efc156a309..45db0c79462d --- a/include/net/devlink.h +++ b/include/net/devlink.h @@@ -298,7 -298,7 +298,7 @@@ struct devlink_resource #define DEVLINK_RESOURCE_ID_PARENT_TOP 0 - #define DEVLINK_PARAM_MAX_STRING_VALUE 32 + #define __DEVLINK_PARAM_MAX_STRING_VALUE 32 enum devlink_param_type { DEVLINK_PARAM_TYPE_U8, DEVLINK_PARAM_TYPE_U16, @@@ -311,7 -311,7 +311,7 @@@ union devlink_param_value u8 vu8; u16 vu16; u32 vu32; - const char *vstr; + char vstr[__DEVLINK_PARAM_MAX_STRING_VALUE]; bool vbool; }; @@@ -362,9 -362,6 +362,9 @@@ enum devlink_param_generic_id DEVLINK_PARAM_GENERIC_ID_MAX_MACS, DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV, DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT, + DEVLINK_PARAM_GENERIC_ID_IGNORE_ARI, + DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX, + DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@@ -383,15 -380,6 +383,15 @@@ #define DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_NAME "region_snapshot_enable" #define DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_TYPE DEVLINK_PARAM_TYPE_BOOL +#define DEVLINK_PARAM_GENERIC_IGNORE_ARI_NAME "ignore_ari" +#define DEVLINK_PARAM_GENERIC_IGNORE_ARI_TYPE DEVLINK_PARAM_TYPE_BOOL + +#define DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_NAME "msix_vec_per_pf_max" +#define DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_TYPE DEVLINK_PARAM_TYPE_U32 + +#define DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_NAME "msix_vec_per_pf_min" +#define DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_TYPE DEVLINK_PARAM_TYPE_U32 + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ @@@ -463,14 -451,11 +463,14 @@@ struct devlink_ops u32 *p_cur, u32 *p_max); int (*eswitch_mode_get)(struct devlink *devlink, u16 *p_mode); - int (*eswitch_mode_set)(struct devlink *devlink, u16 mode); + int (*eswitch_mode_set)(struct devlink *devlink, u16 mode, + struct netlink_ext_ack *extack); int (*eswitch_inline_mode_get)(struct devlink *devlink, u8 *p_inline_mode); - int (*eswitch_inline_mode_set)(struct devlink *devlink, u8 inline_mode); + int (*eswitch_inline_mode_set)(struct devlink *devlink, u8 inline_mode, + struct netlink_ext_ack *extack); int (*eswitch_encap_mode_get)(struct devlink *devlink, u8 *p_encap_mode); - int (*eswitch_encap_mode_set)(struct devlink *devlink, u8 encap_mode); + int (*eswitch_encap_mode_set)(struct devlink *devlink, u8 encap_mode, + struct netlink_ext_ack *extack); }; static inline void *devlink_priv(struct devlink *devlink) @@@ -568,6 -553,8 +568,8 @@@ int devlink_param_driverinit_value_get( int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id, union devlink_param_value init_val); void devlink_param_value_changed(struct devlink *devlink, u32 param_id); + void devlink_param_value_str_fill(union devlink_param_value *dst_val, + const char *src); struct devlink_region *devlink_region_create(struct devlink *devlink, const char *region_name, u32 region_max_snapshots, @@@ -804,6 -791,12 +806,12 @@@ devlink_param_value_changed(struct devl { } + static inline void + devlink_param_value_str_fill(union devlink_param_value *dst_val, + const char *src) + { + } + static inline struct devlink_region * devlink_region_create(struct devlink *devlink, const char *region_name, diff --combined include/net/ip_fib.h index 9846b79c9ee1,c9b7b136939d..852e4ebf2209 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@@ -373,7 -373,6 +373,7 @@@ static inline bool fib4_rules_early_flo extern const struct nla_policy rtm_ipv4_policy[]; void ip_fib_init(void); __be32 fib_compute_spec_dst(struct sk_buff *skb); +bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev); int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, struct in_device *idev, u32 *itag); @@@ -395,6 -394,7 +395,7 @@@ int ip_fib_check_default(__be32 gw, str int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force); int fib_sync_down_addr(struct net_device *dev, __be32 local); int fib_sync_up(struct net_device *dev, unsigned int nh_flags); + void fib_sync_mtu(struct net_device *dev, u32 orig_mtu); #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, @@@ -452,6 -452,4 +453,6 @@@ static inline void fib_proc_exit(struc u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr); +int ip_valid_fib_dump_req(const struct nlmsghdr *nlh, + struct netlink_ext_ack *extack); #endif /* _NET_FIB_H */ diff --combined net/core/dev.c index 0b2d777e5b9e,93243479085f..a4d39b87b4e5 --- a/net/core/dev.c +++ b/net/core/dev.c @@@ -1752,6 -1752,28 +1752,28 @@@ int call_netdevice_notifiers(unsigned l } EXPORT_SYMBOL(call_netdevice_notifiers); + /** + * call_netdevice_notifiers_mtu - call all network notifier blocks + * @val: value passed unmodified to notifier function + * @dev: net_device pointer passed unmodified to notifier function + * @arg: additional u32 argument passed to the notifier function + * + * Call all network notifier blocks. Parameters and return value + * are as for raw_notifier_call_chain(). + */ + static int call_netdevice_notifiers_mtu(unsigned long val, + struct net_device *dev, u32 arg) + { + struct netdev_notifier_info_ext info = { + .info.dev = dev, + .ext.mtu = arg, + }; + + BUILD_BUG_ON(offsetof(struct netdev_notifier_info_ext, info) != 0); + + return call_netdevice_notifiers_info(val, &info.info); + } + #ifdef CONFIG_NET_INGRESS static DEFINE_STATIC_KEY_FALSE(ingress_needed_key); @@@ -1969,9 -1991,6 +1991,9 @@@ void dev_queue_xmit_nit(struct sk_buff rcu_read_lock(); again: list_for_each_entry_rcu(ptype, ptype_list, list) { + if (ptype->ignore_outgoing) + continue; + /* Never send packets back to the socket * they originated from - MvS (miquels@drinkel.ow.org) */ @@@ -3231,7 -3250,7 +3253,7 @@@ struct sk_buff *dev_hard_start_xmit(str while (skb) { struct sk_buff *next = skb->next; - skb->next = NULL; + skb_mark_not_on_list(skb); rc = xmit_one(skb, dev, txq, next != NULL); if (unlikely(!dev_xmit_complete(rc))) { skb->next = next; @@@ -3331,7 -3350,7 +3353,7 @@@ struct sk_buff *validate_xmit_skb_list( for (; skb != NULL; skb = next) { next = skb->next; - skb->next = NULL; + skb_mark_not_on_list(skb); /* in case skb wont be segmented, point to itself */ skb->prev = skb; @@@ -5295,7 -5314,8 +5317,7 @@@ static void __napi_gro_flush_chain(stru list_for_each_entry_safe_reverse(skb, p, head, list) { if (flush_old && NAPI_GRO_CB(skb)->age == jiffies) return; - list_del(&skb->list); - skb->next = NULL; + skb_list_del_init(skb); napi_gro_complete(skb); napi->gro_hash[index].count--; } @@@ -5480,7 -5500,8 +5502,7 @@@ static enum gro_result dev_gro_receive( ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED; if (pp) { - list_del(&pp->list); - pp->next = NULL; + skb_list_del_init(pp); napi_gro_complete(pp); napi->gro_hash[hash].count--; } @@@ -7575,14 -7596,16 +7597,16 @@@ int dev_set_mtu_ext(struct net_device * err = __dev_set_mtu(dev, new_mtu); if (!err) { - err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); + err = call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev, + orig_mtu); err = notifier_to_errno(err); if (err) { /* setting mtu back and notifying everyone again, * so that they have a chance to revert changes. */ __dev_set_mtu(dev, orig_mtu); - call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); + call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev, + new_mtu); } } return err; diff --combined net/core/devlink.c index 6dae81d65d5c,6bc42933be4a..3a4b29a13d31 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@@ -1626,7 -1626,7 +1626,7 @@@ static int devlink_nl_cmd_eswitch_set_d if (!ops->eswitch_mode_set) return -EOPNOTSUPP; mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]); - err = ops->eswitch_mode_set(devlink, mode); + err = ops->eswitch_mode_set(devlink, mode, info->extack); if (err) return err; } @@@ -1636,8 -1636,7 +1636,8 @@@ return -EOPNOTSUPP; inline_mode = nla_get_u8( info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]); - err = ops->eswitch_inline_mode_set(devlink, inline_mode); + err = ops->eswitch_inline_mode_set(devlink, inline_mode, + info->extack); if (err) return err; } @@@ -1646,8 -1645,7 +1646,8 @@@ if (!ops->eswitch_encap_mode_set) return -EOPNOTSUPP; encap_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]); - err = ops->eswitch_encap_mode_set(devlink, encap_mode); + err = ops->eswitch_encap_mode_set(devlink, encap_mode, + info->extack); if (err) return err; } @@@ -2677,21 -2675,6 +2677,21 @@@ static const struct devlink_param devli .name = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_NAME, .type = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_TYPE, }, + { + .id = DEVLINK_PARAM_GENERIC_ID_IGNORE_ARI, + .name = DEVLINK_PARAM_GENERIC_IGNORE_ARI_NAME, + .type = DEVLINK_PARAM_GENERIC_IGNORE_ARI_TYPE, + }, + { + .id = DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX, + .name = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_NAME, + .type = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_TYPE, + }, + { + .id = DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN, + .name = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_NAME, + .type = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_TYPE, + }, }; static int devlink_param_generic_verify(const struct devlink_param *param) @@@ -3012,6 -2995,8 +3012,8 @@@ devlink_param_value_get_from_info(cons struct genl_info *info, union devlink_param_value *value) { + int len; + if (param->type != DEVLINK_PARAM_TYPE_BOOL && !info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) return -EINVAL; @@@ -3027,10 -3012,13 +3029,13 @@@ value->vu32 = nla_get_u32(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]); break; case DEVLINK_PARAM_TYPE_STRING: - if (nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) > - DEVLINK_PARAM_MAX_STRING_VALUE) + len = strnlen(nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]), + nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])); + if (len == nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) || + len >= __DEVLINK_PARAM_MAX_STRING_VALUE) return -EINVAL; - value->vstr = nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]); + strcpy(value->vstr, + nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])); break; case DEVLINK_PARAM_TYPE_BOOL: value->vbool = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA] ? @@@ -3117,7 -3105,10 +3122,10 @@@ static int devlink_nl_cmd_param_set_doi return -EOPNOTSUPP; if (cmode == DEVLINK_PARAM_CMODE_DRIVERINIT) { - param_item->driverinit_value = value; + if (param->type == DEVLINK_PARAM_TYPE_STRING) + strcpy(param_item->driverinit_value.vstr, value.vstr); + else + param_item->driverinit_value = value; param_item->driverinit_value_valid = true; } else { if (!param->set) @@@ -3504,7 -3495,7 +3512,7 @@@ static int devlink_nl_cmd_region_read_d start_offset = *((u64 *)&cb->args[0]); err = nlmsg_parse(cb->nlh, GENL_HDRLEN + devlink_nl_family.hdrsize, - attrs, DEVLINK_ATTR_MAX, ops->policy, NULL); + attrs, DEVLINK_ATTR_MAX, ops->policy, cb->extack); if (err) goto out; @@@ -4557,7 -4548,10 +4565,10 @@@ int devlink_param_driverinit_value_get( DEVLINK_PARAM_CMODE_DRIVERINIT)) return -EOPNOTSUPP; - *init_val = param_item->driverinit_value; + if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING) + strcpy(init_val->vstr, param_item->driverinit_value.vstr); + else + *init_val = param_item->driverinit_value; return 0; } @@@ -4588,7 -4582,10 +4599,10 @@@ int devlink_param_driverinit_value_set( DEVLINK_PARAM_CMODE_DRIVERINIT)) return -EOPNOTSUPP; - param_item->driverinit_value = init_val; + if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING) + strcpy(param_item->driverinit_value.vstr, init_val.vstr); + else + param_item->driverinit_value = init_val; param_item->driverinit_value_valid = true; devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW); @@@ -4620,6 -4617,23 +4634,23 @@@ void devlink_param_value_changed(struc } EXPORT_SYMBOL_GPL(devlink_param_value_changed); + /** + * devlink_param_value_str_fill - Safely fill-up the string preventing + * from overflow of the preallocated buffer + * + * @dst_val: destination devlink_param_value + * @src: source buffer + */ + void devlink_param_value_str_fill(union devlink_param_value *dst_val, + const char *src) + { + size_t len; + + len = strlcpy(dst_val->vstr, src, __DEVLINK_PARAM_MAX_STRING_VALUE); + WARN_ON(len >= __DEVLINK_PARAM_MAX_STRING_VALUE); + } + EXPORT_SYMBOL_GPL(devlink_param_value_str_fill); + /** * devlink_region_create - create a new address region * diff --combined net/core/skbuff.c index 0e937d3d85b5,428094b577fc..54b961de9538 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@@ -3381,6 -3381,64 +3381,6 @@@ unsigned int skb_find_text(struct sk_bu } EXPORT_SYMBOL(skb_find_text); -/** - * skb_append_datato_frags - append the user data to a skb - * @sk: sock structure - * @skb: skb structure to be appended with user data. - * @getfrag: call back function to be used for getting the user data - * @from: pointer to user message iov - * @length: length of the iov message - * - * Description: This procedure append the user data in the fragment part - * of the skb if any page alloc fails user this procedure returns -ENOMEM - */ -int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, - int (*getfrag)(void *from, char *to, int offset, - int len, int odd, struct sk_buff *skb), - void *from, int length) -{ - int frg_cnt = skb_shinfo(skb)->nr_frags; - int copy; - int offset = 0; - int ret; - struct page_frag *pfrag = ¤t->task_frag; - - do { - /* Return error if we don't have space for new frag */ - if (frg_cnt >= MAX_SKB_FRAGS) - return -EMSGSIZE; - - if (!sk_page_frag_refill(sk, pfrag)) - return -ENOMEM; - - /* copy the user data to page */ - copy = min_t(int, length, pfrag->size - pfrag->offset); - - ret = getfrag(from, page_address(pfrag->page) + pfrag->offset, - offset, copy, 0, skb); - if (ret < 0) - return -EFAULT; - - /* copy was successful so update the size parameters */ - skb_fill_page_desc(skb, frg_cnt, pfrag->page, pfrag->offset, - copy); - frg_cnt++; - pfrag->offset += copy; - get_page(pfrag->page); - - skb->truesize += copy; - refcount_add(copy, &sk->sk_wmem_alloc); - skb->len += copy; - skb->data_len += copy; - offset += copy; - length -= copy; - - } while (length > 0); - - return 0; -} -EXPORT_SYMBOL(skb_append_datato_frags); - int skb_append_pagefrags(struct sk_buff *skb, struct page *page, int offset, size_t size) { @@@ -4394,14 -4452,16 +4394,16 @@@ EXPORT_SYMBOL_GPL(skb_complete_wifi_ack */ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) { - if (unlikely(start > skb_headlen(skb)) || - unlikely((int)start + off > skb_headlen(skb) - 2)) { - net_warn_ratelimited("bad partial csum: csum=%u/%u len=%u\n", - start, off, skb_headlen(skb)); + u32 csum_end = (u32)start + (u32)off + sizeof(__sum16); + u32 csum_start = skb_headroom(skb) + (u32)start; + + if (unlikely(csum_start > U16_MAX || csum_end > skb_headlen(skb))) { + net_warn_ratelimited("bad partial csum: csum=%u/%u headroom=%u headlen=%u\n", + start, off, skb_headroom(skb), skb_headlen(skb)); return false; } skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum_start = skb_headroom(skb) + start; + skb->csum_start = csum_start; skb->csum_offset = off; skb_set_transport_header(skb, start); return true; diff --combined net/ipv4/fib_frontend.c index 038f511c73fa,0113993e9b2c..0f1beceb47d5 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@@ -315,32 -315,6 +315,32 @@@ __be32 fib_compute_spec_dst(struct sk_b return inet_select_addr(dev, ip_hdr(skb)->saddr, scope); } +bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev) +{ + bool dev_match = false; +#ifdef CONFIG_IP_ROUTE_MULTIPATH + int ret; + + for (ret = 0; ret < fi->fib_nhs; ret++) { + struct fib_nh *nh = &fi->fib_nh[ret]; + + if (nh->nh_dev == dev) { + dev_match = true; + break; + } else if (l3mdev_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) { + dev_match = true; + break; + } + } +#else + if (fi->fib_nh[0].nh_dev == dev) + dev_match = true; +#endif + + return dev_match; +} +EXPORT_SYMBOL_GPL(fib_info_nh_uses_dev); + /* Given (packet source, input interface) and optional (dst, oif, tos): * - (main) check, that source is valid i.e. not broadcast or our local * address. @@@ -387,8 -361,24 +387,8 @@@ static int __fib_validate_source(struc (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev))) goto e_inval; fib_combine_itag(itag, &res); - dev_match = false; - -#ifdef CONFIG_IP_ROUTE_MULTIPATH - for (ret = 0; ret < res.fi->fib_nhs; ret++) { - struct fib_nh *nh = &res.fi->fib_nh[ret]; - if (nh->nh_dev == dev) { - dev_match = true; - break; - } else if (l3mdev_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) { - dev_match = true; - break; - } - } -#else - if (FIB_RES_DEV(res) == dev) - dev_match = true; -#endif + dev_match = fib_info_nh_uses_dev(res.fi, dev); if (dev_match) { ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; return ret; @@@ -802,40 -792,8 +802,40 @@@ errout return err; } +int ip_valid_fib_dump_req(const struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct rtmsg *rtm; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { + NL_SET_ERR_MSG(extack, "Invalid header for FIB dump request"); + return -EINVAL; + } + + rtm = nlmsg_data(nlh); + if (rtm->rtm_dst_len || rtm->rtm_src_len || rtm->rtm_tos || + rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope || + rtm->rtm_type) { + NL_SET_ERR_MSG(extack, "Invalid values in header for FIB dump request"); + return -EINVAL; + } + if (rtm->rtm_flags & ~(RTM_F_CLONED | RTM_F_PREFIX)) { + NL_SET_ERR_MSG(extack, "Invalid flags for FIB dump request"); + return -EINVAL; + } + + if (nlmsg_attrlen(nlh, sizeof(*rtm))) { + NL_SET_ERR_MSG(extack, "Invalid data after header in FIB dump request"); + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL_GPL(ip_valid_fib_dump_req); + static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { + const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); unsigned int h, s_h; unsigned int e = 0, s_e; @@@ -843,14 -801,8 +843,14 @@@ struct hlist_head *head; int dumped = 0, err; - if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && - ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED) + if (cb->strict_check) { + err = ip_valid_fib_dump_req(nlh, cb->extack); + if (err < 0) + return err; + } + + if (nlmsg_len(nlh) >= sizeof(struct rtmsg) && + ((struct rtmsg *)nlmsg_data(nlh))->rtm_flags & RTM_F_CLONED) return skb->len; s_h = cb->args[0]; @@@ -1291,7 -1243,8 +1291,8 @@@ static int fib_inetaddr_event(struct no static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct netdev_notifier_changeupper_info *info; + struct netdev_notifier_changeupper_info *upper_info = ptr; + struct netdev_notifier_info_ext *info_ext = ptr; struct in_device *in_dev; struct net *net = dev_net(dev); unsigned int flags; @@@ -1326,16 -1279,19 +1327,19 @@@ fib_sync_up(dev, RTNH_F_LINKDOWN); else fib_sync_down_dev(dev, event, false); - /* fall through */ + rt_cache_flush(net); + break; case NETDEV_CHANGEMTU: + fib_sync_mtu(dev, info_ext->ext.mtu); rt_cache_flush(net); break; case NETDEV_CHANGEUPPER: - info = ptr; + upper_info = ptr; /* flush all routes if dev is linked to or unlinked from * an L3 master device (e.g., VRF) */ - if (info->upper_dev && netif_is_l3_master(info->upper_dev)) + if (upper_info->upper_dev && + netif_is_l3_master(upper_info->upper_dev)) fib_disable_ip(dev, NETDEV_DOWN, true); break; } diff --combined net/ipv4/fib_semantics.c index f8c7ec8171a8,446204ca7406..b5c3937ca6ec --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@@ -208,6 -208,7 +208,6 @@@ static void rt_fibinfo_free_cpus(struc static void free_fib_info_rcu(struct rcu_head *head) { struct fib_info *fi = container_of(head, struct fib_info, rcu); - struct dst_metrics *m; change_nexthops(fi) { if (nexthop_nh->nh_dev) @@@ -218,8 -219,9 +218,8 @@@ rt_fibinfo_free(&nexthop_nh->nh_rth_input); } endfor_nexthops(fi); - m = fi->fib_metrics; - if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt)) - kfree(m); + ip_fib_metrics_put(fi->fib_metrics); + kfree(fi); } @@@ -795,10 -797,8 +795,10 @@@ static int fib_check_nh(struct fib_conf return -EINVAL; } dev = __dev_get_by_index(net, nh->nh_oif); - if (!dev) + if (!dev) { + NL_SET_ERR_MSG(extack, "Nexthop device required for onlink"); return -ENODEV; + } if (!(dev->flags & IFF_UP)) { NL_SET_ERR_MSG(extack, "Nexthop device is not up"); @@@ -1018,6 -1018,13 +1018,6 @@@ static bool fib_valid_prefsrc(struct fi return true; } -static int -fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) -{ - return ip_metrics_convert(fi->fib_net, cfg->fc_mx, cfg->fc_mx_len, - fi->fib_metrics->metrics); -} - struct fib_info *fib_create_info(struct fib_config *cfg, struct netlink_ext_ack *extack) { @@@ -1075,14 -1082,16 +1075,14 @@@ fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); if (!fi) goto failure; - if (cfg->fc_mx) { - fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL); - if (unlikely(!fi->fib_metrics)) { - kfree(fi); - return ERR_PTR(err); - } - refcount_set(&fi->fib_metrics->refcnt, 1); - } else { - fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics; + fi->fib_metrics = ip_fib_metrics_init(fi->fib_net, cfg->fc_mx, + cfg->fc_mx_len); + if (unlikely(IS_ERR(fi->fib_metrics))) { + err = PTR_ERR(fi->fib_metrics); + kfree(fi); + return ERR_PTR(err); } + fib_info_cnt++; fi->fib_net = net; fi->fib_protocol = cfg->fc_protocol; @@@ -1101,6 -1110,10 +1101,6 @@@ goto failure; } endfor_nexthops(fi) - err = fib_convert_metrics(fi, cfg); - if (err) - goto failure; - if (cfg->fc_mp) { #ifdef CONFIG_IP_ROUTE_MULTIPATH err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack); @@@ -1457,6 -1470,56 +1457,56 @@@ static int call_fib_nh_notifiers(struc return NOTIFY_DONE; } + /* Update the PMTU of exceptions when: + * - the new MTU of the first hop becomes smaller than the PMTU + * - the old MTU was the same as the PMTU, and it limited discovery of + * larger MTUs on the path. With that limit raised, we can now + * discover larger MTUs + * A special case is locked exceptions, for which the PMTU is smaller + * than the minimal accepted PMTU: + * - if the new MTU is greater than the PMTU, don't make any change + * - otherwise, unlock and set PMTU + */ + static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig) + { + struct fnhe_hash_bucket *bucket; + int i; + + bucket = rcu_dereference_protected(nh->nh_exceptions, 1); + if (!bucket) + return; + + for (i = 0; i < FNHE_HASH_SIZE; i++) { + struct fib_nh_exception *fnhe; + + for (fnhe = rcu_dereference_protected(bucket[i].chain, 1); + fnhe; + fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) { + if (fnhe->fnhe_mtu_locked) { + if (new <= fnhe->fnhe_pmtu) { + fnhe->fnhe_pmtu = new; + fnhe->fnhe_mtu_locked = false; + } + } else if (new < fnhe->fnhe_pmtu || + orig == fnhe->fnhe_pmtu) { + fnhe->fnhe_pmtu = new; + } + } + } + } + + void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) + { + unsigned int hash = fib_devindex_hashfn(dev->ifindex); + struct hlist_head *head = &fib_info_devhash[hash]; + struct fib_nh *nh; + + hlist_for_each_entry(nh, head, nh_hash) { + if (nh->nh_dev == dev) + nh_update_mtu(nh, dev->mtu, orig_mtu); + } + } + /* Event force Flags Description * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host diff --combined net/ipv4/route.c index f71d2395c428,8501554e96a4..c0a9d26c06ce --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@@ -1001,21 -1001,22 +1001,22 @@@ out: kfree_skb(skb) static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { struct dst_entry *dst = &rt->dst; + u32 old_mtu = ipv4_mtu(dst); struct fib_result res; bool lock = false; if (ip_mtu_locked(dst)) return; - if (ipv4_mtu(dst) < mtu) + if (old_mtu < mtu) return; if (mtu < ip_rt_min_pmtu) { lock = true; - mtu = ip_rt_min_pmtu; + mtu = min(old_mtu, ip_rt_min_pmtu); } - if (rt->rt_pmtu == mtu && + if (rt->rt_pmtu == mtu && !lock && time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2)) return; @@@ -1040,15 -1041,17 +1041,15 @@@ static void ip_rt_update_pmtu(struct ds } void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, - int oif, u32 mark, u8 protocol, int flow_flags) + int oif, u8 protocol) { const struct iphdr *iph = (const struct iphdr *) skb->data; struct flowi4 fl4; struct rtable *rt; - - if (!mark) - mark = IP4_REPLY_MARK(net, skb->mark); + u32 mark = IP4_REPLY_MARK(net, skb->mark); __build_flow_key(net, &fl4, NULL, iph, oif, - RT_TOS(iph->tos), protocol, mark, flow_flags); + RT_TOS(iph->tos), protocol, mark, 0); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { __ip_rt_update_pmtu(rt, &fl4, mtu); @@@ -1130,14 -1133,14 +1131,14 @@@ out EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); void ipv4_redirect(struct sk_buff *skb, struct net *net, - int oif, u32 mark, u8 protocol, int flow_flags) + int oif, u8 protocol) { const struct iphdr *iph = (const struct iphdr *) skb->data; struct flowi4 fl4; struct rtable *rt; __build_flow_key(net, &fl4, NULL, iph, oif, - RT_TOS(iph->tos), protocol, mark, flow_flags); + RT_TOS(iph->tos), protocol, 0, 0); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { __ip_do_redirect(rt, skb, &fl4, false); @@@ -1217,15 -1220,18 +1218,15 @@@ void ip_rt_get_source(u8 *addr, struct src = ip_hdr(skb)->saddr; else { struct fib_result res; - struct flowi4 fl4; - struct iphdr *iph; - - iph = ip_hdr(skb); - - memset(&fl4, 0, sizeof(fl4)); - fl4.daddr = iph->daddr; - fl4.saddr = iph->saddr; - fl4.flowi4_tos = RT_TOS(iph->tos); - fl4.flowi4_oif = rt->dst.dev->ifindex; - fl4.flowi4_iif = skb->dev->ifindex; - fl4.flowi4_mark = skb->mark; + struct iphdr *iph = ip_hdr(skb); + struct flowi4 fl4 = { + .daddr = iph->daddr, + .saddr = iph->saddr, + .flowi4_tos = RT_TOS(iph->tos), + .flowi4_oif = rt->dst.dev->ifindex, + .flowi4_iif = skb->dev->ifindex, + .flowi4_mark = skb->mark, + }; rcu_read_lock(); if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0) @@@ -1476,9 -1482,12 +1477,9 @@@ void rt_del_uncached_list(struct rtabl static void ipv4_dst_destroy(struct dst_entry *dst) { - struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); struct rtable *rt = (struct rtable *)dst; - if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt)) - kfree(p); - + ip_dst_metrics_put(dst); rt_del_uncached_list(rt); } @@@ -1525,8 -1534,11 +1526,8 @@@ static void rt_set_nexthop(struct rtabl rt->rt_gateway = nh->nh_gw; rt->rt_uses_gateway = 1; } - dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true); - if (fi->fib_metrics != &dst_default_metrics) { - rt->dst._metrics |= DST_METRICS_REFCOUNTED; - refcount_inc(&fi->fib_metrics->refcnt); - } + ip_dst_init_metrics(&rt->dst, fi->fib_metrics); + #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = nh->nh_tclassid; #endif @@@ -2774,7 -2786,7 +2775,7 @@@ static int inet_rtm_getroute(struct sk_ struct rtable *rt = NULL; struct sk_buff *skb; struct rtmsg *rtm; - struct flowi4 fl4; + struct flowi4 fl4 = {}; __be32 dst = 0; __be32 src = 0; kuid_t uid; @@@ -2814,6 -2826,7 +2815,6 @@@ if (!skb) return -ENOBUFS; - memset(&fl4, 0, sizeof(fl4)); fl4.daddr = dst; fl4.saddr = src; fl4.flowi4_tos = rtm->rtm_tos; diff --combined net/ipv4/udp.c index 1bec2203d558,c32a4c16b7ff..cf8252d05a01 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@@ -1042,7 -1042,7 +1042,7 @@@ int udp_sendmsg(struct sock *sk, struc } if (ipv4_is_multicast(daddr)) { - if (!ipc.oif) + if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif)) ipc.oif = inet->mc_index; if (!saddr) saddr = inet->mc_addr; @@@ -1627,7 -1627,7 +1627,7 @@@ busy_check *err = error; return NULL; } - EXPORT_SYMBOL_GPL(__skb_recv_udp); + EXPORT_SYMBOL(__skb_recv_udp); /* * This should be easy, if there is something there we @@@ -1889,7 -1889,7 +1889,7 @@@ static int __udp_queue_rcv_skb(struct s return 0; } -static DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key); +DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key); void udp_encap_enable(void) { static_branch_enable(&udp_encap_needed_key); diff --combined net/ipv6/ip6_fib.c index 9ba72d94d60f,cbe46175bb59..0783af11b0b7 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@@ -29,7 -29,6 +29,7 @@@ #include #include +#include #include #include #include @@@ -47,7 -46,6 +47,7 @@@ struct fib6_cleaner int (*func)(struct fib6_info *, void *arg); int sernum; void *arg; + bool skip_notify; }; #ifdef CONFIG_IPV6_SUBTREES @@@ -162,6 -160,8 +162,6 @@@ struct fib6_info *fib6_info_alloc(gfp_ } INIT_LIST_HEAD(&f6i->fib6_siblings); - f6i->fib6_metrics = (struct dst_metrics *)&dst_default_metrics; - atomic_inc(&f6i->fib6_ref); return f6i; @@@ -171,6 -171,7 +171,6 @@@ void fib6_info_destroy_rcu(struct rcu_h { struct fib6_info *f6i = container_of(head, struct fib6_info, rcu); struct rt6_exception_bucket *bucket; - struct dst_metrics *m; WARN_ON(f6i->fib6_node); @@@ -195,6 -196,8 +195,8 @@@ *ppcpu_rt = NULL; } } + + free_percpu(f6i->rt6i_pcpu); } lwtstate_put(f6i->fib6_nh.nh_lwtstate); @@@ -202,7 -205,9 +204,7 @@@ if (f6i->fib6_nh.nh_dev) dev_put(f6i->fib6_nh.nh_dev); - m = f6i->fib6_metrics; - if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt)) - kfree(m); + ip_fib_metrics_put(f6i->fib6_metrics); kfree(f6i); } @@@ -565,7 -570,6 +567,7 @@@ static int fib6_dump_table(struct fib6_ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { + const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); unsigned int h, s_h; unsigned int e = 0, s_e; @@@ -575,13 -579,6 +577,13 @@@ struct hlist_head *head; int res = 0; + if (cb->strict_check) { + int err = ip_valid_fib_dump_req(nlh, cb->extack); + + if (err < 0) + return err; + } + s_h = cb->args[0]; s_e = cb->args[1]; @@@ -1957,7 -1954,6 +1959,7 @@@ static int fib6_clean_node(struct fib6_ struct fib6_cleaner *c = container_of(w, struct fib6_cleaner, w); struct nl_info info = { .nl_net = c->net, + .skip_notify = c->skip_notify, }; if (c->sernum != FIB6_NO_SERNUM_CHANGE && @@@ -2009,7 -2005,7 +2011,7 @@@ static void fib6_clean_tree(struct net *net, struct fib6_node *root, int (*func)(struct fib6_info *, void *arg), - int sernum, void *arg) + int sernum, void *arg, bool skip_notify) { struct fib6_cleaner c; @@@ -2021,14 -2017,13 +2023,14 @@@ c.sernum = sernum; c.arg = arg; c.net = net; + c.skip_notify = skip_notify; fib6_walk(net, &c.w); } static void __fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *), - int sernum, void *arg) + int sernum, void *arg, bool skip_notify) { struct fib6_table *table; struct hlist_head *head; @@@ -2040,7 -2035,7 +2042,7 @@@ hlist_for_each_entry_rcu(table, head, tb6_hlist) { spin_lock_bh(&table->tb6_lock); fib6_clean_tree(net, &table->tb6_root, - func, sernum, arg); + func, sernum, arg, skip_notify); spin_unlock_bh(&table->tb6_lock); } } @@@ -2050,21 -2045,14 +2052,21 @@@ void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *), void *arg) { - __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg); + __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, false); +} + +void fib6_clean_all_skip_notify(struct net *net, + int (*func)(struct fib6_info *, void *), + void *arg) +{ + __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, true); } static void fib6_flush_trees(struct net *net) { int new_sernum = fib6_new_sernum(net); - __fib6_clean_all(net, NULL, new_sernum, NULL); + __fib6_clean_all(net, NULL, new_sernum, NULL, false); } /* diff --combined net/rxrpc/ar-internal.h index 76569c178915,a6e6cae82c30..8cee7644965c --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@@ -302,6 -302,7 +302,7 @@@ struct rxrpc_peer /* calculated RTT cache */ #define RXRPC_RTT_CACHE_SIZE 32 + spinlock_t rtt_input_lock; /* RTT lock for input routine */ ktime_t rtt_last_req; /* Time of last RTT request */ u64 rtt; /* Current RTT estimate (in nS) */ u64 rtt_sum; /* Sum of cache contents */ @@@ -442,17 -443,17 +443,17 @@@ struct rxrpc_connection spinlock_t state_lock; /* state-change lock */ enum rxrpc_conn_cache_state cache_state; enum rxrpc_conn_proto_state state; /* current state of connection */ - u32 local_abort; /* local abort code */ - u32 remote_abort; /* remote abort code */ + u32 abort_code; /* Abort code of connection abort */ int debug_id; /* debug ID for printks */ atomic_t serial; /* packet serial number counter */ unsigned int hi_serial; /* highest serial number received */ u32 security_nonce; /* response re-use preventer */ - u16 service_id; /* Service ID, possibly upgraded */ + u32 service_id; /* Service ID, possibly upgraded */ u8 size_align; /* data size alignment (for security) */ u8 security_size; /* security header size */ u8 security_ix; /* security type */ u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */ + short error; /* Local error code */ }; static inline bool rxrpc_to_server(const struct rxrpc_skb_priv *sp) @@@ -635,6 -636,8 +636,8 @@@ struct rxrpc_call bool tx_phase; /* T if transmission phase, F if receive phase */ u8 nr_jumbo_bad; /* Number of jumbo dups/exceeds-windows */ + spinlock_t input_lock; /* Lock for packet input to this call */ + /* receive-phase ACK management */ u8 ackr_reason; /* reason to ACK */ u16 ackr_skew; /* skew on packet being ACK'd */ @@@ -720,8 -723,6 +723,6 @@@ int rxrpc_service_prealloc(struct rxrpc void rxrpc_discard_prealloc(struct rxrpc_sock *); struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *, struct rxrpc_sock *, - struct rxrpc_peer *, - struct rxrpc_connection *, struct sk_buff *); void rxrpc_accept_incoming_calls(struct rxrpc_local *); struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long, @@@ -891,8 -892,9 +892,9 @@@ extern unsigned long rxrpc_conn_idle_cl extern struct idr rxrpc_client_conn_ids; void rxrpc_destroy_client_conn_ids(void); - int rxrpc_connect_call(struct rxrpc_call *, struct rxrpc_conn_parameters *, - struct sockaddr_rxrpc *, gfp_t); + int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_call *, + struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, + gfp_t); void rxrpc_expose_client_call(struct rxrpc_call *); void rxrpc_disconnect_client_call(struct rxrpc_call *); void rxrpc_put_client_conn(struct rxrpc_connection *); @@@ -965,7 -967,7 +967,7 @@@ void rxrpc_unpublish_service_conn(struc /* * input.c */ - void rxrpc_data_ready(struct sock *); + int rxrpc_input_packet(struct sock *, struct sk_buff *); /* * insecure.c @@@ -1045,10 -1047,11 +1047,11 @@@ void rxrpc_peer_keepalive_worker(struc */ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *, const struct sockaddr_rxrpc *); - struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *, + struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *, struct rxrpc_local *, struct sockaddr_rxrpc *, gfp_t); struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t); - void rxrpc_new_incoming_peer(struct rxrpc_local *, struct rxrpc_peer *); + void rxrpc_new_incoming_peer(struct rxrpc_sock *, struct rxrpc_local *, + struct rxrpc_peer *); void rxrpc_destroy_all_peers(struct rxrpc_net *); struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *); struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *); @@@ -1095,6 -1098,7 +1098,6 @@@ void rxrpc_new_skb(struct sk_buff *, en void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_get_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace); -void rxrpc_lose_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_purge_queue(struct sk_buff_head *); /* @@@ -1111,7 -1115,8 +1114,7 @@@ static inline void rxrpc_sysctl_exit(vo /* * utils.c */ -int rxrpc_extract_addr_from_skb(struct rxrpc_local *, struct sockaddr_rxrpc *, - struct sk_buff *); +int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *); static inline bool before(u32 seq1, u32 seq2) { diff --combined net/rxrpc/call_accept.c index 8354cadbb839,652e314de38e..e0d8ca03169a --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@@ -280,14 -280,14 +280,14 @@@ static struct rxrpc_call *rxrpc_alloc_i peer = NULL; if (!peer) { peer = b->peer_backlog[peer_tail]; - if (rxrpc_extract_addr_from_skb(local, &peer->srx, skb) < 0) + if (rxrpc_extract_addr_from_skb(&peer->srx, skb) < 0) return NULL; b->peer_backlog[peer_tail] = NULL; smp_store_release(&b->peer_backlog_tail, (peer_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); - rxrpc_new_incoming_peer(local, peer); + rxrpc_new_incoming_peer(rx, local, peer); } /* Now allocate and set up the connection */ @@@ -333,11 -333,11 +333,11 @@@ */ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, struct rxrpc_sock *rx, - struct rxrpc_peer *peer, - struct rxrpc_connection *conn, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_connection *conn; + struct rxrpc_peer *peer; struct rxrpc_call *call; _enter(""); @@@ -354,6 -354,13 +354,13 @@@ goto out; } + /* The peer, connection and call may all have sprung into existence due + * to a duplicate packet being handled on another CPU in parallel, so + * we have to recheck the routing. However, we're now holding + * rx->incoming_lock, so the values should remain stable. + */ + conn = rxrpc_find_connection_rcu(local, skb, &peer); + call = rxrpc_alloc_incoming_call(rx, local, peer, conn, skb); if (!call) { skb->mark = RXRPC_SKB_MARK_REJECT_BUSY; @@@ -396,20 -403,22 +403,22 @@@ case RXRPC_CONN_SERVICE: write_lock(&call->state_lock); - if (rx->discard_new_call) - call->state = RXRPC_CALL_SERVER_RECV_REQUEST; - else - call->state = RXRPC_CALL_SERVER_ACCEPTING; + if (call->state < RXRPC_CALL_COMPLETE) { + if (rx->discard_new_call) + call->state = RXRPC_CALL_SERVER_RECV_REQUEST; + else + call->state = RXRPC_CALL_SERVER_ACCEPTING; + } write_unlock(&call->state_lock); break; case RXRPC_CONN_REMOTELY_ABORTED: rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, - conn->remote_abort, -ECONNABORTED); + conn->abort_code, conn->error); break; case RXRPC_CONN_LOCALLY_ABORTED: rxrpc_abort_call("CON", call, sp->hdr.seq, - conn->local_abort, -ECONNABORTED); + conn->abort_code, conn->error); break; default: BUG(); diff --combined net/rxrpc/input.c index 5b2626929822,570b49d2da42..9128aa0e40aa --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@@ -216,10 -216,11 +216,11 @@@ static void rxrpc_send_ping(struct rxrp /* * Apply a hard ACK by advancing the Tx window. */ - static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, + static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, struct rxrpc_ack_summary *summary) { struct sk_buff *skb, *list = NULL; + bool rot_last = false; int ix; u8 annotation; @@@ -243,15 -244,17 +244,17 @@@ skb->next = list; list = skb; - if (annotation & RXRPC_TX_ANNO_LAST) + if (annotation & RXRPC_TX_ANNO_LAST) { set_bit(RXRPC_CALL_TX_LAST, &call->flags); + rot_last = true; + } if ((annotation & RXRPC_TX_ANNO_MASK) != RXRPC_TX_ANNO_ACK) summary->nr_rot_new_acks++; } spin_unlock(&call->lock); - trace_rxrpc_transmit(call, (test_bit(RXRPC_CALL_TX_LAST, &call->flags) ? + trace_rxrpc_transmit(call, (rot_last ? rxrpc_transmit_rotate_last : rxrpc_transmit_rotate)); wake_up(&call->waitq); @@@ -259,9 -262,11 +262,11 @@@ while (list) { skb = list; list = skb->next; - skb->next = NULL; + skb_mark_not_on_list(skb); rxrpc_free_skb(skb, rxrpc_skb_tx_freed); } + + return rot_last; } /* @@@ -273,23 -278,26 +278,26 @@@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, const char *abort_why) { + unsigned int state; ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags)); write_lock(&call->state_lock); - switch (call->state) { + state = call->state; + switch (state) { case RXRPC_CALL_CLIENT_SEND_REQUEST: case RXRPC_CALL_CLIENT_AWAIT_REPLY: if (reply_begun) - call->state = RXRPC_CALL_CLIENT_RECV_REPLY; + call->state = state = RXRPC_CALL_CLIENT_RECV_REPLY; else - call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; + call->state = state = RXRPC_CALL_CLIENT_AWAIT_REPLY; break; case RXRPC_CALL_SERVER_AWAIT_ACK: __rxrpc_call_completed(call); rxrpc_notify_socket(call); + state = call->state; break; default: @@@ -297,11 -305,10 +305,10 @@@ } write_unlock(&call->state_lock); - if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) { + if (state == RXRPC_CALL_CLIENT_AWAIT_REPLY) trace_rxrpc_transmit(call, rxrpc_transmit_await_reply); - } else { + else trace_rxrpc_transmit(call, rxrpc_transmit_end); - } _leave(" = ok"); return true; @@@ -332,11 -339,11 +339,11 @@@ static bool rxrpc_receiving_reply(struc trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now); } - if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) - rxrpc_rotate_tx_window(call, top, &summary); if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { - rxrpc_proto_abort("TXL", call, top); - return false; + if (!rxrpc_rotate_tx_window(call, top, &summary)) { + rxrpc_proto_abort("TXL", call, top); + return false; + } } if (!rxrpc_end_tx_phase(call, true, "ETD")) return false; @@@ -452,13 -459,15 +459,15 @@@ static void rxrpc_input_data(struct rxr } } + spin_lock(&call->input_lock); + /* Received data implicitly ACKs all of the request packets we sent * when we're acting as a client. */ if ((state == RXRPC_CALL_CLIENT_SEND_REQUEST || state == RXRPC_CALL_CLIENT_AWAIT_REPLY) && !rxrpc_receiving_reply(call)) - return; + goto unlock; call->ackr_prev_seq = seq; @@@ -488,12 -497,16 +497,16 @@@ next_subpacket if (flags & RXRPC_LAST_PACKET) { if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && - seq != call->rx_top) - return rxrpc_proto_abort("LSN", call, seq); + seq != call->rx_top) { + rxrpc_proto_abort("LSN", call, seq); + goto unlock; + } } else { if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && - after_eq(seq, call->rx_top)) - return rxrpc_proto_abort("LSA", call, seq); + after_eq(seq, call->rx_top)) { + rxrpc_proto_abort("LSA", call, seq); + goto unlock; + } } trace_rxrpc_rx_data(call->debug_id, seq, serial, flags, annotation); @@@ -560,8 -573,10 +573,10 @@@ skip: offset += len; if (flags & RXRPC_JUMBO_PACKET) { - if (skb_copy_bits(skb, offset, &flags, 1) < 0) - return rxrpc_proto_abort("XJF", call, seq); + if (skb_copy_bits(skb, offset, &flags, 1) < 0) { + rxrpc_proto_abort("XJF", call, seq); + goto unlock; + } offset += sizeof(struct rxrpc_jumbo_header); seq++; serial++; @@@ -601,6 -616,9 +616,9 @@@ ack trace_rxrpc_notify_socket(call->debug_id, serial); rxrpc_notify_socket(call); } + + unlock: + spin_unlock(&call->input_lock); _leave(" [queued]"); } @@@ -687,15 -705,14 +705,14 @@@ static void rxrpc_input_ping_response(s ping_time = call->ping_time; smp_rmb(); - ping_serial = call->ping_serial; + ping_serial = READ_ONCE(call->ping_serial); if (orig_serial == call->acks_lost_ping) rxrpc_input_check_for_lost_ack(call); - if (!test_bit(RXRPC_CALL_PINGING, &call->flags) || - before(orig_serial, ping_serial)) + if (before(orig_serial, ping_serial) || + !test_and_clear_bit(RXRPC_CALL_PINGING, &call->flags)) return; - clear_bit(RXRPC_CALL_PINGING, &call->flags); if (after(orig_serial, ping_serial)) return; @@@ -861,15 -878,32 +878,32 @@@ static void rxrpc_input_ack(struct rxrp rxrpc_propose_ack_respond_to_ack); } + /* Discard any out-of-order or duplicate ACKs. */ + if (before_eq(sp->hdr.serial, call->acks_latest)) + return; + + buf.info.rxMTU = 0; ioffset = offset + nr_acks + 3; - if (skb->len >= ioffset + sizeof(buf.info)) { - if (skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0) - return rxrpc_proto_abort("XAI", call, 0); + if (skb->len >= ioffset + sizeof(buf.info) && + skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0) + return rxrpc_proto_abort("XAI", call, 0); + + spin_lock(&call->input_lock); + + /* Discard any out-of-order or duplicate ACKs. */ + if (before_eq(sp->hdr.serial, call->acks_latest)) + goto out; + call->acks_latest_ts = skb->tstamp; + call->acks_latest = sp->hdr.serial; + + /* Parse rwind and mtu sizes if provided. */ + if (buf.info.rxMTU) rxrpc_input_ackinfo(call, skb, &buf.info); - } - if (first_soft_ack == 0) - return rxrpc_proto_abort("AK0", call, 0); + if (first_soft_ack == 0) { + rxrpc_proto_abort("AK0", call, 0); + goto out; + } /* Ignore ACKs unless we are or have just been transmitting. */ switch (READ_ONCE(call->state)) { @@@ -879,39 -913,35 +913,35 @@@ case RXRPC_CALL_SERVER_AWAIT_ACK: break; default: - return; - } - - /* Discard any out-of-order or duplicate ACKs. */ - if (before_eq(sp->hdr.serial, call->acks_latest)) { - _debug("discard ACK %d <= %d", - sp->hdr.serial, call->acks_latest); - return; + goto out; } - call->acks_latest_ts = skb->tstamp; - call->acks_latest = sp->hdr.serial; if (before(hard_ack, call->tx_hard_ack) || - after(hard_ack, call->tx_top)) - return rxrpc_proto_abort("AKW", call, 0); - if (nr_acks > call->tx_top - hard_ack) - return rxrpc_proto_abort("AKN", call, 0); + after(hard_ack, call->tx_top)) { + rxrpc_proto_abort("AKW", call, 0); + goto out; + } + if (nr_acks > call->tx_top - hard_ack) { + rxrpc_proto_abort("AKN", call, 0); + goto out; + } - if (after(hard_ack, call->tx_hard_ack)) - rxrpc_rotate_tx_window(call, hard_ack, &summary); + if (after(hard_ack, call->tx_hard_ack)) { + if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) { + rxrpc_end_tx_phase(call, false, "ETA"); + goto out; + } + } if (nr_acks > 0) { - if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0) - return rxrpc_proto_abort("XSA", call, 0); + if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0) { + rxrpc_proto_abort("XSA", call, 0); + goto out; + } rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks, &summary); } - if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { - rxrpc_end_tx_phase(call, false, "ETA"); - return; - } - if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] & RXRPC_TX_ANNO_LAST && summary.nr_acks == call->tx_top - hard_ack && @@@ -920,7 -950,9 +950,9 @@@ false, true, rxrpc_propose_ack_ping_for_lost_reply); - return rxrpc_congestion_management(call, skb, &summary, acked_serial); + rxrpc_congestion_management(call, skb, &summary, acked_serial); + out: + spin_unlock(&call->input_lock); } /* @@@ -933,9 -965,12 +965,12 @@@ static void rxrpc_input_ackall(struct r _proto("Rx ACKALL %%%u", sp->hdr.serial); - rxrpc_rotate_tx_window(call, call->tx_top, &summary); - if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) + spin_lock(&call->input_lock); + + if (rxrpc_rotate_tx_window(call, call->tx_top, &summary)) rxrpc_end_tx_phase(call, false, "ETL"); + + spin_unlock(&call->input_lock); } /* @@@ -1018,18 -1053,19 +1053,19 @@@ static void rxrpc_input_call_packet(str } /* - * Handle a new call on a channel implicitly completing the preceding call on - * that channel. + * Handle a new service call on a channel implicitly completing the preceding + * call on that channel. This does not apply to client conns. * * TODO: If callNumber > call_id + 1, renegotiate security. */ - static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn, + static void rxrpc_input_implicit_end_call(struct rxrpc_sock *rx, + struct rxrpc_connection *conn, struct rxrpc_call *call) { switch (READ_ONCE(call->state)) { case RXRPC_CALL_SERVER_AWAIT_ACK: rxrpc_call_completed(call); - break; + /* Fall through */ case RXRPC_CALL_COMPLETE: break; default: @@@ -1037,11 -1073,13 +1073,13 @@@ set_bit(RXRPC_CALL_EV_ABORT, &call->events); rxrpc_queue_call(call); } + trace_rxrpc_improper_term(call); break; } - trace_rxrpc_improper_term(call); + spin_lock(&rx->incoming_lock); __rxrpc_disconnect_call(conn, call); + spin_unlock(&rx->incoming_lock); rxrpc_notify_socket(call); } @@@ -1120,8 -1158,10 +1158,10 @@@ int rxrpc_extract_header(struct rxrpc_s * The socket is locked by the caller and this prevents the socket from being * shut down and the local endpoint from going away, thus sk_user_data will not * be cleared until this function returns. + * + * Called with the RCU read lock held from the IP layer via UDP. */ - void rxrpc_data_ready(struct sock *udp_sk) + int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) { struct rxrpc_connection *conn; struct rxrpc_channel *chan; @@@ -1130,38 -1170,17 +1170,17 @@@ struct rxrpc_local *local = udp_sk->sk_user_data; struct rxrpc_peer *peer = NULL; struct rxrpc_sock *rx = NULL; - struct sk_buff *skb; unsigned int channel; - int ret, skew = 0; + int skew = 0; _enter("%p", udp_sk); - ASSERT(!irqs_disabled()); - - skb = skb_recv_udp(udp_sk, 0, 1, &ret); - if (!skb) { - if (ret == -EAGAIN) - return; - _debug("UDP socket error %d", ret); - return; - } - if (skb->tstamp == 0) skb->tstamp = ktime_get_real(); rxrpc_new_skb(skb, rxrpc_skb_rx_received); - _net("recv skb %p", skb); - - /* we'll probably need to checksum it (didn't call sock_recvmsg) */ - if (skb_checksum_complete(skb)) { - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); - __UDP_INC_STATS(&init_net, UDP_MIB_INERRORS, 0); - _leave(" [CSUM failed]"); - return; - } - - __UDP_INC_STATS(&init_net, UDP_MIB_INDATAGRAMS, 0); + skb_pull(skb, sizeof(struct udphdr)); /* The UDP protocol already released all skb resources; * we are free to add our own data there. @@@ -1177,10 -1196,12 +1196,12 @@@ if ((lose++ & 7) == 7) { trace_rxrpc_rx_lose(sp); rxrpc_free_skb(skb, rxrpc_skb_rx_lost); - return; + return 0; } } + if (skb->tstamp == 0) + skb->tstamp = ktime_get_real(); trace_rxrpc_rx_packet(sp); switch (sp->hdr.type) { @@@ -1234,8 -1255,6 +1255,6 @@@ if (sp->hdr.serviceId == 0) goto bad_message; - rcu_read_lock(); - if (rxrpc_to_server(sp)) { /* Weed out packets to services we're not offering. Packets * that would begin a call are explicitly rejected and the rest @@@ -1247,7 -1266,7 +1266,7 @@@ if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && sp->hdr.seq == 1) goto unsupported_service; - goto discard_unlock; + goto discard; } } @@@ -1257,17 -1276,23 +1276,23 @@@ goto wrong_security; if (sp->hdr.serviceId != conn->service_id) { - if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) || - conn->service_id != conn->params.service_id) + int old_id; + + if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags)) + goto reupgrade; + old_id = cmpxchg(&conn->service_id, conn->params.service_id, + sp->hdr.serviceId); + + if (old_id != conn->params.service_id && + old_id != sp->hdr.serviceId) goto reupgrade; - conn->service_id = sp->hdr.serviceId; } if (sp->hdr.callNumber == 0) { /* Connection-level packet */ _debug("CONN %p {%d}", conn, conn->debug_id); rxrpc_post_packet_to_conn(conn, skb); - goto out_unlock; + goto out; } /* Note the serial number skew here */ @@@ -1286,19 -1311,19 +1311,19 @@@ /* Ignore really old calls */ if (sp->hdr.callNumber < chan->last_call) - goto discard_unlock; + goto discard; if (sp->hdr.callNumber == chan->last_call) { if (chan->call || sp->hdr.type == RXRPC_PACKET_TYPE_ABORT) - goto discard_unlock; + goto discard; /* For the previous service call, if completed * successfully, we discard all further packets. */ if (rxrpc_conn_is_service(conn) && chan->last_type == RXRPC_PACKET_TYPE_ACK) - goto discard_unlock; + goto discard; /* But otherwise we need to retransmit the final packet * from data cached in the connection record. @@@ -1309,18 -1334,16 +1334,16 @@@ sp->hdr.serial, sp->hdr.flags, 0); rxrpc_post_packet_to_conn(conn, skb); - goto out_unlock; + goto out; } call = rcu_dereference(chan->call); if (sp->hdr.callNumber > chan->call_id) { - if (rxrpc_to_client(sp)) { - rcu_read_unlock(); + if (rxrpc_to_client(sp)) goto reject_packet; - } if (call) - rxrpc_input_implicit_end_call(conn, call); + rxrpc_input_implicit_end_call(rx, conn, call); call = NULL; } @@@ -1337,55 -1360,42 +1360,42 @@@ if (!call || atomic_read(&call->usage) == 0) { if (rxrpc_to_client(sp) || sp->hdr.type != RXRPC_PACKET_TYPE_DATA) - goto bad_message_unlock; + goto bad_message; if (sp->hdr.seq != 1) - goto discard_unlock; - call = rxrpc_new_incoming_call(local, rx, peer, conn, skb); - if (!call) { - rcu_read_unlock(); + goto discard; + call = rxrpc_new_incoming_call(local, rx, skb); + if (!call) goto reject_packet; - } rxrpc_send_ping(call, skb, skew); mutex_unlock(&call->user_mutex); } rxrpc_input_call_packet(call, skb, skew); - goto discard_unlock; + goto discard; - discard_unlock: - rcu_read_unlock(); discard: rxrpc_free_skb(skb, rxrpc_skb_rx_freed); out: trace_rxrpc_rx_done(0, 0); - return; - - out_unlock: - rcu_read_unlock(); - goto out; + return 0; wrong_security: - rcu_read_unlock(); trace_rxrpc_abort(0, "SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RXKADINCONSISTENCY, EBADMSG); skb->priority = RXKADINCONSISTENCY; goto post_abort; unsupported_service: - rcu_read_unlock(); trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_INVALID_OPERATION, EOPNOTSUPP); skb->priority = RX_INVALID_OPERATION; goto post_abort; reupgrade: - rcu_read_unlock(); trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_PROTOCOL_ERROR, EBADMSG); goto protocol_error; - bad_message_unlock: - rcu_read_unlock(); bad_message: trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_PROTOCOL_ERROR, EBADMSG); @@@ -1397,4 -1407,5 +1407,5 @@@ reject_packet trace_rxrpc_rx_done(skb->mark, skb->priority); rxrpc_reject_packet(local, skb); _leave(" [badmsg]"); + return 0; } diff --combined net/rxrpc/peer_event.c index 81a7869325a6,05b51bdbdd41..7feb611c7258 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@@ -47,8 -47,6 +47,8 @@@ static struct rxrpc_peer *rxrpc_lookup_ */ switch (srx->transport.family) { case AF_INET: + srx->transport_len = sizeof(srx->transport.sin); + srx->transport.family = AF_INET; srx->transport.sin.sin_port = serr->port; switch (serr->ee.ee_origin) { case SO_EE_ORIGIN_ICMP: @@@ -72,20 -70,20 +72,20 @@@ #ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: - srx->transport.sin6.sin6_port = serr->port; switch (serr->ee.ee_origin) { case SO_EE_ORIGIN_ICMP6: _net("Rx ICMP6"); + srx->transport.sin6.sin6_port = serr->port; memcpy(&srx->transport.sin6.sin6_addr, skb_network_header(skb) + serr->addr_offset, sizeof(struct in6_addr)); break; case SO_EE_ORIGIN_ICMP: _net("Rx ICMP on v6 sock"); - srx->transport.sin6.sin6_addr.s6_addr32[0] = 0; - srx->transport.sin6.sin6_addr.s6_addr32[1] = 0; - srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); - memcpy(srx->transport.sin6.sin6_addr.s6_addr + 12, + srx->transport_len = sizeof(srx->transport.sin); + srx->transport.family = AF_INET; + srx->transport.sin.sin_port = serr->port; + memcpy(&srx->transport.sin.sin_addr, skb_network_header(skb) + serr->addr_offset, sizeof(struct in_addr)); break; @@@ -303,6 -301,8 +303,8 @@@ void rxrpc_peer_add_rtt(struct rxrpc_ca if (rtt < 0) return; + spin_lock(&peer->rtt_input_lock); + /* Replace the oldest datum in the RTT buffer */ sum -= peer->rtt_cache[cursor]; sum += rtt; @@@ -314,6 -314,8 +316,8 @@@ peer->rtt_usage = usage; } + spin_unlock(&peer->rtt_input_lock); + /* Now recalculate the average */ if (usage == RXRPC_RTT_CACHE_SIZE) { avg = sum / RXRPC_RTT_CACHE_SIZE; @@@ -322,6 -324,7 +326,7 @@@ do_div(avg, usage); } + /* Don't need to update this under lock */ peer->rtt = avg; trace_rxrpc_rtt_rx(call, why, send_serial, resp_serial, rtt, usage, avg); diff --combined net/sched/cls_u32.c index ac79a40a0392,b2c3406a2cf2..4b28fd44576d --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@@ -68,6 -68,7 +68,6 @@@ struct tc_u_knode u32 mask; u32 __percpu *pcpu_success; #endif - struct tcf_proto *tp; struct rcu_work rwork; /* The 'sel' field MUST be the last field in structure to allow for * tc_u32_keys allocated at end of structure. @@@ -79,10 -80,10 +79,10 @@@ struct tc_u_hnode struct tc_u_hnode __rcu *next; u32 handle; u32 prio; - struct tc_u_common *tp_c; int refcnt; unsigned int divisor; struct idr handle_idr; + bool is_root; struct rcu_head rcu; u32 flags; /* The 'ht' field MUST be the last field in structure to allow for @@@ -97,7 -98,7 +97,7 @@@ struct tc_u_common int refcnt; struct idr handle_idr; struct hlist_node hnode; - struct rcu_head rcu; + long knodes; }; static inline unsigned int u32_hash_fold(__be32 key, @@@ -343,16 -344,19 +343,16 @@@ static void *tc_u_common_ptr(const stru return block->q; } -static unsigned int tc_u_hash(const struct tcf_proto *tp) +static struct hlist_head *tc_u_hash(void *key) { - return hash_ptr(tc_u_common_ptr(tp), U32_HASH_SHIFT); + return tc_u_common_hash + hash_ptr(key, U32_HASH_SHIFT); } -static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp) +static struct tc_u_common *tc_u_common_find(void *key) { struct tc_u_common *tc; - unsigned int h; - - h = tc_u_hash(tp); - hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) { - if (tc->ptr == tc_u_common_ptr(tp)) + hlist_for_each_entry(tc, tc_u_hash(key), hnode) { + if (tc->ptr == key) return tc; } return NULL; @@@ -361,8 -365,10 +361,8 @@@ static int u32_init(struct tcf_proto *tp) { struct tc_u_hnode *root_ht; - struct tc_u_common *tp_c; - unsigned int h; - - tp_c = tc_u_common_find(tp); + void *key = tc_u_common_ptr(tp); + struct tc_u_common *tp_c = tc_u_common_find(key); root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL); if (root_ht == NULL) @@@ -371,7 -377,6 +371,7 @@@ root_ht->refcnt++; root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000; root_ht->prio = tp->prio; + root_ht->is_root = true; idr_init(&root_ht->handle_idr); if (tp_c == NULL) { @@@ -380,23 -385,27 +380,24 @@@ kfree(root_ht); return -ENOBUFS; } - tp_c->ptr = tc_u_common_ptr(tp); + tp_c->ptr = key; INIT_HLIST_NODE(&tp_c->hnode); idr_init(&tp_c->handle_idr); - h = tc_u_hash(tp); - hlist_add_head(&tp_c->hnode, &tc_u_common_hash[h]); + hlist_add_head(&tp_c->hnode, tc_u_hash(key)); } tp_c->refcnt++; RCU_INIT_POINTER(root_ht->next, tp_c->hlist); rcu_assign_pointer(tp_c->hlist, root_ht); - root_ht->tp_c = tp_c; + root_ht->refcnt++; rcu_assign_pointer(tp->root, root_ht); tp->data = tp_c; return 0; } -static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, - bool free_pf) +static int u32_destroy_key(struct tc_u_knode *n, bool free_pf) { struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); @@@ -430,7 -439,7 +431,7 @@@ static void u32_delete_key_work(struct struct tc_u_knode, rwork); rtnl_lock(); - u32_destroy_key(key->tp, key, false); + u32_destroy_key(key, false); rtnl_unlock(); } @@@ -447,13 -456,12 +448,13 @@@ static void u32_delete_key_freepf_work( struct tc_u_knode, rwork); rtnl_lock(); - u32_destroy_key(key->tp, key, true); + u32_destroy_key(key, true); rtnl_unlock(); } static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) { + struct tc_u_common *tp_c = tp->data; struct tc_u_knode __rcu **kp; struct tc_u_knode *pkp; struct tc_u_hnode *ht = rtnl_dereference(key->ht_up); @@@ -464,7 -472,6 +465,7 @@@ kp = &pkp->next, pkp = rtnl_dereference(*kp)) { if (pkp == key) { RCU_INIT_POINTER(*kp, key->next); + tp_c->knodes--; tcf_unbind_filter(tp, &key->res); idr_remove(&ht->handle_idr, key->handle); @@@ -579,7 -586,6 +580,7 @@@ static int u32_replace_hw_knode(struct static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, struct netlink_ext_ack *extack) { + struct tc_u_common *tp_c = tp->data; struct tc_u_knode *n; unsigned int h; @@@ -587,14 -593,13 +588,14 @@@ while ((n = rtnl_dereference(ht->ht[h])) != NULL) { RCU_INIT_POINTER(ht->ht[h], rtnl_dereference(n->next)); + tp_c->knodes--; tcf_unbind_filter(tp, &n->res); u32_remove_hw_knode(tp, n, extack); idr_remove(&ht->handle_idr, n->handle); if (tcf_exts_get_net(&n->exts)) tcf_queue_work(&n->rwork, u32_delete_key_freepf_work); else - u32_destroy_key(n->tp, n, true); + u32_destroy_key(n, true); } } } @@@ -606,7 -611,7 +607,7 @@@ static int u32_destroy_hnode(struct tcf struct tc_u_hnode __rcu **hn; struct tc_u_hnode *phn; - WARN_ON(ht->refcnt); + WARN_ON(--ht->refcnt); u32_clear_hnode(tp, ht, extack); @@@ -627,6 -632,17 +628,6 @@@ return -ENOENT; } -static bool ht_empty(struct tc_u_hnode *ht) -{ - unsigned int h; - - for (h = 0; h <= ht->divisor; h++) - if (rcu_access_pointer(ht->ht[h])) - return false; - - return true; -} - static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_u_common *tp_c = tp->data; @@@ -634,7 -650,7 +635,7 @@@ WARN_ON(root_ht == NULL); - if (root_ht && --root_ht->refcnt == 0) + if (root_ht && --root_ht->refcnt == 1) u32_destroy_hnode(tp, root_ht, extack); if (--tp_c->refcnt == 0) { @@@ -664,22 -680,25 +665,21 @@@ static int u32_delete(struct tcf_proto struct netlink_ext_ack *extack) { struct tc_u_hnode *ht = arg; - struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); struct tc_u_common *tp_c = tp->data; int ret = 0; - if (ht == NULL) - goto out; - if (TC_U32_KEY(ht->handle)) { u32_remove_hw_knode(tp, (struct tc_u_knode *)ht, extack); ret = u32_delete_key(tp, (struct tc_u_knode *)ht); goto out; } - if (root_ht == ht) { + if (ht->is_root) { NL_SET_ERR_MSG_MOD(extack, "Not allowed to delete root node"); return -EINVAL; } if (ht->refcnt == 1) { - ht->refcnt--; u32_destroy_hnode(tp, ht, extack); } else { NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter"); @@@ -687,7 -706,38 +687,7 @@@ } out: - *last = true; - if (root_ht) { - if (root_ht->refcnt > 2) { - *last = false; - goto ret; - } - if (root_ht->refcnt == 2) { - if (!ht_empty(root_ht)) { - *last = false; - goto ret; - } - } - } - - if (tp_c->refcnt > 1) { - *last = false; - goto ret; - } - - if (tp_c->refcnt == 1) { - struct tc_u_hnode *ht; - - for (ht = rtnl_dereference(tp_c->hlist); - ht; - ht = rtnl_dereference(ht->next)) - if (!ht_empty(ht)) { - *last = false; - break; - } - } - -ret: + *last = tp_c->refcnt == 1 && tp_c->knodes == 0; return ret; } @@@ -718,7 -768,7 +718,7 @@@ static const struct nla_policy u32_poli }; static int u32_set_parms(struct net *net, struct tcf_proto *tp, - unsigned long base, struct tc_u_hnode *ht, + unsigned long base, struct tc_u_knode *n, struct nlattr **tb, struct nlattr *est, bool ovr, struct netlink_ext_ack *extack) @@@ -739,16 -789,12 +739,16 @@@ } if (handle) { - ht_down = u32_lookup_ht(ht->tp_c, handle); + ht_down = u32_lookup_ht(tp->data, handle); if (!ht_down) { NL_SET_ERR_MSG_MOD(extack, "Link hash table not found"); return -EINVAL; } + if (ht_down->is_root) { + NL_SET_ERR_MSG_MOD(extack, "Not linking to root node"); + return -EINVAL; + } ht_down->refcnt++; } @@@ -845,6 -891,7 +845,6 @@@ static struct tc_u_knode *u32_init_knod /* Similarly success statistics must be moved as pointers */ new->pcpu_success = n->pcpu_success; #endif - new->tp = tp; memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key)); if (tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE)) { @@@ -913,17 -960,18 +913,17 @@@ static int u32_change(struct net *net, if (!new) return -ENOMEM; - err = u32_set_parms(net, tp, base, - rtnl_dereference(n->ht_up), new, tb, + err = u32_set_parms(net, tp, base, new, tb, tca[TCA_RATE], ovr, extack); if (err) { - u32_destroy_key(tp, new, false); + u32_destroy_key(new, false); return err; } err = u32_replace_hw_knode(tp, new, flags, extack); if (err) { - u32_destroy_key(tp, new, false); + u32_destroy_key(new, false); return err; } @@@ -940,11 -988,7 +940,11 @@@ if (tb[TCA_U32_DIVISOR]) { unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]); - if (--divisor > 0x100) { + if (!is_power_of_2(divisor)) { + NL_SET_ERR_MSG_MOD(extack, "Divisor is not a power of 2"); + return -EINVAL; + } + if (divisor-- > 0x100) { NL_SET_ERR_MSG_MOD(extack, "Exceeded maximum 256 hash buckets"); return -EINVAL; } @@@ -969,6 -1013,7 +969,6 @@@ return err; } } - ht->tp_c = tp_c; ht->refcnt = 1; ht->divisor = divisor; ht->handle = handle; @@@ -1058,6 -1103,7 +1058,6 @@@ n->handle = handle; n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0; n->flags = flags; - n->tp = tp; err = tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE); if (err < 0) @@@ -1079,7 -1125,7 +1079,7 @@@ } #endif - err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr, + err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], ovr, extack); if (err == 0) { struct tc_u_knode __rcu **ins; @@@ -1100,7 -1146,6 +1100,7 @@@ RCU_INIT_POINTER(n->next, pins); rcu_assign_pointer(*ins, n); + tp_c->knodes++; *arg = n; return 0; } diff --combined net/sched/sch_cake.c index dc539295ae65,793016d722ec..b910cd5c56f7 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@@ -812,7 -812,7 +812,7 @@@ static struct sk_buff *dequeue_head(str if (skb) { flow->head = skb->next; - skb->next = NULL; + skb_mark_not_on_list(skb); } return skb; @@@ -1252,7 -1252,7 +1252,7 @@@ found else flow->head = elig_ack->next; - elig_ack->next = NULL; + skb_mark_not_on_list(elig_ack); return elig_ack; } @@@ -1675,7 -1675,7 +1675,7 @@@ static s32 cake_enqueue(struct sk_buff while (segs) { nskb = segs->next; - segs->next = NULL; + skb_mark_not_on_list(segs); qdisc_skb_cb(segs)->pkt_len = segs->len; cobalt_set_enqueue_time(segs, now); get_cobalt_cb(segs)->adjusted_len = cake_overhead(q, @@@ -2644,7 -2644,7 +2644,7 @@@ static int cake_init(struct Qdisc *sch for (i = 1; i <= CAKE_QUEUES; i++) quantum_div[i] = 65535 / i; - q->tins = kvzalloc(CAKE_MAX_TINS * sizeof(struct cake_tin_data), + q->tins = kvcalloc(CAKE_MAX_TINS, sizeof(struct cake_tin_data), GFP_KERNEL); if (!q->tins) goto nomem; diff --combined net/tipc/socket.c index db148c4a916a,49810fdff4c5..de09f514428c --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@@ -47,7 -47,7 +47,7 @@@ #include "netlink.h" #include "group.h" -#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ +#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ #define CONN_PROBING_INTV msecs_to_jiffies(3600000) /* [ms] => 1 h */ #define TIPC_FWD_MSG 1 #define TIPC_MAX_PORT 0xffffffff @@@ -80,6 -80,7 +80,6 @@@ struct sockaddr_pair * @publications: list of publications for port * @blocking_link: address of the congested link we are currently sleeping on * @pub_count: total # of publications port has made during its lifetime - * @probing_state: * @conn_timeout: the time we can wait for an unresponded setup request * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue * @cong_link_cnt: number of congested links @@@ -101,8 -102,8 +101,8 @@@ struct tipc_sock struct list_head cong_links; struct list_head publications; u32 pub_count; - uint conn_timeout; atomic_t dupl_rcvcnt; + u16 conn_timeout; bool probe_unacked; u16 cong_link_cnt; u16 snt_unacked; @@@ -506,9 -507,6 +506,9 @@@ static void __tipc_shutdown(struct sock tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))); + /* Remove any pending SYN message */ + __skb_queue_purge(&sk->sk_write_queue); + /* Reject all unreceived messages, except on an active connection * (which disconnects locally & sends a 'FIN+' to peer). */ @@@ -1198,6 -1196,7 +1198,7 @@@ void tipc_sk_mcast_rcv(struct net *net * @skb: pointer to message buffer. */ static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, + struct sk_buff_head *inputq, struct sk_buff_head *xmitq) { struct tipc_msg *hdr = buf_msg(skb); @@@ -1215,7 -1214,16 +1216,16 @@@ tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), tsk_peer_port(tsk)); sk->sk_state_change(sk); - goto exit; + + /* State change is ignored if socket already awake, + * - convert msg to abort msg and add to inqueue + */ + msg_set_user(hdr, TIPC_CRITICAL_IMPORTANCE); + msg_set_type(hdr, TIPC_CONN_MSG); + msg_set_size(hdr, BASIC_H_SIZE); + msg_set_hdr_sz(hdr, BASIC_H_SIZE); + __skb_queue_tail(inputq, skb); + return; } tsk->probe_unacked = false; @@@ -1321,7 -1329,6 +1331,7 @@@ static int __tipc_sendmsg(struct socke tsk->conn_type = dest->addr.name.name.type; tsk->conn_instance = dest->addr.name.name.instance; } + msg_set_syn(hdr, 1); } seq = &dest->addr.nameseq; @@@ -1364,8 -1371,6 +1374,8 @@@ rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); if (unlikely(rc != dlen)) return rc; + if (unlikely(syn && !tipc_msg_skb_clone(&pkts, &sk->sk_write_queue))) + return -ENOMEM; rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid); if (unlikely(rc == -ELINKCONG)) { @@@ -1485,7 -1490,6 +1495,7 @@@ static void tipc_sk_finish_conn(struct struct net *net = sock_net(sk); struct tipc_msg *msg = &tsk->phdr; + msg_set_syn(msg, 0); msg_set_destnode(msg, peer_node); msg_set_destport(msg, peer_port); msg_set_type(msg, TIPC_CONN_MSG); @@@ -1497,7 -1501,6 +1507,7 @@@ tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); tsk->peer_caps = tipc_node_get_capabilities(net, peer_node); + __skb_queue_purge(&sk->sk_write_queue); if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) return; @@@ -1943,7 -1946,7 +1953,7 @@@ static void tipc_sk_proto_rcv(struct so switch (msg_user(hdr)) { case CONN_MANAGER: - tipc_sk_conn_proto_rcv(tsk, skb, xmitq); + tipc_sk_conn_proto_rcv(tsk, skb, inputq, xmitq); return; case SOCK_WAKEUP: tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0); @@@ -1968,90 -1971,91 +1978,90 @@@ } /** - * tipc_filter_connect - Handle incoming message for a connection-based socket + * tipc_sk_filter_connect - check incoming message for a connection-based socket * @tsk: TIPC socket - * @skb: pointer to message buffer. Set to NULL if buffer is consumed - * - * Returns true if everything ok, false otherwise + * @skb: pointer to message buffer. + * Returns true if message should be added to receive queue, false otherwise */ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) { struct sock *sk = &tsk->sk; struct net *net = sock_net(sk); struct tipc_msg *hdr = buf_msg(skb); - u32 pport = msg_origport(hdr); - u32 pnode = msg_orignode(hdr); + bool con_msg = msg_connected(hdr); + u32 pport = tsk_peer_port(tsk); + u32 pnode = tsk_peer_node(tsk); + u32 oport = msg_origport(hdr); + u32 onode = msg_orignode(hdr); + int err = msg_errcode(hdr); + unsigned long delay; if (unlikely(msg_mcast(hdr))) return false; switch (sk->sk_state) { case TIPC_CONNECTING: - /* Accept only ACK or NACK message */ - if (unlikely(!msg_connected(hdr))) { - if (pport != tsk_peer_port(tsk) || - pnode != tsk_peer_node(tsk)) - return false; - - tipc_set_sk_state(sk, TIPC_DISCONNECTING); - sk->sk_err = ECONNREFUSED; - sk->sk_state_change(sk); - return true; - } - - if (unlikely(msg_errcode(hdr))) { - tipc_set_sk_state(sk, TIPC_DISCONNECTING); - sk->sk_err = ECONNREFUSED; - sk->sk_state_change(sk); - return true; - } - - if (unlikely(!msg_isdata(hdr))) { - tipc_set_sk_state(sk, TIPC_DISCONNECTING); - sk->sk_err = EINVAL; - sk->sk_state_change(sk); - return true; + /* Setup ACK */ + if (likely(con_msg)) { + if (err) + break; + tipc_sk_finish_conn(tsk, oport, onode); + msg_set_importance(&tsk->phdr, msg_importance(hdr)); + /* ACK+ message with data is added to receive queue */ + if (msg_data_sz(hdr)) + return true; + /* Empty ACK-, - wake up sleeping connect() and drop */ + sk->sk_data_ready(sk); + msg_set_dest_droppable(hdr, 1); + return false; } + /* Ignore connectionless message if not from listening socket */ + if (oport != pport || onode != pnode) + return false; - tipc_sk_finish_conn(tsk, msg_origport(hdr), msg_orignode(hdr)); - msg_set_importance(&tsk->phdr, msg_importance(hdr)); - - /* If 'ACK+' message, add to socket receive queue */ - if (msg_data_sz(hdr)) - return true; - - /* If empty 'ACK-' message, wake up sleeping connect() */ - sk->sk_data_ready(sk); + /* Rejected SYN */ + if (err != TIPC_ERR_OVERLOAD) + break; - /* 'ACK-' message is neither accepted nor rejected: */ - msg_set_dest_droppable(hdr, 1); + /* Prepare for new setup attempt if we have a SYN clone */ + if (skb_queue_empty(&sk->sk_write_queue)) + break; + get_random_bytes(&delay, 2); + delay %= (tsk->conn_timeout / 4); + delay = msecs_to_jiffies(delay + 100); + sk_reset_timer(sk, &sk->sk_timer, jiffies + delay); return false; - case TIPC_OPEN: case TIPC_DISCONNECTING: - break; + return false; case TIPC_LISTEN: /* Accept only SYN message */ - if (!msg_connected(hdr) && !(msg_errcode(hdr))) + if (!msg_is_syn(hdr) && + tipc_node_get_capabilities(net, onode) & TIPC_SYN_BIT) + return false; + if (!con_msg && !err) return true; - break; + return false; case TIPC_ESTABLISHED: /* Accept only connection-based messages sent by peer */ - if (unlikely(!tsk_peer_msg(tsk, hdr))) + if (likely(con_msg && !err && pport == oport && pnode == onode)) + return true; + if (!tsk_peer_msg(tsk, hdr)) return false; - - if (unlikely(msg_errcode(hdr))) { - tipc_set_sk_state(sk, TIPC_DISCONNECTING); - /* Let timer expire on it's own */ - tipc_node_remove_conn(net, tsk_peer_node(tsk), - tsk->portid); - sk->sk_state_change(sk); - } + if (!err) + return true; + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + tipc_node_remove_conn(net, pnode, tsk->portid); + sk->sk_state_change(sk); return true; default: pr_err("Unknown sk_state %u\n", sk->sk_state); } - - return false; + /* Abort connection setup attempt */ + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + sk->sk_err = ECONNREFUSED; + sk->sk_state_change(sk); + return true; } /** @@@ -2553,78 -2557,43 +2563,78 @@@ static int tipc_shutdown(struct socket return res; } +static void tipc_sk_check_probing_state(struct sock *sk, + struct sk_buff_head *list) +{ + struct tipc_sock *tsk = tipc_sk(sk); + u32 pnode = tsk_peer_node(tsk); + u32 pport = tsk_peer_port(tsk); + u32 self = tsk_own_node(tsk); + u32 oport = tsk->portid; + struct sk_buff *skb; + + if (tsk->probe_unacked) { + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + sk->sk_err = ECONNABORTED; + tipc_node_remove_conn(sock_net(sk), pnode, pport); + sk->sk_state_change(sk); + return; + } + /* Prepare new probe */ + skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0, + pnode, self, pport, oport, TIPC_OK); + if (skb) + __skb_queue_tail(list, skb); + tsk->probe_unacked = true; + sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV); +} + +static void tipc_sk_retry_connect(struct sock *sk, struct sk_buff_head *list) +{ + struct tipc_sock *tsk = tipc_sk(sk); + + /* Try again later if dest link is congested */ + if (tsk->cong_link_cnt) { + sk_reset_timer(sk, &sk->sk_timer, msecs_to_jiffies(100)); + return; + } + /* Prepare SYN for retransmit */ + tipc_msg_skb_clone(&sk->sk_write_queue, list); +} + static void tipc_sk_timeout(struct timer_list *t) { struct sock *sk = from_timer(sk, t, sk_timer); struct tipc_sock *tsk = tipc_sk(sk); - u32 peer_port = tsk_peer_port(tsk); - u32 peer_node = tsk_peer_node(tsk); - u32 own_node = tsk_own_node(tsk); - u32 own_port = tsk->portid; - struct net *net = sock_net(sk); - struct sk_buff *skb = NULL; + u32 pnode = tsk_peer_node(tsk); + struct sk_buff_head list; + int rc = 0; + skb_queue_head_init(&list); bh_lock_sock(sk); - if (!tipc_sk_connected(sk)) - goto exit; /* Try again later if socket is busy */ if (sock_owned_by_user(sk)) { sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 20); - goto exit; + bh_unlock_sock(sk); + return; } - if (tsk->probe_unacked) { - tipc_set_sk_state(sk, TIPC_DISCONNECTING); - tipc_node_remove_conn(net, peer_node, peer_port); - sk->sk_state_change(sk); - goto exit; - } - /* Send new probe */ - skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0, - peer_node, own_node, peer_port, own_port, - TIPC_OK); - tsk->probe_unacked = true; - sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV); -exit: + if (sk->sk_state == TIPC_ESTABLISHED) + tipc_sk_check_probing_state(sk, &list); + else if (sk->sk_state == TIPC_CONNECTING) + tipc_sk_retry_connect(sk, &list); + bh_unlock_sock(sk); - if (skb) - tipc_node_xmit_skb(net, skb, peer_node, own_port); + + if (!skb_queue_empty(&list)) + rc = tipc_node_xmit(sock_net(sk), &list, pnode, tsk->portid); + + /* SYN messages may cause link congestion */ + if (rc == -ELINKCONG) { + tipc_dest_push(&tsk->cong_links, pnode, 0); + tsk->cong_link_cnt = 1; + } sock_put(sk); }